diff --git a/src/modules/meson.build b/src/modules/meson.build index f90878c82..c267e681c 100644 --- a/src/modules/meson.build +++ b/src/modules/meson.build @@ -61,33 +61,36 @@ simd_cargs = [] simd_dependencies = [] if have_sse - pffft_sse = static_library('pffft_sse', - ['module-filter-chain/pffft.c' ], + filter_chain_sse = static_library('filter_chain_sse', + ['module-filter-chain/pffft.c', + 'module-filter-chain/dsp-ops-sse.c' ], c_args : [sse_args, '-O3', '-DHAVE_SSE'], dependencies : [ spa_dep ], install : false ) simd_cargs += ['-DHAVE_SSE'] - simd_dependencies += pffft_sse + simd_dependencies += filter_chain_sse endif if have_neon - pffft_neon = static_library('pffft_neon', + filter_chain_neon = static_library('filter_chain_neon', ['module-filter-chain/pffft.c' ], c_args : [neon_args, '-O3', '-DHAVE_NEON'], dependencies : [ spa_dep ], install : false ) simd_cargs += ['-DHAVE_NEON'] - simd_dependencies += pffft_neon + simd_dependencies += filter_chain_neon endif -pffft_c = static_library('pffft_c', - ['module-filter-chain/pffft.c' ], +filter_chain_c = static_library('filter_chain_c', + ['module-filter-chain/pffft.c', + 'module-filter-chain/dsp-ops.c', + 'module-filter-chain/dsp-ops-c.c' ], c_args : [simd_cargs, '-O3', '-DPFFFT_SIMD_DISABLE'], dependencies : [ spa_dep ], install : false ) -simd_dependencies += pffft_c +simd_dependencies += filter_chain_c filter_chain_sources = [ 'module-filter-chain.c', diff --git a/src/modules/module-filter-chain/builtin_plugin.c b/src/modules/module-filter-chain/builtin_plugin.c index 3966422c3..6918ad91d 100644 --- a/src/modules/module-filter-chain/builtin_plugin.c +++ b/src/modules/module-filter-chain/builtin_plugin.c @@ -40,6 +40,9 @@ #include "biquad.h" #include "pffft.h" #include "convolver.h" +#include "dsp-ops.h" + +static struct dsp_ops dsp_ops; struct builtin { unsigned long rate; @@ -82,7 +85,7 @@ static void copy_run(void * Instance, unsigned long SampleCount) { struct builtin *impl = Instance; float *in = impl->port[1], *out = impl->port[0]; - memcpy(out, in, SampleCount * sizeof(float)); + dsp_ops_copy(&dsp_ops, out, in, SampleCount); } static struct fc_port copy_ports[] = { @@ -112,10 +115,10 @@ static const struct fc_descriptor copy_desc = { static void mixer_run(void * Instance, unsigned long SampleCount) { struct builtin *impl = Instance; - int i; - unsigned long j; + int i, n_src = 0; float *out = impl->port[0]; - bool first = true; + const void *src[8]; + float gains[8]; if (out == NULL) return; @@ -127,24 +130,10 @@ static void mixer_run(void * Instance, unsigned long SampleCount) if (in == NULL || gain == 0.0f) continue; - if (first) { - if (gain == 1.0f) - memcpy(out, in, SampleCount * sizeof(float)); - else - for (j = 0; j < SampleCount; j++) - out[j] = in[j] * gain; - first = false; - } else { - if (gain == 1.0f) - for (j = 0; j < SampleCount; j++) - out[j] += in[j]; - else - for (j = 0; j < SampleCount; j++) - out[j] += in[j] * gain; - } + src[n_src] = in; + gains[n_src++] = gain; } - if (first) - memset(out, 0, SampleCount * sizeof(float)); + dsp_ops_mix_gain(&dsp_ops, out, src, gains, n_src, SampleCount); } static struct fc_port mixer_ports[] = { @@ -941,7 +930,11 @@ struct fc_plugin *load_builtin_plugin(const struct spa_support *support, uint32_ const char *plugin, const char *config) { struct spa_cpu *cpu_iface; + uint32_t cpu_flags; cpu_iface = spa_support_find(support, n_support, SPA_TYPE_INTERFACE_CPU); - pffft_select_cpu(cpu_iface ? spa_cpu_get_flags(cpu_iface) : 0); + cpu_flags = cpu_iface ? spa_cpu_get_flags(cpu_iface) : 0; + dsp_ops.cpu_flags = cpu_flags; + dsp_ops_init(&dsp_ops); + pffft_select_cpu(cpu_flags); return &builtin_plugin; } diff --git a/src/modules/module-filter-chain/dsp-ops-c.c b/src/modules/module-filter-chain/dsp-ops-c.c new file mode 100644 index 000000000..885c8cd38 --- /dev/null +++ b/src/modules/module-filter-chain/dsp-ops-c.c @@ -0,0 +1,100 @@ +/* Spa + * + * Copyright © 2022 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include + +#include "dsp-ops.h" + +static inline void dsp_clear_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples) +{ + memset(dst, 0, sizeof(float) * n_samples); +} + +static inline void dsp_add_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, uint32_t n_samples) +{ + uint32_t i; + const float *s = src; + float *d = dst; + for (i = 0; i < n_samples; i++) + d[i] += s[i]; +} + +static inline void dsp_gain_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, float gain, uint32_t n_samples) +{ + uint32_t i; + const float *s = src; + float *d = dst; + for (i = 0; i < n_samples; i++) + d[i] = s[i] * gain; +} + +static inline void dsp_gain_add_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, float gain, uint32_t n_samples) +{ + uint32_t i; + const float *s = src; + float *d = dst; + for (i = 0; i < n_samples; i++) + d[i] += s[i] * gain; +} + + +void dsp_copy_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, uint32_t n_samples) +{ + if (dst != src) + spa_memcpy(dst, src, sizeof(float) * n_samples); +} + +void dsp_mix_gain_c(struct dsp_ops *ops, + void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src[], + float gain[], uint32_t n_src, uint32_t n_samples) +{ + uint32_t i; + if (n_src == 0) { + dsp_clear_c(ops, dst, n_samples); + } else if (n_src == 1) { + if (dst != src[0]) + dsp_copy_c(ops, dst, src[0], n_samples); + } else { + if (gain[0] == 1.0f) + dsp_copy_c(ops, dst, src[0], n_samples); + else + dsp_gain_c(ops, dst, src[0], gain[0], n_samples); + + for (i = 1; i < n_src; i++) { + if (gain[i] == 1.0f) + dsp_add_c(ops, dst, src[i], n_samples); + else + dsp_gain_add_c(ops, dst, src[i], gain[i], n_samples); + } + } +} diff --git a/src/modules/module-filter-chain/dsp-ops-sse.c b/src/modules/module-filter-chain/dsp-ops-sse.c new file mode 100644 index 000000000..cabcae340 --- /dev/null +++ b/src/modules/module-filter-chain/dsp-ops-sse.c @@ -0,0 +1,91 @@ +/* Spa + * + * Copyright © 2022 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include + +#include "dsp-ops.h" + +#include + +void dsp_mix_gain_sse(struct dsp_ops *ops, + void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src[], + float gain[], uint32_t n_src, uint32_t n_samples) +{ + if (n_src == 0) { + memset(dst, 0, n_samples * sizeof(float)); + } else if (n_src == 1) { + if (dst != src[0]) + spa_memcpy(dst, src[0], n_samples * sizeof(float)); + } else { + uint32_t n, i, unrolled; + __m128 in[4], g; + const float **s = (const float **)src; + float *d = dst; + + if (SPA_LIKELY(SPA_IS_ALIGNED(dst, 16))) { + unrolled = n_samples & ~15; + for (i = 0; i < n_src; i++) { + if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16))) { + unrolled = 0; + break; + } + } + } else + unrolled = 0; + + for (n = 0; n < unrolled; n += 16) { + g = _mm_set1_ps(gain[0]); + in[0] = _mm_mul_ps(g, _mm_load_ps(&s[0][n+ 0])); + in[1] = _mm_mul_ps(g, _mm_load_ps(&s[0][n+ 4])); + in[2] = _mm_mul_ps(g, _mm_load_ps(&s[0][n+ 8])); + in[3] = _mm_mul_ps(g, _mm_load_ps(&s[0][n+12])); + + for (i = 1; i < n_src; i++) { + g = _mm_set1_ps(gain[i]); + in[0] = _mm_add_ps(in[0], _mm_mul_ps(g, _mm_load_ps(&s[i][n+ 0]))); + in[1] = _mm_add_ps(in[1], _mm_mul_ps(g, _mm_load_ps(&s[i][n+ 4]))); + in[2] = _mm_add_ps(in[2], _mm_mul_ps(g, _mm_load_ps(&s[i][n+ 8]))); + in[3] = _mm_add_ps(in[3], _mm_mul_ps(g, _mm_load_ps(&s[i][n+12]))); + } + _mm_store_ps(&d[n+ 0], in[0]); + _mm_store_ps(&d[n+ 4], in[1]); + _mm_store_ps(&d[n+ 8], in[2]); + _mm_store_ps(&d[n+12], in[3]); + } + for (; n < n_samples; n++) { + g = _mm_set_ss(gain[0]); + in[0] = _mm_mul_ss(g, _mm_load_ss(&s[0][n])); + for (i = 1; i < n_src; i++) { + g = _mm_set_ss(gain[i]); + in[0] = _mm_add_ss(in[0], _mm_mul_ss(g, _mm_load_ss(&s[i][n]))); + } + _mm_store_ss(&d[n], in[0]); + } + } +} diff --git a/src/modules/module-filter-chain/dsp-ops.c b/src/modules/module-filter-chain/dsp-ops.c new file mode 100644 index 000000000..b40278bf6 --- /dev/null +++ b/src/modules/module-filter-chain/dsp-ops.c @@ -0,0 +1,92 @@ +/* Spa + * + * Copyright © 2022 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include + +#include "dsp-ops.h" + +struct dsp_info { + uint32_t cpu_flags; + + void (*copy) (struct dsp_ops *ops, + void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, uint32_t n_samples); + void (*mix_gain) (struct dsp_ops *ops, + void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src[], + float gain[], uint32_t n_src, uint32_t n_samples); +}; + +static struct dsp_info dsp_table[] = +{ +#if defined (HAVE_SSE) + { SPA_CPU_FLAG_SSE, + .copy = dsp_copy_c, + .mix_gain = dsp_mix_gain_sse, + }, +#endif + { 0, + .copy = dsp_copy_c, + .mix_gain = dsp_mix_gain_c, + }, +}; + +#define MATCH_CPU_FLAGS(a,b) ((a) == 0 || ((a) & (b)) == a) + +static const struct dsp_info *find_dsp_info(uint32_t cpu_flags) +{ + SPA_FOR_EACH_ELEMENT_VAR(dsp_table, t) { + if (MATCH_CPU_FLAGS(t->cpu_flags, cpu_flags)) + return t; + } + return NULL; +} + +static void impl_dsp_ops_free(struct dsp_ops *ops) +{ + spa_zero(*ops); +} + +int dsp_ops_init(struct dsp_ops *ops) +{ + const struct dsp_info *info; + + info = find_dsp_info(ops->cpu_flags); + if (info == NULL) + return -ENOTSUP; + + ops->priv = info; + ops->cpu_flags = info->cpu_flags; + ops->copy = info->copy; + ops->mix_gain = info->mix_gain; + ops->free = impl_dsp_ops_free; + + return 0; +} diff --git a/src/modules/module-filter-chain/dsp-ops.h b/src/modules/module-filter-chain/dsp-ops.h new file mode 100644 index 000000000..ffbca6ce7 --- /dev/null +++ b/src/modules/module-filter-chain/dsp-ops.h @@ -0,0 +1,62 @@ +/* Spa + * + * Copyright © 2022 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +struct dsp_ops { + uint32_t cpu_flags; + + void (*clear) (struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples); + void (*copy) (struct dsp_ops *ops, + void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, uint32_t n_samples); + void (*mix_gain) (struct dsp_ops *ops, + void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src[], + float gain[], uint32_t n_src, uint32_t n_samples); + void (*free) (struct dsp_ops *ops); + + const void *priv; +}; + +int dsp_ops_init(struct dsp_ops *ops); + +#define dsp_ops_copy(ops,...) (ops)->copy(ops, __VA_ARGS__) +#define dsp_ops_mix_gain(ops,...) (ops)->mix_gain(ops, __VA_ARGS__) +#define dsp_ops_free(ops) (ops)->free(ops) + + +#define MAKE_COPY_FUNC(arch) \ +void dsp_copy_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ + const void * SPA_RESTRICT src, uint32_t n_samples) +#define MAKE_MIX_GAIN_FUNC(arch) \ +void dsp_mix_gain_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ + const void * SPA_RESTRICT src[], float gain[], uint32_t n_src, uint32_t n_samples) + + +MAKE_COPY_FUNC(c); +MAKE_MIX_GAIN_FUNC(c); +#if defined (HAVE_SSE) +MAKE_MIX_GAIN_FUNC(sse); +#endif