From 123fe3d1c54d9c9543eb6b01e23510fad877874e Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Tue, 24 Aug 2021 12:04:09 +0200 Subject: [PATCH] filter-chain: improve pffft compilation and CPU support Compile different pffft versions per CPU. Plug the right version depending on the runtime CPU. See #1543 --- src/modules/meson.build | 35 ++- src/modules/module-filter-chain.c | 9 +- .../module-filter-chain/builtin_plugin.c | 6 + src/modules/module-filter-chain/convolver.c | 27 +- src/modules/module-filter-chain/pffft.c | 252 +++++++++++++----- src/modules/module-filter-chain/pffft.h | 3 + src/modules/module-filter-chain/plugin.h | 1 + 7 files changed, 243 insertions(+), 90 deletions(-) diff --git a/src/modules/meson.build b/src/modules/meson.build index 755c8a084..77d05c559 100644 --- a/src/modules/meson.build +++ b/src/modules/meson.build @@ -45,17 +45,50 @@ pipewire_module_loopback = shared_library('pipewire-module-loopback', dependencies : [mathlib, dl_lib, pipewire_dep], ) +simd_cargs = [] +simd_dependencies = [] + +if have_sse + pffft_sse = static_library('pffft_sse', + ['module-filter-chain/pffft.c' ], + c_args : [sse_args, '-O3', '-DHAVE_SSE'], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_SSE'] + simd_dependencies += pffft_sse +endif +if have_neon + pffft_neon = static_library('pffft_neon', + ['module-filter-chain/pffft.c' ], + c_args : [neon_args, '-O3', '-DHAVE_NEON'], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_NEON'] + simd_dependencies += pffft_neon +endif + +pffft_c = static_library('pffft_c', + ['module-filter-chain/pffft.c' ], + c_args : [simd_cargs, '-O3', '-DPFFFT_SIMD_DISABLE'], + include_directories : [spa_inc], + install : false +) +simd_dependencies += pffft_c + + pipewire_module_filter_chain = shared_library('pipewire-module-filter-chain', [ 'module-filter-chain.c', 'module-filter-chain/biquad.c', 'module-filter-chain/ladspa_plugin.c', 'module-filter-chain/builtin_plugin.c', - 'module-filter-chain/pffft.c', 'module-filter-chain/convolver.c' ], include_directories : [configinc, spa_inc], install : true, install_dir : modules_install_dir, install_rpath: modules_install_dir, + link_with : simd_dependencies, dependencies : [mathlib, dl_lib, pipewire_dep, sndfile_dep], ) diff --git a/src/modules/module-filter-chain.c b/src/modules/module-filter-chain.c index b74b36fda..8633b002e 100644 --- a/src/modules/module-filter-chain.c +++ b/src/modules/module-filter-chain.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1629,6 +1630,9 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) struct impl *impl; uint32_t id = pw_global_get_id(pw_impl_module_get_global(module)); const char *str; + const struct spa_support *support; + uint32_t n_support; + struct spa_cpu *cpu_iface; int res; impl = calloc(1, sizeof(struct impl)); @@ -1637,6 +1641,10 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) pw_log_debug("module %p: new %s", impl, args); + support = pw_context_get_support(context, &n_support); + cpu_iface = spa_support_find(support, n_support, SPA_TYPE_INTERFACE_CPU); + init_builtin_plugin(cpu_iface ? spa_cpu_get_flags(cpu_iface) : 0); + if (args) props = pw_properties_new_string(args); else @@ -1727,7 +1735,6 @@ int pipewire__module_init(struct pw_impl_module *module, const char *args) pw_log_error("can't connect: %m"); goto error; } - pw_properties_free(props); pw_proxy_add_listener((struct pw_proxy*)impl->core, diff --git a/src/modules/module-filter-chain/builtin_plugin.c b/src/modules/module-filter-chain/builtin_plugin.c index 8ccaff382..d306cf05b 100644 --- a/src/modules/module-filter-chain/builtin_plugin.c +++ b/src/modules/module-filter-chain/builtin_plugin.c @@ -34,6 +34,7 @@ #include "plugin.h" #include "biquad.h" +#include "pffft.h" #include "convolver.h" struct builtin { @@ -622,3 +623,8 @@ struct fc_plugin *load_builtin_plugin(const char *plugin, const char *config) { return &builtin_plugin; } + +void init_builtin_plugin(uint32_t cpu_flags) +{ + pffft_select_cpu(cpu_flags); +} diff --git a/src/modules/module-filter-chain/convolver.c b/src/modules/module-filter-chain/convolver.c index e58a4531d..c87cd70ad 100644 --- a/src/modules/module-filter-chain/convolver.c +++ b/src/modules/module-filter-chain/convolver.c @@ -29,7 +29,6 @@ #include #include -#include #include "pffft.h" @@ -132,6 +131,11 @@ static inline void fft_convolve_accum(void *fft, struct fft_cpx *r, pffft_zconvolve_accumulate(fft, a->v, b->v, r->v, scale); } +static inline void fft_sum(float *r, const float *a, const float *b,int len) +{ + pffft_sum(a, b, r, len); +} + static struct convolver1 *convolver1_new(int block, const float *ir, int irlen) { struct convolver1 *conv; @@ -211,25 +215,6 @@ static void convolver1_free(struct convolver1 *conv) free(conv); } -void Sum(float* result, const float* a, const float* b, int len) -{ - int i; -#if defined (__SSE__) - const int end4 = 4 * (len / 4); - for (i = 0; i < end4; i += 4) { - const __m128 va = _mm_load_ps(&a[i]); - const __m128 vb = _mm_load_ps(&b[i]); - _mm_store_ps(&result[i], _mm_add_ps(va,vb)); - } - for (i = end4; i < len; ++i) { - result[i] = a[i] + b[i]; - } -#else - for (i = 0; i < len; i++) - result[i] = a[i] + b[i]; -#endif -} - static int convolver1_run(struct convolver1 *conv, const float *input, float *output, int len) { int i, processed = 0; @@ -270,7 +255,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou ifft_run(conv->ifft, &conv->conv, conv->fft_buffer); - Sum(output + processed, conv->fft_buffer + inputBufferPos, conv->overlap + inputBufferPos, processing); + fft_sum(output + processed, conv->fft_buffer + inputBufferPos, conv->overlap + inputBufferPos, processing); conv->inputBufferFill += processing; if (conv->inputBufferFill == conv->blockSize) { diff --git a/src/modules/module-filter-chain/pffft.c b/src/modules/module-filter-chain/pffft.c index 308dc97bc..aee30de44 100644 --- a/src/modules/module-filter-chain/pffft.c +++ b/src/modules/module-filter-chain/pffft.c @@ -60,8 +60,11 @@ #include #include #include +#include #include +#include + /* detect compiler flavour */ #if defined(_MSC_VER) #define COMPILER_MSVC @@ -81,19 +84,19 @@ #define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) #endif -/* +/* vector support macros: the rest of the code is independant of SSE/Altivec/NEON -- adding support for other platforms with 4-element - vectors should be limited to these macros + vectors should be limited to these macros */ // define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code //#define PFFFT_SIMD_DISABLE /* - Altivec support macros + Altivec support macros */ -#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__)) +#if !defined(PFFFT_SIMD_DISABLE) && (defined(HAVE_ALTIVEC)) typedef vector float v4sf; #define SIMD_SZ 4 #define VZERO() ((vector float) vec_splat_u8(0)) @@ -125,12 +128,18 @@ inline v4sf ld_ps1(const float *p) x3 = vec_mergel(y1, y3); \ } #define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15)) -#define VALIGNED(ptr) ((((long long)(ptr)) & 0xF) == 0) +#define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0xF) == 0) +#define pffft_funcs pffft_funcs_altivec +#define new_setup_simd new_setup_altivec +#define zreorder_simd zreorder_altivec +#define zconvolve_accumulate_simd zconvolve_accumulate_altivec +#define transform_simd transform_altivec +#define sum_simd sum_altivec /* SSE1 support macros */ -#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(i386) || defined(_M_IX86)) +#elif !defined(PFFFT_SIMD_DISABLE) && (defined(HAVE_SSE)) #include typedef __m128 v4sf; @@ -145,12 +154,18 @@ typedef __m128 v4sf; #define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; } #define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3) #define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) -#define VALIGNED(ptr) ((((long long)(ptr)) & 0xF) == 0) +#define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0xF) == 0) +#define pffft_funcs pffft_funcs_sse +#define new_setup_simd new_setup_sse +#define zreorder_simd zreorder_sse +#define zconvolve_accumulate_simd zconvolve_accumulate_sse +#define transform_simd transform_sse +#define sum_simd sum_sse /* ARM NEON support macros */ -#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__arm__) || defined(__aarch64__) || defined(__arm64__)) +#elif !defined(PFFFT_SIMD_DISABLE) && (defined(HAVE_NEON)) #include typedef float32x4_t v4sf; #define SIMD_SZ 4 @@ -172,7 +187,13 @@ typedef float32x4_t v4sf; // marginally faster version //# define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); } #define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a)) -#define VALIGNED(ptr) ((((long long)(ptr)) & 0x3) == 0) +#define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x3) == 0) +#define pffft_funcs pffft_funcs_neon +#define new_setup_simd new_setup_neon +#define zreorder_simd zreorder_neon +#define zconvolve_accumulate_simd zconvolve_accumulate_neon +#define transform_simd transform_neon +#define sum_simd sum_neon #else #if !defined(PFFFT_SIMD_DISABLE) #warning "building with simd disabled !\n"; @@ -190,7 +211,13 @@ typedef float v4sf; #define VMADD(a,b,c) ((a)*(b)+(c)) #define VSUB(a,b) ((a)-(b)) #define LD_PS1(p) (p) -#define VALIGNED(ptr) ((((long long)(ptr)) & 0x3) == 0) +#define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x3) == 0) +#define pffft_funcs pffft_funcs_c +#define new_setup_simd new_setup_c +#define zreorder_simd zreorder_c +#define zconvolve_accumulate_simd zconvolve_accumulate_c +#define transform_simd transform_c +#define sum_simd sum_c #endif // shortcuts for complex multiplcations @@ -212,7 +239,7 @@ typedef union v4sf_union { #define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3)) /* detect bugs with the vector support macros */ -void validate_pffft_simd() +static void validate_pffft_simd() { float f[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; v4sf_union a0, a1, a2, a3, t, u; @@ -270,35 +297,11 @@ void validate_pffft_simd() assertv4(a3, 3, 7, 11, 15); } #else -void validate_pffft_simd() +static void validate_pffft_simd() { } // allow test_pffft.c to call this function even when simd is not available.. #endif //!PFFFT_SIMD_DISABLE -/* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... -void *pffft_aligned_malloc(size_t nb_bytes) -{ - void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); - if (!p0) - return (void *)0; - p = (void *)(((size_t)p0 + MALLOC_V4SF_ALIGNMENT) & - (~((size_t)(MALLOC_V4SF_ALIGNMENT - 1)))); - *((void **)p - 1) = p0; - return p; -} - -void pffft_aligned_free(void *p) -{ - if (p) - free(*((void **)p - 1)); -} - -int pffft_simd_size() -{ - return SIMD_SZ; -} - /* passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 */ @@ -1297,7 +1300,7 @@ static void rffti1_ps(int n, float *wa, int *ifac) } } /* rffti1 */ -void cffti1_ps(int n, float *wa, int *ifac) +static void cffti1_ps(int n, float *wa, int *ifac) { static const int ntryh[] = { 5, 3, 4, 2, 0 }; int k1, j, ii; @@ -1335,7 +1338,7 @@ void cffti1_ps(int n, float *wa, int *ifac) } } /* cffti1 */ -v4sf *cfftf1_ps(int n, const v4sf * input_readonly, v4sf * work1, v4sf * work2, +static v4sf *cfftf1_ps(int n, const v4sf * input_readonly, v4sf * work1, v4sf * work2, const float *wa, const int *ifac, int isign) { v4sf *in = (v4sf *) input_readonly; @@ -1399,7 +1402,17 @@ struct PFFFT_Setup { float *twiddle; // points into 'data', N/4 elements }; -PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) +struct funcs { + PFFFT_Setup * (*new_setup) (int N, pffft_transform_t transform); + void (*transform) (PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction, int ordered); + void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); + void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); + void (*sum)(const float *a, const float *b, float *ab, int len); + int (*simd_size)(void); + void (*validate)(void); +}; + +static PFFFT_Setup *new_setup_simd(int N, pffft_transform_t transform) { PFFFT_Setup *s = (PFFFT_Setup *) malloc(sizeof(PFFFT_Setup)); int k, m; @@ -1462,12 +1475,6 @@ PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) return s; } -void pffft_destroy_setup(PFFFT_Setup * s) -{ - pffft_aligned_free(s->data); - free(s); -} - #if !defined(PFFFT_SIMD_DISABLE) /* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */ @@ -1512,7 +1519,7 @@ static void unreversed_copy(int N, const v4sf * in, v4sf * out, int out_stride) UNINTERLEAVE2(h0, g1, out[0], out[1]); } -void pffft_zreorder(PFFFT_Setup * setup, const float *in, float *out, +static void zreorder_simd(PFFFT_Setup * setup, const float *in, float *out, pffft_direction_t direction) { int k, N = setup->N, Ncvec = setup->Ncvec; @@ -1563,7 +1570,7 @@ void pffft_zreorder(PFFFT_Setup * setup, const float *in, float *out, } } -void pffft_cplx_finalize(int Ncvec, const v4sf * in, v4sf * out, const v4sf * e) +static void pffft_cplx_finalize(int Ncvec, const v4sf * in, v4sf * out, const v4sf * e) { int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks v4sf r0, i0, r1, i1, r2, i2, r3, i3; @@ -1626,7 +1633,7 @@ void pffft_cplx_finalize(int Ncvec, const v4sf * in, v4sf * out, const v4sf * e) } } -void pffft_cplx_preprocess(int Ncvec, const v4sf * in, v4sf * out, +static void pffft_cplx_preprocess(int Ncvec, const v4sf * in, v4sf * out, const v4sf * e) { int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks @@ -1908,8 +1915,8 @@ static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf * in, uout[2 * Ncvec - 1].f[3] = ci3; } -void pffft_transform_internal(PFFFT_Setup * setup, const float *finput, - float *foutput, v4sf * scratch, +static void transform_simd(PFFFT_Setup * setup, const float *finput, + float *foutput, float * scratch, pffft_direction_t direction, int ordered) { int k, Ncvec = setup->Ncvec; @@ -1921,7 +1928,7 @@ void pffft_transform_internal(PFFFT_Setup * setup, const float *finput, const v4sf *vinput = (const v4sf *)finput; v4sf *voutput = (v4sf *) foutput; - v4sf *buff[2] = { voutput, scratch ? scratch : scratch_on_stack }; + v4sf *buff[2] = { voutput, scratch ? (v4sf*)scratch : scratch_on_stack }; int ib = (nf_odd ^ ordered ? 1 : 0); assert(VALIGNED(finput) && VALIGNED(foutput)); @@ -1998,7 +2005,7 @@ void pffft_transform_internal(PFFFT_Setup * setup, const float *finput, assert(buff[ib] == voutput); } -void pffft_zconvolve_accumulate(PFFFT_Setup * s, const float *a, const float *b, +static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a, const float *b, float *ab, float scaling) { int Ncvec = s->Ncvec; @@ -2101,12 +2108,25 @@ void pffft_zconvolve_accumulate(PFFFT_Setup * s, const float *a, const float *b, } } +static void sum_simd(const float *a, const float *b, float *ab, int len) +{ + const v4sf *RESTRICT va = (const v4sf *)a; + const v4sf *RESTRICT vb = (const v4sf *)b; + v4sf *RESTRICT vab = (v4sf *) ab; + int i; + const int end4 = len / SIMD_SZ; + + for (i = 0; i < end4; i += 1) + vab[i] = VADD(va[i],vb[i]); + for (i = i * 4; i < len; ++i) + ab[i] = a[i] + b[i]; +} + #else // defined(PFFFT_SIMD_DISABLE) // standard routine using scalar floats, without SIMD stuff. -#define pffft_zreorder_nosimd pffft_zreorder -void pffft_zreorder_nosimd(PFFFT_Setup * setup, const float *in, float *out, +static void zreorder_simd(PFFFT_Setup * setup, const float *in, float *out, pffft_direction_t direction) { int k, N = setup->N; @@ -2129,8 +2149,7 @@ void pffft_zreorder_nosimd(PFFFT_Setup * setup, const float *in, float *out, } } -#define pffft_transform_internal_nosimd pffft_transform_internal -void pffft_transform_internal_nosimd(PFFFT_Setup * setup, const float *input, +static void transform_simd(PFFFT_Setup * setup, const float *input, float *output, float *scratch, pffft_direction_t direction, int ordered) { @@ -2198,8 +2217,7 @@ void pffft_transform_internal_nosimd(PFFFT_Setup * setup, const float *input, assert(buff[ib] == output); } -#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate -void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup * s, const float *a, +static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a, const float *b, float *ab, float scaling) { int i, Ncvec = s->Ncvec; @@ -2225,20 +2243,120 @@ void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup * s, const float *a, ab[2 * i + 1] += ai * scaling; } } +static void sum_simd(const float *a, const float *b, float *ab, int len) +{ + int i; + for (i = 0; i < len; ++i) + ab[i] = VADD(a[i], b[i]); +} #endif // defined(PFFFT_SIMD_DISABLE) -void pffft_transform(PFFFT_Setup * setup, const float *input, float *output, - float *work, pffft_direction_t direction) +static int simd_size_simd(void) { - pffft_transform_internal(setup, input, output, (v4sf *) work, direction, - 0); + return SIMD_SZ; } -void pffft_transform_ordered(PFFFT_Setup * setup, const float *input, - float *output, float *work, - pffft_direction_t direction) +struct funcs pffft_funcs = { + .new_setup = new_setup_simd, + .transform = transform_simd, + .zreorder = zreorder_simd, + .zconvolve_accumulate = zconvolve_accumulate_simd, + .sum = sum_simd, + .simd_size = simd_size_simd, + .validate = validate_pffft_simd, +}; + +#if defined(PFFFT_SIMD_DISABLE) + +extern struct funcs pffft_funcs_c; +#if (defined(HAVE_SSE)) +extern struct funcs pffft_funcs_sse; +#endif +#if (defined(HAVE_ALTIVEC)) +extern struct funcs pffft_funcs_altivec; +#endif +#if (defined(HAVE_NEON)) +extern struct funcs pffft_funcs_neon; +#endif + +static struct funcs *funcs = &pffft_funcs_c; + +/* SSE and co like 16-bytes aligned pointers */ +#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... +void *pffft_aligned_malloc(size_t nb_bytes) { - pffft_transform_internal(setup, input, output, (v4sf *) work, direction, - 1); + void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); + if (!p0) + return (void *)0; + p = (void *)(((size_t)p0 + MALLOC_V4SF_ALIGNMENT) & + (~((size_t)(MALLOC_V4SF_ALIGNMENT - 1)))); + *((void **)p - 1) = p0; + return p; } + +void pffft_aligned_free(void *p) +{ + if (p) + free(*((void **)p - 1)); +} + +int pffft_simd_size(void) +{ + return funcs->simd_size(); +} + +PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) +{ + return funcs->new_setup(N, transform); +} + +void pffft_destroy_setup(PFFFT_Setup * s) +{ + pffft_aligned_free(s->data); + free(s); +} + +void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) +{ + return funcs->transform(setup, input, output, work, direction, 0); +} + +void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) +{ + return funcs->transform(setup, input, output, work, direction, 1); +} + +void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction) +{ + return funcs->zreorder(setup, input, output, direction); +} + +void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling) +{ + return funcs->zconvolve_accumulate(setup, dft_a, dft_b, dft_ab, scaling); +} + +void pffft_sum(const float *a, const float *b, float *ab, int len) +{ + return funcs->sum(a, b, ab, len); +} + +void pffft_select_cpu(int flags) +{ + funcs = &pffft_funcs_c; +#if defined(HAVE_SSE) + if (flags & SPA_CPU_FLAG_SSE) + funcs = &pffft_funcs_sse; +#endif +#if defined(HAVE_NEON) + if (flags & SPA_CPU_FLAG_NEON) + funcs = &pffft_funcs_neon; +#endif +#if defined(HAVE_ALTIVEC) + if (flags & SPA_CPU_FLAG_ALTIVEC) + funcs = &pffft_funcs_altivec; +#endif +} + +#endif diff --git a/src/modules/module-filter-chain/pffft.h b/src/modules/module-filter-chain/pffft.h index dd554fe93..cf83833f2 100644 --- a/src/modules/module-filter-chain/pffft.h +++ b/src/modules/module-filter-chain/pffft.h @@ -159,6 +159,7 @@ extern "C" { */ void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); + void pffft_sum(const float *a, const float *b, float *ab, int len); /* the float buffers must have the correct alignment (16-byte boundary on intel and powerpc). This function may be used to obtain such @@ -170,6 +171,8 @@ extern "C" { /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ int pffft_simd_size(); + void pffft_select_cpu(int flags); + #ifdef __cplusplus } #endif diff --git a/src/modules/module-filter-chain/plugin.h b/src/modules/module-filter-chain/plugin.h index ab596778a..801230884 100644 --- a/src/modules/module-filter-chain/plugin.h +++ b/src/modules/module-filter-chain/plugin.h @@ -94,3 +94,4 @@ static inline void fc_descriptor_free(struct fc_descriptor *desc) struct fc_plugin *load_ladspa_plugin(const char *path, const char *config); struct fc_plugin *load_builtin_plugin(const char *path, const char *config); +void init_builtin_plugin(uint32_t cpu_flags);