From 538b6ce35e978d00da4ac3e7ad57e54074330957 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 16 Dec 2022 10:22:01 +0100 Subject: [PATCH] filter-chain: use dsp sum/copy/clear functions --- .../module-filter-chain/builtin_plugin.c | 2 +- src/modules/module-filter-chain/convolver.c | 57 ++++++++----------- src/modules/module-filter-chain/convolver.h | 4 +- src/modules/module-filter-chain/pffft.c | 37 ------------ src/modules/module-filter-chain/pffft.h | 1 - 5 files changed, 27 insertions(+), 74 deletions(-) diff --git a/src/modules/module-filter-chain/builtin_plugin.c b/src/modules/module-filter-chain/builtin_plugin.c index 9ad5314c9..45867b36d 100644 --- a/src/modules/module-filter-chain/builtin_plugin.c +++ b/src/modules/module-filter-chain/builtin_plugin.c @@ -731,7 +731,7 @@ static void * convolver_instantiate(const struct fc_descriptor * Descriptor, impl->rate = SampleRate; - impl->conv = convolver_new(blocksize, tailsize, samples, n_samples); + impl->conv = convolver_new(dsp_ops, blocksize, tailsize, samples, n_samples); if (impl->conv == NULL) goto error; diff --git a/src/modules/module-filter-chain/convolver.c b/src/modules/module-filter-chain/convolver.c index f834e7a10..3d1d6ac19 100644 --- a/src/modules/module-filter-chain/convolver.c +++ b/src/modules/module-filter-chain/convolver.c @@ -32,6 +32,8 @@ #include "pffft.h" +static struct dsp_ops *dsp; + struct fft_cpx { float *v; }; @@ -61,16 +63,6 @@ struct convolver1 { float scale; }; -static inline void fft_copy(void *dst, const void *src, int size) -{ - memcpy(dst, src, size * sizeof(float)); -} - -static void fft_clear(void *data, int size) -{ - memset(data, 0, size * sizeof(float)); -} - static void *fft_alloc(int size) { return pffft_aligned_malloc(size * sizeof(float)); @@ -80,9 +72,9 @@ static void fft_free(void *data) pffft_aligned_free(data); } -static void fft_cpx_clear(struct fft_cpx *cpx, int size) +static inline void fft_cpx_clear(struct fft_cpx *cpx, int size) { - fft_clear(cpx->v, size * 2); + dsp_ops_clear(dsp, cpx->v, size * 2); } static void fft_cpx_init(struct fft_cpx *cpx, int size) { @@ -138,18 +130,13 @@ static inline void fft_convolve_accum(void *fft, struct fft_cpx *r, pffft_zconvolve_accumulate(fft, a->v, b->v, c->v, r->v, scale); } -static inline void fft_sum(float *r, const float *a, const float *b,int len) -{ - pffft_sum(a, b, r, len); -} - static void convolver1_reset(struct convolver1 *conv) { int i; for (i = 0; i < conv->segCount; i++) fft_cpx_clear(&conv->segments[i], conv->fftComplexSize); - fft_clear(conv->overlap, conv->blockSize); - fft_clear(conv->inputBuffer, conv->segSize); + dsp_ops_clear(dsp, conv->overlap, conv->blockSize); + dsp_ops_clear(dsp, conv->inputBuffer, conv->segSize); fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize); fft_cpx_clear(&conv->conv, conv->fftComplexSize); conv->inputBufferFill = 0; @@ -200,9 +187,9 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen) fft_cpx_init(&conv->segments[i], conv->fftComplexSize); fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize); - fft_copy(conv->fft_buffer, &ir[i * conv->blockSize], copy); + dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy); if (copy < conv->segSize) - fft_clear(conv->fft_buffer + copy, conv->segSize - copy); + dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy); fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]); } @@ -252,7 +239,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou int i, processed = 0; if (conv == NULL || conv->segCount == 0) { - fft_clear(output, len); + dsp_ops_clear(dsp, output, len); return len; } @@ -260,9 +247,9 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou const int processing = SPA_MIN(len - processed, conv->blockSize - conv->inputBufferFill); const int inputBufferPos = conv->inputBufferFill; - fft_copy(conv->inputBuffer + inputBufferPos, input + processed, processing); + dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing); if (inputBufferPos == 0 && processing < conv->blockSize) - fft_clear(conv->inputBuffer + processing, conv->blockSize - processing); + dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing); fft_run(conv->fft, conv->inputBuffer, &conv->segments[conv->current]); @@ -301,14 +288,14 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou ifft_run(conv->ifft, &conv->conv, conv->fft_buffer); - fft_sum(output + processed, conv->fft_buffer + inputBufferPos, + dsp_ops_sum(dsp, output + processed, conv->fft_buffer + inputBufferPos, conv->overlap + inputBufferPos, processing); conv->inputBufferFill += processing; if (conv->inputBufferFill == conv->blockSize) { conv->inputBufferFill = 0; - fft_copy(conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize); + dsp_ops_copy(dsp, conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize); conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1); } @@ -340,23 +327,25 @@ void convolver_reset(struct convolver *conv) convolver1_reset(conv->headConvolver); if (conv->tailConvolver0) { convolver1_reset(conv->tailConvolver0); - fft_clear(conv->tailOutput0, conv->tailBlockSize); - fft_clear(conv->tailPrecalculated0, conv->tailBlockSize); + dsp_ops_clear(dsp, conv->tailOutput0, conv->tailBlockSize); + dsp_ops_clear(dsp, conv->tailPrecalculated0, conv->tailBlockSize); } if (conv->tailConvolver) { convolver1_reset(conv->tailConvolver); - fft_clear(conv->tailOutput, conv->tailBlockSize); - fft_clear(conv->tailPrecalculated, conv->tailBlockSize); + dsp_ops_clear(dsp, conv->tailOutput, conv->tailBlockSize); + dsp_ops_clear(dsp, conv->tailPrecalculated, conv->tailBlockSize); } conv->tailInputFill = 0; conv->precalculatedPos = 0; } -struct convolver *convolver_new(int head_block, int tail_block, const float *ir, int irlen) +struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tail_block, const float *ir, int irlen) { struct convolver *conv; int head_ir_len; + dsp = dsp_ops; + if (head_block == 0 || tail_block == 0) return NULL; @@ -430,16 +419,16 @@ int convolver_run(struct convolver *conv, const float *input, float *output, int int processing = SPA_MIN(remaining, conv->headBlockSize - (conv->tailInputFill % conv->headBlockSize)); if (conv->tailPrecalculated0) - fft_sum(&output[processed], &output[processed], + dsp_ops_sum(dsp, &output[processed], &output[processed], &conv->tailPrecalculated0[conv->precalculatedPos], processing); if (conv->tailPrecalculated) - fft_sum(&output[processed], &output[processed], + dsp_ops_sum(dsp, &output[processed], &output[processed], &conv->tailPrecalculated[conv->precalculatedPos], processing); conv->precalculatedPos += processing; - fft_copy(conv->tailInput + conv->tailInputFill, input + processed, processing); + dsp_ops_copy(dsp, conv->tailInput + conv->tailInputFill, input + processed, processing); conv->tailInputFill += processing; if (conv->tailPrecalculated0 && (conv->tailInputFill % conv->headBlockSize == 0)) { diff --git a/src/modules/module-filter-chain/convolver.h b/src/modules/module-filter-chain/convolver.h index ebd5415ca..1988114cd 100644 --- a/src/modules/module-filter-chain/convolver.h +++ b/src/modules/module-filter-chain/convolver.h @@ -25,7 +25,9 @@ #include #include -struct convolver *convolver_new(int block, int tail, const float *ir, int irlen); +#include "dsp-ops.h" + +struct convolver *convolver_new(struct dsp_ops *dsp, int block, int tail, const float *ir, int irlen); void convolver_free(struct convolver *conv); void convolver_reset(struct convolver *conv); diff --git a/src/modules/module-filter-chain/pffft.c b/src/modules/module-filter-chain/pffft.c index 2f65e3fcd..037019140 100644 --- a/src/modules/module-filter-chain/pffft.c +++ b/src/modules/module-filter-chain/pffft.c @@ -135,7 +135,6 @@ inline v4sf ld_ps1(const float *p) #define zconvolve_accumulate_simd zconvolve_accumulate_altivec #define zconvolve_simd zconvolve_altivec #define transform_simd transform_altivec -#define sum_simd sum_altivec /* SSE1 support macros @@ -162,7 +161,6 @@ typedef __m128 v4sf; #define zconvolve_accumulate_simd zconvolve_accumulate_sse #define zconvolve_simd zconvolve_sse #define transform_simd transform_sse -#define sum_simd sum_sse /* ARM NEON support macros @@ -196,7 +194,6 @@ typedef float32x4_t v4sf; #define zconvolve_accumulate_simd zconvolve_accumulate_neon #define zconvolve_simd zconvolve_neon #define transform_simd transform_neon -#define sum_simd sum_neon #else #if !defined(PFFFT_SIMD_DISABLE) #warning "building with simd disabled !\n"; @@ -221,7 +218,6 @@ typedef float v4sf; #define zconvolve_accumulate_simd zconvolve_accumulate_c #define zconvolve_simd zconvolve_c #define transform_simd transform_c -#define sum_simd sum_c #endif // shortcuts for complex multiplcations @@ -1412,7 +1408,6 @@ struct funcs { void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling); void (*zconvolve)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); - void (*sum)(const float *a, const float *b, float *ab, int len); int (*simd_size)(void); void (*validate)(void); }; @@ -2125,25 +2120,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a, const float *b, } } - -static void sum_simd(const float *a, const float *b, float *ab, int len) -{ - int i = 0; - - if (VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)) { - const v4sf *RESTRICT va = (const v4sf *)a; - const v4sf *RESTRICT vb = (const v4sf *)b; - v4sf *RESTRICT vab = (v4sf *) ab; - const int end4 = len / SIMD_SZ; - - for (; i < end4; i += 1) - vab[i] = VADD(va[i],vb[i]); - i *= 4; - } - for (; i < len; ++i) - ab[i] = a[i] + b[i]; -} - #else // defined(PFFFT_SIMD_DISABLE) // standard routine using scalar floats, without SIMD stuff. @@ -2293,13 +2269,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a, ab[i + 1] = ai * scaling; } } -static void sum_simd(const float *a, const float *b, float *ab, int len) -{ - int i; - for (i = 0; i < len; ++i) - ab[i] = VADD(a[i], b[i]); -} - #endif // defined(PFFFT_SIMD_DISABLE) static int simd_size_simd(void) @@ -2313,7 +2282,6 @@ struct funcs pffft_funcs = { .zreorder = zreorder_simd, .zconvolve_accumulate = zconvolve_accumulate_simd, .zconvolve = zconvolve_simd, - .sum = sum_simd, .simd_size = simd_size_simd, .validate = validate_pffft_simd, }; @@ -2393,11 +2361,6 @@ void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, return funcs->zconvolve(setup, dft_a, dft_b, dft_ab, scaling); } -void pffft_sum(const float *a, const float *b, float *ab, int len) -{ - return funcs->sum(a, b, ab, len); -} - void pffft_select_cpu(int flags) { funcs = &pffft_funcs_c; diff --git a/src/modules/module-filter-chain/pffft.h b/src/modules/module-filter-chain/pffft.h index ca58676fb..ac8e271fa 100644 --- a/src/modules/module-filter-chain/pffft.h +++ b/src/modules/module-filter-chain/pffft.h @@ -161,7 +161,6 @@ extern "C" { void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); - void pffft_sum(const float *a, const float *b, float *ab, int len); /* the float buffers must have the correct alignment (16-byte boundary on intel and powerpc). This function may be used to obtain such