filter-chain: use dsp sum/copy/clear functions

2025-11-04 13:30:12 -05:00 · 2022-12-16 10:22:01 +01:00 · 2022-12-16 10:22:01 +01:00 · 538b6ce35e
commit 538b6ce35e
parent 0f2f113bdc
5 changed files with 27 additions and 74 deletions
--- a/src/modules/module-filter-chain/builtin_plugin.c
+++ b/src/modules/module-filter-chain/builtin_plugin.c
@ -731,7 +731,7 @@ static void * convolver_instantiate(const struct fc_descriptor * Descriptor,

 	impl->rate = SampleRate;

-	impl->conv = convolver_new(blocksize, tailsize, samples, n_samples);
+	impl->conv = convolver_new(dsp_ops, blocksize, tailsize, samples, n_samples);
 	if (impl->conv == NULL)
 		goto error;

--- a/src/modules/module-filter-chain/convolver.c
+++ b/src/modules/module-filter-chain/convolver.c
@ -32,6 +32,8 @@

 #include "pffft.h"

+static struct dsp_ops *dsp;
+
 struct fft_cpx {
 	float *v;
 };
@ -61,16 +63,6 @@ struct convolver1 {
 	float scale;
 };

-static inline void fft_copy(void *dst, const void *src, int size)
-{
-	memcpy(dst, src, size * sizeof(float));
-}
-
-static void fft_clear(void *data, int size)
-{
-	memset(data, 0, size * sizeof(float));
-}
-
 static void *fft_alloc(int size)
 {
 	return pffft_aligned_malloc(size * sizeof(float));
@ -80,9 +72,9 @@ static void fft_free(void *data)
 	pffft_aligned_free(data);
 }

-static void fft_cpx_clear(struct fft_cpx *cpx, int size)
+static inline void fft_cpx_clear(struct fft_cpx *cpx, int size)
 {
-	fft_clear(cpx->v, size * 2);
+	dsp_ops_clear(dsp, cpx->v, size * 2);
 }
 static void fft_cpx_init(struct fft_cpx *cpx, int size)
 {
@ -138,18 +130,13 @@ static inline void fft_convolve_accum(void *fft, struct fft_cpx *r,
 	pffft_zconvolve_accumulate(fft, a->v, b->v, c->v, r->v, scale);
 }

-static inline void fft_sum(float *r, const float *a, const float *b,int len)
-{
-	pffft_sum(a, b, r, len);
-}
-
 static void convolver1_reset(struct convolver1 *conv)
 {
 	int i;
 	for (i = 0; i < conv->segCount; i++)
 		fft_cpx_clear(&conv->segments[i], conv->fftComplexSize);
-	fft_clear(conv->overlap, conv->blockSize);
-	fft_clear(conv->inputBuffer, conv->segSize);
+	dsp_ops_clear(dsp, conv->overlap, conv->blockSize);
+	dsp_ops_clear(dsp, conv->inputBuffer, conv->segSize);
 	fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize);
 	fft_cpx_clear(&conv->conv, conv->fftComplexSize);
 	conv->inputBufferFill = 0;
@ -200,9 +187,9 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
 		fft_cpx_init(&conv->segments[i], conv->fftComplexSize);
 		fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize);

-		fft_copy(conv->fft_buffer, &ir[i * conv->blockSize], copy);
+		dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy);
 		if (copy < conv->segSize)
-			fft_clear(conv->fft_buffer + copy, conv->segSize - copy);
+			dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy);

 	        fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]);
 	}
@ -252,7 +239,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
 	int i, processed = 0;

 	if (conv == NULL || conv->segCount == 0) {
-		fft_clear(output, len);
+		dsp_ops_clear(dsp, output, len);
 		return len;
 	}

@ -260,9 +247,9 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
 		const int processing = SPA_MIN(len - processed, conv->blockSize - conv->inputBufferFill);
 		const int inputBufferPos = conv->inputBufferFill;

-		fft_copy(conv->inputBuffer + inputBufferPos, input + processed, processing);
+		dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing);
 		if (inputBufferPos == 0 && processing < conv->blockSize)
-			fft_clear(conv->inputBuffer + processing, conv->blockSize - processing);
+			dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing);

 		fft_run(conv->fft, conv->inputBuffer, &conv->segments[conv->current]);

@ -301,14 +288,14 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou

 		ifft_run(conv->ifft, &conv->conv, conv->fft_buffer);

-		fft_sum(output + processed, conv->fft_buffer + inputBufferPos,
+		dsp_ops_sum(dsp, output + processed, conv->fft_buffer + inputBufferPos,
 				conv->overlap + inputBufferPos, processing);

 		conv->inputBufferFill += processing;
 		if (conv->inputBufferFill == conv->blockSize) {
 			conv->inputBufferFill = 0;

-			fft_copy(conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize);
+			dsp_ops_copy(dsp, conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize);

 			conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1);
 		}
@ -340,23 +327,25 @@ void convolver_reset(struct convolver *conv)
 		convolver1_reset(conv->headConvolver);
 	if (conv->tailConvolver0) {
 		convolver1_reset(conv->tailConvolver0);
-		fft_clear(conv->tailOutput0, conv->tailBlockSize);
-		fft_clear(conv->tailPrecalculated0, conv->tailBlockSize);
+		dsp_ops_clear(dsp, conv->tailOutput0, conv->tailBlockSize);
+		dsp_ops_clear(dsp, conv->tailPrecalculated0, conv->tailBlockSize);
 	}
 	if (conv->tailConvolver) {
 		convolver1_reset(conv->tailConvolver);
-		fft_clear(conv->tailOutput, conv->tailBlockSize);
-		fft_clear(conv->tailPrecalculated, conv->tailBlockSize);
+		dsp_ops_clear(dsp, conv->tailOutput, conv->tailBlockSize);
+		dsp_ops_clear(dsp, conv->tailPrecalculated, conv->tailBlockSize);
 	}
 	conv->tailInputFill = 0;
 	conv->precalculatedPos = 0;
 }

-struct convolver *convolver_new(int head_block, int tail_block, const float *ir, int irlen)
+struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tail_block, const float *ir, int irlen)
 {
 	struct convolver *conv;
 	int head_ir_len;

+	dsp = dsp_ops;
+
 	if (head_block == 0 || tail_block == 0)
 		return NULL;

@ -430,16 +419,16 @@ int convolver_run(struct convolver *conv, const float *input, float *output, int
 			int processing = SPA_MIN(remaining, conv->headBlockSize - (conv->tailInputFill % conv->headBlockSize));

 			if (conv->tailPrecalculated0)
-				fft_sum(&output[processed], &output[processed],
+				dsp_ops_sum(dsp, &output[processed], &output[processed],
 						&conv->tailPrecalculated0[conv->precalculatedPos],
 						processing);
 			if (conv->tailPrecalculated)
-				fft_sum(&output[processed], &output[processed],
+				dsp_ops_sum(dsp, &output[processed], &output[processed],
 						&conv->tailPrecalculated[conv->precalculatedPos],
 						processing);
 			conv->precalculatedPos += processing;

-			fft_copy(conv->tailInput + conv->tailInputFill, input + processed, processing);
+			dsp_ops_copy(dsp, conv->tailInput + conv->tailInputFill, input + processed, processing);
 			conv->tailInputFill += processing;

 			if (conv->tailPrecalculated0 && (conv->tailInputFill % conv->headBlockSize == 0)) {
--- a/src/modules/module-filter-chain/convolver.h
+++ b/src/modules/module-filter-chain/convolver.h
@ -25,7 +25,9 @@
 #include <stdint.h>
 #include <stddef.h>

-struct convolver *convolver_new(int block, int tail, const float *ir, int irlen);
+#include "dsp-ops.h"
+
+struct convolver *convolver_new(struct dsp_ops *dsp, int block, int tail, const float *ir, int irlen);
 void convolver_free(struct convolver *conv);

 void convolver_reset(struct convolver *conv);
--- a/src/modules/module-filter-chain/pffft.c
+++ b/src/modules/module-filter-chain/pffft.c
@ -135,7 +135,6 @@ inline v4sf ld_ps1(const float *p)
 #define zconvolve_accumulate_simd zconvolve_accumulate_altivec
 #define zconvolve_simd zconvolve_altivec
 #define transform_simd transform_altivec
-#define sum_simd sum_altivec

 /*
  SSE1 support macros
@ -162,7 +161,6 @@ typedef __m128 v4sf;
 #define zconvolve_accumulate_simd zconvolve_accumulate_sse
 #define zconvolve_simd zconvolve_sse
 #define transform_simd transform_sse
-#define sum_simd sum_sse

 /*
  ARM NEON support macros
@ -196,7 +194,6 @@ typedef float32x4_t v4sf;
 #define zconvolve_accumulate_simd zconvolve_accumulate_neon
 #define zconvolve_simd zconvolve_neon
 #define transform_simd transform_neon
-#define sum_simd sum_neon
 #else
 #if !defined(PFFFT_SIMD_DISABLE)
 #warning "building with simd disabled !\n";
@ -221,7 +218,6 @@ typedef float v4sf;
 #define zconvolve_accumulate_simd zconvolve_accumulate_c
 #define zconvolve_simd zconvolve_c
 #define transform_simd transform_c
-#define sum_simd sum_c
 #endif

 // shortcuts for complex multiplcations
@ -1412,7 +1408,6 @@ struct funcs {
  void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
  void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling);
  void (*zconvolve)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
-  void (*sum)(const float *a, const float *b, float *ab, int len);
  int (*simd_size)(void);
  void (*validate)(void);
 };
@ -2125,25 +2120,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a, const float *b,
 	}
 }

-
-static void sum_simd(const float *a, const float *b, float *ab, int len)
-{
-	int i = 0;
-
-	if (VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)) {
-		const v4sf *RESTRICT va = (const v4sf *)a;
-		const v4sf *RESTRICT vb = (const v4sf *)b;
-		v4sf *RESTRICT vab = (v4sf *) ab;
-		const int end4 = len / SIMD_SZ;
-
-		for (; i < end4; i += 1)
-			vab[i] = VADD(va[i],vb[i]);
-		i *= 4;
-	}
-	for (; i < len; ++i)
-		ab[i] = a[i] + b[i];
-}
-
 #else				// defined(PFFFT_SIMD_DISABLE)

 // standard routine using scalar floats, without SIMD stuff.
@ -2293,13 +2269,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a,
 		ab[i + 1] = ai * scaling;
 	}
 }
-static void sum_simd(const float *a, const float *b, float *ab, int len)
-{
-	int i;
-	for (i = 0; i < len; ++i)
-		ab[i] = VADD(a[i], b[i]);
-}
-
 #endif				// defined(PFFFT_SIMD_DISABLE)

 static int simd_size_simd(void)
@ -2313,7 +2282,6 @@ struct funcs pffft_funcs = {
 	.zreorder = zreorder_simd,
 	.zconvolve_accumulate = zconvolve_accumulate_simd,
 	.zconvolve = zconvolve_simd,
-	.sum = sum_simd,
 	.simd_size = simd_size_simd,
 	.validate = validate_pffft_simd,
 };
@ -2393,11 +2361,6 @@ void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b,
 	return funcs->zconvolve(setup, dft_a, dft_b, dft_ab, scaling);
 }

-void pffft_sum(const float *a, const float *b, float *ab, int len)
-{
-	return funcs->sum(a, b, ab, len);
-}
-
 void pffft_select_cpu(int flags)
 {
 	funcs = &pffft_funcs_c;
--- a/src/modules/module-filter-chain/pffft.h
+++ b/src/modules/module-filter-chain/pffft.h
@ -161,7 +161,6 @@ extern "C" {

  void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);

-  void pffft_sum(const float *a, const float *b, float *ab, int len);
  /*
    the float buffers must have the correct alignment (16-byte boundary
    on intel and powerpc). This function may be used to obtain such