From f4f316b951b0dc766a56e1b600d0769504ba0720 Mon Sep 17 00:00:00 2001
From: Wim Taymans <wtaymans@redhat.com>
Date: Fri, 16 Dec 2022 11:30:18 +0100
Subject: [PATCH] filter-chain: move fft to dsp_ops

---
 src/modules/module-filter-chain/convolver.c | 65 +++++----------------
 src/modules/module-filter-chain/dsp-ops-c.c | 31 ++++++++++
 src/modules/module-filter-chain/dsp-ops.c   | 15 +++++
 src/modules/module-filter-chain/dsp-ops.h   | 43 +++++++++++++-
 4 files changed, 102 insertions(+), 52 deletions(-)

diff --git a/src/modules/module-filter-chain/convolver.c b/src/modules/module-filter-chain/convolver.c
index e1a484a86..6bed5b140 100644
--- a/src/modules/module-filter-chain/convolver.c
+++ b/src/modules/module-filter-chain/convolver.c
@@ -30,8 +30,6 @@
 
 #include <math.h>
 
-#include "pffft.h"
-
 static struct dsp_ops *dsp;
 
 struct convolver1 {
@@ -98,42 +96,6 @@ static int next_power_of_two(int val)
 	return r;
 }
 
-static inline void *fft_new(int size)
-{
-	return pffft_new_setup(size, PFFFT_REAL);
-}
-
-static inline void *ifft_new(int size)
-{
-	return pffft_new_setup(size, PFFFT_REAL);
-}
-
-static inline void fft_destroy(void *fft)
-{
-	pffft_destroy_setup(fft);
-}
-
-static inline void fft_run(void *fft, const float *in, float *out)
-{
-	pffft_transform(fft, in, out, NULL, PFFFT_FORWARD);
-}
-
-static inline void ifft_run(void *ifft, const float *in, float *out)
-{
-	pffft_transform(ifft, in, out, NULL, PFFFT_BACKWARD);
-}
-
-static inline void fft_convolve(void *fft, float *r,
-		const float *a, const float *b, int len, float scale)
-{
-	pffft_zconvolve(fft, a, b, r, scale);
-}
-static inline void fft_convolve_accum(void *fft, float *r,
-		const float *c, const float *a, const float *b, int len, float scale)
-{
-	pffft_zconvolve_accumulate(fft, a, b, c, r, scale);
-}
-
 static void convolver1_reset(struct convolver1 *conv)
 {
 	int i;
@@ -170,10 +132,10 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
 	conv->segCount = (irlen + conv->blockSize-1) / conv->blockSize;
 	conv->fftComplexSize = (conv->segSize / 2) + 1;
 
-	conv->fft = fft_new(conv->segSize);
+	conv->fft = dsp_ops_fft_new(dsp, conv->segSize, true);
 	if (conv->fft == NULL)
 		goto error;
-	conv->ifft = ifft_new(conv->segSize);
+	conv->ifft = dsp_ops_fft_new(dsp, conv->segSize, true);
 	if (conv->ifft == NULL)
 		goto error;
 
@@ -195,7 +157,7 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
 		if (copy < conv->segSize)
 			dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy);
 
-	        fft_run(conv->fft, conv->fft_buffer, conv->segmentsIr[i]);
+	        dsp_ops_fft_run(dsp, conv->fft, 1, conv->fft_buffer, conv->segmentsIr[i]);
 	}
 	conv->pre_mult = fft_cpx_alloc(conv->fftComplexSize);
 	conv->conv = fft_cpx_alloc(conv->fftComplexSize);
@@ -207,9 +169,9 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
 	return conv;
 error:
 	if (conv->fft)
-		fft_destroy(conv->fft);
+		dsp_ops_fft_free(dsp, conv->fft);
 	if (conv->ifft)
-		fft_destroy(conv->ifft);
+		dsp_ops_fft_free(dsp, conv->ifft);
 	if (conv->fft_buffer)
 		fft_free(conv->fft_buffer);
 	free(conv);
@@ -224,9 +186,9 @@ static void convolver1_free(struct convolver1 *conv)
 		fft_cpx_free(conv->segmentsIr[i]);
 	}
 	if (conv->fft)
-		fft_destroy(conv->fft);
+		dsp_ops_fft_free(dsp, conv->fft);
 	if (conv->ifft)
-		fft_destroy(conv->ifft);
+		dsp_ops_fft_free(dsp, conv->ifft);
 	if (conv->fft_buffer)
 		fft_free(conv->fft_buffer);
 	free(conv->segments);
@@ -255,13 +217,13 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
 		if (inputBufferPos == 0 && processing < conv->blockSize)
 			dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing);
 
-		fft_run(conv->fft, conv->inputBuffer, conv->segments[conv->current]);
+		dsp_ops_fft_run(dsp, conv->fft, 1, conv->inputBuffer, conv->segments[conv->current]);
 
 		if (conv->segCount > 1) {
 			if (conv->inputBufferFill == 0) {
 				int indexAudio = (conv->current + 1) % conv->segCount;
 
-				fft_convolve(conv->fft, conv->pre_mult,
+				dsp_ops_fft_cmul(dsp, conv->fft, conv->pre_mult,
 						conv->segmentsIr[1],
 						conv->segments[indexAudio],
 						conv->fftComplexSize, conv->scale);
@@ -269,7 +231,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
 				for (i = 2; i < conv->segCount; i++) {
 					indexAudio = (conv->current + i) % conv->segCount;
 
-					fft_convolve_accum(conv->fft,
+					dsp_ops_fft_cmuladd(dsp, conv->fft,
 							conv->pre_mult,
 							conv->pre_mult,
 							conv->segmentsIr[i],
@@ -277,20 +239,21 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
 							conv->fftComplexSize, conv->scale);
 				}
 			}
-			fft_convolve_accum(conv->fft,
+			dsp_ops_fft_cmuladd(dsp, conv->fft,
 					conv->conv,
 					conv->pre_mult,
 					conv->segments[conv->current],
 					conv->segmentsIr[0],
 					conv->fftComplexSize, conv->scale);
 		} else {
-			fft_convolve(conv->fft, conv->conv,
+			dsp_ops_fft_cmul(dsp, conv->fft,
+					conv->conv,
 					conv->segments[conv->current],
 					conv->segmentsIr[0],
 					conv->fftComplexSize, conv->scale);
 		}
 
-		ifft_run(conv->ifft, conv->conv, conv->fft_buffer);
+		dsp_ops_fft_run(dsp, conv->ifft, -1, conv->conv, conv->fft_buffer);
 
 		dsp_ops_sum(dsp, output + processed, conv->fft_buffer + inputBufferPos,
 				conv->overlap + inputBufferPos, processing);
diff --git a/src/modules/module-filter-chain/dsp-ops-c.c b/src/modules/module-filter-chain/dsp-ops-c.c
index 576ab7582..a913f5af7 100644
--- a/src/modules/module-filter-chain/dsp-ops-c.c
+++ b/src/modules/module-filter-chain/dsp-ops-c.c
@@ -29,6 +29,7 @@
 
 #include <spa/utils/defs.h>
 
+#include "pffft.h"
 #include "dsp-ops.h"
 
 void dsp_clear_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples)
@@ -141,3 +142,33 @@ void dsp_sum_c(struct dsp_ops *ops, float * dst,
 		dst[i] = a[i] + b[i];
 }
 
+void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real)
+{
+	return pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX);
+}
+
+void dsp_fft_free_c(struct dsp_ops *ops, void *fft)
+{
+	pffft_destroy_setup(fft);
+}
+void dsp_fft_run_c(struct dsp_ops *ops, void *fft, int direction,
+	const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
+{
+	pffft_transform(fft, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
+}
+
+void dsp_fft_cmul_c(struct dsp_ops *ops, void *fft,
+	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
+	const float * SPA_RESTRICT b, uint32_t len, const float scale)
+{
+	pffft_zconvolve(fft, a, b, dst, scale);
+}
+
+void dsp_fft_cmuladd_c(struct dsp_ops *ops, void *fft,
+	float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
+	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
+	uint32_t len, const float scale)
+{
+	pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
+}
+
diff --git a/src/modules/module-filter-chain/dsp-ops.c b/src/modules/module-filter-chain/dsp-ops.c
index f5fa7f694..d35d5c936 100644
--- a/src/modules/module-filter-chain/dsp-ops.c
+++ b/src/modules/module-filter-chain/dsp-ops.c
@@ -47,6 +47,11 @@ static struct dsp_info dsp_table[] =
 		.funcs.mix_gain = dsp_mix_gain_sse,
 		.funcs.biquad_run = dsp_biquad_run_c,
 		.funcs.sum = dsp_sum_avx,
+		.funcs.fft_new = dsp_fft_new_c,
+		.funcs.fft_free = dsp_fft_free_c,
+		.funcs.fft_run = dsp_fft_run_c,
+		.funcs.fft_cmul = dsp_fft_cmul_c,
+		.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
 	},
 #endif
 #if defined (HAVE_SSE)
@@ -56,6 +61,11 @@ static struct dsp_info dsp_table[] =
 		.funcs.mix_gain = dsp_mix_gain_sse,
 		.funcs.biquad_run = dsp_biquad_run_c,
 		.funcs.sum = dsp_sum_sse,
+		.funcs.fft_new = dsp_fft_new_c,
+		.funcs.fft_free = dsp_fft_free_c,
+		.funcs.fft_run = dsp_fft_run_c,
+		.funcs.fft_cmul = dsp_fft_cmul_c,
+		.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
 	},
 #endif
 	{ 0,
@@ -64,6 +74,11 @@ static struct dsp_info dsp_table[] =
 		.funcs.mix_gain = dsp_mix_gain_c,
 		.funcs.biquad_run = dsp_biquad_run_c,
 		.funcs.sum = dsp_sum_c,
+		.funcs.fft_new = dsp_fft_new_c,
+		.funcs.fft_free = dsp_fft_free_c,
+		.funcs.fft_run = dsp_fft_run_c,
+		.funcs.fft_cmul = dsp_fft_cmul_c,
+		.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
 	},
 };
 
diff --git a/src/modules/module-filter-chain/dsp-ops.h b/src/modules/module-filter-chain/dsp-ops.h
index 19fdc05c6..bd77062ad 100644
--- a/src/modules/module-filter-chain/dsp-ops.h
+++ b/src/modules/module-filter-chain/dsp-ops.h
@@ -45,6 +45,18 @@ struct dsp_ops_funcs {
 	void (*sum) (struct dsp_ops *ops,
 			float * dst, const float * SPA_RESTRICT a,
 			const float * SPA_RESTRICT b, uint32_t n_samples);
+
+	void *(*fft_new) (struct dsp_ops *ops, int32_t size, bool real);
+	void (*fft_free) (struct dsp_ops *ops, void *fft);
+	void (*fft_run) (struct dsp_ops *ops, void *fft, int direction,
+			const float * SPA_RESTRICT src, float * SPA_RESTRICT dst);
+	void (*fft_cmul) (struct dsp_ops *ops, void *fft,
+			float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
+			const float * SPA_RESTRICT b, uint32_t len, const float scale);
+	void (*fft_cmuladd) (struct dsp_ops *ops, void *fft,
+			float * dst, const float * src,
+			const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
+			uint32_t len, const float scale);
 };
 
 struct dsp_ops {
@@ -67,6 +79,12 @@ int dsp_ops_init(struct dsp_ops *ops);
 #define dsp_ops_biquad_run(ops,...)	(ops)->funcs.biquad_run(ops, __VA_ARGS__)
 #define dsp_ops_sum(ops,...)		(ops)->funcs.sum(ops, __VA_ARGS__)
 
+#define dsp_ops_fft_new(ops,...)	(ops)->funcs.fft_new(ops, __VA_ARGS__)
+#define dsp_ops_fft_free(ops,...)	(ops)->funcs.fft_free(ops, __VA_ARGS__)
+#define dsp_ops_fft_run(ops,...)	(ops)->funcs.fft_run(ops, __VA_ARGS__)
+#define dsp_ops_fft_cmul(ops,...)	(ops)->funcs.fft_cmul(ops, __VA_ARGS__)
+#define dsp_ops_fft_cmuladd(ops,...)	(ops)->funcs.fft_cmuladd(ops, __VA_ARGS__)
+
 #define MAKE_CLEAR_FUNC(arch) \
 void dsp_clear_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples)
 #define MAKE_COPY_FUNC(arch) \
@@ -80,14 +98,37 @@ void dsp_biquad_run_##arch (struct dsp_ops *ops, struct biquad *bq,	\
 	float *out, const float *in, uint32_t n_samples)
 #define MAKE_SUM_FUNC(arch) \
 void dsp_sum_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \
-	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples);
+	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples)
 
+#define MAKE_FFT_NEW_FUNC(arch) \
+void *dsp_fft_new_##arch(struct dsp_ops *ops, int32_t size, bool real)
+#define MAKE_FFT_FREE_FUNC(arch) \
+void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft)
+#define MAKE_FFT_RUN_FUNC(arch) \
+void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \
+	const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
+#define MAKE_FFT_CMUL_FUNC(arch) \
+void dsp_fft_cmul_##arch(struct dsp_ops *ops, void *fft, \
+	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \
+	const float * SPA_RESTRICT b, uint32_t len, const float scale)
+#define MAKE_FFT_CMULADD_FUNC(arch) \
+void dsp_fft_cmuladd_##arch(struct dsp_ops *ops, void *fft,		\
+	float * dst, const float * src,					\
+	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,	\
+	uint32_t len, const float scale)
 
 MAKE_CLEAR_FUNC(c);
 MAKE_COPY_FUNC(c);
 MAKE_MIX_GAIN_FUNC(c);
 MAKE_BIQUAD_RUN_FUNC(c);
 MAKE_SUM_FUNC(c);
+
+MAKE_FFT_NEW_FUNC(c);
+MAKE_FFT_FREE_FUNC(c);
+MAKE_FFT_RUN_FUNC(c);
+MAKE_FFT_CMUL_FUNC(c);
+MAKE_FFT_CMULADD_FUNC(c);
+
 #if defined (HAVE_SSE)
 MAKE_MIX_GAIN_FUNC(sse);
 MAKE_SUM_FUNC(sse);