filter-chain: move some biquad functions around

Make just 1 biquad function in dsp-ops. Move the implementation prototypes to a separate header.
2025-11-03 09:01:54 -05:00 · 2024-11-11 17:59:10 +01:00 · 2024-11-11 17:59:10 +01:00 · d17c3fb72d
commit d17c3fb72d
parent aad3d1eafa
7 changed files with 115 additions and 155 deletions
--- a/src/modules/module-filter-chain.c
+++ b/src/modules/module-filter-chain.c
@ -30,6 +30,8 @@
 #include <pipewire/impl.h>
 #include <pipewire/extensions/profiler.h>
 #include "module-filter-chain/dsp-ops-impl.h"
 #define NAME "filter-chain"
 PW_LOG_TOPIC_STATIC(mod_topic, "mod." NAME);
--- a/src/modules/module-filter-chain/builtin_plugin.c
+++ b/src/modules/module-filter-chain/builtin_plugin.c
@ -538,7 +538,7 @@ static void bq_run(void *Instance, unsigned long samples)
 		if (impl->freq != freq || impl->Q != Q || impl->gain != gain)
 			bq_freq_update(impl, impl->type, freq, Q, gain);
 	}
-	dsp_ops_biquad_run(impl->dsp, bq, out, in, samples);
+	dsp_ops_biquad_run(impl->dsp, bq, 1, 0, &out, (const float **)&in, 1, samples);
 }
 /** bq_lowpass */
@ -1991,7 +1991,7 @@ static void param_eq_connect_port(void * Instance, unsigned long Port,
 static void param_eq_run(void * Instance, unsigned long SampleCount)
 {
 	struct param_eq_impl *impl = Instance;
-	dsp_ops_biquadn_run(impl->dsp, impl->bq, impl->n_bq, PARAM_EQ_MAX,
+	dsp_ops_biquad_run(impl->dsp, impl->bq, impl->n_bq, PARAM_EQ_MAX,
 			&impl->port[8], (const float**)impl->port, 8, SampleCount);
 }
--- a/src/modules/module-filter-chain/dsp-ops-c.c
+++ b/src/modules/module-filter-chain/dsp-ops-c.c
@ -16,13 +16,20 @@
 #else
 #include "pffft.h"
 #endif
-#include "dsp-ops.h"
+#include "dsp-ops-impl.h"
 void dsp_clear_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples)
 {
 	memset(dst, 0, sizeof(float) * n_samples);
 }
 void dsp_copy_c(struct dsp_ops *ops, void * SPA_RESTRICT dst,
 			const void * SPA_RESTRICT src, uint32_t n_samples)
 {
 	if (dst != src)
 		spa_memcpy(dst, src, sizeof(float) * n_samples);
 }
 static inline void dsp_add_c(struct dsp_ops *ops, void * SPA_RESTRICT dst,
 			const void * SPA_RESTRICT src, uint32_t n_samples)
 {
@ -67,13 +74,6 @@ static inline void dsp_gain_add_c(struct dsp_ops *ops, void * SPA_RESTRICT dst,
 }
 void dsp_copy_c(struct dsp_ops *ops, void * SPA_RESTRICT dst,
 			const void * SPA_RESTRICT src, uint32_t n_samples)
 {
 	if (dst != src)
 		spa_memcpy(dst, src, sizeof(float) * n_samples);
 }
 void dsp_mix_gain_c(struct dsp_ops *ops,
 		void * SPA_RESTRICT dst,
 		const void * SPA_RESTRICT src[],
@ -114,7 +114,7 @@ void dsp_mult_c(struct dsp_ops *ops,
 	}
 }
-void dsp_biquad_run_c(struct dsp_ops *ops, struct biquad *bq,
+static void biquad_run_c(struct dsp_ops *ops, struct biquad *bq,
 		float *out, const float *in, uint32_t n_samples)
 {
 	float x, y, x1, x2;
@ -146,7 +146,7 @@ void dsp_biquad_run_c(struct dsp_ops *ops, struct biquad *bq,
 #undef F
 }
-void dsp_biquadn_run_c(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
+void dsp_biquad_run_c(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
 		float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[],
 		uint32_t n_src, uint32_t n_samples)
 {
@ -159,9 +159,9 @@ void dsp_biquadn_run_c(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, ui
 		if (s == NULL || d == NULL)
 			continue;
 		if (n_bq > 0)
-			dsp_biquad_run_c(ops, &bq[0], d, s, n_samples);
+			biquad_run_c(ops, &bq[0], d, s, n_samples);
 		for (j = 1; j < n_bq; j++)
-			dsp_biquad_run_c(ops, &bq[j], d, d, n_samples);
+			biquad_run_c(ops, &bq[j], d, d, n_samples);
 	}
 }
--- a/src/modules/module-filter-chain/dsp-ops-impl.h
+++ b/src/modules/module-filter-chain/dsp-ops-impl.h
@ -0,0 +1,93 @@
 /* Spa */
 /* SPDX-FileCopyrightText: Copyright © 2022 Wim Taymans */
 /* SPDX-License-Identifier: MIT */
 #ifndef DSP_OPS_IMPL_H
 #define DSP_OPS_IMPL_H
 #include "dsp-ops.h"
 int dsp_ops_init(struct dsp_ops *ops, uint32_t cpu_flags);
 #define MAKE_CLEAR_FUNC(arch) \
 void dsp_clear_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples)
 #define MAKE_COPY_FUNC(arch) \
 void dsp_copy_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \
 	const void * SPA_RESTRICT src, uint32_t n_samples)
 #define MAKE_MIX_GAIN_FUNC(arch) \
 void dsp_mix_gain_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst,	\
 	const void * SPA_RESTRICT src[], float gain[], uint32_t n_src, uint32_t n_samples)
 #define MAKE_SUM_FUNC(arch) \
 void dsp_sum_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \
 	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples)
 #define MAKE_LINEAR_FUNC(arch) \
 void dsp_linear_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \
 	const float * SPA_RESTRICT src, const float mult, const float add, uint32_t n_samples)
 #define MAKE_MULT_FUNC(arch) \
 void dsp_mult_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst,	\
 	const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples)
 #define MAKE_BIQUAD_RUN_FUNC(arch) \
 void dsp_biquad_run_##arch (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, \
 	float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples)
 #define MAKE_DELAY_FUNC(arch) \
 void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, \
 		uint32_t delay, float *dst, const float *src, uint32_t n_samples)
 #define MAKE_FFT_NEW_FUNC(arch) \
 void *dsp_fft_new_##arch(struct dsp_ops *ops, uint32_t size, bool real)
 #define MAKE_FFT_FREE_FUNC(arch) \
 void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft)
 #define MAKE_FFT_MEMALLOC_FUNC(arch) \
 void *dsp_fft_memalloc_##arch(struct dsp_ops *ops, uint32_t size, bool real)
 #define MAKE_FFT_MEMFREE_FUNC(arch) \
 void dsp_fft_memfree_##arch(struct dsp_ops *ops, void *mem)
 #define MAKE_FFT_MEMCLEAR_FUNC(arch) \
 void dsp_fft_memclear_##arch(struct dsp_ops *ops, void *mem, uint32_t size, bool real)
 #define MAKE_FFT_RUN_FUNC(arch) \
 void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \
 	const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
 #define MAKE_FFT_CMUL_FUNC(arch) \
 void dsp_fft_cmul_##arch(struct dsp_ops *ops, void *fft, \
 	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \
 	const float * SPA_RESTRICT b, uint32_t len, const float scale)
 #define MAKE_FFT_CMULADD_FUNC(arch) \
 void dsp_fft_cmuladd_##arch(struct dsp_ops *ops, void *fft,		\
 	float * dst, const float * src,					\
 	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,	\
 	uint32_t len, const float scale)
 MAKE_CLEAR_FUNC(c);
 MAKE_COPY_FUNC(c);
 MAKE_MIX_GAIN_FUNC(c);
 MAKE_SUM_FUNC(c);
 MAKE_LINEAR_FUNC(c);
 MAKE_MULT_FUNC(c);
 MAKE_BIQUAD_RUN_FUNC(c);
 MAKE_DELAY_FUNC(c);
 MAKE_FFT_NEW_FUNC(c);
 MAKE_FFT_FREE_FUNC(c);
 MAKE_FFT_MEMALLOC_FUNC(c);
 MAKE_FFT_MEMFREE_FUNC(c);
 MAKE_FFT_MEMCLEAR_FUNC(c);
 MAKE_FFT_RUN_FUNC(c);
 MAKE_FFT_CMUL_FUNC(c);
 MAKE_FFT_CMULADD_FUNC(c);
 #if defined (HAVE_SSE)
 MAKE_MIX_GAIN_FUNC(sse);
 MAKE_SUM_FUNC(sse);
 MAKE_BIQUAD_RUN_FUNC(sse);
 MAKE_DELAY_FUNC(sse);
 MAKE_FFT_CMUL_FUNC(sse);
 MAKE_FFT_CMULADD_FUNC(sse);
 #endif
 #if defined (HAVE_AVX)
 MAKE_MIX_GAIN_FUNC(avx);
 MAKE_SUM_FUNC(avx);
 MAKE_FFT_CMUL_FUNC(avx);
 MAKE_FFT_CMULADD_FUNC(avx);
 #endif
 #endif /* DSP_OPS_IMPL_H */
--- a/src/modules/module-filter-chain/dsp-ops-sse.c
+++ b/src/modules/module-filter-chain/dsp-ops-sse.c
@ -15,7 +15,7 @@
 #include "pffft.h"
 #endif
-#include "dsp-ops.h"
+#include "dsp-ops-impl.h"
 #include <xmmintrin.h>
@ -128,7 +128,7 @@ void dsp_sum_sse(struct dsp_ops *ops, float *r, const float *a, const float *b,
 	}
 }
-void dsp_biquad_run_sse(struct dsp_ops *ops, struct biquad *bq,
+static void dsp_biquad_run1_sse(struct dsp_ops *ops, struct biquad *bq,
 		float *out, const float *in, uint32_t n_samples)
 {
 	__m128 x, y, z;
@ -432,7 +432,7 @@ static void dsp_biquad2_run4_sse(struct dsp_ops *ops, struct biquad *bq, uint32_
 #undef F
 }
-void dsp_biquadn_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
+void dsp_biquad_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
 		float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[],
 		uint32_t n_src, uint32_t n_samples)
 {
@ -502,7 +502,7 @@ void dsp_biquadn_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq,
 			dsp_biquad2_run_sse(ops, &bq[j], d, s, n_samples);
 		}
 		if (j < n_bq) {
-			dsp_biquad_run_sse(ops, &bq[j], d, s, n_samples);
+			dsp_biquad_run1_sse(ops, &bq[j], d, s, n_samples);
 		}
 	}
 }
--- a/src/modules/module-filter-chain/dsp-ops.c
+++ b/src/modules/module-filter-chain/dsp-ops.c
@ -11,7 +11,7 @@
 #include <spa/utils/defs.h>
 #include <spa/param/audio/format-utils.h>
-#include "dsp-ops.h"
+#include "dsp-ops-impl.h"
 struct dsp_info {
 	uint32_t cpu_flags;
@ -38,7 +38,6 @@ static struct dsp_info dsp_table[] =
 		.funcs.fft_run = dsp_fft_run_c,
 		.funcs.fft_cmul = dsp_fft_cmul_avx,
 		.funcs.fft_cmuladd = dsp_fft_cmuladd_avx,
 		.funcs.biquadn_run = dsp_biquadn_run_sse,
 		.funcs.delay = dsp_delay_sse,
 	},
 #endif
@ -59,7 +58,6 @@ static struct dsp_info dsp_table[] =
 		.funcs.fft_run = dsp_fft_run_c,
 		.funcs.fft_cmul = dsp_fft_cmul_sse,
 		.funcs.fft_cmuladd = dsp_fft_cmuladd_sse,
 		.funcs.biquadn_run = dsp_biquadn_run_sse,
 		.funcs.delay = dsp_delay_sse,
 	},
 #endif
@ -79,7 +77,6 @@ static struct dsp_info dsp_table[] =
 		.funcs.fft_run = dsp_fft_run_c,
 		.funcs.fft_cmul = dsp_fft_cmul_c,
 		.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
 		.funcs.biquadn_run = dsp_biquadn_run_c,
 		.funcs.delay = dsp_delay_c,
 	},
 };
@ -115,43 +112,3 @@ int dsp_ops_init(struct dsp_ops *ops, uint32_t cpu_flags)
 	return 0;
 }
 int dsp_ops_benchmark(void)
 {
 	struct dsp_ops ops[3];
 	uint32_t i;
 	struct biquad bq;
 	float in[2048], out[2048];
 	struct timespec ts;
 	uint64_t t1, t2, t3, t4;
 	dsp_ops_init(&ops[0], 0);
 	dsp_ops_init(&ops[1], SPA_CPU_FLAG_SSE);
 	dsp_ops_init(&ops[2], SPA_CPU_FLAG_AVX);
 	clock_gettime(CLOCK_MONOTONIC, &ts);
        t1 = SPA_TIMESPEC_TO_NSEC(&ts);
 	for (i = 0; i < 8192; i++)
 		dsp_ops_biquad_run(&ops[0], &bq, out, in, 2048);
 	clock_gettime(CLOCK_MONOTONIC, &ts);
        t2 = SPA_TIMESPEC_TO_NSEC(&ts);
 	for (i = 0; i < 8192; i++)
 		dsp_ops_biquad_run(&ops[1], &bq, out, in, 2048);
 	clock_gettime(CLOCK_MONOTONIC, &ts);
        t3 = SPA_TIMESPEC_TO_NSEC(&ts);
 	for (i = 0; i < 8192; i++)
 		dsp_ops_biquad_run(&ops[2], &bq, out, in, 2048);
 	clock_gettime(CLOCK_MONOTONIC, &ts);
        t4 = SPA_TIMESPEC_TO_NSEC(&ts);
 	fprintf(stderr, "%"PRIu64" %"PRIu64" %"PRIu64" speedup:%f\n",
 			t2-t1, t3-t2, t4-t3,
 			((double)(t2-t1))/(t3-t2));
 	return 0;
 }
--- a/src/modules/module-filter-chain/dsp-ops.h
+++ b/src/modules/module-filter-chain/dsp-ops.h
@ -20,8 +20,6 @@ struct dsp_ops_funcs {
 			void * SPA_RESTRICT dst,
 			const void * SPA_RESTRICT src[],
 			float gain[], uint32_t n_src, uint32_t n_samples);
 	void (*biquad_run) (struct dsp_ops *ops, struct biquad *bq,
 			float *out, const float *in, uint32_t n_samples);
 	void (*sum) (struct dsp_ops *ops,
 			float * dst, const float * SPA_RESTRICT a,
 			const float * SPA_RESTRICT b, uint32_t n_samples);
@ -46,7 +44,7 @@ struct dsp_ops_funcs {
 	void (*mult) (struct dsp_ops *ops,
 			void * SPA_RESTRICT dst,
 			const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples);
-	void (*biquadn_run) (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
+	void (*biquad_run) (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
 			float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[],
 			uint32_t n_src, uint32_t n_samples);
 	void (*delay) (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, uint32_t delay,
@ -63,9 +61,6 @@ struct dsp_ops {
 	const void *priv;
 };
 int dsp_ops_init(struct dsp_ops *ops, uint32_t cpu_flags);
 int dsp_ops_benchmark(void);
 #define dsp_ops_free(ops)		(ops)->free(ops)
 #define dsp_ops_clear(ops,...)		(ops)->funcs.clear(ops, __VA_ARGS__)
@ -75,7 +70,6 @@ int dsp_ops_benchmark(void);
 #define dsp_ops_sum(ops,...)		(ops)->funcs.sum(ops, __VA_ARGS__)
 #define dsp_ops_linear(ops,...)		(ops)->funcs.linear(ops, __VA_ARGS__)
 #define dsp_ops_mult(ops,...)		(ops)->funcs.mult(ops, __VA_ARGS__)
 #define dsp_ops_biquadn_run(ops,...)	(ops)->funcs.biquadn_run(ops, __VA_ARGS__)
 #define dsp_ops_delay(ops,...)		(ops)->funcs.delay(ops, __VA_ARGS__)
 #define dsp_ops_fft_new(ops,...)	(ops)->funcs.fft_new(ops, __VA_ARGS__)
@ -87,90 +81,4 @@ int dsp_ops_benchmark(void);
 #define dsp_ops_fft_cmul(ops,...)	(ops)->funcs.fft_cmul(ops, __VA_ARGS__)
 #define dsp_ops_fft_cmuladd(ops,...)	(ops)->funcs.fft_cmuladd(ops, __VA_ARGS__)
 #define MAKE_CLEAR_FUNC(arch) \
 void dsp_clear_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples)
 #define MAKE_COPY_FUNC(arch) \
 void dsp_copy_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \
 	const void * SPA_RESTRICT src, uint32_t n_samples)
 #define MAKE_MIX_GAIN_FUNC(arch) \
 void dsp_mix_gain_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst,	\
 	const void * SPA_RESTRICT src[], float gain[], uint32_t n_src, uint32_t n_samples)
 #define MAKE_BIQUAD_RUN_FUNC(arch) \
 void dsp_biquad_run_##arch (struct dsp_ops *ops, struct biquad *bq,	\
 	float *out, const float *in, uint32_t n_samples)
 #define MAKE_SUM_FUNC(arch) \
 void dsp_sum_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \
 	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples)
 #define MAKE_LINEAR_FUNC(arch) \
 void dsp_linear_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \
 	const float * SPA_RESTRICT src, const float mult, const float add, uint32_t n_samples)
 #define MAKE_MULT_FUNC(arch) \
 void dsp_mult_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst,	\
 	const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples)
 #define MAKE_BIQUADN_RUN_FUNC(arch) \
 void dsp_biquadn_run_##arch (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, \
 	float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples)
 #define MAKE_DELAY_FUNC(arch) \
 void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, \
 		uint32_t delay, float *dst, const float *src, uint32_t n_samples)
 #define MAKE_FFT_NEW_FUNC(arch) \
 void *dsp_fft_new_##arch(struct dsp_ops *ops, uint32_t size, bool real)
 #define MAKE_FFT_FREE_FUNC(arch) \
 void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft)
 #define MAKE_FFT_MEMALLOC_FUNC(arch) \
 void *dsp_fft_memalloc_##arch(struct dsp_ops *ops, uint32_t size, bool real)
 #define MAKE_FFT_MEMFREE_FUNC(arch) \
 void dsp_fft_memfree_##arch(struct dsp_ops *ops, void *mem)
 #define MAKE_FFT_MEMCLEAR_FUNC(arch) \
 void dsp_fft_memclear_##arch(struct dsp_ops *ops, void *mem, uint32_t size, bool real)
 #define MAKE_FFT_RUN_FUNC(arch) \
 void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \
 	const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
 #define MAKE_FFT_CMUL_FUNC(arch) \
 void dsp_fft_cmul_##arch(struct dsp_ops *ops, void *fft, \
 	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \
 	const float * SPA_RESTRICT b, uint32_t len, const float scale)
 #define MAKE_FFT_CMULADD_FUNC(arch) \
 void dsp_fft_cmuladd_##arch(struct dsp_ops *ops, void *fft,		\
 	float * dst, const float * src,					\
 	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,	\
 	uint32_t len, const float scale)
 MAKE_CLEAR_FUNC(c);
 MAKE_COPY_FUNC(c);
 MAKE_MIX_GAIN_FUNC(c);
 MAKE_BIQUAD_RUN_FUNC(c);
 MAKE_SUM_FUNC(c);
 MAKE_LINEAR_FUNC(c);
 MAKE_MULT_FUNC(c);
 MAKE_BIQUADN_RUN_FUNC(c);
 MAKE_DELAY_FUNC(c);
 MAKE_FFT_NEW_FUNC(c);
 MAKE_FFT_FREE_FUNC(c);
 MAKE_FFT_MEMALLOC_FUNC(c);
 MAKE_FFT_MEMFREE_FUNC(c);
 MAKE_FFT_MEMCLEAR_FUNC(c);
 MAKE_FFT_RUN_FUNC(c);
 MAKE_FFT_CMUL_FUNC(c);
 MAKE_FFT_CMULADD_FUNC(c);
 #if defined (HAVE_SSE)
 MAKE_MIX_GAIN_FUNC(sse);
 MAKE_SUM_FUNC(sse);
 MAKE_BIQUAD_RUN_FUNC(sse);
 MAKE_BIQUADN_RUN_FUNC(sse);
 MAKE_DELAY_FUNC(sse);
 MAKE_FFT_CMUL_FUNC(sse);
 MAKE_FFT_CMULADD_FUNC(sse);
 #endif
 #if defined (HAVE_AVX)
 MAKE_MIX_GAIN_FUNC(avx);
 MAKE_SUM_FUNC(avx);
 MAKE_FFT_CMUL_FUNC(avx);
 MAKE_FFT_CMULADD_FUNC(avx);
 #endif
 #endif /* DSP_OPS_H */