From d17c3fb72d2504fc102849bc02c244b381e114c1 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Mon, 11 Nov 2024 17:59:10 +0100 Subject: [PATCH] filter-chain: move some biquad functions around Make just 1 biquad function in dsp-ops. Move the implementation prototypes to a separate header. --- src/modules/module-filter-chain.c | 2 + .../module-filter-chain/builtin_plugin.c | 4 +- src/modules/module-filter-chain/dsp-ops-c.c | 24 ++--- .../module-filter-chain/dsp-ops-impl.h | 93 ++++++++++++++++++ src/modules/module-filter-chain/dsp-ops-sse.c | 8 +- src/modules/module-filter-chain/dsp-ops.c | 45 +-------- src/modules/module-filter-chain/dsp-ops.h | 94 +------------------ 7 files changed, 115 insertions(+), 155 deletions(-) create mode 100644 src/modules/module-filter-chain/dsp-ops-impl.h diff --git a/src/modules/module-filter-chain.c b/src/modules/module-filter-chain.c index 290cb091d..9aec83842 100644 --- a/src/modules/module-filter-chain.c +++ b/src/modules/module-filter-chain.c @@ -30,6 +30,8 @@ #include #include +#include "module-filter-chain/dsp-ops-impl.h" + #define NAME "filter-chain" PW_LOG_TOPIC_STATIC(mod_topic, "mod." NAME); diff --git a/src/modules/module-filter-chain/builtin_plugin.c b/src/modules/module-filter-chain/builtin_plugin.c index d6a5192b7..6e5e101eb 100644 --- a/src/modules/module-filter-chain/builtin_plugin.c +++ b/src/modules/module-filter-chain/builtin_plugin.c @@ -538,7 +538,7 @@ static void bq_run(void *Instance, unsigned long samples) if (impl->freq != freq || impl->Q != Q || impl->gain != gain) bq_freq_update(impl, impl->type, freq, Q, gain); } - dsp_ops_biquad_run(impl->dsp, bq, out, in, samples); + dsp_ops_biquad_run(impl->dsp, bq, 1, 0, &out, (const float **)&in, 1, samples); } /** bq_lowpass */ @@ -1991,7 +1991,7 @@ static void param_eq_connect_port(void * Instance, unsigned long Port, static void param_eq_run(void * Instance, unsigned long SampleCount) { struct param_eq_impl *impl = Instance; - dsp_ops_biquadn_run(impl->dsp, impl->bq, impl->n_bq, PARAM_EQ_MAX, + dsp_ops_biquad_run(impl->dsp, impl->bq, impl->n_bq, PARAM_EQ_MAX, &impl->port[8], (const float**)impl->port, 8, SampleCount); } diff --git a/src/modules/module-filter-chain/dsp-ops-c.c b/src/modules/module-filter-chain/dsp-ops-c.c index 99de489be..d5f681ce2 100644 --- a/src/modules/module-filter-chain/dsp-ops-c.c +++ b/src/modules/module-filter-chain/dsp-ops-c.c @@ -16,13 +16,20 @@ #else #include "pffft.h" #endif -#include "dsp-ops.h" +#include "dsp-ops-impl.h" void dsp_clear_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples) { memset(dst, 0, sizeof(float) * n_samples); } +void dsp_copy_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, + const void * SPA_RESTRICT src, uint32_t n_samples) +{ + if (dst != src) + spa_memcpy(dst, src, sizeof(float) * n_samples); +} + static inline void dsp_add_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src, uint32_t n_samples) { @@ -67,13 +74,6 @@ static inline void dsp_gain_add_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, } -void dsp_copy_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, - const void * SPA_RESTRICT src, uint32_t n_samples) -{ - if (dst != src) - spa_memcpy(dst, src, sizeof(float) * n_samples); -} - void dsp_mix_gain_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], @@ -114,7 +114,7 @@ void dsp_mult_c(struct dsp_ops *ops, } } -void dsp_biquad_run_c(struct dsp_ops *ops, struct biquad *bq, +static void biquad_run_c(struct dsp_ops *ops, struct biquad *bq, float *out, const float *in, uint32_t n_samples) { float x, y, x1, x2; @@ -146,7 +146,7 @@ void dsp_biquad_run_c(struct dsp_ops *ops, struct biquad *bq, #undef F } -void dsp_biquadn_run_c(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, +void dsp_biquad_run_c(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples) { @@ -159,9 +159,9 @@ void dsp_biquadn_run_c(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, ui if (s == NULL || d == NULL) continue; if (n_bq > 0) - dsp_biquad_run_c(ops, &bq[0], d, s, n_samples); + biquad_run_c(ops, &bq[0], d, s, n_samples); for (j = 1; j < n_bq; j++) - dsp_biquad_run_c(ops, &bq[j], d, d, n_samples); + biquad_run_c(ops, &bq[j], d, d, n_samples); } } diff --git a/src/modules/module-filter-chain/dsp-ops-impl.h b/src/modules/module-filter-chain/dsp-ops-impl.h new file mode 100644 index 000000000..205510daa --- /dev/null +++ b/src/modules/module-filter-chain/dsp-ops-impl.h @@ -0,0 +1,93 @@ +/* Spa */ +/* SPDX-FileCopyrightText: Copyright © 2022 Wim Taymans */ +/* SPDX-License-Identifier: MIT */ + +#ifndef DSP_OPS_IMPL_H +#define DSP_OPS_IMPL_H + +#include "dsp-ops.h" + +int dsp_ops_init(struct dsp_ops *ops, uint32_t cpu_flags); + +#define MAKE_CLEAR_FUNC(arch) \ +void dsp_clear_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples) +#define MAKE_COPY_FUNC(arch) \ +void dsp_copy_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ + const void * SPA_RESTRICT src, uint32_t n_samples) +#define MAKE_MIX_GAIN_FUNC(arch) \ +void dsp_mix_gain_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ + const void * SPA_RESTRICT src[], float gain[], uint32_t n_src, uint32_t n_samples) +#define MAKE_SUM_FUNC(arch) \ +void dsp_sum_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \ + const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples) +#define MAKE_LINEAR_FUNC(arch) \ +void dsp_linear_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \ + const float * SPA_RESTRICT src, const float mult, const float add, uint32_t n_samples) +#define MAKE_MULT_FUNC(arch) \ +void dsp_mult_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ + const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples) +#define MAKE_BIQUAD_RUN_FUNC(arch) \ +void dsp_biquad_run_##arch (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, \ + float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples) +#define MAKE_DELAY_FUNC(arch) \ +void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, \ + uint32_t delay, float *dst, const float *src, uint32_t n_samples) + +#define MAKE_FFT_NEW_FUNC(arch) \ +void *dsp_fft_new_##arch(struct dsp_ops *ops, uint32_t size, bool real) +#define MAKE_FFT_FREE_FUNC(arch) \ +void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft) +#define MAKE_FFT_MEMALLOC_FUNC(arch) \ +void *dsp_fft_memalloc_##arch(struct dsp_ops *ops, uint32_t size, bool real) +#define MAKE_FFT_MEMFREE_FUNC(arch) \ +void dsp_fft_memfree_##arch(struct dsp_ops *ops, void *mem) +#define MAKE_FFT_MEMCLEAR_FUNC(arch) \ +void dsp_fft_memclear_##arch(struct dsp_ops *ops, void *mem, uint32_t size, bool real) +#define MAKE_FFT_RUN_FUNC(arch) \ +void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \ + const float * SPA_RESTRICT src, float * SPA_RESTRICT dst) +#define MAKE_FFT_CMUL_FUNC(arch) \ +void dsp_fft_cmul_##arch(struct dsp_ops *ops, void *fft, \ + float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \ + const float * SPA_RESTRICT b, uint32_t len, const float scale) +#define MAKE_FFT_CMULADD_FUNC(arch) \ +void dsp_fft_cmuladd_##arch(struct dsp_ops *ops, void *fft, \ + float * dst, const float * src, \ + const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, \ + uint32_t len, const float scale) + + +MAKE_CLEAR_FUNC(c); +MAKE_COPY_FUNC(c); +MAKE_MIX_GAIN_FUNC(c); +MAKE_SUM_FUNC(c); +MAKE_LINEAR_FUNC(c); +MAKE_MULT_FUNC(c); +MAKE_BIQUAD_RUN_FUNC(c); +MAKE_DELAY_FUNC(c); + +MAKE_FFT_NEW_FUNC(c); +MAKE_FFT_FREE_FUNC(c); +MAKE_FFT_MEMALLOC_FUNC(c); +MAKE_FFT_MEMFREE_FUNC(c); +MAKE_FFT_MEMCLEAR_FUNC(c); +MAKE_FFT_RUN_FUNC(c); +MAKE_FFT_CMUL_FUNC(c); +MAKE_FFT_CMULADD_FUNC(c); + +#if defined (HAVE_SSE) +MAKE_MIX_GAIN_FUNC(sse); +MAKE_SUM_FUNC(sse); +MAKE_BIQUAD_RUN_FUNC(sse); +MAKE_DELAY_FUNC(sse); +MAKE_FFT_CMUL_FUNC(sse); +MAKE_FFT_CMULADD_FUNC(sse); +#endif +#if defined (HAVE_AVX) +MAKE_MIX_GAIN_FUNC(avx); +MAKE_SUM_FUNC(avx); +MAKE_FFT_CMUL_FUNC(avx); +MAKE_FFT_CMULADD_FUNC(avx); +#endif + +#endif /* DSP_OPS_IMPL_H */ diff --git a/src/modules/module-filter-chain/dsp-ops-sse.c b/src/modules/module-filter-chain/dsp-ops-sse.c index 9e7d7b753..c218f1bc6 100644 --- a/src/modules/module-filter-chain/dsp-ops-sse.c +++ b/src/modules/module-filter-chain/dsp-ops-sse.c @@ -15,7 +15,7 @@ #include "pffft.h" #endif -#include "dsp-ops.h" +#include "dsp-ops-impl.h" #include @@ -128,7 +128,7 @@ void dsp_sum_sse(struct dsp_ops *ops, float *r, const float *a, const float *b, } } -void dsp_biquad_run_sse(struct dsp_ops *ops, struct biquad *bq, +static void dsp_biquad_run1_sse(struct dsp_ops *ops, struct biquad *bq, float *out, const float *in, uint32_t n_samples) { __m128 x, y, z; @@ -432,7 +432,7 @@ static void dsp_biquad2_run4_sse(struct dsp_ops *ops, struct biquad *bq, uint32_ #undef F } -void dsp_biquadn_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, +void dsp_biquad_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples) { @@ -502,7 +502,7 @@ void dsp_biquadn_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, dsp_biquad2_run_sse(ops, &bq[j], d, s, n_samples); } if (j < n_bq) { - dsp_biquad_run_sse(ops, &bq[j], d, s, n_samples); + dsp_biquad_run1_sse(ops, &bq[j], d, s, n_samples); } } } diff --git a/src/modules/module-filter-chain/dsp-ops.c b/src/modules/module-filter-chain/dsp-ops.c index f5dcb28f8..ffd2983bf 100644 --- a/src/modules/module-filter-chain/dsp-ops.c +++ b/src/modules/module-filter-chain/dsp-ops.c @@ -11,7 +11,7 @@ #include #include -#include "dsp-ops.h" +#include "dsp-ops-impl.h" struct dsp_info { uint32_t cpu_flags; @@ -38,7 +38,6 @@ static struct dsp_info dsp_table[] = .funcs.fft_run = dsp_fft_run_c, .funcs.fft_cmul = dsp_fft_cmul_avx, .funcs.fft_cmuladd = dsp_fft_cmuladd_avx, - .funcs.biquadn_run = dsp_biquadn_run_sse, .funcs.delay = dsp_delay_sse, }, #endif @@ -59,7 +58,6 @@ static struct dsp_info dsp_table[] = .funcs.fft_run = dsp_fft_run_c, .funcs.fft_cmul = dsp_fft_cmul_sse, .funcs.fft_cmuladd = dsp_fft_cmuladd_sse, - .funcs.biquadn_run = dsp_biquadn_run_sse, .funcs.delay = dsp_delay_sse, }, #endif @@ -79,7 +77,6 @@ static struct dsp_info dsp_table[] = .funcs.fft_run = dsp_fft_run_c, .funcs.fft_cmul = dsp_fft_cmul_c, .funcs.fft_cmuladd = dsp_fft_cmuladd_c, - .funcs.biquadn_run = dsp_biquadn_run_c, .funcs.delay = dsp_delay_c, }, }; @@ -115,43 +112,3 @@ int dsp_ops_init(struct dsp_ops *ops, uint32_t cpu_flags) return 0; } - -int dsp_ops_benchmark(void) -{ - struct dsp_ops ops[3]; - uint32_t i; - struct biquad bq; - float in[2048], out[2048]; - struct timespec ts; - uint64_t t1, t2, t3, t4; - - dsp_ops_init(&ops[0], 0); - dsp_ops_init(&ops[1], SPA_CPU_FLAG_SSE); - dsp_ops_init(&ops[2], SPA_CPU_FLAG_AVX); - - clock_gettime(CLOCK_MONOTONIC, &ts); - t1 = SPA_TIMESPEC_TO_NSEC(&ts); - - for (i = 0; i < 8192; i++) - dsp_ops_biquad_run(&ops[0], &bq, out, in, 2048); - - clock_gettime(CLOCK_MONOTONIC, &ts); - t2 = SPA_TIMESPEC_TO_NSEC(&ts); - - for (i = 0; i < 8192; i++) - dsp_ops_biquad_run(&ops[1], &bq, out, in, 2048); - - clock_gettime(CLOCK_MONOTONIC, &ts); - t3 = SPA_TIMESPEC_TO_NSEC(&ts); - - for (i = 0; i < 8192; i++) - dsp_ops_biquad_run(&ops[2], &bq, out, in, 2048); - - clock_gettime(CLOCK_MONOTONIC, &ts); - t4 = SPA_TIMESPEC_TO_NSEC(&ts); - - fprintf(stderr, "%"PRIu64" %"PRIu64" %"PRIu64" speedup:%f\n", - t2-t1, t3-t2, t4-t3, - ((double)(t2-t1))/(t3-t2)); - return 0; -} diff --git a/src/modules/module-filter-chain/dsp-ops.h b/src/modules/module-filter-chain/dsp-ops.h index 5621d5baa..1bf4eed96 100644 --- a/src/modules/module-filter-chain/dsp-ops.h +++ b/src/modules/module-filter-chain/dsp-ops.h @@ -20,8 +20,6 @@ struct dsp_ops_funcs { void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], float gain[], uint32_t n_src, uint32_t n_samples); - void (*biquad_run) (struct dsp_ops *ops, struct biquad *bq, - float *out, const float *in, uint32_t n_samples); void (*sum) (struct dsp_ops *ops, float * dst, const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples); @@ -46,7 +44,7 @@ struct dsp_ops_funcs { void (*mult) (struct dsp_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples); - void (*biquadn_run) (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, + void (*biquad_run) (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples); void (*delay) (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, uint32_t delay, @@ -63,9 +61,6 @@ struct dsp_ops { const void *priv; }; -int dsp_ops_init(struct dsp_ops *ops, uint32_t cpu_flags); -int dsp_ops_benchmark(void); - #define dsp_ops_free(ops) (ops)->free(ops) #define dsp_ops_clear(ops,...) (ops)->funcs.clear(ops, __VA_ARGS__) @@ -75,7 +70,6 @@ int dsp_ops_benchmark(void); #define dsp_ops_sum(ops,...) (ops)->funcs.sum(ops, __VA_ARGS__) #define dsp_ops_linear(ops,...) (ops)->funcs.linear(ops, __VA_ARGS__) #define dsp_ops_mult(ops,...) (ops)->funcs.mult(ops, __VA_ARGS__) -#define dsp_ops_biquadn_run(ops,...) (ops)->funcs.biquadn_run(ops, __VA_ARGS__) #define dsp_ops_delay(ops,...) (ops)->funcs.delay(ops, __VA_ARGS__) #define dsp_ops_fft_new(ops,...) (ops)->funcs.fft_new(ops, __VA_ARGS__) @@ -87,90 +81,4 @@ int dsp_ops_benchmark(void); #define dsp_ops_fft_cmul(ops,...) (ops)->funcs.fft_cmul(ops, __VA_ARGS__) #define dsp_ops_fft_cmuladd(ops,...) (ops)->funcs.fft_cmuladd(ops, __VA_ARGS__) -#define MAKE_CLEAR_FUNC(arch) \ -void dsp_clear_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples) -#define MAKE_COPY_FUNC(arch) \ -void dsp_copy_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ - const void * SPA_RESTRICT src, uint32_t n_samples) -#define MAKE_MIX_GAIN_FUNC(arch) \ -void dsp_mix_gain_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ - const void * SPA_RESTRICT src[], float gain[], uint32_t n_src, uint32_t n_samples) -#define MAKE_BIQUAD_RUN_FUNC(arch) \ -void dsp_biquad_run_##arch (struct dsp_ops *ops, struct biquad *bq, \ - float *out, const float *in, uint32_t n_samples) -#define MAKE_SUM_FUNC(arch) \ -void dsp_sum_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \ - const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples) -#define MAKE_LINEAR_FUNC(arch) \ -void dsp_linear_##arch (struct dsp_ops *ops, float * SPA_RESTRICT dst, \ - const float * SPA_RESTRICT src, const float mult, const float add, uint32_t n_samples) -#define MAKE_MULT_FUNC(arch) \ -void dsp_mult_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \ - const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples) -#define MAKE_BIQUADN_RUN_FUNC(arch) \ -void dsp_biquadn_run_##arch (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, \ - float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples) -#define MAKE_DELAY_FUNC(arch) \ -void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, \ - uint32_t delay, float *dst, const float *src, uint32_t n_samples) - -#define MAKE_FFT_NEW_FUNC(arch) \ -void *dsp_fft_new_##arch(struct dsp_ops *ops, uint32_t size, bool real) -#define MAKE_FFT_FREE_FUNC(arch) \ -void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft) -#define MAKE_FFT_MEMALLOC_FUNC(arch) \ -void *dsp_fft_memalloc_##arch(struct dsp_ops *ops, uint32_t size, bool real) -#define MAKE_FFT_MEMFREE_FUNC(arch) \ -void dsp_fft_memfree_##arch(struct dsp_ops *ops, void *mem) -#define MAKE_FFT_MEMCLEAR_FUNC(arch) \ -void dsp_fft_memclear_##arch(struct dsp_ops *ops, void *mem, uint32_t size, bool real) -#define MAKE_FFT_RUN_FUNC(arch) \ -void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \ - const float * SPA_RESTRICT src, float * SPA_RESTRICT dst) -#define MAKE_FFT_CMUL_FUNC(arch) \ -void dsp_fft_cmul_##arch(struct dsp_ops *ops, void *fft, \ - float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \ - const float * SPA_RESTRICT b, uint32_t len, const float scale) -#define MAKE_FFT_CMULADD_FUNC(arch) \ -void dsp_fft_cmuladd_##arch(struct dsp_ops *ops, void *fft, \ - float * dst, const float * src, \ - const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, \ - uint32_t len, const float scale) - - -MAKE_CLEAR_FUNC(c); -MAKE_COPY_FUNC(c); -MAKE_MIX_GAIN_FUNC(c); -MAKE_BIQUAD_RUN_FUNC(c); -MAKE_SUM_FUNC(c); -MAKE_LINEAR_FUNC(c); -MAKE_MULT_FUNC(c); -MAKE_BIQUADN_RUN_FUNC(c); -MAKE_DELAY_FUNC(c); - -MAKE_FFT_NEW_FUNC(c); -MAKE_FFT_FREE_FUNC(c); -MAKE_FFT_MEMALLOC_FUNC(c); -MAKE_FFT_MEMFREE_FUNC(c); -MAKE_FFT_MEMCLEAR_FUNC(c); -MAKE_FFT_RUN_FUNC(c); -MAKE_FFT_CMUL_FUNC(c); -MAKE_FFT_CMULADD_FUNC(c); - -#if defined (HAVE_SSE) -MAKE_MIX_GAIN_FUNC(sse); -MAKE_SUM_FUNC(sse); -MAKE_BIQUAD_RUN_FUNC(sse); -MAKE_BIQUADN_RUN_FUNC(sse); -MAKE_DELAY_FUNC(sse); -MAKE_FFT_CMUL_FUNC(sse); -MAKE_FFT_CMULADD_FUNC(sse); -#endif -#if defined (HAVE_AVX) -MAKE_MIX_GAIN_FUNC(avx); -MAKE_SUM_FUNC(avx); -MAKE_FFT_CMUL_FUNC(avx); -MAKE_FFT_CMULADD_FUNC(avx); -#endif - #endif /* DSP_OPS_H */