diff --git a/src/modules/module-filter-chain/convolver.c b/src/modules/module-filter-chain/convolver.c index 235b7a8aa..da422e063 100644 --- a/src/modules/module-filter-chain/convolver.c +++ b/src/modules/module-filter-chain/convolver.c @@ -37,37 +37,6 @@ struct convolver1 { float scale; }; -static void *fft_alloc(int size) -{ - size_t nb_bytes = size * sizeof(float); -#define ALIGNMENT 64 - void *p, *p0 = malloc(nb_bytes + ALIGNMENT); - if (!p0) - return (void *)0; - p = (void *)(((size_t)p0 + ALIGNMENT) & (~((size_t)(ALIGNMENT - 1)))); - *((void **)p - 1) = p0; - return p; -} -static void fft_free(void *p) -{ - if (p) - free(*((void **)p - 1)); -} - -static inline void fft_cpx_clear(float *v, int size) -{ - dsp_ops_clear(dsp, v, size * 2); -} -static float *fft_cpx_alloc(int size) -{ - return fft_alloc(size * 2); -} - -static void fft_cpx_free(float *cpx) -{ - fft_free(cpx); -} - static int next_power_of_two(int val) { int r = 1; @@ -80,11 +49,11 @@ static void convolver1_reset(struct convolver1 *conv) { int i; for (i = 0; i < conv->segCount; i++) - fft_cpx_clear(conv->segments[i], conv->fftComplexSize); - dsp_ops_clear(dsp, conv->overlap, conv->blockSize); - dsp_ops_clear(dsp, conv->inputBuffer, conv->segSize); - fft_cpx_clear(conv->pre_mult, conv->fftComplexSize); - fft_cpx_clear(conv->conv, conv->fftComplexSize); + dsp_ops_fft_memclear(dsp, conv->segments[i], conv->fftComplexSize, false); + dsp_ops_fft_memclear(dsp, conv->overlap, conv->blockSize, true); + dsp_ops_fft_memclear(dsp, conv->inputBuffer, conv->segSize, true); + dsp_ops_fft_memclear(dsp, conv->pre_mult, conv->fftComplexSize, false); + dsp_ops_fft_memclear(dsp, conv->conv, conv->fftComplexSize, false); conv->inputBufferFill = 0; conv->current = 0; } @@ -94,22 +63,22 @@ static void convolver1_free(struct convolver1 *conv) int i; for (i = 0; i < conv->segCount; i++) { if (conv->segments) - fft_cpx_free(conv->segments[i]); + dsp_ops_fft_memfree(dsp, conv->segments[i]); if (conv->segmentsIr) - fft_cpx_free(conv->segmentsIr[i]); + dsp_ops_fft_memfree(dsp, conv->segmentsIr[i]); } if (conv->fft) dsp_ops_fft_free(dsp, conv->fft); if (conv->ifft) dsp_ops_fft_free(dsp, conv->ifft); if (conv->fft_buffer) - fft_free(conv->fft_buffer); + dsp_ops_fft_memfree(dsp, conv->fft_buffer); free(conv->segments); free(conv->segmentsIr); - fft_cpx_free(conv->pre_mult); - fft_cpx_free(conv->conv); - fft_free(conv->overlap); - fft_free(conv->inputBuffer); + dsp_ops_fft_memfree(dsp, conv->pre_mult); + dsp_ops_fft_memfree(dsp, conv->conv); + dsp_ops_fft_memfree(dsp, conv->overlap); + dsp_ops_fft_memfree(dsp, conv->inputBuffer); free(conv); } @@ -143,7 +112,7 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen) if (conv->ifft == NULL) goto error; - conv->fft_buffer = fft_alloc(conv->segSize); + conv->fft_buffer = dsp_ops_fft_memalloc(dsp, conv->segSize, true); if (conv->fft_buffer == NULL) goto error; @@ -156,21 +125,21 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen) int left = irlen - (i * conv->blockSize); int copy = SPA_MIN(conv->blockSize, left); - conv->segments[i] = fft_cpx_alloc(conv->fftComplexSize); - conv->segmentsIr[i] = fft_cpx_alloc(conv->fftComplexSize); + conv->segments[i] = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false); + conv->segmentsIr[i] = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false); if (conv->segments[i] == NULL || conv->segmentsIr[i] == NULL) goto error; dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy); if (copy < conv->segSize) - dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy); + dsp_ops_fft_memclear(dsp, conv->fft_buffer + copy, conv->segSize - copy, true); dsp_ops_fft_run(dsp, conv->fft, 1, conv->fft_buffer, conv->segmentsIr[i]); } - conv->pre_mult = fft_cpx_alloc(conv->fftComplexSize); - conv->conv = fft_cpx_alloc(conv->fftComplexSize); - conv->overlap = fft_alloc(conv->blockSize); - conv->inputBuffer = fft_alloc(conv->segSize); + conv->pre_mult = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false); + conv->conv = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false); + conv->overlap = dsp_ops_fft_memalloc(dsp, conv->blockSize, true); + conv->inputBuffer = dsp_ops_fft_memalloc(dsp, conv->segSize, true); if (conv->pre_mult == NULL || conv->conv == NULL || conv->overlap == NULL || conv->inputBuffer == NULL) goto error; @@ -188,7 +157,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou int i, processed = 0; if (conv == NULL || conv->segCount == 0) { - dsp_ops_clear(dsp, output, len); + dsp_ops_fft_memclear(dsp, output, len, true); return len; } @@ -198,7 +167,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing); if (inputBufferPos == 0 && processing < conv->blockSize) - dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing); + dsp_ops_fft_memclear(dsp, conv->inputBuffer + processing, conv->blockSize - processing, true); dsp_ops_fft_run(dsp, conv->fft, 1, conv->inputBuffer, conv->segments[conv->current]); @@ -277,13 +246,13 @@ void convolver_reset(struct convolver *conv) convolver1_reset(conv->headConvolver); if (conv->tailConvolver0) { convolver1_reset(conv->tailConvolver0); - dsp_ops_clear(dsp, conv->tailOutput0, conv->tailBlockSize); - dsp_ops_clear(dsp, conv->tailPrecalculated0, conv->tailBlockSize); + dsp_ops_fft_memclear(dsp, conv->tailOutput0, conv->tailBlockSize, true); + dsp_ops_fft_memclear(dsp, conv->tailPrecalculated0, conv->tailBlockSize, true); } if (conv->tailConvolver) { convolver1_reset(conv->tailConvolver); - dsp_ops_clear(dsp, conv->tailOutput, conv->tailBlockSize); - dsp_ops_clear(dsp, conv->tailPrecalculated, conv->tailBlockSize); + dsp_ops_fft_memclear(dsp, conv->tailOutput, conv->tailBlockSize, true); + dsp_ops_fft_memclear(dsp, conv->tailPrecalculated, conv->tailBlockSize, true); } conv->tailInputFill = 0; conv->precalculatedPos = 0; @@ -324,8 +293,8 @@ struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tai if (irlen > conv->tailBlockSize) { int conv1IrLen = SPA_MIN(irlen - conv->tailBlockSize, conv->tailBlockSize); conv->tailConvolver0 = convolver1_new(conv->headBlockSize, ir + conv->tailBlockSize, conv1IrLen); - conv->tailOutput0 = fft_alloc(conv->tailBlockSize); - conv->tailPrecalculated0 = fft_alloc(conv->tailBlockSize); + conv->tailOutput0 = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true); + conv->tailPrecalculated0 = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true); if (conv->tailConvolver0 == NULL || conv->tailOutput0 == NULL || conv->tailPrecalculated0 == NULL) goto error; @@ -334,15 +303,15 @@ struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tai if (irlen > 2 * conv->tailBlockSize) { int tailIrLen = irlen - (2 * conv->tailBlockSize); conv->tailConvolver = convolver1_new(conv->tailBlockSize, ir + (2 * conv->tailBlockSize), tailIrLen); - conv->tailOutput = fft_alloc(conv->tailBlockSize); - conv->tailPrecalculated = fft_alloc(conv->tailBlockSize); + conv->tailOutput = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true); + conv->tailPrecalculated = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true); if (conv->tailConvolver == NULL || conv->tailOutput == NULL || conv->tailPrecalculated == NULL) goto error; } if (conv->tailConvolver0 || conv->tailConvolver) { - conv->tailInput = fft_alloc(conv->tailBlockSize); + conv->tailInput = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true); if (conv->tailInput == NULL) goto error; } @@ -363,11 +332,11 @@ void convolver_free(struct convolver *conv) convolver1_free(conv->tailConvolver0); if (conv->tailConvolver) convolver1_free(conv->tailConvolver); - fft_free(conv->tailOutput0); - fft_free(conv->tailPrecalculated0); - fft_free(conv->tailOutput); - fft_free(conv->tailPrecalculated); - fft_free(conv->tailInput); + dsp_ops_fft_memfree(dsp, conv->tailOutput0); + dsp_ops_fft_memfree(dsp, conv->tailPrecalculated0); + dsp_ops_fft_memfree(dsp, conv->tailOutput); + dsp_ops_fft_memfree(dsp, conv->tailPrecalculated); + dsp_ops_fft_memfree(dsp, conv->tailInput); free(conv); } diff --git a/src/modules/module-filter-chain/dsp-ops-c.c b/src/modules/module-filter-chain/dsp-ops-c.c index 16170cb30..892448ace 100644 --- a/src/modules/module-filter-chain/dsp-ops-c.c +++ b/src/modules/module-filter-chain/dsp-ops-c.c @@ -222,7 +222,7 @@ struct fft_info { }; #endif -void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real) +void *dsp_fft_new_c(struct dsp_ops *ops, uint32_t size, bool real) { #ifdef HAVE_FFTW struct fft_info *info = calloc(1, sizeof(struct fft_info)); @@ -232,8 +232,8 @@ void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real) if (info == NULL) return NULL; - rdata = fftwf_alloc_real (size * 2); - cdata = fftwf_alloc_complex (size + 1); + rdata = fftwf_alloc_real(size * 2); + cdata = fftwf_alloc_complex(size + 1); info->plan_r2c = fftwf_plan_dft_r2c_1d(size, rdata, cdata, FFTW_ESTIMATE); info->plan_c2r = fftwf_plan_dft_c2r_1d(size, cdata, rdata, FFTW_ESTIMATE); @@ -258,6 +258,39 @@ void dsp_fft_free_c(struct dsp_ops *ops, void *fft) pffft_destroy_setup(fft); #endif } +void *dsp_fft_memalloc_c(struct dsp_ops *ops, uint32_t size, bool real) +{ +#ifdef HAVE_FFTW + if (real) + return fftwf_alloc_real(size); + else { + return fftwf_alloc_complex(size); + } +#else + if (real) + pffft_aligned_malloc(size * sizeof(float)); + else + pffft_aligned_malloc(size * 2 * sizeof(float)); +#endif +} +void dsp_fft_memfree_c(struct dsp_ops *ops, void *data) +{ +#ifdef HAVE_FFTW + fftwf_free(data); +#else + pffft_aligned_free(data); +#endif +} + +void dsp_fft_memclear_c(struct dsp_ops *ops, void *data, uint32_t size, bool real) +{ +#ifdef HAVE_FFTW + dsp_ops_clear(ops, data, real ? size : size * 2); +#else + dsp_ops_clear(ops, data, real ? size : size * 2); +#endif +} + void dsp_fft_run_c(struct dsp_ops *ops, void *fft, int direction, const float * SPA_RESTRICT src, float * SPA_RESTRICT dst) { diff --git a/src/modules/module-filter-chain/dsp-ops.c b/src/modules/module-filter-chain/dsp-ops.c index 599642263..f5dcb28f8 100644 --- a/src/modules/module-filter-chain/dsp-ops.c +++ b/src/modules/module-filter-chain/dsp-ops.c @@ -32,6 +32,9 @@ static struct dsp_info dsp_table[] = .funcs.mult = dsp_mult_c, .funcs.fft_new = dsp_fft_new_c, .funcs.fft_free = dsp_fft_free_c, + .funcs.fft_memalloc = dsp_fft_memalloc_c, + .funcs.fft_memfree = dsp_fft_memfree_c, + .funcs.fft_memclear = dsp_fft_memclear_c, .funcs.fft_run = dsp_fft_run_c, .funcs.fft_cmul = dsp_fft_cmul_avx, .funcs.fft_cmuladd = dsp_fft_cmuladd_avx, @@ -50,6 +53,9 @@ static struct dsp_info dsp_table[] = .funcs.mult = dsp_mult_c, .funcs.fft_new = dsp_fft_new_c, .funcs.fft_free = dsp_fft_free_c, + .funcs.fft_memalloc = dsp_fft_memalloc_c, + .funcs.fft_memfree = dsp_fft_memfree_c, + .funcs.fft_memclear = dsp_fft_memclear_c, .funcs.fft_run = dsp_fft_run_c, .funcs.fft_cmul = dsp_fft_cmul_sse, .funcs.fft_cmuladd = dsp_fft_cmuladd_sse, @@ -67,6 +73,9 @@ static struct dsp_info dsp_table[] = .funcs.mult = dsp_mult_c, .funcs.fft_new = dsp_fft_new_c, .funcs.fft_free = dsp_fft_free_c, + .funcs.fft_memalloc = dsp_fft_memalloc_c, + .funcs.fft_memfree = dsp_fft_memfree_c, + .funcs.fft_memclear = dsp_fft_memclear_c, .funcs.fft_run = dsp_fft_run_c, .funcs.fft_cmul = dsp_fft_cmul_c, .funcs.fft_cmuladd = dsp_fft_cmuladd_c, diff --git a/src/modules/module-filter-chain/dsp-ops.h b/src/modules/module-filter-chain/dsp-ops.h index 433050333..5621d5baa 100644 --- a/src/modules/module-filter-chain/dsp-ops.h +++ b/src/modules/module-filter-chain/dsp-ops.h @@ -26,8 +26,11 @@ struct dsp_ops_funcs { float * dst, const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t n_samples); - void *(*fft_new) (struct dsp_ops *ops, int32_t size, bool real); + void *(*fft_new) (struct dsp_ops *ops, uint32_t size, bool real); void (*fft_free) (struct dsp_ops *ops, void *fft); + void *(*fft_memalloc) (struct dsp_ops *ops, uint32_t size, bool real); + void (*fft_memfree) (struct dsp_ops *ops, void *mem); + void (*fft_memclear) (struct dsp_ops *ops, void *mem, uint32_t size, bool real); void (*fft_run) (struct dsp_ops *ops, void *fft, int direction, const float * SPA_RESTRICT src, float * SPA_RESTRICT dst); void (*fft_cmul) (struct dsp_ops *ops, void *fft, @@ -77,6 +80,9 @@ int dsp_ops_benchmark(void); #define dsp_ops_fft_new(ops,...) (ops)->funcs.fft_new(ops, __VA_ARGS__) #define dsp_ops_fft_free(ops,...) (ops)->funcs.fft_free(ops, __VA_ARGS__) +#define dsp_ops_fft_memalloc(ops,...) (ops)->funcs.fft_memalloc(ops, __VA_ARGS__) +#define dsp_ops_fft_memfree(ops,...) (ops)->funcs.fft_memfree(ops, __VA_ARGS__) +#define dsp_ops_fft_memclear(ops,...) (ops)->funcs.fft_memclear(ops, __VA_ARGS__) #define dsp_ops_fft_run(ops,...) (ops)->funcs.fft_run(ops, __VA_ARGS__) #define dsp_ops_fft_cmul(ops,...) (ops)->funcs.fft_cmul(ops, __VA_ARGS__) #define dsp_ops_fft_cmuladd(ops,...) (ops)->funcs.fft_cmuladd(ops, __VA_ARGS__) @@ -109,9 +115,15 @@ void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32 uint32_t delay, float *dst, const float *src, uint32_t n_samples) #define MAKE_FFT_NEW_FUNC(arch) \ -void *dsp_fft_new_##arch(struct dsp_ops *ops, int32_t size, bool real) +void *dsp_fft_new_##arch(struct dsp_ops *ops, uint32_t size, bool real) #define MAKE_FFT_FREE_FUNC(arch) \ void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft) +#define MAKE_FFT_MEMALLOC_FUNC(arch) \ +void *dsp_fft_memalloc_##arch(struct dsp_ops *ops, uint32_t size, bool real) +#define MAKE_FFT_MEMFREE_FUNC(arch) \ +void dsp_fft_memfree_##arch(struct dsp_ops *ops, void *mem) +#define MAKE_FFT_MEMCLEAR_FUNC(arch) \ +void dsp_fft_memclear_##arch(struct dsp_ops *ops, void *mem, uint32_t size, bool real) #define MAKE_FFT_RUN_FUNC(arch) \ void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \ const float * SPA_RESTRICT src, float * SPA_RESTRICT dst) @@ -138,6 +150,9 @@ MAKE_DELAY_FUNC(c); MAKE_FFT_NEW_FUNC(c); MAKE_FFT_FREE_FUNC(c); +MAKE_FFT_MEMALLOC_FUNC(c); +MAKE_FFT_MEMFREE_FUNC(c); +MAKE_FFT_MEMCLEAR_FUNC(c); MAKE_FFT_RUN_FUNC(c); MAKE_FFT_CMUL_FUNC(c); MAKE_FFT_CMULADD_FUNC(c);