filter-chain: move fft mem function to dsp_ops

So that we can have different versions per FFT.

One possible optimization would be to split the complex number in
separate real and imag arrays. This would speed up the multiply
operations.
This commit is contained in:
Wim Taymans 2024-11-05 16:12:28 +01:00
parent 79baeb8d18
commit c672327181
4 changed files with 98 additions and 72 deletions

View file

@ -37,37 +37,6 @@ struct convolver1 {
float scale; float scale;
}; };
static void *fft_alloc(int size)
{
size_t nb_bytes = size * sizeof(float);
#define ALIGNMENT 64
void *p, *p0 = malloc(nb_bytes + ALIGNMENT);
if (!p0)
return (void *)0;
p = (void *)(((size_t)p0 + ALIGNMENT) & (~((size_t)(ALIGNMENT - 1))));
*((void **)p - 1) = p0;
return p;
}
static void fft_free(void *p)
{
if (p)
free(*((void **)p - 1));
}
static inline void fft_cpx_clear(float *v, int size)
{
dsp_ops_clear(dsp, v, size * 2);
}
static float *fft_cpx_alloc(int size)
{
return fft_alloc(size * 2);
}
static void fft_cpx_free(float *cpx)
{
fft_free(cpx);
}
static int next_power_of_two(int val) static int next_power_of_two(int val)
{ {
int r = 1; int r = 1;
@ -80,11 +49,11 @@ static void convolver1_reset(struct convolver1 *conv)
{ {
int i; int i;
for (i = 0; i < conv->segCount; i++) for (i = 0; i < conv->segCount; i++)
fft_cpx_clear(conv->segments[i], conv->fftComplexSize); dsp_ops_fft_memclear(dsp, conv->segments[i], conv->fftComplexSize, false);
dsp_ops_clear(dsp, conv->overlap, conv->blockSize); dsp_ops_fft_memclear(dsp, conv->overlap, conv->blockSize, true);
dsp_ops_clear(dsp, conv->inputBuffer, conv->segSize); dsp_ops_fft_memclear(dsp, conv->inputBuffer, conv->segSize, true);
fft_cpx_clear(conv->pre_mult, conv->fftComplexSize); dsp_ops_fft_memclear(dsp, conv->pre_mult, conv->fftComplexSize, false);
fft_cpx_clear(conv->conv, conv->fftComplexSize); dsp_ops_fft_memclear(dsp, conv->conv, conv->fftComplexSize, false);
conv->inputBufferFill = 0; conv->inputBufferFill = 0;
conv->current = 0; conv->current = 0;
} }
@ -94,22 +63,22 @@ static void convolver1_free(struct convolver1 *conv)
int i; int i;
for (i = 0; i < conv->segCount; i++) { for (i = 0; i < conv->segCount; i++) {
if (conv->segments) if (conv->segments)
fft_cpx_free(conv->segments[i]); dsp_ops_fft_memfree(dsp, conv->segments[i]);
if (conv->segmentsIr) if (conv->segmentsIr)
fft_cpx_free(conv->segmentsIr[i]); dsp_ops_fft_memfree(dsp, conv->segmentsIr[i]);
} }
if (conv->fft) if (conv->fft)
dsp_ops_fft_free(dsp, conv->fft); dsp_ops_fft_free(dsp, conv->fft);
if (conv->ifft) if (conv->ifft)
dsp_ops_fft_free(dsp, conv->ifft); dsp_ops_fft_free(dsp, conv->ifft);
if (conv->fft_buffer) if (conv->fft_buffer)
fft_free(conv->fft_buffer); dsp_ops_fft_memfree(dsp, conv->fft_buffer);
free(conv->segments); free(conv->segments);
free(conv->segmentsIr); free(conv->segmentsIr);
fft_cpx_free(conv->pre_mult); dsp_ops_fft_memfree(dsp, conv->pre_mult);
fft_cpx_free(conv->conv); dsp_ops_fft_memfree(dsp, conv->conv);
fft_free(conv->overlap); dsp_ops_fft_memfree(dsp, conv->overlap);
fft_free(conv->inputBuffer); dsp_ops_fft_memfree(dsp, conv->inputBuffer);
free(conv); free(conv);
} }
@ -143,7 +112,7 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
if (conv->ifft == NULL) if (conv->ifft == NULL)
goto error; goto error;
conv->fft_buffer = fft_alloc(conv->segSize); conv->fft_buffer = dsp_ops_fft_memalloc(dsp, conv->segSize, true);
if (conv->fft_buffer == NULL) if (conv->fft_buffer == NULL)
goto error; goto error;
@ -156,21 +125,21 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
int left = irlen - (i * conv->blockSize); int left = irlen - (i * conv->blockSize);
int copy = SPA_MIN(conv->blockSize, left); int copy = SPA_MIN(conv->blockSize, left);
conv->segments[i] = fft_cpx_alloc(conv->fftComplexSize); conv->segments[i] = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false);
conv->segmentsIr[i] = fft_cpx_alloc(conv->fftComplexSize); conv->segmentsIr[i] = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false);
if (conv->segments[i] == NULL || conv->segmentsIr[i] == NULL) if (conv->segments[i] == NULL || conv->segmentsIr[i] == NULL)
goto error; goto error;
dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy); dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy);
if (copy < conv->segSize) if (copy < conv->segSize)
dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy); dsp_ops_fft_memclear(dsp, conv->fft_buffer + copy, conv->segSize - copy, true);
dsp_ops_fft_run(dsp, conv->fft, 1, conv->fft_buffer, conv->segmentsIr[i]); dsp_ops_fft_run(dsp, conv->fft, 1, conv->fft_buffer, conv->segmentsIr[i]);
} }
conv->pre_mult = fft_cpx_alloc(conv->fftComplexSize); conv->pre_mult = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false);
conv->conv = fft_cpx_alloc(conv->fftComplexSize); conv->conv = dsp_ops_fft_memalloc(dsp, conv->fftComplexSize, false);
conv->overlap = fft_alloc(conv->blockSize); conv->overlap = dsp_ops_fft_memalloc(dsp, conv->blockSize, true);
conv->inputBuffer = fft_alloc(conv->segSize); conv->inputBuffer = dsp_ops_fft_memalloc(dsp, conv->segSize, true);
if (conv->pre_mult == NULL || conv->conv == NULL || conv->overlap == NULL || if (conv->pre_mult == NULL || conv->conv == NULL || conv->overlap == NULL ||
conv->inputBuffer == NULL) conv->inputBuffer == NULL)
goto error; goto error;
@ -188,7 +157,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
int i, processed = 0; int i, processed = 0;
if (conv == NULL || conv->segCount == 0) { if (conv == NULL || conv->segCount == 0) {
dsp_ops_clear(dsp, output, len); dsp_ops_fft_memclear(dsp, output, len, true);
return len; return len;
} }
@ -198,7 +167,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing); dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing);
if (inputBufferPos == 0 && processing < conv->blockSize) if (inputBufferPos == 0 && processing < conv->blockSize)
dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing); dsp_ops_fft_memclear(dsp, conv->inputBuffer + processing, conv->blockSize - processing, true);
dsp_ops_fft_run(dsp, conv->fft, 1, conv->inputBuffer, conv->segments[conv->current]); dsp_ops_fft_run(dsp, conv->fft, 1, conv->inputBuffer, conv->segments[conv->current]);
@ -277,13 +246,13 @@ void convolver_reset(struct convolver *conv)
convolver1_reset(conv->headConvolver); convolver1_reset(conv->headConvolver);
if (conv->tailConvolver0) { if (conv->tailConvolver0) {
convolver1_reset(conv->tailConvolver0); convolver1_reset(conv->tailConvolver0);
dsp_ops_clear(dsp, conv->tailOutput0, conv->tailBlockSize); dsp_ops_fft_memclear(dsp, conv->tailOutput0, conv->tailBlockSize, true);
dsp_ops_clear(dsp, conv->tailPrecalculated0, conv->tailBlockSize); dsp_ops_fft_memclear(dsp, conv->tailPrecalculated0, conv->tailBlockSize, true);
} }
if (conv->tailConvolver) { if (conv->tailConvolver) {
convolver1_reset(conv->tailConvolver); convolver1_reset(conv->tailConvolver);
dsp_ops_clear(dsp, conv->tailOutput, conv->tailBlockSize); dsp_ops_fft_memclear(dsp, conv->tailOutput, conv->tailBlockSize, true);
dsp_ops_clear(dsp, conv->tailPrecalculated, conv->tailBlockSize); dsp_ops_fft_memclear(dsp, conv->tailPrecalculated, conv->tailBlockSize, true);
} }
conv->tailInputFill = 0; conv->tailInputFill = 0;
conv->precalculatedPos = 0; conv->precalculatedPos = 0;
@ -324,8 +293,8 @@ struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tai
if (irlen > conv->tailBlockSize) { if (irlen > conv->tailBlockSize) {
int conv1IrLen = SPA_MIN(irlen - conv->tailBlockSize, conv->tailBlockSize); int conv1IrLen = SPA_MIN(irlen - conv->tailBlockSize, conv->tailBlockSize);
conv->tailConvolver0 = convolver1_new(conv->headBlockSize, ir + conv->tailBlockSize, conv1IrLen); conv->tailConvolver0 = convolver1_new(conv->headBlockSize, ir + conv->tailBlockSize, conv1IrLen);
conv->tailOutput0 = fft_alloc(conv->tailBlockSize); conv->tailOutput0 = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true);
conv->tailPrecalculated0 = fft_alloc(conv->tailBlockSize); conv->tailPrecalculated0 = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true);
if (conv->tailConvolver0 == NULL || conv->tailOutput0 == NULL || if (conv->tailConvolver0 == NULL || conv->tailOutput0 == NULL ||
conv->tailPrecalculated0 == NULL) conv->tailPrecalculated0 == NULL)
goto error; goto error;
@ -334,15 +303,15 @@ struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tai
if (irlen > 2 * conv->tailBlockSize) { if (irlen > 2 * conv->tailBlockSize) {
int tailIrLen = irlen - (2 * conv->tailBlockSize); int tailIrLen = irlen - (2 * conv->tailBlockSize);
conv->tailConvolver = convolver1_new(conv->tailBlockSize, ir + (2 * conv->tailBlockSize), tailIrLen); conv->tailConvolver = convolver1_new(conv->tailBlockSize, ir + (2 * conv->tailBlockSize), tailIrLen);
conv->tailOutput = fft_alloc(conv->tailBlockSize); conv->tailOutput = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true);
conv->tailPrecalculated = fft_alloc(conv->tailBlockSize); conv->tailPrecalculated = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true);
if (conv->tailConvolver == NULL || conv->tailOutput == NULL || if (conv->tailConvolver == NULL || conv->tailOutput == NULL ||
conv->tailPrecalculated == NULL) conv->tailPrecalculated == NULL)
goto error; goto error;
} }
if (conv->tailConvolver0 || conv->tailConvolver) { if (conv->tailConvolver0 || conv->tailConvolver) {
conv->tailInput = fft_alloc(conv->tailBlockSize); conv->tailInput = dsp_ops_fft_memalloc(dsp, conv->tailBlockSize, true);
if (conv->tailInput == NULL) if (conv->tailInput == NULL)
goto error; goto error;
} }
@ -363,11 +332,11 @@ void convolver_free(struct convolver *conv)
convolver1_free(conv->tailConvolver0); convolver1_free(conv->tailConvolver0);
if (conv->tailConvolver) if (conv->tailConvolver)
convolver1_free(conv->tailConvolver); convolver1_free(conv->tailConvolver);
fft_free(conv->tailOutput0); dsp_ops_fft_memfree(dsp, conv->tailOutput0);
fft_free(conv->tailPrecalculated0); dsp_ops_fft_memfree(dsp, conv->tailPrecalculated0);
fft_free(conv->tailOutput); dsp_ops_fft_memfree(dsp, conv->tailOutput);
fft_free(conv->tailPrecalculated); dsp_ops_fft_memfree(dsp, conv->tailPrecalculated);
fft_free(conv->tailInput); dsp_ops_fft_memfree(dsp, conv->tailInput);
free(conv); free(conv);
} }

View file

@ -222,7 +222,7 @@ struct fft_info {
}; };
#endif #endif
void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real) void *dsp_fft_new_c(struct dsp_ops *ops, uint32_t size, bool real)
{ {
#ifdef HAVE_FFTW #ifdef HAVE_FFTW
struct fft_info *info = calloc(1, sizeof(struct fft_info)); struct fft_info *info = calloc(1, sizeof(struct fft_info));
@ -258,6 +258,39 @@ void dsp_fft_free_c(struct dsp_ops *ops, void *fft)
pffft_destroy_setup(fft); pffft_destroy_setup(fft);
#endif #endif
} }
void *dsp_fft_memalloc_c(struct dsp_ops *ops, uint32_t size, bool real)
{
#ifdef HAVE_FFTW
if (real)
return fftwf_alloc_real(size);
else {
return fftwf_alloc_complex(size);
}
#else
if (real)
pffft_aligned_malloc(size * sizeof(float));
else
pffft_aligned_malloc(size * 2 * sizeof(float));
#endif
}
void dsp_fft_memfree_c(struct dsp_ops *ops, void *data)
{
#ifdef HAVE_FFTW
fftwf_free(data);
#else
pffft_aligned_free(data);
#endif
}
void dsp_fft_memclear_c(struct dsp_ops *ops, void *data, uint32_t size, bool real)
{
#ifdef HAVE_FFTW
dsp_ops_clear(ops, data, real ? size : size * 2);
#else
dsp_ops_clear(ops, data, real ? size : size * 2);
#endif
}
void dsp_fft_run_c(struct dsp_ops *ops, void *fft, int direction, void dsp_fft_run_c(struct dsp_ops *ops, void *fft, int direction,
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst) const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
{ {

View file

@ -32,6 +32,9 @@ static struct dsp_info dsp_table[] =
.funcs.mult = dsp_mult_c, .funcs.mult = dsp_mult_c,
.funcs.fft_new = dsp_fft_new_c, .funcs.fft_new = dsp_fft_new_c,
.funcs.fft_free = dsp_fft_free_c, .funcs.fft_free = dsp_fft_free_c,
.funcs.fft_memalloc = dsp_fft_memalloc_c,
.funcs.fft_memfree = dsp_fft_memfree_c,
.funcs.fft_memclear = dsp_fft_memclear_c,
.funcs.fft_run = dsp_fft_run_c, .funcs.fft_run = dsp_fft_run_c,
.funcs.fft_cmul = dsp_fft_cmul_avx, .funcs.fft_cmul = dsp_fft_cmul_avx,
.funcs.fft_cmuladd = dsp_fft_cmuladd_avx, .funcs.fft_cmuladd = dsp_fft_cmuladd_avx,
@ -50,6 +53,9 @@ static struct dsp_info dsp_table[] =
.funcs.mult = dsp_mult_c, .funcs.mult = dsp_mult_c,
.funcs.fft_new = dsp_fft_new_c, .funcs.fft_new = dsp_fft_new_c,
.funcs.fft_free = dsp_fft_free_c, .funcs.fft_free = dsp_fft_free_c,
.funcs.fft_memalloc = dsp_fft_memalloc_c,
.funcs.fft_memfree = dsp_fft_memfree_c,
.funcs.fft_memclear = dsp_fft_memclear_c,
.funcs.fft_run = dsp_fft_run_c, .funcs.fft_run = dsp_fft_run_c,
.funcs.fft_cmul = dsp_fft_cmul_sse, .funcs.fft_cmul = dsp_fft_cmul_sse,
.funcs.fft_cmuladd = dsp_fft_cmuladd_sse, .funcs.fft_cmuladd = dsp_fft_cmuladd_sse,
@ -67,6 +73,9 @@ static struct dsp_info dsp_table[] =
.funcs.mult = dsp_mult_c, .funcs.mult = dsp_mult_c,
.funcs.fft_new = dsp_fft_new_c, .funcs.fft_new = dsp_fft_new_c,
.funcs.fft_free = dsp_fft_free_c, .funcs.fft_free = dsp_fft_free_c,
.funcs.fft_memalloc = dsp_fft_memalloc_c,
.funcs.fft_memfree = dsp_fft_memfree_c,
.funcs.fft_memclear = dsp_fft_memclear_c,
.funcs.fft_run = dsp_fft_run_c, .funcs.fft_run = dsp_fft_run_c,
.funcs.fft_cmul = dsp_fft_cmul_c, .funcs.fft_cmul = dsp_fft_cmul_c,
.funcs.fft_cmuladd = dsp_fft_cmuladd_c, .funcs.fft_cmuladd = dsp_fft_cmuladd_c,

View file

@ -26,8 +26,11 @@ struct dsp_ops_funcs {
float * dst, const float * SPA_RESTRICT a, float * dst, const float * SPA_RESTRICT a,
const float * SPA_RESTRICT b, uint32_t n_samples); const float * SPA_RESTRICT b, uint32_t n_samples);
void *(*fft_new) (struct dsp_ops *ops, int32_t size, bool real); void *(*fft_new) (struct dsp_ops *ops, uint32_t size, bool real);
void (*fft_free) (struct dsp_ops *ops, void *fft); void (*fft_free) (struct dsp_ops *ops, void *fft);
void *(*fft_memalloc) (struct dsp_ops *ops, uint32_t size, bool real);
void (*fft_memfree) (struct dsp_ops *ops, void *mem);
void (*fft_memclear) (struct dsp_ops *ops, void *mem, uint32_t size, bool real);
void (*fft_run) (struct dsp_ops *ops, void *fft, int direction, void (*fft_run) (struct dsp_ops *ops, void *fft, int direction,
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst); const float * SPA_RESTRICT src, float * SPA_RESTRICT dst);
void (*fft_cmul) (struct dsp_ops *ops, void *fft, void (*fft_cmul) (struct dsp_ops *ops, void *fft,
@ -77,6 +80,9 @@ int dsp_ops_benchmark(void);
#define dsp_ops_fft_new(ops,...) (ops)->funcs.fft_new(ops, __VA_ARGS__) #define dsp_ops_fft_new(ops,...) (ops)->funcs.fft_new(ops, __VA_ARGS__)
#define dsp_ops_fft_free(ops,...) (ops)->funcs.fft_free(ops, __VA_ARGS__) #define dsp_ops_fft_free(ops,...) (ops)->funcs.fft_free(ops, __VA_ARGS__)
#define dsp_ops_fft_memalloc(ops,...) (ops)->funcs.fft_memalloc(ops, __VA_ARGS__)
#define dsp_ops_fft_memfree(ops,...) (ops)->funcs.fft_memfree(ops, __VA_ARGS__)
#define dsp_ops_fft_memclear(ops,...) (ops)->funcs.fft_memclear(ops, __VA_ARGS__)
#define dsp_ops_fft_run(ops,...) (ops)->funcs.fft_run(ops, __VA_ARGS__) #define dsp_ops_fft_run(ops,...) (ops)->funcs.fft_run(ops, __VA_ARGS__)
#define dsp_ops_fft_cmul(ops,...) (ops)->funcs.fft_cmul(ops, __VA_ARGS__) #define dsp_ops_fft_cmul(ops,...) (ops)->funcs.fft_cmul(ops, __VA_ARGS__)
#define dsp_ops_fft_cmuladd(ops,...) (ops)->funcs.fft_cmuladd(ops, __VA_ARGS__) #define dsp_ops_fft_cmuladd(ops,...) (ops)->funcs.fft_cmuladd(ops, __VA_ARGS__)
@ -109,9 +115,15 @@ void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32
uint32_t delay, float *dst, const float *src, uint32_t n_samples) uint32_t delay, float *dst, const float *src, uint32_t n_samples)
#define MAKE_FFT_NEW_FUNC(arch) \ #define MAKE_FFT_NEW_FUNC(arch) \
void *dsp_fft_new_##arch(struct dsp_ops *ops, int32_t size, bool real) void *dsp_fft_new_##arch(struct dsp_ops *ops, uint32_t size, bool real)
#define MAKE_FFT_FREE_FUNC(arch) \ #define MAKE_FFT_FREE_FUNC(arch) \
void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft) void dsp_fft_free_##arch(struct dsp_ops *ops, void *fft)
#define MAKE_FFT_MEMALLOC_FUNC(arch) \
void *dsp_fft_memalloc_##arch(struct dsp_ops *ops, uint32_t size, bool real)
#define MAKE_FFT_MEMFREE_FUNC(arch) \
void dsp_fft_memfree_##arch(struct dsp_ops *ops, void *mem)
#define MAKE_FFT_MEMCLEAR_FUNC(arch) \
void dsp_fft_memclear_##arch(struct dsp_ops *ops, void *mem, uint32_t size, bool real)
#define MAKE_FFT_RUN_FUNC(arch) \ #define MAKE_FFT_RUN_FUNC(arch) \
void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \ void dsp_fft_run_##arch(struct dsp_ops *ops, void *fft, int direction, \
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst) const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
@ -138,6 +150,9 @@ MAKE_DELAY_FUNC(c);
MAKE_FFT_NEW_FUNC(c); MAKE_FFT_NEW_FUNC(c);
MAKE_FFT_FREE_FUNC(c); MAKE_FFT_FREE_FUNC(c);
MAKE_FFT_MEMALLOC_FUNC(c);
MAKE_FFT_MEMFREE_FUNC(c);
MAKE_FFT_MEMCLEAR_FUNC(c);
MAKE_FFT_RUN_FUNC(c); MAKE_FFT_RUN_FUNC(c);
MAKE_FFT_CMUL_FUNC(c); MAKE_FFT_CMUL_FUNC(c);
MAKE_FFT_CMULADD_FUNC(c); MAKE_FFT_CMULADD_FUNC(c);