filter-chain: use dsp sum/copy/clear functions

This commit is contained in:
Wim Taymans 2022-12-16 10:22:01 +01:00
parent 0f2f113bdc
commit 538b6ce35e
5 changed files with 27 additions and 74 deletions

View file

@ -731,7 +731,7 @@ static void * convolver_instantiate(const struct fc_descriptor * Descriptor,
impl->rate = SampleRate; impl->rate = SampleRate;
impl->conv = convolver_new(blocksize, tailsize, samples, n_samples); impl->conv = convolver_new(dsp_ops, blocksize, tailsize, samples, n_samples);
if (impl->conv == NULL) if (impl->conv == NULL)
goto error; goto error;

View file

@ -32,6 +32,8 @@
#include "pffft.h" #include "pffft.h"
static struct dsp_ops *dsp;
struct fft_cpx { struct fft_cpx {
float *v; float *v;
}; };
@ -61,16 +63,6 @@ struct convolver1 {
float scale; float scale;
}; };
static inline void fft_copy(void *dst, const void *src, int size)
{
memcpy(dst, src, size * sizeof(float));
}
static void fft_clear(void *data, int size)
{
memset(data, 0, size * sizeof(float));
}
static void *fft_alloc(int size) static void *fft_alloc(int size)
{ {
return pffft_aligned_malloc(size * sizeof(float)); return pffft_aligned_malloc(size * sizeof(float));
@ -80,9 +72,9 @@ static void fft_free(void *data)
pffft_aligned_free(data); pffft_aligned_free(data);
} }
static void fft_cpx_clear(struct fft_cpx *cpx, int size) static inline void fft_cpx_clear(struct fft_cpx *cpx, int size)
{ {
fft_clear(cpx->v, size * 2); dsp_ops_clear(dsp, cpx->v, size * 2);
} }
static void fft_cpx_init(struct fft_cpx *cpx, int size) static void fft_cpx_init(struct fft_cpx *cpx, int size)
{ {
@ -138,18 +130,13 @@ static inline void fft_convolve_accum(void *fft, struct fft_cpx *r,
pffft_zconvolve_accumulate(fft, a->v, b->v, c->v, r->v, scale); pffft_zconvolve_accumulate(fft, a->v, b->v, c->v, r->v, scale);
} }
static inline void fft_sum(float *r, const float *a, const float *b,int len)
{
pffft_sum(a, b, r, len);
}
static void convolver1_reset(struct convolver1 *conv) static void convolver1_reset(struct convolver1 *conv)
{ {
int i; int i;
for (i = 0; i < conv->segCount; i++) for (i = 0; i < conv->segCount; i++)
fft_cpx_clear(&conv->segments[i], conv->fftComplexSize); fft_cpx_clear(&conv->segments[i], conv->fftComplexSize);
fft_clear(conv->overlap, conv->blockSize); dsp_ops_clear(dsp, conv->overlap, conv->blockSize);
fft_clear(conv->inputBuffer, conv->segSize); dsp_ops_clear(dsp, conv->inputBuffer, conv->segSize);
fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize); fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize);
fft_cpx_clear(&conv->conv, conv->fftComplexSize); fft_cpx_clear(&conv->conv, conv->fftComplexSize);
conv->inputBufferFill = 0; conv->inputBufferFill = 0;
@ -200,9 +187,9 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
fft_cpx_init(&conv->segments[i], conv->fftComplexSize); fft_cpx_init(&conv->segments[i], conv->fftComplexSize);
fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize); fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize);
fft_copy(conv->fft_buffer, &ir[i * conv->blockSize], copy); dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy);
if (copy < conv->segSize) if (copy < conv->segSize)
fft_clear(conv->fft_buffer + copy, conv->segSize - copy); dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy);
fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]); fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]);
} }
@ -252,7 +239,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
int i, processed = 0; int i, processed = 0;
if (conv == NULL || conv->segCount == 0) { if (conv == NULL || conv->segCount == 0) {
fft_clear(output, len); dsp_ops_clear(dsp, output, len);
return len; return len;
} }
@ -260,9 +247,9 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
const int processing = SPA_MIN(len - processed, conv->blockSize - conv->inputBufferFill); const int processing = SPA_MIN(len - processed, conv->blockSize - conv->inputBufferFill);
const int inputBufferPos = conv->inputBufferFill; const int inputBufferPos = conv->inputBufferFill;
fft_copy(conv->inputBuffer + inputBufferPos, input + processed, processing); dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing);
if (inputBufferPos == 0 && processing < conv->blockSize) if (inputBufferPos == 0 && processing < conv->blockSize)
fft_clear(conv->inputBuffer + processing, conv->blockSize - processing); dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing);
fft_run(conv->fft, conv->inputBuffer, &conv->segments[conv->current]); fft_run(conv->fft, conv->inputBuffer, &conv->segments[conv->current]);
@ -301,14 +288,14 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
ifft_run(conv->ifft, &conv->conv, conv->fft_buffer); ifft_run(conv->ifft, &conv->conv, conv->fft_buffer);
fft_sum(output + processed, conv->fft_buffer + inputBufferPos, dsp_ops_sum(dsp, output + processed, conv->fft_buffer + inputBufferPos,
conv->overlap + inputBufferPos, processing); conv->overlap + inputBufferPos, processing);
conv->inputBufferFill += processing; conv->inputBufferFill += processing;
if (conv->inputBufferFill == conv->blockSize) { if (conv->inputBufferFill == conv->blockSize) {
conv->inputBufferFill = 0; conv->inputBufferFill = 0;
fft_copy(conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize); dsp_ops_copy(dsp, conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize);
conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1); conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1);
} }
@ -340,23 +327,25 @@ void convolver_reset(struct convolver *conv)
convolver1_reset(conv->headConvolver); convolver1_reset(conv->headConvolver);
if (conv->tailConvolver0) { if (conv->tailConvolver0) {
convolver1_reset(conv->tailConvolver0); convolver1_reset(conv->tailConvolver0);
fft_clear(conv->tailOutput0, conv->tailBlockSize); dsp_ops_clear(dsp, conv->tailOutput0, conv->tailBlockSize);
fft_clear(conv->tailPrecalculated0, conv->tailBlockSize); dsp_ops_clear(dsp, conv->tailPrecalculated0, conv->tailBlockSize);
} }
if (conv->tailConvolver) { if (conv->tailConvolver) {
convolver1_reset(conv->tailConvolver); convolver1_reset(conv->tailConvolver);
fft_clear(conv->tailOutput, conv->tailBlockSize); dsp_ops_clear(dsp, conv->tailOutput, conv->tailBlockSize);
fft_clear(conv->tailPrecalculated, conv->tailBlockSize); dsp_ops_clear(dsp, conv->tailPrecalculated, conv->tailBlockSize);
} }
conv->tailInputFill = 0; conv->tailInputFill = 0;
conv->precalculatedPos = 0; conv->precalculatedPos = 0;
} }
struct convolver *convolver_new(int head_block, int tail_block, const float *ir, int irlen) struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tail_block, const float *ir, int irlen)
{ {
struct convolver *conv; struct convolver *conv;
int head_ir_len; int head_ir_len;
dsp = dsp_ops;
if (head_block == 0 || tail_block == 0) if (head_block == 0 || tail_block == 0)
return NULL; return NULL;
@ -430,16 +419,16 @@ int convolver_run(struct convolver *conv, const float *input, float *output, int
int processing = SPA_MIN(remaining, conv->headBlockSize - (conv->tailInputFill % conv->headBlockSize)); int processing = SPA_MIN(remaining, conv->headBlockSize - (conv->tailInputFill % conv->headBlockSize));
if (conv->tailPrecalculated0) if (conv->tailPrecalculated0)
fft_sum(&output[processed], &output[processed], dsp_ops_sum(dsp, &output[processed], &output[processed],
&conv->tailPrecalculated0[conv->precalculatedPos], &conv->tailPrecalculated0[conv->precalculatedPos],
processing); processing);
if (conv->tailPrecalculated) if (conv->tailPrecalculated)
fft_sum(&output[processed], &output[processed], dsp_ops_sum(dsp, &output[processed], &output[processed],
&conv->tailPrecalculated[conv->precalculatedPos], &conv->tailPrecalculated[conv->precalculatedPos],
processing); processing);
conv->precalculatedPos += processing; conv->precalculatedPos += processing;
fft_copy(conv->tailInput + conv->tailInputFill, input + processed, processing); dsp_ops_copy(dsp, conv->tailInput + conv->tailInputFill, input + processed, processing);
conv->tailInputFill += processing; conv->tailInputFill += processing;
if (conv->tailPrecalculated0 && (conv->tailInputFill % conv->headBlockSize == 0)) { if (conv->tailPrecalculated0 && (conv->tailInputFill % conv->headBlockSize == 0)) {

View file

@ -25,7 +25,9 @@
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
struct convolver *convolver_new(int block, int tail, const float *ir, int irlen); #include "dsp-ops.h"
struct convolver *convolver_new(struct dsp_ops *dsp, int block, int tail, const float *ir, int irlen);
void convolver_free(struct convolver *conv); void convolver_free(struct convolver *conv);
void convolver_reset(struct convolver *conv); void convolver_reset(struct convolver *conv);

View file

@ -135,7 +135,6 @@ inline v4sf ld_ps1(const float *p)
#define zconvolve_accumulate_simd zconvolve_accumulate_altivec #define zconvolve_accumulate_simd zconvolve_accumulate_altivec
#define zconvolve_simd zconvolve_altivec #define zconvolve_simd zconvolve_altivec
#define transform_simd transform_altivec #define transform_simd transform_altivec
#define sum_simd sum_altivec
/* /*
SSE1 support macros SSE1 support macros
@ -162,7 +161,6 @@ typedef __m128 v4sf;
#define zconvolve_accumulate_simd zconvolve_accumulate_sse #define zconvolve_accumulate_simd zconvolve_accumulate_sse
#define zconvolve_simd zconvolve_sse #define zconvolve_simd zconvolve_sse
#define transform_simd transform_sse #define transform_simd transform_sse
#define sum_simd sum_sse
/* /*
ARM NEON support macros ARM NEON support macros
@ -196,7 +194,6 @@ typedef float32x4_t v4sf;
#define zconvolve_accumulate_simd zconvolve_accumulate_neon #define zconvolve_accumulate_simd zconvolve_accumulate_neon
#define zconvolve_simd zconvolve_neon #define zconvolve_simd zconvolve_neon
#define transform_simd transform_neon #define transform_simd transform_neon
#define sum_simd sum_neon
#else #else
#if !defined(PFFFT_SIMD_DISABLE) #if !defined(PFFFT_SIMD_DISABLE)
#warning "building with simd disabled !\n"; #warning "building with simd disabled !\n";
@ -221,7 +218,6 @@ typedef float v4sf;
#define zconvolve_accumulate_simd zconvolve_accumulate_c #define zconvolve_accumulate_simd zconvolve_accumulate_c
#define zconvolve_simd zconvolve_c #define zconvolve_simd zconvolve_c
#define transform_simd transform_c #define transform_simd transform_c
#define sum_simd sum_c
#endif #endif
// shortcuts for complex multiplcations // shortcuts for complex multiplcations
@ -1412,7 +1408,6 @@ struct funcs {
void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling); void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling);
void (*zconvolve)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); void (*zconvolve)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
void (*sum)(const float *a, const float *b, float *ab, int len);
int (*simd_size)(void); int (*simd_size)(void);
void (*validate)(void); void (*validate)(void);
}; };
@ -2125,25 +2120,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a, const float *b,
} }
} }
static void sum_simd(const float *a, const float *b, float *ab, int len)
{
int i = 0;
if (VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)) {
const v4sf *RESTRICT va = (const v4sf *)a;
const v4sf *RESTRICT vb = (const v4sf *)b;
v4sf *RESTRICT vab = (v4sf *) ab;
const int end4 = len / SIMD_SZ;
for (; i < end4; i += 1)
vab[i] = VADD(va[i],vb[i]);
i *= 4;
}
for (; i < len; ++i)
ab[i] = a[i] + b[i];
}
#else // defined(PFFFT_SIMD_DISABLE) #else // defined(PFFFT_SIMD_DISABLE)
// standard routine using scalar floats, without SIMD stuff. // standard routine using scalar floats, without SIMD stuff.
@ -2293,13 +2269,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a,
ab[i + 1] = ai * scaling; ab[i + 1] = ai * scaling;
} }
} }
static void sum_simd(const float *a, const float *b, float *ab, int len)
{
int i;
for (i = 0; i < len; ++i)
ab[i] = VADD(a[i], b[i]);
}
#endif // defined(PFFFT_SIMD_DISABLE) #endif // defined(PFFFT_SIMD_DISABLE)
static int simd_size_simd(void) static int simd_size_simd(void)
@ -2313,7 +2282,6 @@ struct funcs pffft_funcs = {
.zreorder = zreorder_simd, .zreorder = zreorder_simd,
.zconvolve_accumulate = zconvolve_accumulate_simd, .zconvolve_accumulate = zconvolve_accumulate_simd,
.zconvolve = zconvolve_simd, .zconvolve = zconvolve_simd,
.sum = sum_simd,
.simd_size = simd_size_simd, .simd_size = simd_size_simd,
.validate = validate_pffft_simd, .validate = validate_pffft_simd,
}; };
@ -2393,11 +2361,6 @@ void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b,
return funcs->zconvolve(setup, dft_a, dft_b, dft_ab, scaling); return funcs->zconvolve(setup, dft_a, dft_b, dft_ab, scaling);
} }
void pffft_sum(const float *a, const float *b, float *ab, int len)
{
return funcs->sum(a, b, ab, len);
}
void pffft_select_cpu(int flags) void pffft_select_cpu(int flags)
{ {
funcs = &pffft_funcs_c; funcs = &pffft_funcs_c;

View file

@ -161,7 +161,6 @@ extern "C" {
void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
void pffft_sum(const float *a, const float *b, float *ab, int len);
/* /*
the float buffers must have the correct alignment (16-byte boundary the float buffers must have the correct alignment (16-byte boundary
on intel and powerpc). This function may be used to obtain such on intel and powerpc). This function may be used to obtain such