filter-chain: use dsp sum/copy/clear functions

This commit is contained in:
Wim Taymans 2022-12-16 10:22:01 +01:00
parent 0f2f113bdc
commit 538b6ce35e
5 changed files with 27 additions and 74 deletions

View file

@ -731,7 +731,7 @@ static void * convolver_instantiate(const struct fc_descriptor * Descriptor,
impl->rate = SampleRate;
impl->conv = convolver_new(blocksize, tailsize, samples, n_samples);
impl->conv = convolver_new(dsp_ops, blocksize, tailsize, samples, n_samples);
if (impl->conv == NULL)
goto error;

View file

@ -32,6 +32,8 @@
#include "pffft.h"
static struct dsp_ops *dsp;
struct fft_cpx {
float *v;
};
@ -61,16 +63,6 @@ struct convolver1 {
float scale;
};
static inline void fft_copy(void *dst, const void *src, int size)
{
memcpy(dst, src, size * sizeof(float));
}
static void fft_clear(void *data, int size)
{
memset(data, 0, size * sizeof(float));
}
static void *fft_alloc(int size)
{
return pffft_aligned_malloc(size * sizeof(float));
@ -80,9 +72,9 @@ static void fft_free(void *data)
pffft_aligned_free(data);
}
static void fft_cpx_clear(struct fft_cpx *cpx, int size)
static inline void fft_cpx_clear(struct fft_cpx *cpx, int size)
{
fft_clear(cpx->v, size * 2);
dsp_ops_clear(dsp, cpx->v, size * 2);
}
static void fft_cpx_init(struct fft_cpx *cpx, int size)
{
@ -138,18 +130,13 @@ static inline void fft_convolve_accum(void *fft, struct fft_cpx *r,
pffft_zconvolve_accumulate(fft, a->v, b->v, c->v, r->v, scale);
}
static inline void fft_sum(float *r, const float *a, const float *b,int len)
{
pffft_sum(a, b, r, len);
}
static void convolver1_reset(struct convolver1 *conv)
{
int i;
for (i = 0; i < conv->segCount; i++)
fft_cpx_clear(&conv->segments[i], conv->fftComplexSize);
fft_clear(conv->overlap, conv->blockSize);
fft_clear(conv->inputBuffer, conv->segSize);
dsp_ops_clear(dsp, conv->overlap, conv->blockSize);
dsp_ops_clear(dsp, conv->inputBuffer, conv->segSize);
fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize);
fft_cpx_clear(&conv->conv, conv->fftComplexSize);
conv->inputBufferFill = 0;
@ -200,9 +187,9 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
fft_cpx_init(&conv->segments[i], conv->fftComplexSize);
fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize);
fft_copy(conv->fft_buffer, &ir[i * conv->blockSize], copy);
dsp_ops_copy(dsp, conv->fft_buffer, &ir[i * conv->blockSize], copy);
if (copy < conv->segSize)
fft_clear(conv->fft_buffer + copy, conv->segSize - copy);
dsp_ops_clear(dsp, conv->fft_buffer + copy, conv->segSize - copy);
fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]);
}
@ -252,7 +239,7 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
int i, processed = 0;
if (conv == NULL || conv->segCount == 0) {
fft_clear(output, len);
dsp_ops_clear(dsp, output, len);
return len;
}
@ -260,9 +247,9 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
const int processing = SPA_MIN(len - processed, conv->blockSize - conv->inputBufferFill);
const int inputBufferPos = conv->inputBufferFill;
fft_copy(conv->inputBuffer + inputBufferPos, input + processed, processing);
dsp_ops_copy(dsp, conv->inputBuffer + inputBufferPos, input + processed, processing);
if (inputBufferPos == 0 && processing < conv->blockSize)
fft_clear(conv->inputBuffer + processing, conv->blockSize - processing);
dsp_ops_clear(dsp, conv->inputBuffer + processing, conv->blockSize - processing);
fft_run(conv->fft, conv->inputBuffer, &conv->segments[conv->current]);
@ -301,14 +288,14 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
ifft_run(conv->ifft, &conv->conv, conv->fft_buffer);
fft_sum(output + processed, conv->fft_buffer + inputBufferPos,
dsp_ops_sum(dsp, output + processed, conv->fft_buffer + inputBufferPos,
conv->overlap + inputBufferPos, processing);
conv->inputBufferFill += processing;
if (conv->inputBufferFill == conv->blockSize) {
conv->inputBufferFill = 0;
fft_copy(conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize);
dsp_ops_copy(dsp, conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize);
conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1);
}
@ -340,23 +327,25 @@ void convolver_reset(struct convolver *conv)
convolver1_reset(conv->headConvolver);
if (conv->tailConvolver0) {
convolver1_reset(conv->tailConvolver0);
fft_clear(conv->tailOutput0, conv->tailBlockSize);
fft_clear(conv->tailPrecalculated0, conv->tailBlockSize);
dsp_ops_clear(dsp, conv->tailOutput0, conv->tailBlockSize);
dsp_ops_clear(dsp, conv->tailPrecalculated0, conv->tailBlockSize);
}
if (conv->tailConvolver) {
convolver1_reset(conv->tailConvolver);
fft_clear(conv->tailOutput, conv->tailBlockSize);
fft_clear(conv->tailPrecalculated, conv->tailBlockSize);
dsp_ops_clear(dsp, conv->tailOutput, conv->tailBlockSize);
dsp_ops_clear(dsp, conv->tailPrecalculated, conv->tailBlockSize);
}
conv->tailInputFill = 0;
conv->precalculatedPos = 0;
}
struct convolver *convolver_new(int head_block, int tail_block, const float *ir, int irlen)
struct convolver *convolver_new(struct dsp_ops *dsp_ops, int head_block, int tail_block, const float *ir, int irlen)
{
struct convolver *conv;
int head_ir_len;
dsp = dsp_ops;
if (head_block == 0 || tail_block == 0)
return NULL;
@ -430,16 +419,16 @@ int convolver_run(struct convolver *conv, const float *input, float *output, int
int processing = SPA_MIN(remaining, conv->headBlockSize - (conv->tailInputFill % conv->headBlockSize));
if (conv->tailPrecalculated0)
fft_sum(&output[processed], &output[processed],
dsp_ops_sum(dsp, &output[processed], &output[processed],
&conv->tailPrecalculated0[conv->precalculatedPos],
processing);
if (conv->tailPrecalculated)
fft_sum(&output[processed], &output[processed],
dsp_ops_sum(dsp, &output[processed], &output[processed],
&conv->tailPrecalculated[conv->precalculatedPos],
processing);
conv->precalculatedPos += processing;
fft_copy(conv->tailInput + conv->tailInputFill, input + processed, processing);
dsp_ops_copy(dsp, conv->tailInput + conv->tailInputFill, input + processed, processing);
conv->tailInputFill += processing;
if (conv->tailPrecalculated0 && (conv->tailInputFill % conv->headBlockSize == 0)) {

View file

@ -25,7 +25,9 @@
#include <stdint.h>
#include <stddef.h>
struct convolver *convolver_new(int block, int tail, const float *ir, int irlen);
#include "dsp-ops.h"
struct convolver *convolver_new(struct dsp_ops *dsp, int block, int tail, const float *ir, int irlen);
void convolver_free(struct convolver *conv);
void convolver_reset(struct convolver *conv);

View file

@ -135,7 +135,6 @@ inline v4sf ld_ps1(const float *p)
#define zconvolve_accumulate_simd zconvolve_accumulate_altivec
#define zconvolve_simd zconvolve_altivec
#define transform_simd transform_altivec
#define sum_simd sum_altivec
/*
SSE1 support macros
@ -162,7 +161,6 @@ typedef __m128 v4sf;
#define zconvolve_accumulate_simd zconvolve_accumulate_sse
#define zconvolve_simd zconvolve_sse
#define transform_simd transform_sse
#define sum_simd sum_sse
/*
ARM NEON support macros
@ -196,7 +194,6 @@ typedef float32x4_t v4sf;
#define zconvolve_accumulate_simd zconvolve_accumulate_neon
#define zconvolve_simd zconvolve_neon
#define transform_simd transform_neon
#define sum_simd sum_neon
#else
#if !defined(PFFFT_SIMD_DISABLE)
#warning "building with simd disabled !\n";
@ -221,7 +218,6 @@ typedef float v4sf;
#define zconvolve_accumulate_simd zconvolve_accumulate_c
#define zconvolve_simd zconvolve_c
#define transform_simd transform_c
#define sum_simd sum_c
#endif
// shortcuts for complex multiplcations
@ -1412,7 +1408,6 @@ struct funcs {
void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling);
void (*zconvolve)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
void (*sum)(const float *a, const float *b, float *ab, int len);
int (*simd_size)(void);
void (*validate)(void);
};
@ -2125,25 +2120,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a, const float *b,
}
}
static void sum_simd(const float *a, const float *b, float *ab, int len)
{
int i = 0;
if (VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)) {
const v4sf *RESTRICT va = (const v4sf *)a;
const v4sf *RESTRICT vb = (const v4sf *)b;
v4sf *RESTRICT vab = (v4sf *) ab;
const int end4 = len / SIMD_SZ;
for (; i < end4; i += 1)
vab[i] = VADD(va[i],vb[i]);
i *= 4;
}
for (; i < len; ++i)
ab[i] = a[i] + b[i];
}
#else // defined(PFFFT_SIMD_DISABLE)
// standard routine using scalar floats, without SIMD stuff.
@ -2293,13 +2269,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a,
ab[i + 1] = ai * scaling;
}
}
static void sum_simd(const float *a, const float *b, float *ab, int len)
{
int i;
for (i = 0; i < len; ++i)
ab[i] = VADD(a[i], b[i]);
}
#endif // defined(PFFFT_SIMD_DISABLE)
static int simd_size_simd(void)
@ -2313,7 +2282,6 @@ struct funcs pffft_funcs = {
.zreorder = zreorder_simd,
.zconvolve_accumulate = zconvolve_accumulate_simd,
.zconvolve = zconvolve_simd,
.sum = sum_simd,
.simd_size = simd_size_simd,
.validate = validate_pffft_simd,
};
@ -2393,11 +2361,6 @@ void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b,
return funcs->zconvolve(setup, dft_a, dft_b, dft_ab, scaling);
}
void pffft_sum(const float *a, const float *b, float *ab, int len)
{
return funcs->sum(a, b, ab, len);
}
void pffft_select_cpu(int flags)
{
funcs = &pffft_funcs_c;

View file

@ -161,7 +161,6 @@ extern "C" {
void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
void pffft_sum(const float *a, const float *b, float *ab, int len);
/*
the float buffers must have the correct alignment (16-byte boundary
on intel and powerpc). This function may be used to obtain such