mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-08 13:30:08 -05:00
filter-chain: optimize delay function
This commit is contained in:
parent
d8bd84183d
commit
5e87f1d4f4
5 changed files with 69 additions and 18 deletions
|
|
@ -1150,10 +1150,10 @@ static void *delay_instantiate(const struct fc_descriptor * Descriptor,
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
impl->rate = SampleRate;
|
impl->rate = SampleRate;
|
||||||
impl->buffer_samples = (uint32_t)(max_delay * impl->rate);
|
impl->buffer_samples = SPA_ROUND_UP_N((uint32_t)(max_delay * impl->rate), 64);
|
||||||
pw_log_info("max-delay:%f seconds rate:%lu samples:%d", max_delay, impl->rate, impl->buffer_samples);
|
pw_log_info("max-delay:%f seconds rate:%lu samples:%d", max_delay, impl->rate, impl->buffer_samples);
|
||||||
|
|
||||||
impl->buffer = calloc(impl->buffer_samples, sizeof(float));
|
impl->buffer = calloc(impl->buffer_samples * 2 + 64, sizeof(float));
|
||||||
if (impl->buffer == NULL) {
|
if (impl->buffer == NULL) {
|
||||||
delay_cleanup(impl);
|
delay_cleanup(impl);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
@ -1175,27 +1175,13 @@ static void delay_run(void * Instance, unsigned long SampleCount)
|
||||||
struct delay_impl *impl = Instance;
|
struct delay_impl *impl = Instance;
|
||||||
float *in = impl->port[1], *out = impl->port[0];
|
float *in = impl->port[1], *out = impl->port[0];
|
||||||
float delay = impl->port[2][0];
|
float delay = impl->port[2][0];
|
||||||
unsigned long n;
|
|
||||||
uint32_t r, w;
|
|
||||||
|
|
||||||
if (delay != impl->delay) {
|
if (delay != impl->delay) {
|
||||||
impl->delay_samples = SPA_CLAMP((uint32_t)(delay * impl->rate), 0u, impl->buffer_samples-1);
|
impl->delay_samples = SPA_CLAMP((uint32_t)(delay * impl->rate), 0u, impl->buffer_samples-1);
|
||||||
impl->delay = delay;
|
impl->delay = delay;
|
||||||
}
|
}
|
||||||
r = impl->ptr;
|
dsp_ops_delay(dsp_ops, impl->buffer, &impl->ptr, impl->buffer_samples,
|
||||||
w = impl->ptr + impl->delay_samples;
|
impl->delay_samples, out, in, SampleCount);
|
||||||
if (w >= impl->buffer_samples)
|
|
||||||
w -= impl->buffer_samples;
|
|
||||||
|
|
||||||
for (n = 0; n < SampleCount; n++) {
|
|
||||||
impl->buffer[w] = in[n];
|
|
||||||
out[n] = impl->buffer[r];
|
|
||||||
if (++r >= impl->buffer_samples)
|
|
||||||
r = 0;
|
|
||||||
if (++w >= impl->buffer_samples)
|
|
||||||
w = 0;
|
|
||||||
}
|
|
||||||
impl->ptr = r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct fc_port delay_ports[] = {
|
static struct fc_port delay_ports[] = {
|
||||||
|
|
|
||||||
|
|
@ -188,6 +188,27 @@ void dsp_linear_c(struct dsp_ops *ops, float * dst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void dsp_delay_c(struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer,
|
||||||
|
uint32_t delay, float *dst, const float *src, uint32_t n_samples)
|
||||||
|
{
|
||||||
|
if (delay == 0) {
|
||||||
|
dsp_copy_c(ops, dst, src, n_samples);
|
||||||
|
} else {
|
||||||
|
uint32_t w, o, i;
|
||||||
|
|
||||||
|
w = *pos;
|
||||||
|
o = n_buffer - delay;
|
||||||
|
|
||||||
|
for (i = 0; i < n_samples; i++) {
|
||||||
|
buffer[w] = buffer[w + n_buffer] = src[i];
|
||||||
|
dst[i] = buffer[w + o];
|
||||||
|
w = w + 1 > n_buffer ? 0 : w + 1;
|
||||||
|
}
|
||||||
|
*pos = w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real)
|
void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real)
|
||||||
{
|
{
|
||||||
return pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX);
|
return pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX);
|
||||||
|
|
|
||||||
|
|
@ -500,3 +500,36 @@ void dsp_biquadn_run_sse(struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void dsp_delay_sse(struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, uint32_t delay,
|
||||||
|
float *dst, const float *src, uint32_t n_samples)
|
||||||
|
{
|
||||||
|
__m128 t[1];
|
||||||
|
uint32_t w = *pos;
|
||||||
|
uint32_t o = n_buffer - delay;
|
||||||
|
uint32_t n, unrolled;
|
||||||
|
|
||||||
|
if (SPA_IS_ALIGNED(src, 16) &&
|
||||||
|
SPA_IS_ALIGNED(dst, 16))
|
||||||
|
unrolled = n_samples & ~3;
|
||||||
|
else
|
||||||
|
unrolled = 0;
|
||||||
|
|
||||||
|
for(n = 0; n < unrolled; n += 4) {
|
||||||
|
t[0] = _mm_load_ps(&src[n]);
|
||||||
|
_mm_storeu_ps(&buffer[w], t[0]);
|
||||||
|
_mm_storeu_ps(&buffer[w+n_buffer], t[0]);
|
||||||
|
t[0] = _mm_loadu_ps(&buffer[w+o]);
|
||||||
|
_mm_store_ps(&dst[n], t[0]);
|
||||||
|
w = w + 4 >= n_buffer ? 0 : w + 4;
|
||||||
|
}
|
||||||
|
for(; n < n_samples; n++) {
|
||||||
|
t[0] = _mm_load_ss(&src[n]);
|
||||||
|
_mm_store_ss(&buffer[w], t[0]);
|
||||||
|
_mm_store_ss(&buffer[w+n_buffer], t[0]);
|
||||||
|
t[0] = _mm_load_ss(&buffer[w+o]);
|
||||||
|
_mm_store_ss(&dst[n], t[0]);
|
||||||
|
w = w + 1 >= n_buffer ? 0 : w + 1;
|
||||||
|
}
|
||||||
|
*pos = w;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,7 @@ static struct dsp_info dsp_table[] =
|
||||||
.funcs.fft_cmul = dsp_fft_cmul_c,
|
.funcs.fft_cmul = dsp_fft_cmul_c,
|
||||||
.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
|
.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
|
||||||
.funcs.biquadn_run = dsp_biquadn_run_sse,
|
.funcs.biquadn_run = dsp_biquadn_run_sse,
|
||||||
|
.funcs.delay = dsp_delay_sse,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
#if defined (HAVE_SSE)
|
#if defined (HAVE_SSE)
|
||||||
|
|
@ -53,6 +54,7 @@ static struct dsp_info dsp_table[] =
|
||||||
.funcs.fft_cmul = dsp_fft_cmul_c,
|
.funcs.fft_cmul = dsp_fft_cmul_c,
|
||||||
.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
|
.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
|
||||||
.funcs.biquadn_run = dsp_biquadn_run_sse,
|
.funcs.biquadn_run = dsp_biquadn_run_sse,
|
||||||
|
.funcs.delay = dsp_delay_sse,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
{ 0,
|
{ 0,
|
||||||
|
|
@ -69,6 +71,7 @@ static struct dsp_info dsp_table[] =
|
||||||
.funcs.fft_cmul = dsp_fft_cmul_c,
|
.funcs.fft_cmul = dsp_fft_cmul_c,
|
||||||
.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
|
.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
|
||||||
.funcs.biquadn_run = dsp_biquadn_run_c,
|
.funcs.biquadn_run = dsp_biquadn_run_c,
|
||||||
|
.funcs.delay = dsp_delay_c,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,8 @@ struct dsp_ops_funcs {
|
||||||
void (*biquadn_run) (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
|
void (*biquadn_run) (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride,
|
||||||
float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[],
|
float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[],
|
||||||
uint32_t n_src, uint32_t n_samples);
|
uint32_t n_src, uint32_t n_samples);
|
||||||
|
void (*delay) (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, uint32_t delay,
|
||||||
|
float *dst, const float *src, uint32_t n_samples);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct dsp_ops {
|
struct dsp_ops {
|
||||||
|
|
@ -71,6 +73,7 @@ int dsp_ops_benchmark(void);
|
||||||
#define dsp_ops_linear(ops,...) (ops)->funcs.linear(ops, __VA_ARGS__)
|
#define dsp_ops_linear(ops,...) (ops)->funcs.linear(ops, __VA_ARGS__)
|
||||||
#define dsp_ops_mult(ops,...) (ops)->funcs.mult(ops, __VA_ARGS__)
|
#define dsp_ops_mult(ops,...) (ops)->funcs.mult(ops, __VA_ARGS__)
|
||||||
#define dsp_ops_biquadn_run(ops,...) (ops)->funcs.biquadn_run(ops, __VA_ARGS__)
|
#define dsp_ops_biquadn_run(ops,...) (ops)->funcs.biquadn_run(ops, __VA_ARGS__)
|
||||||
|
#define dsp_ops_delay(ops,...) (ops)->funcs.delay(ops, __VA_ARGS__)
|
||||||
|
|
||||||
#define dsp_ops_fft_new(ops,...) (ops)->funcs.fft_new(ops, __VA_ARGS__)
|
#define dsp_ops_fft_new(ops,...) (ops)->funcs.fft_new(ops, __VA_ARGS__)
|
||||||
#define dsp_ops_fft_free(ops,...) (ops)->funcs.fft_free(ops, __VA_ARGS__)
|
#define dsp_ops_fft_free(ops,...) (ops)->funcs.fft_free(ops, __VA_ARGS__)
|
||||||
|
|
@ -101,6 +104,9 @@ void dsp_mult_##arch(struct dsp_ops *ops, void * SPA_RESTRICT dst, \
|
||||||
#define MAKE_BIQUADN_RUN_FUNC(arch) \
|
#define MAKE_BIQUADN_RUN_FUNC(arch) \
|
||||||
void dsp_biquadn_run_##arch (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, \
|
void dsp_biquadn_run_##arch (struct dsp_ops *ops, struct biquad *bq, uint32_t n_bq, uint32_t bq_stride, \
|
||||||
float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples)
|
float * SPA_RESTRICT out[], const float * SPA_RESTRICT in[], uint32_t n_src, uint32_t n_samples)
|
||||||
|
#define MAKE_DELAY_FUNC(arch) \
|
||||||
|
void dsp_delay_##arch (struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_buffer, \
|
||||||
|
uint32_t delay, float *dst, const float *src, uint32_t n_samples)
|
||||||
|
|
||||||
#define MAKE_FFT_NEW_FUNC(arch) \
|
#define MAKE_FFT_NEW_FUNC(arch) \
|
||||||
void *dsp_fft_new_##arch(struct dsp_ops *ops, int32_t size, bool real)
|
void *dsp_fft_new_##arch(struct dsp_ops *ops, int32_t size, bool real)
|
||||||
|
|
@ -128,6 +134,7 @@ MAKE_SUM_FUNC(c);
|
||||||
MAKE_LINEAR_FUNC(c);
|
MAKE_LINEAR_FUNC(c);
|
||||||
MAKE_MULT_FUNC(c);
|
MAKE_MULT_FUNC(c);
|
||||||
MAKE_BIQUADN_RUN_FUNC(c);
|
MAKE_BIQUADN_RUN_FUNC(c);
|
||||||
|
MAKE_DELAY_FUNC(c);
|
||||||
|
|
||||||
MAKE_FFT_NEW_FUNC(c);
|
MAKE_FFT_NEW_FUNC(c);
|
||||||
MAKE_FFT_FREE_FUNC(c);
|
MAKE_FFT_FREE_FUNC(c);
|
||||||
|
|
@ -140,6 +147,7 @@ MAKE_MIX_GAIN_FUNC(sse);
|
||||||
MAKE_SUM_FUNC(sse);
|
MAKE_SUM_FUNC(sse);
|
||||||
MAKE_BIQUAD_RUN_FUNC(sse);
|
MAKE_BIQUAD_RUN_FUNC(sse);
|
||||||
MAKE_BIQUADN_RUN_FUNC(sse);
|
MAKE_BIQUADN_RUN_FUNC(sse);
|
||||||
|
MAKE_DELAY_FUNC(sse);
|
||||||
#endif
|
#endif
|
||||||
#if defined (HAVE_AVX)
|
#if defined (HAVE_AVX)
|
||||||
MAKE_SUM_FUNC(avx);
|
MAKE_SUM_FUNC(avx);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue