delay: improve delay performance

Use a wrap around delay ringbuffer. We can then avoid some modulo
arithmetic and read more efficiently.

Also handle the delay convolver case better by reversing the taps and
reading the taps and delay buffer without extra overhead.
This commit is contained in:
Wim Taymans 2024-10-15 12:14:57 +02:00
parent 668055e612
commit a57f2f25b6
4 changed files with 31 additions and 14 deletions

View file

@ -775,16 +775,20 @@ int channelmix_init(struct channelmix *mix)
mix->delay = (uint32_t)(mix->rear_delay * mix->freq / 1000.0f);
mix->func_name = info->name;
spa_log_debug(mix->log, "selected %s delay:%d options:%08x", info->name, mix->delay,
mix->options);
if (mix->hilbert_taps > 0) {
mix->n_taps = SPA_CLAMP(mix->hilbert_taps, 15u, 255u) | 1;
blackman_window(mix->taps, mix->n_taps);
hilbert_generate(mix->taps, mix->n_taps);
reverse_taps(mix->taps, mix->n_taps);
} else {
mix->n_taps = 1;
mix->taps[0] = 1.0f;
}
if (mix->delay + mix->n_taps > BUFFER_SIZE)
mix->delay = BUFFER_SIZE - mix->n_taps;
spa_log_debug(mix->log, "selected %s delay:%d options:%08x", info->name, mix->delay,
mix->options);
return make_matrix(mix);
}

View file

@ -60,7 +60,7 @@ struct channelmix {
uint32_t hilbert_taps; /* to phase shift, 0 disabled */
struct lr4 lr4[SPA_AUDIO_MAX_CHANNELS];
float buffer[2][BUFFER_SIZE];
float buffer[2][BUFFER_SIZE*2];
uint32_t pos[2];
uint32_t delay;
float taps[MAX_TAPS];

View file

@ -14,14 +14,15 @@ static inline void delay_run(float *buffer, uint32_t *pos,
float *dst, const float *src, const float vol, uint32_t n_samples)
{
uint32_t i;
uint32_t p = *pos;
uint32_t w = *pos;
uint32_t o = n_buffer - delay;
for (i = 0; i < n_samples; i++) {
buffer[p] = src[i];
dst[i] = buffer[(p - delay) & (n_buffer-1)] * vol;
p = (p + 1) & (n_buffer-1);
buffer[w] = buffer[w + n_buffer] = src[i];
dst[i] = buffer[w + o] * vol;
w = w + 1 >= n_buffer ? 0 : w + 1;
}
*pos = p;
*pos = w;
}
static inline void delay_convolve_run(float *buffer, uint32_t *pos,
@ -30,19 +31,24 @@ static inline void delay_convolve_run(float *buffer, uint32_t *pos,
float *dst, const float *src, const float vol, uint32_t n_samples)
{
uint32_t i, j;
uint32_t p = *pos;
uint32_t w = *pos;
uint32_t o = n_buffer - delay - n_taps-1;
if (n_taps == 1) {
delay_run(buffer, pos, n_buffer, delay, dst, src, vol, n_samples);
return;
}
for (i = 0; i < n_samples; i++) {
float sum = 0.0f;
buffer[p] = src[i];
buffer[w] = buffer[w + n_buffer] = src[i];
for (j = 0; j < n_taps; j++)
sum += (taps[j] * buffer[((p - delay) - j) & (n_buffer-1)]);
sum += taps[j] * buffer[w+o+j];
dst[i] = sum * vol;
p = (p + 1) & (n_buffer-1);
w = w + 1 >= n_buffer ? 0 : w + 1;
}
*pos = p;
*pos = w;
}
#ifdef __cplusplus

View file

@ -43,6 +43,13 @@ static inline int hilbert_generate(float *taps, int n_taps)
return 0;
}
static inline void reverse_taps(float *taps, int n_taps)
{
int i;
for (i = 0; i < n_taps/2; i++)
SPA_SWAP(taps[i], taps[n_taps-1-i]);
}
#ifdef __cplusplus
} /* extern "C" */
#endif