delay: improve delay performance

Use a wrap around delay ringbuffer. We can then avoid some modulo arithmetic and read more efficiently. Also handle the delay convolver case better by reversing the taps and reading the taps and delay buffer without extra overhead.
2026-06-24 13:36:20 -04:00 · 2024-10-15 12:14:57 +02:00 · 2024-10-15 12:14:57 +02:00 · a57f2f25b6
commit a57f2f25b6
parent 668055e612
4 changed files with 31 additions and 14 deletions
--- a/spa/plugins/audioconvert/channelmix-ops.c
+++ b/spa/plugins/audioconvert/channelmix-ops.c
@ -775,16 +775,20 @@ int channelmix_init(struct channelmix *mix)
 	mix->delay = (uint32_t)(mix->rear_delay * mix->freq / 1000.0f);
 	mix->func_name = info->name;

-	spa_log_debug(mix->log, "selected %s delay:%d options:%08x", info->name, mix->delay,
-			mix->options);
-
 	if (mix->hilbert_taps > 0) {
 		mix->n_taps = SPA_CLAMP(mix->hilbert_taps, 15u, 255u) | 1;
 		blackman_window(mix->taps, mix->n_taps);
 		hilbert_generate(mix->taps, mix->n_taps);
+		reverse_taps(mix->taps, mix->n_taps);
 	} else {
 		mix->n_taps = 1;
 		mix->taps[0] = 1.0f;
 	}
+	if (mix->delay + mix->n_taps > BUFFER_SIZE)
+		mix->delay = BUFFER_SIZE - mix->n_taps;
+
+	spa_log_debug(mix->log, "selected %s delay:%d options:%08x", info->name, mix->delay,
+			mix->options);
+
 	return make_matrix(mix);
 }
--- a/spa/plugins/audioconvert/channelmix-ops.h
+++ b/spa/plugins/audioconvert/channelmix-ops.h
@ -60,7 +60,7 @@ struct channelmix {
 	uint32_t hilbert_taps;				/* to phase shift, 0 disabled */
 	struct lr4 lr4[SPA_AUDIO_MAX_CHANNELS];

-	float buffer[2][BUFFER_SIZE];
+	float buffer[2][BUFFER_SIZE*2];
 	uint32_t pos[2];
 	uint32_t delay;
 	float taps[MAX_TAPS];
--- a/spa/plugins/audioconvert/delay.h
+++ b/spa/plugins/audioconvert/delay.h
@ -14,14 +14,15 @@ static inline void delay_run(float *buffer, uint32_t *pos,
 		float *dst, const float *src, const float vol, uint32_t n_samples)
 {
 	uint32_t i;
-	uint32_t p = *pos;
+	uint32_t w = *pos;
+	uint32_t o = n_buffer - delay;

 	for (i = 0; i < n_samples; i++) {
-		buffer[p] = src[i];
-		dst[i] = buffer[(p - delay) & (n_buffer-1)] * vol;
-		p = (p + 1) & (n_buffer-1);
+		buffer[w] = buffer[w + n_buffer] = src[i];
+		dst[i] = buffer[w + o] * vol;
+		w = w + 1 >= n_buffer ? 0 : w + 1;
 	}
-	*pos = p;
+	*pos = w;
 }

 static inline void delay_convolve_run(float *buffer, uint32_t *pos,
@ -30,19 +31,24 @@ static inline void delay_convolve_run(float *buffer, uint32_t *pos,
 		float *dst, const float *src, const float vol, uint32_t n_samples)
 {
 	uint32_t i, j;
-	uint32_t p = *pos;
+	uint32_t w = *pos;
+	uint32_t o = n_buffer - delay - n_taps-1;

+	if (n_taps == 1) {
+		delay_run(buffer, pos, n_buffer, delay, dst, src, vol, n_samples);
+		return;
+	}
 	for (i = 0; i < n_samples; i++) {
 		float sum = 0.0f;

-		buffer[p] = src[i];
+		buffer[w] = buffer[w + n_buffer] = src[i];
 		for (j = 0; j < n_taps; j++)
-			sum += (taps[j] * buffer[((p - delay) - j) & (n_buffer-1)]);
+			sum += taps[j] * buffer[w+o+j];
 		dst[i] = sum * vol;

-		p = (p + 1) & (n_buffer-1);
+		w = w + 1 >= n_buffer ? 0 : w + 1;
 	}
-	*pos = p;
+	*pos = w;
 }

 #ifdef __cplusplus
--- a/spa/plugins/audioconvert/hilbert.h
+++ b/spa/plugins/audioconvert/hilbert.h
@ -43,6 +43,13 @@ static inline int hilbert_generate(float *taps, int n_taps)
 	return 0;
 }

+static inline void reverse_taps(float *taps, int n_taps)
+{
+	int i;
+	for (i = 0; i < n_taps/2; i++)
+		SPA_SWAP(taps[i], taps[n_taps-1-i]);
+}
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif