From 9cae4ce7e79d377f43e8d01a23985bf71fa37e28 Mon Sep 17 00:00:00 2001
From: Wim Taymans <wtaymans@redhat.com>
Date: Tue, 21 Apr 2026 16:24:38 +0200
Subject: [PATCH] filter-chain: add convolver2

Add support for multiple convolver outputs. This makes things more
efficient because we only need to do the input FFT once to produce the N
outputs.

Add convolver2 that can have multiple outputs.
---
 spa/plugins/filter-graph/convolver.c      | 260 ++++++-----
 spa/plugins/filter-graph/convolver.h      |  10 +
 spa/plugins/filter-graph/plugin_builtin.c | 520 +++++++++++++++-------
 src/modules/module-filter-chain.c         |  45 +-
 4 files changed, 580 insertions(+), 255 deletions(-)

diff --git a/spa/plugins/filter-graph/convolver.c b/spa/plugins/filter-graph/convolver.c
index b8adadcec..0233193f8 100644
--- a/spa/plugins/filter-graph/convolver.c
+++ b/spa/plugins/filter-graph/convolver.c
@@ -14,28 +14,31 @@
 
 #include <spa/utils/defs.h>
 
+struct ir {
+	float **segments;
+	float *time_buffer[2];
+	float *precalc[2];
+};
+
 struct partition {
 	struct convolver *conv;
 
 	int block_size;
 	int time_size;
 	int freq_size;
-
-	int n_segments;
-	float **segments;
-	float **segments_ir;
-	int current;
-
-	float *time_buffer[2];
-
 	void *fft;
 	void *ifft;
-
-	float *pre_mult;
 	float *freq;
-	float *precalc[2];
+
+	int n_segments;
+	int current;
+	float **segments;
 
 	int block_fill;
+	int n_ir;
+	struct ir *ir;
+	int time_idx;
+	int precalc_idx;
 
 	float scale;
 };
@@ -74,68 +77,74 @@ static void partition_reset(struct spa_fga_dsp *dsp, struct partition *part)
 	int i;
 	for (i = 0; i < part->n_segments; i++)
 		spa_fga_dsp_fft_memclear(dsp, part->segments[i], part->freq_size, false);
-	spa_fga_dsp_fft_memclear(dsp, part->time_buffer[0], part->time_size, true);
-	spa_fga_dsp_fft_memclear(dsp, part->time_buffer[1], part->time_size, true);
-	spa_fga_dsp_fft_memclear(dsp, part->pre_mult, part->freq_size, false);
+	for (i = 0; i < part->n_ir; i++) {
+		struct ir *r = &part->ir[i];
+		spa_fga_dsp_fft_memclear(dsp, r->time_buffer[0], part->time_size, true);
+		spa_fga_dsp_fft_memclear(dsp, r->time_buffer[1], part->time_size, true);
+		spa_fga_dsp_fft_memclear(dsp, r->precalc[0], part->block_size, true);
+		spa_fga_dsp_fft_memclear(dsp, r->precalc[1], part->block_size, true);
+	}
 	spa_fga_dsp_fft_memclear(dsp, part->freq, part->freq_size, false);
-	spa_fga_dsp_fft_memclear(dsp, part->precalc[0], part->block_size, true);
-	spa_fga_dsp_fft_memclear(dsp, part->precalc[1], part->block_size, true);
 	part->block_fill = 0;
+	part->time_idx = 0;
+	part->precalc_idx = 0;
 	part->current = 0;
 }
 
 static void partition_free(struct spa_fga_dsp *dsp, struct partition *part)
 {
-	int i;
+	int i, j;
 	for (i = 0; i < part->n_segments; i++) {
 		if (part->segments)
 			spa_fga_dsp_fft_memfree(dsp, part->segments[i]);
-		if (part->segments_ir)
-			spa_fga_dsp_fft_memfree(dsp, part->segments_ir[i]);
+	}
+	for (i = 0; i < part->n_ir; i++) {
+		struct ir *r = &part->ir[i];
+		for (j = 0; j < part->n_segments; j++) {
+			if (r->segments)
+				spa_fga_dsp_fft_memfree(dsp, r->segments[j]);
+		}
+		if (r->time_buffer[0])
+			spa_fga_dsp_fft_memfree(dsp, r->time_buffer[0]);
+		if (r->time_buffer[1])
+			spa_fga_dsp_fft_memfree(dsp, r->time_buffer[1]);
+		if (r->precalc[0])
+			spa_fga_dsp_fft_memfree(dsp, r->precalc[0]);
+		if (r->precalc[1])
+			spa_fga_dsp_fft_memfree(dsp, r->precalc[1]);
+		free(r->segments);
 	}
 	if (part->fft)
 		spa_fga_dsp_fft_free(dsp, part->fft);
 	if (part->ifft)
 		spa_fga_dsp_fft_free(dsp, part->ifft);
-	if (part->time_buffer[0])
-		spa_fga_dsp_fft_memfree(dsp, part->time_buffer[0]);
-	if (part->time_buffer[1])
-		spa_fga_dsp_fft_memfree(dsp, part->time_buffer[1]);
 	free(part->segments);
-	free(part->segments_ir);
-	spa_fga_dsp_fft_memfree(dsp, part->pre_mult);
+	free(part->ir);
 	spa_fga_dsp_fft_memfree(dsp, part->freq);
-	spa_fga_dsp_fft_memfree(dsp, part->precalc[0]);
-	spa_fga_dsp_fft_memfree(dsp, part->precalc[1]);
 	free(part);
 }
 
-static struct partition *partition_new(struct convolver *conv, int block, const float *ir, int irlen)
+static struct partition *partition_new(struct convolver *conv, int block,
+		const struct convolver_ir ir[], int n_ir, int iroffset, int irlen)
 {
 	struct partition *part;
 	struct spa_fga_dsp *dsp = conv->dsp;
-	int i;
+	int i, j;
 
 	if (block == 0)
 		return NULL;
 
-	while (irlen > 0 && fabs(ir[irlen-1]) < 0.000001f)
-		irlen--;
-
 	part = calloc(1, sizeof(*part));
 	if (part == NULL)
 		return NULL;
 
 	part->conv = conv;
 
-	if (irlen == 0)
-		return part;
-
-
 	part->block_size = next_power_of_two(block);
 	part->time_size = 2 * part->block_size;
 	part->n_segments = (irlen + part->block_size-1) / part->block_size;
 	part->freq_size = (part->time_size / 2) + 1;
+	part->n_ir = n_ir;
 
 	part->fft = spa_fga_dsp_fft_new(dsp, part->time_size, true);
 	if (part->fft == NULL)
@@ -144,38 +153,44 @@ static struct partition *partition_new(struct convolver *conv, int block, const
 	if (part->ifft == NULL)
 		goto error;
 
-	part->time_buffer[0] = spa_fga_dsp_fft_memalloc(dsp, part->time_size, true);
-	part->time_buffer[1] = spa_fga_dsp_fft_memalloc(dsp, part->time_size, true);
-	if (part->time_buffer[0] == NULL || part->time_buffer[1] == NULL)
-		goto error;
-
 	part->segments = calloc(part->n_segments, sizeof(float*));
-	part->segments_ir = calloc(part->n_segments, sizeof(float*));
-	if (part->segments == NULL || part->segments_ir == NULL)
+	part->freq = spa_fga_dsp_fft_memalloc(dsp, part->freq_size, false);
+	part->ir = calloc(part->n_ir, sizeof(struct ir));
+	if (part->segments == NULL || part->freq == NULL || part->ir == NULL)
 		goto error;
 
 	for (i = 0; i < part->n_segments; i++) {
-		int left = irlen - (i * part->block_size);
-		int copy = SPA_MIN(part->block_size, left);
-
 		part->segments[i] = spa_fga_dsp_fft_memalloc(dsp, part->freq_size, false);
-		part->segments_ir[i] = spa_fga_dsp_fft_memalloc(dsp, part->freq_size, false);
-		if (part->segments[i] == NULL || part->segments_ir[i] == NULL)
+		if (part->segments[i] == NULL)
+			goto error;
+	}
+
+	for (i = 0; i < part->n_ir; i++) {
+		struct ir *r = &part->ir[i];
+
+		r->segments = calloc(part->n_segments, sizeof(float*));
+		r->time_buffer[0] = spa_fga_dsp_fft_memalloc(dsp, part->time_size, true);
+		r->time_buffer[1] = spa_fga_dsp_fft_memalloc(dsp, part->time_size, true);
+		r->precalc[0] = spa_fga_dsp_fft_memalloc(dsp, part->block_size, true);
+		r->precalc[1] = spa_fga_dsp_fft_memalloc(dsp, part->block_size, true);
+		if (r->segments == NULL || r->time_buffer[0] == NULL || r->time_buffer[1] == NULL ||
+		    r->precalc[0] == NULL || r->precalc[1] == NULL)
 			goto error;
 
-		spa_fga_dsp_copy(dsp, part->time_buffer[0], &ir[i * part->block_size], copy);
-		if (copy < part->time_size)
-			spa_fga_dsp_fft_memclear(dsp, part->time_buffer[0] + copy, part->time_size - copy, true);
+		for (j = 0; j < part->n_segments; j++) {
+			int left = ir[i].len - iroffset - (j * part->block_size);
+			int copy = SPA_CLAMP(left, 0, part->block_size);
 
-	        spa_fga_dsp_fft_run(dsp, part->fft, 1, part->time_buffer[0], part->segments_ir[i]);
+			r->segments[j] = spa_fga_dsp_fft_memalloc(dsp, part->freq_size, false);
+			if (r->segments[j] == NULL)
+				goto error;
+
+			spa_fga_dsp_copy(dsp, r->time_buffer[0], &ir[i].ir[iroffset + (j * part->block_size)], copy);
+			spa_fga_dsp_fft_memclear(dsp, r->time_buffer[0] + copy, part->time_size - copy, true);
+
+		        spa_fga_dsp_fft_run(dsp, part->fft, 1, r->time_buffer[0], r->segments[j]);
+		}
 	}
-	part->pre_mult = spa_fga_dsp_fft_memalloc(dsp, part->freq_size, false);
-	part->freq = spa_fga_dsp_fft_memalloc(dsp, part->freq_size, false);
-	part->precalc[0] = spa_fga_dsp_fft_memalloc(dsp, part->block_size, true);
-	part->precalc[1] = spa_fga_dsp_fft_memalloc(dsp, part->block_size, true);
-	if (part->pre_mult == NULL || part->freq == NULL ||
-	    part->precalc[0] == NULL ||  part->precalc[1] == NULL)
-		goto error;
 	part->scale = 1.0f / part->time_size;
 	partition_reset(dsp, part);
 
@@ -185,41 +200,50 @@ error:
 	return NULL;
 }
 
-static int partition_run(struct spa_fga_dsp *dsp, struct partition *part, const float *input, float *output, int len)
+static int partition_run(struct spa_fga_dsp *dsp, struct partition *part, const float *input,
+		float *output[], int offset, int len)
 {
-	int i;
+	int i, j;
 	int block_fill = part->block_fill;
-	int current = part->current;
+	int idx = part->time_idx, pc_idx = part->precalc_idx;
+	float *dst;
 
-	spa_fga_dsp_fft_run(dsp, part->fft, 1, input, part->segments[current]);
+	spa_fga_dsp_fft_run(dsp, part->fft, 1, input, part->segments[part->current]);
 
-	spa_fga_dsp_fft_cmul(dsp, part->fft,
-			part->freq,
-			part->segments[current],
-			part->segments_ir[0],
-			part->freq_size, part->scale);
+	for (i = 0; i < part->n_ir; i++) {
+		struct ir *r = &part->ir[i];
+		int current = part->current;
 
-	for (i = 1; i < part->n_segments; i++) {
-		if (++current == part->n_segments)
-			current = 0;
-
-		spa_fga_dsp_fft_cmuladd(dsp, part->fft,
-				part->freq,
+		spa_fga_dsp_fft_cmul(dsp, part->fft,
 				part->freq,
 				part->segments[current],
-				part->segments_ir[i],
+				r->segments[0],
 				part->freq_size, part->scale);
-	}
-	spa_fga_dsp_fft_run(dsp, part->ifft, -1, part->freq, part->time_buffer[0]);
 
-	spa_fga_dsp_sum(dsp, output, part->time_buffer[0] + block_fill,
-			part->time_buffer[1] + part->block_size + block_fill, len);
+		for (j = 1; j < part->n_segments; j++) {
+			if (++current == part->n_segments)
+				current = 0;
+
+			spa_fga_dsp_fft_cmuladd(dsp, part->fft,
+					part->freq,
+					part->freq,
+					part->segments[current],
+					r->segments[j],
+					part->freq_size, part->scale);
+		}
+		spa_fga_dsp_fft_run(dsp, part->ifft, -1, part->freq, r->time_buffer[idx]);
+
+		dst = output ? output[i]: r->precalc[pc_idx];
+		if (dst)
+			spa_fga_dsp_sum(dsp, dst + offset, r->time_buffer[idx] + block_fill,
+					r->time_buffer[idx^1] + part->block_size + block_fill, len);
+	}
 
 	block_fill += len;
 	if (block_fill == part->block_size) {
 		block_fill = 0;
 
-		SPA_SWAP(part->time_buffer[0], part->time_buffer[1]);
+		part->time_idx = idx ^ 1;
 
 		if (part->current == 0)
 			part->current = part->n_segments;
@@ -240,7 +264,7 @@ static void *do_background_process(void *data)
 		if (!conv->running)
 			break;
 
-		partition_run(conv->dsp, part, conv->delay[1], part->time_buffer[1], part->block_size);
+		partition_run(conv->dsp, part, conv->delay[1], NULL, 0, part->block_size);
 
 		sem_post(&conv->sem_finish);
 	}
@@ -260,10 +284,12 @@ void convolver_reset(struct convolver *conv)
 	conv->delay_fill = 0;
 }
 
-struct convolver *convolver_new(struct spa_fga_dsp *dsp, int head_block, int tail_block, const float *ir, int irlen)
+struct convolver *convolver_new_many(struct spa_fga_dsp *dsp, int head_block, int tail_block,
+		const struct convolver_ir ir[], int n_ir)
 {
 	struct convolver *conv;
-	int head_ir_len, min_size, max_size, ir_consumed = 0;
+	int i, irlen, head_ir_len, min_size, max_size, ir_consumed = 0;
+	struct convolver_ir tmp[n_ir];
 
 	if (head_block == 0 || tail_block == 0)
 		return NULL;
@@ -272,27 +298,40 @@ struct convolver *convolver_new(struct spa_fga_dsp *dsp, int head_block, int tai
 	if (head_block > tail_block)
 		SPA_SWAP(head_block, tail_block);
 
-	while (irlen > 0 && fabs(ir[irlen-1]) < 0.000001f)
-		irlen--;
-
 	conv = calloc(1, sizeof(*conv));
 	if (conv == NULL)
 		return NULL;
 
+	irlen = 0;
+	for (i = 0; i < n_ir; i++) {
+		int l = ir[i].len;
+		while (l > 0 && fabs(ir[i].ir[l-1]) < 0.000001f)
+			l--;
+		tmp[i].ir = ir[i].ir;
+		tmp[i].len = l;
+		irlen = SPA_MAX(irlen, l);
+	}
+
 	conv->dsp = dsp;
+	conv->min_size = next_power_of_two(head_block);
+	conv->max_size = next_power_of_two(tail_block);
+
+	conv->delay[0] = spa_fga_dsp_fft_memalloc(dsp, 2 * conv->max_size, true);
+	conv->delay[1] = spa_fga_dsp_fft_memalloc(dsp, 2 * conv->max_size, true);
+	if (conv->delay[0] == NULL || conv->delay[1] == NULL)
+		goto error;
 
 	if (irlen == 0)
 		return conv;
 
-	conv->min_size = next_power_of_two(head_block);
-	conv->max_size = next_power_of_two(tail_block);
-
 	min_size = conv->min_size;
 	max_size = conv->max_size;
 
 	while (ir_consumed < irlen) {
 		head_ir_len = SPA_MIN(irlen - ir_consumed, 2 * max_size);
-		conv->partition[conv->n_partition] = partition_new(conv, min_size, ir + ir_consumed, head_ir_len);
+
+		conv->partition[conv->n_partition] = partition_new(conv, min_size,
+				tmp, n_ir, ir_consumed, head_ir_len);
 		if (conv->partition[conv->n_partition] == NULL)
 			goto error;
 		conv->n_partition++;
@@ -304,11 +343,6 @@ struct convolver *convolver_new(struct spa_fga_dsp *dsp, int head_block, int tai
 			max_size = irlen;
 	}
 
-	conv->delay[0] = spa_fga_dsp_fft_memalloc(dsp, 2 * conv->max_size, true);
-	conv->delay[1] = spa_fga_dsp_fft_memalloc(dsp, 2 * conv->max_size, true);
-	if (conv->delay[0] == NULL || conv->delay[1] == NULL)
-		goto error;
-
 	convolver_reset(conv);
 
 	conv->threaded = false;
@@ -334,6 +368,12 @@ error:
 	return NULL;
 }
 
+struct convolver *convolver_new(struct spa_fga_dsp *dsp, int head_block, int tail_block, const float *ir, int irlen)
+{
+	const struct convolver_ir tmp = { ir, irlen };
+	return convolver_new_many(dsp, head_block, tail_block, &tmp, 1);
+}
+
 void convolver_free(struct convolver *conv)
 {
 	struct spa_fga_dsp *dsp = conv->dsp;
@@ -354,13 +394,16 @@ void convolver_free(struct convolver *conv)
 	free(conv);
 }
 
-int convolver_run(struct convolver *conv, const float *input, float *output, int length)
+int convolver_run_many(struct convolver *conv, const float *input, float *output[], int length)
 {
-	int processed = 0, i;
+	int processed = 0, i, j;
 	struct spa_fga_dsp *dsp = conv->dsp;
 
+	if (conv->n_partition == 0)
+		return length;
+
 	while (processed < length) {
-		int remaining = length - processed;
+		int remaining = length - processed, pc_idx;
 		float *delay = conv->delay[0];
 		int delay_fill = conv->delay_fill;
 		struct partition *part = conv->partition[0];
@@ -371,20 +414,23 @@ int convolver_run(struct convolver *conv, const float *input, float *output, int
 		spa_memcpy(delay + delay_fill, input + processed, processing * sizeof(float));
 		conv->delay_fill += processing;
 
-		partition_run(dsp, part, delay + delay_fill - block_fill, &output[processed], processing);
+		partition_run(dsp, part, delay + delay_fill - block_fill, output, processed, processing);
 
 		for (i = 1; i < conv->n_partition; i++) {
 			part = conv->partition[i];
+			pc_idx = part->precalc_idx;
 			block_size = part->block_size;
 			block_fill = delay_fill % block_size;
 
-			spa_fga_dsp_sum(dsp, &output[processed], &output[processed],
-					&part->precalc[0][block_fill], processing);
-
+			for (j = 0; j < part->n_ir; j++) {
+				struct ir *r = &part->ir[j];
+				spa_fga_dsp_sum(dsp, &output[j][processed], &output[j][processed],
+						&r->precalc[pc_idx][block_fill], processing);
+			}
 			if (block_fill + processing == block_size) {
-				SPA_SWAP(part->precalc[0], part->precalc[1]);
+				part->precalc_idx = pc_idx ^ 1;
 				partition_run(dsp, part, delay + conv->delay_fill - block_size,
-						part->precalc[0], block_size);
+						NULL, 0, block_size);
 			}
 		}
 		if (conv->delay_fill == conv->max_size) {
@@ -396,3 +442,9 @@ int convolver_run(struct convolver *conv, const float *input, float *output, int
 	}
 	return 0;
 }
+
+int convolver_run(struct convolver *conv, const float *input, float *output, int length)
+{
+	float *tmp[1] = { output };
+	return convolver_run_many(conv, input, tmp, length);
+}
diff --git a/spa/plugins/filter-graph/convolver.h b/spa/plugins/filter-graph/convolver.h
index ad6139a35..45bc50fde 100644
--- a/spa/plugins/filter-graph/convolver.h
+++ b/spa/plugins/filter-graph/convolver.h
@@ -7,8 +7,18 @@
 
 #include "audio-dsp.h"
 
+struct convolver_ir {
+	const float *ir;
+	int len;
+};
+
 struct convolver *convolver_new(struct spa_fga_dsp *dsp, int block, int tail, const float *ir, int irlen);
 void convolver_free(struct convolver *conv);
 
 void convolver_reset(struct convolver *conv);
 int convolver_run(struct convolver *conv, const float *input, float *output, int length);
+
+
+struct convolver *convolver_new_many(struct spa_fga_dsp *dsp, int block, int tail,
+		const struct convolver_ir *ir, int n_ir);
+int convolver_run_many(struct convolver *conv, const float *input, float **output, int length);
diff --git a/spa/plugins/filter-graph/plugin_builtin.c b/spa/plugins/filter-graph/plugin_builtin.c
index b4882010c..628c7aec5 100644
--- a/spa/plugins/filter-graph/plugin_builtin.c
+++ b/spa/plugins/filter-graph/plugin_builtin.c
@@ -706,14 +706,14 @@ static const struct spa_fga_descriptor bq_raw_desc = {
 	.cleanup = builtin_cleanup,
 };
 
-/** convolve */
+/** convolver */
 struct convolver_impl {
 	struct plugin *plugin;
 
 	struct spa_log *log;
 	struct spa_fga_dsp *dsp;
 	unsigned long rate;
-	float *port[3];
+	float *port[10];
 	float latency;
 
 	struct convolver *conv;
@@ -796,51 +796,76 @@ static int finfo_open(const char *filename, struct finfo *info, int rate)
 	return 0;
 }
 
-static float *finfo_read_samples(struct plugin *pl, struct finfo *info, float gain, int delay,
-		int offset, int length, int channel, long unsigned *rate, int *n_samples, int *latency)
+struct impulse {
+	float gain;
+	float delay;
+	char *filenames[MAX_RATES];
+	int offset;
+	int length;
+	int channel;
+	int resample_quality;
+	int latency;
+	unsigned long rate;
+	float *samples;
+	int n_samples;
+};
+
+#define IMPULSE_INIT(ch) (struct impulse) { 1.0f, 0.0f, { NULL, }, 0, 0, ch, RESAMPLE_DEFAULT_QUALITY, }
+
+static void impulse_clear(struct impulse *ir)
+{
+	uint32_t i;
+	for (i = 0; i < MAX_RATES; i++)
+		if (ir->filenames[i])
+			free(ir->filenames[i]);
+	free(ir->samples);
+	spa_zero(*ir);
+}
+
+static int finfo_read_samples(struct plugin *pl, struct finfo *info, struct impulse *ir)
 {
 	float *samples, v;
-	int i, n, h;
+	int i, n, h, delay = (int)(ir->delay * info->rate);
 
-	if (length <= 0)
-		length = info->def_frames;
+	if (ir->length <= 0)
+		ir->length = info->def_frames;
 	else
-		length = SPA_MIN(length, info->max_frames);
+		ir->length = SPA_MIN(ir->length, info->max_frames);
 
-	length -= SPA_MIN(offset, length);
+	ir->length -= SPA_MIN(ir->offset, ir->length);
 
-	n = delay + length;
+	n = delay + ir->length;
 	if (n == 0)
-		return NULL;
+		return -EINVAL;
 
 	samples = calloc(n * info->channels, sizeof(float));
 	if (samples == NULL)
-		return NULL;
+		return -errno;
 
-	channel = channel % info->channels;
+	ir->channel = ir->channel % info->channels;
 
 	switch (info->type) {
 	case TYPE_SNDFILE:
 #ifdef HAVE_SNDFILE
-		if (offset > 0)
-			sf_seek(info->fs, offset, SEEK_SET);
-		sf_readf_float(info->fs, samples + (delay * info->channels), length);
+		if (ir->offset > 0)
+			sf_seek(info->fs, ir->offset, SEEK_SET);
+		sf_readf_float(info->fs, samples + (delay * info->channels), ir->length);
 		for (i = 0; i < n; i++)
-			samples[i] = samples[info->channels * i + channel] * gain;
+			samples[i] = samples[info->channels * i + ir->channel] * ir->gain;
 #endif
 		break;
 	case TYPE_HILBERT:
-		gain *= 2 / (float)M_PI;
-		h = length / 2;
+		ir->gain *= 2 / (float)M_PI;
+		h = ir->length / 2;
 		for (i = 1; i < h; i += 2) {
-			v = (gain / i) * (0.43f + 0.57f * cosf(i * (float)M_PI / h));
+			v = (ir->gain / i) * (0.43f + 0.57f * cosf(i * (float)M_PI / h));
 			samples[delay + h + i] = -v;
 			samples[delay + h - i] =  v;
 		}
-		spa_log_info(pl->log, "created hilbert function length %d", length);
+		spa_log_info(pl->log, "created hilbert function length %d", ir->length);
 		break;
 	case TYPE_DIRAC:
-		samples[delay] = gain;
+		samples[delay] = ir->gain;
 		spa_log_info(pl->log, "created dirac function");
 		break;
 	case TYPE_IR:
@@ -848,22 +873,23 @@ static float *finfo_read_samples(struct plugin *pl, struct finfo *info, float ga
 		struct spa_json it[1];
 		float v;
 		if (spa_json_begin_array_relax(&it[0], info->filename+4, strlen(info->filename+4)) <= 0)
-			return NULL;
+			return -EINVAL;
 		if (spa_json_get_int(&it[0], &h) <= 0)
-			return NULL;
+			return -EINVAL;
 		info->rate = h;
 		i = 0;
 		while (spa_json_get_float(&it[0], &v) > 0) {
-			samples[delay + i] = v * gain;
+			samples[delay + i] = v * ir->gain;
 			i++;
 		}
 		break;
 	}
 	}
-	*n_samples = n;
-	*rate = info->rate;
-	*latency = (int) (n * info->latency);
-	return samples;
+	ir->samples = samples;
+	ir->n_samples = n;
+	ir->latency = (int) (n * info->latency);
+	ir->rate = info->rate;
+	return 0;
 }
 
 
@@ -875,49 +901,44 @@ static void finfo_close(struct finfo *info)
 #endif
 }
 
-static float *read_closest(struct plugin *pl, char **filenames, float gain, float delay_sec, int offset,
-		int length, int channel, long unsigned *rate, int *n_samples, int *latency)
+static int read_closest(struct plugin *pl, struct impulse *ir, int rate)
 {
 	struct finfo finfo[MAX_RATES];
-	int res, diff = INT_MAX;
+	int res = -ENOENT, diff = INT_MAX;
 	uint32_t best = SPA_ID_INVALID, i;
-	float *samples = NULL;
 
 	spa_zero(finfo);
 
-	for (i = 0; i < MAX_RATES && filenames[i] && filenames[i][0]; i++) {
-		res = finfo_open(filenames[i], &finfo[i], *rate);
-		if (res < 0)
+	for (i = 0; i < MAX_RATES && ir->filenames[i] && ir->filenames[i][0]; i++) {
+		if ((res = finfo_open(ir->filenames[i], &finfo[i], rate)) < 0)
 			continue;
 
-		if (labs((long)finfo[i].rate - (long)*rate) < diff) {
+		if (labs((long)finfo[i].rate - (long)rate) < diff) {
 			best = i;
-			diff = labs((long)finfo[i].rate - (long)*rate);
+			diff = labs((long)finfo[i].rate - (long)rate);
 			spa_log_debug(pl->log, "new closest match: %d", finfo[i].rate);
 		}
 	}
 	if (best != SPA_ID_INVALID) {
-		spa_log_info(pl->log, "loading best rate:%u %s", finfo[best].rate, filenames[best]);
-		samples = finfo_read_samples(pl, &finfo[best], gain,
-				(int) (delay_sec * finfo[best].rate), offset, length,
-				channel, rate, n_samples, latency);
+		spa_log_info(pl->log, "loading best rate:%u %s", finfo[best].rate, ir->filenames[best]);
+		res = finfo_read_samples(pl, &finfo[best], ir);
 	} else {
 		char buf[PATH_MAX];
 		spa_log_error(pl->log, "Can't open any sample file (CWD %s):",
 				getcwd(buf, sizeof(buf)));
 
-		for (i = 0; i < MAX_RATES && filenames[i] && filenames[i][0]; i++) {
-			res = finfo_open(filenames[i], &finfo[i], *rate);
+		for (i = 0; i < MAX_RATES && ir->filenames[i] && ir->filenames[i][0]; i++) {
+			res = finfo_open(ir->filenames[i], &finfo[i], rate);
 			if (res < 0)
-				spa_log_error(pl->log, " failed file %s: %s", filenames[i], finfo[i].error);
+				spa_log_error(pl->log, " failed file %s: %s", ir->filenames[i], finfo[i].error);
 			else
-				spa_log_warn(pl->log, " unexpectedly opened file %s", filenames[i]);
+				spa_log_warn(pl->log, " unexpectedly opened file %s", ir->filenames[i]);
 		}
 	}
 	for (i = 0; i < MAX_RATES; i++)
 		finfo_close(&finfo[i]);
 
-	return samples;
+	return res;
 }
 
 static float *resample_buffer(struct plugin *pl, float *samples, int *n_samples,
@@ -997,22 +1018,119 @@ error:
 #endif
 }
 
+static int convolver_read_impulse(struct plugin *pl, struct spa_json *obj,
+		unsigned long SampleRate, struct impulse *ir)
+{
+	int len, res = -EINVAL;
+	uint32_t i = 0;
+	char key[256];
+	const char *val;
+	struct spa_json it[2];
+	unsigned long rate;
+
+	while ((len = spa_json_object_next(obj, key, sizeof(key), &val)) > 0) {
+		if (spa_streq(key, "gain")) {
+			if (spa_json_parse_float(val, len, &ir->gain) <= 0) {
+				spa_log_error(pl->log, "convolver:gain requires a number");
+				goto error;
+			}
+		}
+		else if (spa_streq(key, "delay")) {
+			int delay_i;
+			if (spa_json_parse_int(val, len, &delay_i) > 0) {
+				ir->delay = delay_i / (float)SampleRate;
+			} else if (spa_json_parse_float(val, len, &ir->delay) <= 0) {
+				spa_log_error(pl->log, "convolver:delay requires a number");
+				goto error;
+			}
+		}
+		else if (spa_streq(key, "filename")) {
+			if (spa_json_is_array(val, len)) {
+				spa_json_enter(obj, &it[0]);
+				while ((len = spa_json_next(&it[0], &val)) > 0 &&
+					i < SPA_N_ELEMENTS(ir->filenames)) {
+						ir->filenames[i] = malloc(len+1);
+						if (ir->filenames[i] == NULL)
+							goto error_errno;
+						spa_json_parse_stringn(val, len, ir->filenames[i], len+1);
+						i++;
+				}
+			}
+			else {
+				ir->filenames[0] = malloc(len+1);
+				if (ir->filenames[0] == NULL)
+					goto error_errno;
+				spa_json_parse_stringn(val, len, ir->filenames[0], len+1);
+			}
+		}
+		else if (spa_streq(key, "offset")) {
+			if (spa_json_parse_int(val, len, &ir->offset) <= 0) {
+				spa_log_error(pl->log, "convolver:offset requires a number");
+				goto error;
+			}
+		}
+		else if (spa_streq(key, "length")) {
+			if (spa_json_parse_int(val, len, &ir->length) <= 0) {
+				spa_log_error(pl->log, "convolver:length requires a number");
+				goto error;
+			}
+		}
+		else if (spa_streq(key, "channel")) {
+			if (spa_json_parse_int(val, len, &ir->channel) <= 0) {
+				spa_log_error(pl->log, "convolver:channel requires a number");
+				goto error;
+			}
+		}
+		else if (spa_streq(key, "resample_quality")) {
+			if (spa_json_parse_int(val, len, &ir->resample_quality) <= 0) {
+				spa_log_error(pl->log, "convolver:resample_quality requires a number");
+				goto error;
+			}
+		}
+		else {
+			spa_log_warn(pl->log, "convolver: ignoring config key: '%s'", key);
+		}
+	}
+	if (ir->filenames[0] == NULL) {
+		spa_log_error(pl->log, "convolver:filename was not given");
+		goto error;
+	}
+
+	if (ir->delay < 0.0f)
+		ir->delay = 0.0f;
+	if (ir->offset < 0)
+		ir->offset = 0;
+
+	rate = SampleRate;
+	if ((res = read_closest(pl, ir, rate)) < 0)
+		goto error;
+	if (rate != ir->rate)
+		ir->samples = resample_buffer(pl, ir->samples, &ir->n_samples,
+				ir->rate, rate, ir->resample_quality);
+	if (ir->samples == NULL)
+		goto error_errno;
+
+	return res;
+
+error_errno:
+	res = -errno;
+error:
+	impulse_clear(ir);
+	return res;
+}
+
 static void * convolver_instantiate(const struct spa_fga_plugin *plugin, const struct spa_fga_descriptor * Descriptor,
 		unsigned long SampleRate, int index, const char *config)
 {
 	struct plugin *pl = SPA_CONTAINER_OF(plugin, struct plugin, plugin);
-	struct convolver_impl *impl;
-	float *samples;
-	int offset = 0, length = 0, channel = index, n_samples = 0, len;
-	uint32_t i = 0;
+	struct convolver_impl *impl = NULL;
+	int res, len;
 	struct spa_json it[2];
 	const char *val;
 	char key[256];
-	char *filenames[MAX_RATES] = { 0 };
 	int blocksize = 0, tailsize = 0;
-	int resample_quality = RESAMPLE_DEFAULT_QUALITY, def_latency;
-	float gain = 1.0f, delay = 0.0f, latency = -1.0f;
-	unsigned long rate;
+	float latency = -1.0f;
+	struct impulse ir = IMPULSE_INIT(index);
 
 	errno = EINVAL;
 	if (config == NULL) {
@@ -1038,107 +1156,26 @@ static void * convolver_instantiate(const struct spa_fga_plugin *plugin, const s
 				return NULL;
 			}
 		}
-		else if (spa_streq(key, "gain")) {
-			if (spa_json_parse_float(val, len, &gain) <= 0) {
-				spa_log_error(pl->log, "convolver:gain requires a number");
-				return NULL;
-			}
-		}
-		else if (spa_streq(key, "delay")) {
-			int delay_i;
-			if (spa_json_parse_int(val, len, &delay_i) > 0) {
-				delay = delay_i / (float)SampleRate;
-			} else if (spa_json_parse_float(val, len, &delay) <= 0) {
-				spa_log_error(pl->log, "convolver:delay requires a number");
-				return NULL;
-			}
-		}
-		else if (spa_streq(key, "filename")) {
-			if (spa_json_is_array(val, len)) {
-				spa_json_enter(&it[0], &it[1]);
-				while ((len = spa_json_next(&it[1], &val)) > 0 &&
-					i < SPA_N_ELEMENTS(filenames)) {
-						filenames[i] = malloc(len+1);
-						if (filenames[i] == NULL)
-							return NULL;
-						spa_json_parse_stringn(val, len, filenames[i], len+1);
-						i++;
-				}
-			}
-			else {
-				filenames[0] = malloc(len+1);
-				if (filenames[0] == NULL)
-					return NULL;
-				spa_json_parse_stringn(val, len, filenames[0], len+1);
-			}
-		}
-		else if (spa_streq(key, "offset")) {
-			if (spa_json_parse_int(val, len, &offset) <= 0) {
-				spa_log_error(pl->log, "convolver:offset requires a number");
-				return NULL;
-			}
-		}
-		else if (spa_streq(key, "length")) {
-			if (spa_json_parse_int(val, len, &length) <= 0) {
-				spa_log_error(pl->log, "convolver:length requires a number");
-				return NULL;
-			}
-		}
-		else if (spa_streq(key, "channel")) {
-			if (spa_json_parse_int(val, len, &channel) <= 0) {
-				spa_log_error(pl->log, "convolver:channel requires a number");
-				return NULL;
-			}
-		}
-		else if (spa_streq(key, "resample_quality")) {
-			if (spa_json_parse_int(val, len, &resample_quality) <= 0) {
-				spa_log_error(pl->log, "convolver:resample_quality requires a number");
-				return NULL;
-			}
-		}
 		else if (spa_streq(key, "latency")) {
 			if (spa_json_parse_float(val, len, &latency) <= 0) {
 				spa_log_error(pl->log, "convolver:latency requires a number");
 				return NULL;
 			}
 		}
-		else {
-			spa_log_warn(pl->log, "convolver: ignoring config key: '%s'", key);
-		}
 	}
-	if (filenames[0] == NULL) {
-		spa_log_error(pl->log, "convolver:filename was not given");
-		return NULL;
-	}
-
-	if (delay < 0.0f)
-		delay = 0.0f;
-	if (offset < 0)
-		offset = 0;
-
-	rate = SampleRate;
-	samples = read_closest(pl, filenames, gain, delay, offset,
-			length, channel, &rate, &n_samples, &def_latency);
-	if (samples != NULL && rate != SampleRate)
-		samples = resample_buffer(pl, samples, &n_samples,
-				rate, SampleRate, resample_quality);
-
-	for (i = 0; i < MAX_RATES; i++)
-		if (filenames[i])
-			free(filenames[i]);
-
-	if (samples == NULL) {
-		errno = ENOENT;
+	spa_json_begin_object(&it[0], config, strlen(config));
+	if ((res = convolver_read_impulse(pl, &it[0], SampleRate, &ir)) < 0) {
+		errno = -res;
 		return NULL;
 	}
 
 	if (blocksize <= 0)
-		blocksize = SPA_CLAMP(n_samples, 64, 256);
+		blocksize = SPA_CLAMP(ir.n_samples, 64, 256);
 	if (tailsize <= 0)
 		tailsize = SPA_CLAMP(4096, blocksize, 32768);
 
-	spa_log_info(pl->log, "using n_samples:%u %d:%d blocksize delay:%f def-latency:%d", n_samples,
-			blocksize, tailsize, delay, def_latency);
+	spa_log_info(pl->log, "using n_samples:%u %d:%d blocksize delay:%f def-latency:%d", ir.n_samples,
+			blocksize, tailsize, ir.delay, ir.latency);
 
 	impl = calloc(1, sizeof(*impl));
 	if (impl == NULL)
@@ -1148,21 +1185,20 @@ static void * convolver_instantiate(const struct spa_fga_plugin *plugin, const s
 	impl->log = pl->log;
 	impl->dsp = pl->dsp;
 	impl->rate = SampleRate;
-
-	impl->conv = convolver_new(impl->dsp, blocksize, tailsize, samples, n_samples);
-	if (impl->conv == NULL)
-		goto error;
-
 	if (latency < 0.0f)
-		impl->latency = def_latency;
+		impl->latency = ir.latency;
 	else
 		impl->latency = latency * impl->rate;
 
-	free(samples);
+	impl->conv = convolver_new(impl->dsp, blocksize, tailsize, ir.samples, ir.n_samples);
+	if (impl->conv == NULL)
+		goto error;
+
+	impulse_clear(&ir);
 
 	return impl;
 error:
-	free(samples);
+	impulse_clear(&ir);
 	free(impl);
 	return NULL;
 }
@@ -1235,6 +1271,188 @@ static const struct spa_fga_descriptor convolve_desc = {
 	.cleanup = convolver_cleanup,
 };
 
+/** convolver2 */
+static void * convolver2_instantiate(const struct spa_fga_plugin *plugin, const struct spa_fga_descriptor * Descriptor,
+		unsigned long SampleRate, int index, const char *config)
+{
+	struct plugin *pl = SPA_CONTAINER_OF(plugin, struct plugin, plugin);
+	struct convolver_impl *impl = NULL;
+	int i, len, res, max_samples = 0;
+	struct spa_json it[3];
+	const char *val;
+	char key[256];
+	int blocksize = 0, tailsize = 0;
+	float latency = -1.0f;
+	struct impulse ir[8];
+	struct convolver_ir cir[8];
+	int n_ir = 0;
+
+	errno = EINVAL;
+	if (config == NULL) {
+		spa_log_error(pl->log, "convolver: requires a config section");
+		return NULL;
+	}
+
+	if (spa_json_begin_object(&it[0], config, strlen(config)) <= 0) {
+		spa_log_error(pl->log, "convolver:config must be an object");
+		return NULL;
+	}
+
+	while ((len = spa_json_object_next(&it[0], key, sizeof(key), &val)) > 0) {
+		if (spa_streq(key, "blocksize")) {
+			if (spa_json_parse_int(val, len, &blocksize) <= 0) {
+				spa_log_error(pl->log, "convolver2:blocksize requires a number");
+				return NULL;
+			}
+		}
+		else if (spa_streq(key, "tailsize")) {
+			if (spa_json_parse_int(val, len, &tailsize) <= 0) {
+				spa_log_error(pl->log, "convolver2:tailsize requires a number");
+				return NULL;
+			}
+		}
+		else if (spa_streq(key, "latency")) {
+			if (spa_json_parse_float(val, len, &latency) <= 0) {
+				spa_log_error(pl->log, "convolver2:latency requires a number");
+				return NULL;
+			}
+		}
+		else if (spa_streq(key, "impulses")) {
+			if (!spa_json_is_array(val, len)) {
+				spa_log_error(pl->log, "convolver2:impulses require an array");
+				return NULL;
+			}
+			spa_json_enter(&it[0], &it[1]);
+			while (spa_json_enter_object(&it[1], &it[2]) > 0) {
+				ir[n_ir] = IMPULSE_INIT(n_ir);
+				if ((res = convolver_read_impulse(pl, &it[2], SampleRate, &ir[n_ir])) < 0)
+					return NULL;
+
+				max_samples = SPA_MAX(max_samples, ir[n_ir].n_samples);
+				cir[n_ir].ir = ir[n_ir].samples;
+				cir[n_ir].len = ir[n_ir].n_samples;
+				n_ir++;
+			}
+		}
+		else {
+			spa_log_warn(pl->log, "convolver: ignoring config key: '%s'", key);
+		}
+	}
+	if (n_ir == 0) {
+		spa_log_error(pl->log, "convolver2:no impulses given");
+		goto error;
+	}
+
+	if (blocksize <= 0)
+		blocksize = SPA_CLAMP(max_samples, 64, 256);
+	if (tailsize <= 0)
+		tailsize = SPA_CLAMP(4096, blocksize, 32768);
+
+	spa_log_info(pl->log, "using max_samples:%u %d:%d blocksize", max_samples,
+			blocksize, tailsize);
+
+	impl = calloc(1, sizeof(*impl));
+	if (impl == NULL)
+		goto error;
+
+	impl->plugin = pl;
+	impl->log = pl->log;
+	impl->dsp = pl->dsp;
+	impl->rate = SampleRate;
+	if (latency < 0.0f)
+		impl->latency = ir[0].latency;
+	else
+		impl->latency = latency * impl->rate;
+
+	impl->conv = convolver_new_many(impl->dsp, blocksize, tailsize, cir, n_ir);
+	if (impl->conv == NULL)
+		goto error;
+
+
+	return impl;
+error:
+	for (i = 0; i < n_ir; i++)
+		impulse_clear(&ir[i]);
+	free(impl);
+	return NULL;
+}
+
+static void convolver2_run(void * Instance, unsigned long SampleCount)
+{
+	struct convolver_impl *impl = Instance;
+	if (impl->port[0] != NULL)
+		convolver_run_many(impl->conv, impl->port[0], &impl->port[2], SampleCount);
+	if (impl->port[1] != NULL)
+		impl->port[1][0] = impl->latency;
+}
+
+static void convolver2_activate(void * Instance)
+{
+	struct convolver_impl *impl = Instance;
+	if (impl->port[1] != NULL)
+		impl->port[1][0] = impl->latency;
+}
+
+static struct spa_fga_port convolver2_ports[] = {
+	{ .index = 0,
+	  .name = "In",
+	  .flags = SPA_FGA_PORT_INPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 1,
+	  .name = "latency",
+	  .hint = SPA_FGA_HINT_LATENCY,
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_CONTROL,
+	},
+	{ .index = 2,
+	  .name = "Out 1",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 3,
+	  .name = "Out 2",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 4,
+	  .name = "Out 3",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 5,
+	  .name = "Out 4",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 6,
+	  .name = "Out 5",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 7,
+	  .name = "Out 6",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 8,
+	  .name = "Out 7",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+	{ .index = 9,
+	  .name = "Out 8",
+	  .flags = SPA_FGA_PORT_OUTPUT | SPA_FGA_PORT_AUDIO,
+	},
+};
+
+
+static const struct spa_fga_descriptor convolver2_desc = {
+	.name = "convolver2",
+	.flags = SPA_FGA_DESCRIPTOR_SUPPORTS_NULL_DATA,
+
+	.n_ports = SPA_N_ELEMENTS(convolver2_ports),
+	.ports = convolver2_ports,
+
+	.instantiate = convolver2_instantiate,
+	.connect_port = convolver_connect_port,
+	.activate = convolver2_activate,
+	.deactivate = convolver_deactivate,
+	.run = convolver2_run,
+	.cleanup = convolver_cleanup,
+};
+
 /** delay */
 struct delay_impl {
 	struct plugin *plugin;
@@ -3339,6 +3557,8 @@ static const struct spa_fga_descriptor * builtin_descriptor(unsigned long Index)
 		return &busy_desc;
 	case 32:
 		return &null_desc;
+	case 33:
+		return &convolver2_desc;
 	}
 	return NULL;
 }
diff --git a/src/modules/module-filter-chain.c b/src/modules/module-filter-chain.c
index 42f222949..7168ab9ba 100644
--- a/src/modules/module-filter-chain.c
+++ b/src/modules/module-filter-chain.c
@@ -368,6 +368,11 @@ extern struct spa_handle_factory spa_filter_graph_factory;
  * The convolver has an input port "In" and an output port "Out". It requires a config
  * section in the node declaration in this format:
  *
+ * When multiple impulses are applied to one input, use the convolver2, which is more
+ * performant.
+ *
+ * Check the documentation for Convolver2 for the parameter meanings.
+ *
  *\code{.unparsed}
  * filter.graph = {
  *     nodes = [
@@ -392,12 +397,50 @@ extern struct spa_handle_factory spa_filter_graph_factory;
  *     }
  *     ...
  * }
+ *
+ * ### Convolver2
+ *
+ * The convolver2 can be used to apply one or more impulse responses to a signal.
+ * It is usually used for reverbs or virtual surround. The convolver2 is implemented
+ * with a fast FFT implementation.
+ *
+ * The convolver2 has an input port "In" and 8 output ports "Out 1" to "Out 8". It
+ * requires a config section in the node declaration in this format:
+ *
+ *\code{.unparsed}
+ * filter.graph = {
+ *     nodes = [
+ *         {
+ *             type   = builtin
+ *             name   = ...
+ *             label  = convolver2
+ *             config = {
+ *                 blocksize = ...
+ *                 tailsize = ...
+ *                 impulses = [
+ *                     {
+ *                         gain = ...
+ *                         delay = ...
+ *                         filename = ...
+ *                         offset = ...
+ *                         length = ...
+ *                         channel = ...
+ *                         resample_quality = ...
+ *                     }
+ *                     ...
+ *                 ]
+ *                 latency = ...
+ *             }
+ *             ...
+ *         }
+ *
  *\endcode
  *
  * - `blocksize` specifies the size of the blocks to use in the FFT. It is a value
  *               between 64 and 256. When not specified, this value is
  *               computed automatically from the number of samples in the file.
  * - `tailsize` specifies the size of the tail blocks to use in the FFT.
+ * - `impulses`  An array of objects with the IRs for the outputs "Out 1" to "Out 8".
  * - `gain`     the overall gain to apply to the IR file. Default 1.0
  * - `delay`    The extra delay to add to the IR. A float number will be interpreted as seconds,
  *              and integer as samples. Using the delay in seconds is independent of the graph
@@ -420,7 +463,7 @@ extern struct spa_handle_factory spa_filter_graph_factory;
  * - `resample_quality` The resample quality in case the IR does not match the graph
  *                      samplerate.
  * - `latency`  The extra latency in seconds to report. When left unspecified (or < 0.0)
- *              the default IR latency will be used, the the filename argument.
+ *              the default IR latency will be used, depending on the filename argument.
  *
  * ### Delay
  *