diff --git a/spa/plugins/filter-graph/audio-dsp-avx2.c b/spa/plugins/filter-graph/audio-dsp-avx2.c
index 76c7b17d5..346b26ab3 100644
--- a/spa/plugins/filter-graph/audio-dsp-avx2.c
+++ b/spa/plugins/filter-graph/audio-dsp-avx2.c
@@ -140,10 +140,10 @@ static void dsp_add_n_gain_avx2(void *obj, float *dst,
 
 		for (i = 1; i < n_src; i++) {
 			g = _mm256_set1_ps(gain[i]);
-			in[0] = _mm256_add_ps(in[0], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+ 0])));
-			in[1] = _mm256_add_ps(in[1], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+ 8])));
-			in[2] = _mm256_add_ps(in[2], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+16])));
-			in[3] = _mm256_add_ps(in[3], _mm256_mul_ps(g, _mm256_load_ps(&s[i][n+24])));
+			in[0] = _mm256_fmadd_ps(g, _mm256_load_ps(&s[i][n+ 0]), in[0]);
+			in[1] = _mm256_fmadd_ps(g, _mm256_load_ps(&s[i][n+ 8]), in[1]);
+			in[2] = _mm256_fmadd_ps(g, _mm256_load_ps(&s[i][n+16]), in[2]);
+			in[3] = _mm256_fmadd_ps(g, _mm256_load_ps(&s[i][n+24]), in[3]);
 		}
 		_mm256_store_ps(&d[n+ 0], in[0]);
 		_mm256_store_ps(&d[n+ 8], in[1]);
@@ -237,13 +237,12 @@ void dsp_sum_avx2(void *obj, float *r, const float *a, const float *b, uint32_t
 
 inline static __m256 _mm256_mul_pz(__m256 ab, __m256 cd)
 {
-	__m256 aa, bb, dc, x0, x1;
+	__m256 aa, bb, dc, x1;
 	aa = _mm256_moveldup_ps(ab);
 	bb = _mm256_movehdup_ps(ab);
-	x0 = _mm256_mul_ps(aa, cd);
 	dc = _mm256_shuffle_ps(cd, cd, _MM_SHUFFLE(2,3,0,1));
 	x1 = _mm256_mul_ps(bb, dc);
-	return _mm256_addsub_ps(x0, x1);
+	return _mm256_fmaddsub_ps(aa, cd, x1);
 }
 
 void dsp_fft_cmul_avx2(void *obj, void *fft,
@@ -308,12 +307,10 @@ void dsp_fft_cmuladd_avx2(void *obj, void *fft,
 		bb[1] = _mm256_load_ps(&b[2*i+8]);	/* br2 bi2 br3 bi3 */
 		dd[0] = _mm256_mul_pz(aa[0], bb[0]);
 		dd[1] = _mm256_mul_pz(aa[1], bb[1]);
-		dd[0] = _mm256_mul_ps(dd[0], s);
-		dd[1] = _mm256_mul_ps(dd[1], s);
 		t[0] = _mm256_load_ps(&src[2*i]);
 		t[1] = _mm256_load_ps(&src[2*i+8]);
-		t[0] = _mm256_add_ps(t[0], dd[0]);
-		t[1] = _mm256_add_ps(t[1], dd[1]);
+		t[0] = _mm256_fmadd_ps(dd[0], s, t[0]);
+		t[1] = _mm256_fmadd_ps(dd[1], s, t[1]);
 		_mm256_store_ps(&dst[2*i], t[0]);
 		_mm256_store_ps(&dst[2*i+8], t[1]);
 	}
diff --git a/spa/plugins/filter-graph/audio-dsp.c b/spa/plugins/filter-graph/audio-dsp.c
index 133b53db5..d0c4ef008 100644
--- a/spa/plugins/filter-graph/audio-dsp.c
+++ b/spa/plugins/filter-graph/audio-dsp.c
@@ -24,7 +24,7 @@ struct dsp_info {
 static const struct dsp_info dsp_table[] =
 {
 #if defined (HAVE_AVX2)
-	{ SPA_CPU_FLAG_AVX2,
+	{ SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3,
 		.funcs.clear = dsp_clear_c,
 		.funcs.copy = dsp_copy_c,
 		.funcs.mix_gain = dsp_mix_gain_avx2,
diff --git a/spa/plugins/filter-graph/convolver.c b/spa/plugins/filter-graph/convolver.c
index a077c6ec1..788b118e3 100644
--- a/spa/plugins/filter-graph/convolver.c
+++ b/spa/plugins/filter-graph/convolver.c
@@ -171,7 +171,10 @@ static int convolver1_run(struct spa_fga_dsp *dsp, struct convolver1 *conv, cons
 
 		if (conv->segCount > 1) {
 			if (inputBufferFill == 0) {
-				int indexAudio = (conv->current + 1) % conv->segCount;
+				int indexAudio = conv->current;
+
+				if (++indexAudio == conv->segCount)
+					indexAudio = 0;
 
 				spa_fga_dsp_fft_cmul(dsp, conv->fft, conv->pre_mult,
 						conv->segmentsIr[1],
@@ -179,7 +182,8 @@ static int convolver1_run(struct spa_fga_dsp *dsp, struct convolver1 *conv, cons
 						conv->fftComplexSize, conv->scale);
 
 				for (i = 2; i < conv->segCount; i++) {
-					indexAudio = (conv->current + i) % conv->segCount;
+					if (++indexAudio == conv->segCount)
+						indexAudio = 0;
 
 					spa_fga_dsp_fft_cmuladd(dsp, conv->fft,
 							conv->pre_mult,
@@ -214,9 +218,10 @@ static int convolver1_run(struct spa_fga_dsp *dsp, struct convolver1 *conv, cons
 
 			SPA_SWAP(conv->fft_buffer[0], conv->fft_buffer[1]);
 
-			conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1);
+			if (conv->current == 0)
+				conv->current = conv->segCount;
+			conv->current--;
 		}
-
 		processed += processing;
 	}
 	conv->inputBufferFill = inputBufferFill;
diff --git a/spa/plugins/filter-graph/meson.build b/spa/plugins/filter-graph/meson.build
index 94ee0bd25..20b90f4c4 100644
--- a/spa/plugins/filter-graph/meson.build
+++ b/spa/plugins/filter-graph/meson.build
@@ -18,16 +18,16 @@ if have_sse
   simd_cargs += ['-DHAVE_SSE']
   simd_dependencies += filter_graph_sse
 endif
-if have_avx2
-  filter_graph_avx2 = static_library('filter_graph_avx2',
+if have_avx2 and have_fma
+  filter_graph_avx2_fma = static_library('filter_graph_avx2_fma',
     ['audio-dsp-avx2.c' ],
     include_directories : [configinc],
-    c_args : [avx2_args, fma_args,'-O3', '-DHAVE_AVX2'],
+    c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'],
     dependencies : [ spa_dep ],
     install : false
     )
-  simd_cargs += ['-DHAVE_AVX2']
-  simd_dependencies += filter_graph_avx2
+  simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA']
+  simd_dependencies += filter_graph_avx2_fma
 endif
 if have_neon
   filter_graph_neon = static_library('filter_graph_neon',