audiomixer: optimize avx mixer some more

Add avx mixer to test and benchmark Rework and unroll the avx mixer some more. The SSE one is 10 times faster than the C one, The AVX is 20 times faster. The SSE2 function is 5 times faster than the C one.
2026-07-10 00:07:03 -04:00 · 2022-07-10 23:13:24 +02:00 · 2022-07-10 23:13:24 +02:00 · 8fe83e5304
commit 8fe83e5304
parent 23984f8790
5 changed files with 77 additions and 58 deletions
--- a/spa/plugins/audiomixer/mix-ops-sse2.c
+++ b/spa/plugins/audiomixer/mix-ops-sse2.c
@ -48,18 +48,17 @@ mix_f64_sse2(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_REST
 		__m128d in[4];
 		const double **s = (const double **)src;
 		double *d = dst;
-		bool aligned = true;

-		if (SPA_UNLIKELY(!SPA_IS_ALIGNED(dst, 16)))
-			aligned = false;
-		else {
-			for (i = 0; i < n_src && aligned; i++) {
-				if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16)))
-					aligned = false;
+		if (SPA_LIKELY(SPA_IS_ALIGNED(dst, 16))) {
+			unrolled = n_samples & ~15;
+			for (i = 0; i < n_src; i++) {
+				if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16))) {
+					unrolled = 0;
+					break;
+				}
 			}
-		}
-
-		unrolled = aligned ? n_samples & ~7 : 0;
+		} else
+			unrolled = 0;

 		for (n = 0; n < unrolled; n += 8) {
 			in[0] = _mm_load_pd(&s[0][n+0]);