audiomixer: optimize avx mixer some more

Add avx mixer to test and benchmark Rework and unroll the avx mixer some more. The SSE one is 10 times faster than the C one, The AVX is 20 times faster. The SSE2 function is 5 times faster than the C one.
2026-07-10 11:03:57 -04:00 · 2022-07-10 23:13:24 +02:00 · 2022-07-10 23:13:24 +02:00 · 8fe83e5304
commit 8fe83e5304
parent 23984f8790
5 changed files with 77 additions and 58 deletions
--- a/spa/plugins/audiomixer/mix-ops-sse.c
+++ b/spa/plugins/audiomixer/mix-ops-sse.c
@ -48,18 +48,17 @@ mix_f32_sse(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTR
 		__m128 in[4];
 		const float **s = (const float **)src;
 		float *d = dst;
-		bool aligned = true;

-		if (SPA_UNLIKELY(!SPA_IS_ALIGNED(dst, 16)))
-			aligned = false;
-		else {
-			for (i = 0; i < n_src && aligned; i++) {
-				if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16)))
-					aligned = false;
+		if (SPA_LIKELY(SPA_IS_ALIGNED(dst, 16))) {
+			unrolled = n_samples & ~15;
+			for (i = 0; i < n_src; i++) {
+				if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16))) {
+					unrolled = 0;
+					break;
+				}
 			}
-		}
-
-		unrolled = aligned ? n_samples & ~15 : 0;
+		} else
+			unrolled = 0;

 		for (n = 0; n < unrolled; n += 16) {
 			in[0] = _mm_load_ps(&s[0][n+ 0]);