audiomixer: optimize avx mixer some more

Add avx mixer to test and benchmark
Rework and unroll the avx mixer some more.
The SSE one is 10 times faster than the C one, The AVX is 20 times
faster. The SSE2 function is 5 times faster than the C one.
This commit is contained in:
Wim Taymans 2022-07-10 23:13:24 +02:00
parent 23984f8790
commit 8fe83e5304
5 changed files with 77 additions and 58 deletions

View file

@ -74,8 +74,8 @@ static void run_test1(const char *name, const char *impl, mix_func_t func, int n
mix.n_channels = 1;
for (j = 0; j < n_src; j++)
ip[j] = SPA_PTR_ALIGN(&samp_in[j * n_samples * 4], 16, void);
op = SPA_PTR_ALIGN(samp_out, 16, void);
ip[j] = SPA_PTR_ALIGN(&samp_in[j * n_samples * 4], 32, void);
op = SPA_PTR_ALIGN(samp_out, 32, void);
clock_gettime(CLOCK_MONOTONIC, &ts);
t1 = SPA_TIMESPEC_TO_NSEC(&ts);
@ -163,6 +163,11 @@ static void test_f32(void)
run_test("test_f32", "sse", mix_f32_sse);
}
#endif
#if defined (HAVE_AVX)
if (cpu_flags & SPA_CPU_FLAG_AVX) {
run_test("test_f32", "avx", mix_f32_avx);
}
#endif
}
static void test_f64(void)