mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-10 13:30:05 -05:00
audiomixer: optimize avx mixer some more
Add avx mixer to test and benchmark Rework and unroll the avx mixer some more. The SSE one is 10 times faster than the C one, The AVX is 20 times faster. The SSE2 function is 5 times faster than the C one.
This commit is contained in:
parent
23984f8790
commit
8fe83e5304
5 changed files with 77 additions and 58 deletions
|
|
@ -48,18 +48,17 @@ mix_f32_sse(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTR
|
|||
__m128 in[4];
|
||||
const float **s = (const float **)src;
|
||||
float *d = dst;
|
||||
bool aligned = true;
|
||||
|
||||
if (SPA_UNLIKELY(!SPA_IS_ALIGNED(dst, 16)))
|
||||
aligned = false;
|
||||
else {
|
||||
for (i = 0; i < n_src && aligned; i++) {
|
||||
if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16)))
|
||||
aligned = false;
|
||||
if (SPA_LIKELY(SPA_IS_ALIGNED(dst, 16))) {
|
||||
unrolled = n_samples & ~15;
|
||||
for (i = 0; i < n_src; i++) {
|
||||
if (SPA_UNLIKELY(!SPA_IS_ALIGNED(src[i], 16))) {
|
||||
unrolled = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unrolled = aligned ? n_samples & ~15 : 0;
|
||||
} else
|
||||
unrolled = 0;
|
||||
|
||||
for (n = 0; n < unrolled; n += 16) {
|
||||
in[0] = _mm_load_ps(&s[0][n+ 0]);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue