From abe87f8008440fe0c69e41d25d7f324a4d5c2622 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 17 Mar 2022 10:00:23 +0100 Subject: [PATCH] audioconvert: use unaligned loads We collect 4 channels from the source. When the number of channels is not a multiple of 4, we need to do unaligned reads. See #2221 --- spa/plugins/audioconvert/fmt-ops-sse2.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c index a51352653..81524d2fd 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse2.c +++ b/spa/plugins/audioconvert/fmt-ops-sse2.c @@ -769,10 +769,10 @@ conv_deinterleave_32_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * unrolled = 0; for(n = 0; n < unrolled; n += 4) { - out[0] = _mm_load_ps(&s[0 * n_channels]); - out[1] = _mm_load_ps(&s[1 * n_channels]); - out[2] = _mm_load_ps(&s[2 * n_channels]); - out[3] = _mm_load_ps(&s[3 * n_channels]); + out[0] = _mm_loadu_ps(&s[0 * n_channels]); + out[1] = _mm_loadu_ps(&s[1 * n_channels]); + out[2] = _mm_loadu_ps(&s[2 * n_channels]); + out[3] = _mm_loadu_ps(&s[3 * n_channels]); _MM_TRANSPOSE4_PS(out[0], out[1], out[2], out[3]); @@ -851,10 +851,10 @@ conv_deinterleave_32s_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void unrolled = 0; for(n = 0; n < unrolled; n += 4) { - out[0] = _mm_load_ps(&s[0 * n_channels]); - out[1] = _mm_load_ps(&s[1 * n_channels]); - out[2] = _mm_load_ps(&s[2 * n_channels]); - out[3] = _mm_load_ps(&s[3 * n_channels]); + out[0] = _mm_loadu_ps(&s[0 * n_channels]); + out[1] = _mm_loadu_ps(&s[1 * n_channels]); + out[2] = _mm_loadu_ps(&s[2 * n_channels]); + out[3] = _mm_loadu_ps(&s[3 * n_channels]); _MM_TRANSPOSE4_PS(out[0], out[1], out[2], out[3]);