From fdc1391b19e167afe1276820db13c2ba4fc56413 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Sun, 15 Oct 2023 22:09:23 +0200 Subject: [PATCH] audioconvert: avoid unaligned reads using memcpy See #3572 --- spa/plugins/audioconvert/fmt-ops-sse2.c | 63 ++++++++++++++----------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c index fed352e35..878776aae 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse2.c +++ b/spa/plugins/audioconvert/fmt-ops-sse2.c @@ -114,6 +114,13 @@ conv_s16_to_f32d_2_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const v } } +#define spa_read_unaligned(ptr, type) \ +__extension__ ({ \ + __typeof__(type) _val; \ + memcpy(&_val, (ptr), sizeof(_val)); \ + _val; \ +}) + void conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples) @@ -134,10 +141,10 @@ conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA for(n = 0; n < unrolled; n += 4) { in = _mm_setr_epi32( - *((uint32_t*)&s[0 * n_channels]), - *((uint32_t*)&s[1 * n_channels]), - *((uint32_t*)&s[2 * n_channels]), - *((uint32_t*)&s[3 * n_channels])); + spa_read_unaligned(&s[0 * n_channels], uint32_t), + spa_read_unaligned(&s[1 * n_channels], uint32_t), + spa_read_unaligned(&s[2 * n_channels], uint32_t), + spa_read_unaligned(&s[3 * n_channels], uint32_t)); in = _mm_slli_epi32(in, 8); in = _mm_srai_epi32(in, 8); out = _mm_cvtepi32_ps(in); @@ -175,15 +182,15 @@ conv_s24_to_f32d_2s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA for(n = 0; n < unrolled; n += 4) { in[0] = _mm_setr_epi32( - *((uint32_t*)&s[0 + 0*n_channels]), - *((uint32_t*)&s[0 + 1*n_channels]), - *((uint32_t*)&s[0 + 2*n_channels]), - *((uint32_t*)&s[0 + 3*n_channels])); + spa_read_unaligned(&s[0 + 0*n_channels], uint32_t), + spa_read_unaligned(&s[0 + 1*n_channels], uint32_t), + spa_read_unaligned(&s[0 + 2*n_channels], uint32_t), + spa_read_unaligned(&s[0 + 3*n_channels], uint32_t)); in[1] = _mm_setr_epi32( - *((uint32_t*)&s[1 + 0*n_channels]), - *((uint32_t*)&s[1 + 1*n_channels]), - *((uint32_t*)&s[1 + 2*n_channels]), - *((uint32_t*)&s[1 + 3*n_channels])); + spa_read_unaligned(&s[1 + 0*n_channels], uint32_t), + spa_read_unaligned(&s[1 + 1*n_channels], uint32_t), + spa_read_unaligned(&s[1 + 2*n_channels], uint32_t), + spa_read_unaligned(&s[1 + 3*n_channels], uint32_t)); in[0] = _mm_slli_epi32(in[0], 8); in[1] = _mm_slli_epi32(in[1], 8); @@ -236,25 +243,25 @@ conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA for(n = 0; n < unrolled; n += 4) { in[0] = _mm_setr_epi32( - *((uint32_t*)&s[0 + 0*n_channels]), - *((uint32_t*)&s[0 + 1*n_channels]), - *((uint32_t*)&s[0 + 2*n_channels]), - *((uint32_t*)&s[0 + 3*n_channels])); + spa_read_unaligned(&s[0 + 0*n_channels], uint32_t), + spa_read_unaligned(&s[0 + 1*n_channels], uint32_t), + spa_read_unaligned(&s[0 + 2*n_channels], uint32_t), + spa_read_unaligned(&s[0 + 3*n_channels], uint32_t)); in[1] = _mm_setr_epi32( - *((uint32_t*)&s[1 + 0*n_channels]), - *((uint32_t*)&s[1 + 1*n_channels]), - *((uint32_t*)&s[1 + 2*n_channels]), - *((uint32_t*)&s[1 + 3*n_channels])); + spa_read_unaligned(&s[1 + 0*n_channels], uint32_t), + spa_read_unaligned(&s[1 + 1*n_channels], uint32_t), + spa_read_unaligned(&s[1 + 2*n_channels], uint32_t), + spa_read_unaligned(&s[1 + 3*n_channels], uint32_t)); in[2] = _mm_setr_epi32( - *((uint32_t*)&s[2 + 0*n_channels]), - *((uint32_t*)&s[2 + 1*n_channels]), - *((uint32_t*)&s[2 + 2*n_channels]), - *((uint32_t*)&s[2 + 3*n_channels])); + spa_read_unaligned(&s[2 + 0*n_channels], uint32_t), + spa_read_unaligned(&s[2 + 1*n_channels], uint32_t), + spa_read_unaligned(&s[2 + 2*n_channels], uint32_t), + spa_read_unaligned(&s[2 + 3*n_channels], uint32_t)); in[3] = _mm_setr_epi32( - *((uint32_t*)&s[3 + 0*n_channels]), - *((uint32_t*)&s[3 + 1*n_channels]), - *((uint32_t*)&s[3 + 2*n_channels]), - *((uint32_t*)&s[3 + 3*n_channels])); + spa_read_unaligned(&s[3 + 0*n_channels], uint32_t), + spa_read_unaligned(&s[3 + 1*n_channels], uint32_t), + spa_read_unaligned(&s[3 + 2*n_channels], uint32_t), + spa_read_unaligned(&s[3 + 3*n_channels], uint32_t)); in[0] = _mm_slli_epi32(in[0], 8); in[1] = _mm_slli_epi32(in[1], 8);