audioconvert: avoid unaligned reads using memcpy

See #3572
This commit is contained in:
Wim Taymans 2023-10-15 22:09:23 +02:00
parent cc109843e5
commit fdc1391b19

View file

@ -114,6 +114,13 @@ conv_s16_to_f32d_2_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const v
}
}
#define spa_read_unaligned(ptr, type) \
__extension__ ({ \
__typeof__(type) _val; \
memcpy(&_val, (ptr), sizeof(_val)); \
_val; \
})
void
conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
uint32_t n_channels, uint32_t n_samples)
@ -134,10 +141,10 @@ conv_s24_to_f32d_1s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
for(n = 0; n < unrolled; n += 4) {
in = _mm_setr_epi32(
*((uint32_t*)&s[0 * n_channels]),
*((uint32_t*)&s[1 * n_channels]),
*((uint32_t*)&s[2 * n_channels]),
*((uint32_t*)&s[3 * n_channels]));
spa_read_unaligned(&s[0 * n_channels], uint32_t),
spa_read_unaligned(&s[1 * n_channels], uint32_t),
spa_read_unaligned(&s[2 * n_channels], uint32_t),
spa_read_unaligned(&s[3 * n_channels], uint32_t));
in = _mm_slli_epi32(in, 8);
in = _mm_srai_epi32(in, 8);
out = _mm_cvtepi32_ps(in);
@ -175,15 +182,15 @@ conv_s24_to_f32d_2s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
for(n = 0; n < unrolled; n += 4) {
in[0] = _mm_setr_epi32(
*((uint32_t*)&s[0 + 0*n_channels]),
*((uint32_t*)&s[0 + 1*n_channels]),
*((uint32_t*)&s[0 + 2*n_channels]),
*((uint32_t*)&s[0 + 3*n_channels]));
spa_read_unaligned(&s[0 + 0*n_channels], uint32_t),
spa_read_unaligned(&s[0 + 1*n_channels], uint32_t),
spa_read_unaligned(&s[0 + 2*n_channels], uint32_t),
spa_read_unaligned(&s[0 + 3*n_channels], uint32_t));
in[1] = _mm_setr_epi32(
*((uint32_t*)&s[1 + 0*n_channels]),
*((uint32_t*)&s[1 + 1*n_channels]),
*((uint32_t*)&s[1 + 2*n_channels]),
*((uint32_t*)&s[1 + 3*n_channels]));
spa_read_unaligned(&s[1 + 0*n_channels], uint32_t),
spa_read_unaligned(&s[1 + 1*n_channels], uint32_t),
spa_read_unaligned(&s[1 + 2*n_channels], uint32_t),
spa_read_unaligned(&s[1 + 3*n_channels], uint32_t));
in[0] = _mm_slli_epi32(in[0], 8);
in[1] = _mm_slli_epi32(in[1], 8);
@ -236,25 +243,25 @@ conv_s24_to_f32d_4s_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA
for(n = 0; n < unrolled; n += 4) {
in[0] = _mm_setr_epi32(
*((uint32_t*)&s[0 + 0*n_channels]),
*((uint32_t*)&s[0 + 1*n_channels]),
*((uint32_t*)&s[0 + 2*n_channels]),
*((uint32_t*)&s[0 + 3*n_channels]));
spa_read_unaligned(&s[0 + 0*n_channels], uint32_t),
spa_read_unaligned(&s[0 + 1*n_channels], uint32_t),
spa_read_unaligned(&s[0 + 2*n_channels], uint32_t),
spa_read_unaligned(&s[0 + 3*n_channels], uint32_t));
in[1] = _mm_setr_epi32(
*((uint32_t*)&s[1 + 0*n_channels]),
*((uint32_t*)&s[1 + 1*n_channels]),
*((uint32_t*)&s[1 + 2*n_channels]),
*((uint32_t*)&s[1 + 3*n_channels]));
spa_read_unaligned(&s[1 + 0*n_channels], uint32_t),
spa_read_unaligned(&s[1 + 1*n_channels], uint32_t),
spa_read_unaligned(&s[1 + 2*n_channels], uint32_t),
spa_read_unaligned(&s[1 + 3*n_channels], uint32_t));
in[2] = _mm_setr_epi32(
*((uint32_t*)&s[2 + 0*n_channels]),
*((uint32_t*)&s[2 + 1*n_channels]),
*((uint32_t*)&s[2 + 2*n_channels]),
*((uint32_t*)&s[2 + 3*n_channels]));
spa_read_unaligned(&s[2 + 0*n_channels], uint32_t),
spa_read_unaligned(&s[2 + 1*n_channels], uint32_t),
spa_read_unaligned(&s[2 + 2*n_channels], uint32_t),
spa_read_unaligned(&s[2 + 3*n_channels], uint32_t));
in[3] = _mm_setr_epi32(
*((uint32_t*)&s[3 + 0*n_channels]),
*((uint32_t*)&s[3 + 1*n_channels]),
*((uint32_t*)&s[3 + 2*n_channels]),
*((uint32_t*)&s[3 + 3*n_channels]));
spa_read_unaligned(&s[3 + 0*n_channels], uint32_t),
spa_read_unaligned(&s[3 + 1*n_channels], uint32_t),
spa_read_unaligned(&s[3 + 2*n_channels], uint32_t),
spa_read_unaligned(&s[3 + 3*n_channels], uint32_t));
in[0] = _mm_slli_epi32(in[0], 8);
in[1] = _mm_slli_epi32(in[1], 8);