diff --git a/spa/plugins/audioconvert/fmt-ops-sse.c b/spa/plugins/audioconvert/fmt-ops-sse.c index 0bf94c814..a1c8c41f0 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse.c +++ b/spa/plugins/audioconvert/fmt-ops-sse.c @@ -111,8 +111,12 @@ conv_s24_to_f32d_1_sse(void *data, int n_dst, void *dst[n_dst], const void *src, const uint8_t *s = src; float **d = (float **) dst; float *d0 = d[0]; - int n, n_samples, unrolled; + int n = 0, n_samples, unrolled; __m128i in; + union { + __m128i in; + uint8_t b[16]; + } b; __m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE); n_samples = n_bytes / (3 * n_dst); @@ -120,11 +124,12 @@ conv_s24_to_f32d_1_sse(void *data, int n_dst, void *dst[n_dst], const void *src, unrolled = n_samples / 4; n_samples = n_samples & 3; - for(n = 0; unrolled--; n += 4) { - in = _mm_set_epi32(READ24(&s[9*n_dst]), - READ24(&s[6*n_dst]), - READ24(&s[3*n_dst]), - READ24(s)); + for(; unrolled--; n += 4) { + memcpy(&b.b[1], &s[0], 3); + memcpy(&b.b[5], &s[3], 3); + memcpy(&b.b[9], &s[6], 3); + memcpy(&b.b[13], &s[9], 3); + in = _mm_srai_epi32(b.in, 8); out = _mm_cvtepi32_ps(in); out = _mm_mul_ps(out, factor); _mm_storeu_ps(&d0[n], out); diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c index 7023fa6fa..ea823631c 100644 --- a/spa/plugins/audioconvert/fmt-ops.c +++ b/spa/plugins/audioconvert/fmt-ops.c @@ -46,27 +46,21 @@ #define S32_MAX 2147483647 #define S32_SCALE 2147483647 -union s24_data { - struct { -#if __BYTE_ORDER == __LITTLE_ENDIAN - uint16_t lo; - int16_t hi; -#else - int16_t hi; - uint16_t lo; -#endif - } s24; - int32_t value; -}; static inline int32_t read_s24(const void *src) { - union s24_data d; - const uint8_t *s = src; - d.s24.hi = ((int8_t*)s)[2]; - d.s24.lo = *((uint16_t*)s); - return d.value; + union { + int8_t v[4]; + int32_t value; + } d; +#if __BYTE_ORDER == __LITTLE_ENDIAN + memcpy(&d.v[1], src, 3); +#else + memcpy(&d.v[0], src, 3); +#endif + return d.value >> 8; } + #define READ24(s) read_s24(s) #if defined (__SSE__)