diff --git a/spa/plugins/audioconvert/fmt-ops-sse.c b/spa/plugins/audioconvert/fmt-ops-sse.c index 0feac0b4b..0bf94c814 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse.c +++ b/spa/plugins/audioconvert/fmt-ops-sse.c @@ -105,6 +105,49 @@ conv_s16_to_f32d_sse(void *data, int n_dst, void *dst[n_dst], int n_src, const v conv_s16_to_f32d_1_sse(data, n_dst, &dst[i], &s[i], n_bytes); } +static void +conv_s24_to_f32d_1_sse(void *data, int n_dst, void *dst[n_dst], const void *src, int n_bytes) +{ + const uint8_t *s = src; + float **d = (float **) dst; + float *d0 = d[0]; + int n, n_samples, unrolled; + __m128i in; + __m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE); + + n_samples = n_bytes / (3 * n_dst); + + unrolled = n_samples / 4; + n_samples = n_samples & 3; + + for(n = 0; unrolled--; n += 4) { + in = _mm_set_epi32(READ24(&s[9*n_dst]), + READ24(&s[6*n_dst]), + READ24(&s[3*n_dst]), + READ24(s)); + out = _mm_cvtepi32_ps(in); + out = _mm_mul_ps(out, factor); + _mm_storeu_ps(&d0[n], out); + s += 12 * n_dst; + } + for(; n_samples--; n++) { + out = _mm_cvtsi32_ss(out, READ24(s)); + out = _mm_mul_ss(out, factor); + _mm_store_ss(&d0[n], out); + s += 3 * n_dst; + } +} + +static void +conv_s24_to_f32d_sse(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_bytes) +{ + const int8_t *s = src[0]; + int i = 0; + + for(; i < n_dst; i++) + conv_s24_to_f32d_1_sse(data, n_dst, &dst[i], &s[3*i], n_bytes); +} + static void conv_f32d_to_s32_1_sse(void *data, void *dst, int n_src, const void *src[n_src], int n_bytes) { diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c index fb8a65a81..7023fa6fa 100644 --- a/spa/plugins/audioconvert/fmt-ops.c +++ b/spa/plugins/audioconvert/fmt-ops.c @@ -46,6 +46,29 @@ #define S32_MAX 2147483647 #define S32_SCALE 2147483647 +union s24_data { + struct { +#if __BYTE_ORDER == __LITTLE_ENDIAN + uint16_t lo; + int16_t hi; +#else + int16_t hi; + uint16_t lo; +#endif + } s24; + int32_t value; +}; + +static inline int32_t read_s24(const void *src) +{ + union s24_data d; + const uint8_t *s = src; + d.s24.hi = ((int8_t*)s)[2]; + d.s24.lo = *((uint16_t*)s); + return d.value; +} +#define READ24(s) read_s24(s) + #if defined (__SSE__) #include "fmt-ops-sse.c" #endif @@ -192,13 +215,8 @@ conv_s32d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void } } -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define READ24(s) (((uint32_t)s[2] << 16) | ((uint32_t)s[1] << 8) | ((uint32_t)s[0])) -#else -#define READ24(s) (((uint32_t)s[0] << 16) | ((uint32_t)s[1] << 8) | ((uint32_t)s[2])) -#endif -#define S24_TO_F32(v) ((((int32_t)v)<<8) * (1.0f / S32_SCALE)) +#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE)) static void conv_s24_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_bytes) @@ -711,6 +729,9 @@ static const struct conv_info { { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 }, { SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32 }, +#if defined (__SSE2__) + { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE, conv_s24_to_f32d_sse }, +#endif { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32d }, { SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32, 0, conv_s24d_to_f32 },