fmt: improve s24 to float conversion

2025-12-18 08:56:45 -05:00 · 2018-11-08 09:56:58 +01:00 · 2018-11-08 09:56:58 +01:00 · 3267667d94
commit 3267667d94
parent 9825932b88
2 changed files with 70 additions and 6 deletions
--- a/spa/plugins/audioconvert/fmt-ops-sse.c
+++ b/spa/plugins/audioconvert/fmt-ops-sse.c
@ -105,6 +105,49 @@ conv_s16_to_f32d_sse(void *data, int n_dst, void *dst[n_dst], int n_src, const v
 		conv_s16_to_f32d_1_sse(data, n_dst, &dst[i], &s[i], n_bytes);
 }

+static void
+conv_s24_to_f32d_1_sse(void *data, int n_dst, void *dst[n_dst], const void *src, int n_bytes)
+{
+	const uint8_t *s = src;
+	float **d = (float **) dst;
+	float *d0 = d[0];
+	int n, n_samples, unrolled;
+	__m128i in;
+	__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
+
+	n_samples = n_bytes / (3 * n_dst);
+
+	unrolled = n_samples / 4;
+	n_samples = n_samples & 3;
+
+	for(n = 0; unrolled--; n += 4) {
+		in = _mm_set_epi32(READ24(&s[9*n_dst]),
+				   READ24(&s[6*n_dst]),
+				   READ24(&s[3*n_dst]),
+				   READ24(s));
+		out = _mm_cvtepi32_ps(in);
+		out = _mm_mul_ps(out, factor);
+		_mm_storeu_ps(&d0[n], out);
+		s += 12 * n_dst;
+	}
+	for(; n_samples--; n++) {
+		out = _mm_cvtsi32_ss(out, READ24(s));
+		out = _mm_mul_ss(out, factor);
+		_mm_store_ss(&d0[n], out);
+		s += 3 * n_dst;
+	}
+}
+
+static void
+conv_s24_to_f32d_sse(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_bytes)
+{
+	const int8_t *s = src[0];
+	int i = 0;
+
+	for(; i < n_dst; i++)
+		conv_s24_to_f32d_1_sse(data, n_dst, &dst[i], &s[3*i], n_bytes);
+}
+
 static void
 conv_f32d_to_s32_1_sse(void *data, void *dst, int n_src, const void *src[n_src], int n_bytes)
 {
--- a/spa/plugins/audioconvert/fmt-ops.c
+++ b/spa/plugins/audioconvert/fmt-ops.c
@ -46,6 +46,29 @@
 #define S32_MAX		2147483647
 #define S32_SCALE	2147483647

+union s24_data {
+	struct {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+		uint16_t lo;
+		int16_t hi;
+#else
+		int16_t hi;
+		uint16_t lo;
+#endif
+	} s24;
+	int32_t value;
+};
+
+static inline int32_t read_s24(const void *src)
+{
+	union s24_data d;
+	const uint8_t *s = src;
+	d.s24.hi = ((int8_t*)s)[2];
+	d.s24.lo = *((uint16_t*)s);
+	return d.value;
+}
+#define READ24(s) read_s24(s)
+
 #if defined (__SSE__)
 #include "fmt-ops-sse.c"
 #endif
@ -192,13 +215,8 @@ conv_s32d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void
 	}
 }

-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define READ24(s) (((uint32_t)s[2] << 16) | ((uint32_t)s[1] << 8) | ((uint32_t)s[0]))
-#else
-#define READ24(s) (((uint32_t)s[0] << 16) | ((uint32_t)s[1] << 8) | ((uint32_t)s[2]))
-#endif

-#define S24_TO_F32(v)	((((int32_t)v)<<8) * (1.0f / S32_SCALE))
+#define S24_TO_F32(v)	(((int32_t)(v)) * (1.0f / S24_SCALE))

 static void
 conv_s24_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_bytes)
@ -711,6 +729,9 @@ static const struct conv_info {

 	{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 },
 	{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32 },
+#if defined (__SSE2__)
+	{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE, conv_s24_to_f32d_sse },
+#endif
 	{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32d },
 	{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32, 0, conv_s24d_to_f32 },