audioconvert: improve format conversion

Make dither noise as a value between -0.5 and 0.5 and add this to the scaled samples. For this, we first need to do the scaling and then the CLAMP to the target depth. This optimizes to the same code but allows us to avoid under and overflows when we add the dither noise. Add more dithering methods. Expose a dither.method property on audioconvert. Disable dither when the target depth > 16.
2025-11-07 13:30:09 -05:00 · 2022-06-29 14:10:15 +02:00 · 2022-06-29 14:10:15 +02:00 · 938f2b123e
commit 938f2b123e
parent d23b96b033
6 changed files with 408 additions and 171 deletions
--- a/spa/plugins/audioconvert/fmt-ops-avx2.c
+++ b/spa/plugins/audioconvert/fmt-ops-avx2.c
@ -550,7 +550,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m128 in[1];
 	__m128i out[4];
 	__m128 scale = _mm_set1_ps(S32_SCALE);
-	__m128 int_min = _mm_set1_ps(S32_MIN);
+	__m128 int_max = _mm_set1_ps(S32_MAX);

 	if (SPA_IS_ALIGNED(s0, 16))
 		unrolled = n_samples & ~3;
@ -559,7 +559,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R

 	for(n = 0; n < unrolled; n += 4) {
 		in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		out[1] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(0, 3, 2, 1));
 		out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2));
@ -574,7 +574,7 @@ conv_f32d_to_s32_1s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	for(; n < n_samples; n++) {
 		in[0] = _mm_load_ss(&s0[n]);
 		in[0] = _mm_mul_ss(in[0], scale);
-		in[0] = _mm_min_ss(in[0], int_min);
+		in[0] = _mm_min_ss(in[0], int_max);
 		*d = _mm_cvtss_si32(in[0]);
 		d += n_channels;
 	}
@ -590,7 +590,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m256 in[2];
 	__m256i out[2], t[2];
 	__m256 scale = _mm256_set1_ps(S32_SCALE);
-	__m256 int_min = _mm256_set1_ps(S32_MIN);
+	__m256 int_max = _mm256_set1_ps(S32_MAX);

 	if (SPA_IS_ALIGNED(s0, 32) &&
 	    SPA_IS_ALIGNED(s1, 32))
@ -602,8 +602,8 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm256_mul_ps(_mm256_load_ps(&s0[n]), scale);
 		in[1] = _mm256_mul_ps(_mm256_load_ps(&s1[n]), scale);

-		in[0] = _mm256_min_ps(in[0], int_min);
-		in[1] = _mm256_min_ps(in[1], int_min);
+		in[0] = _mm256_min_ps(in[0], int_max);
+		in[1] = _mm256_min_ps(in[1], int_max);

 		out[0] = _mm256_cvtps_epi32(in[0]);	/* a0 a1 a2 a3 a4 a5 a6 a7 */
 		out[1] = _mm256_cvtps_epi32(in[1]);	/* b0 b1 b2 b3 b4 b5 b6 b7 */
@ -636,7 +636,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		__m128 in[2];
 		__m128i out[2];
 		__m128 scale = _mm_set1_ps(S32_SCALE);
-		__m128 int_min = _mm_set1_ps(S32_MIN);
+		__m128 int_max = _mm_set1_ps(S32_MAX);

 		in[0] = _mm_load_ss(&s0[n]);
 		in[1] = _mm_load_ss(&s1[n]);
@ -644,7 +644,7 @@ conv_f32d_to_s32_2s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_unpacklo_ps(in[0], in[1]);

 		in[0] = _mm_mul_ps(in[0], scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		_mm_storel_epi64((__m128i*)d, out[0]);
 		d += n_channels;
@ -661,7 +661,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 	__m256 in[4];
 	__m256i out[4], t[4];
 	__m256 scale = _mm256_set1_ps(S32_SCALE);
-	__m256 int_min = _mm256_set1_ps(S32_MIN);
+	__m256 int_max = _mm256_set1_ps(S32_MAX);

 	if (SPA_IS_ALIGNED(s0, 32) &&
 	    SPA_IS_ALIGNED(s1, 32) &&
@ -677,10 +677,10 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[2] = _mm256_mul_ps(_mm256_load_ps(&s2[n]), scale);
 		in[3] = _mm256_mul_ps(_mm256_load_ps(&s3[n]), scale);

-		in[0] = _mm256_min_ps(in[0], int_min);
-		in[1] = _mm256_min_ps(in[1], int_min);
-		in[2] = _mm256_min_ps(in[2], int_min);
-		in[3] = _mm256_min_ps(in[3], int_min);
+		in[0] = _mm256_min_ps(in[0], int_max);
+		in[1] = _mm256_min_ps(in[1], int_max);
+		in[2] = _mm256_min_ps(in[2], int_max);
+		in[3] = _mm256_min_ps(in[3], int_max);

 		out[0] = _mm256_cvtps_epi32(in[0]); /* a0 a1 a2 a3 a4 a5 a6 a7 */
 		out[1] = _mm256_cvtps_epi32(in[1]); /* b0 b1 b2 b3 b4 b5 b6 b7 */
@ -711,7 +711,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		__m128 in[4];
 		__m128i out[4];
 		__m128 scale = _mm_set1_ps(S32_SCALE);
-		__m128 int_min = _mm_set1_ps(S32_MIN);
+		__m128 int_max = _mm_set1_ps(S32_MAX);

 		in[0] = _mm_load_ss(&s0[n]);
 		in[1] = _mm_load_ss(&s1[n]);
@ -723,7 +723,7 @@ conv_f32d_to_s32_4s_avx2(void *data, void * SPA_RESTRICT dst, const void * SPA_R
 		in[0] = _mm_unpacklo_ps(in[0], in[1]);

 		in[0] = _mm_mul_ps(in[0], scale);
-		in[0] = _mm_min_ps(in[0], int_min);
+		in[0] = _mm_min_ps(in[0], int_max);
 		out[0] = _mm_cvtps_epi32(in[0]);
 		_mm_storeu_si128((__m128i*)d, out[0]);
 		d += n_channels;