From 9b37142ef64d23671f7c70c0f7f6ee6da14e19ba Mon Sep 17 00:00:00 2001
From: Wim Taymans <wtaymans@redhat.com>
Date: Wed, 29 Jun 2022 17:31:32 +0200
Subject: [PATCH] audioconvert: implement noise shaping

To compare the results:

  pw-cat -r test.wav --format=u8 -P '{ dither.method=none }'

vs:

  pw-cat -r test.wav --format=u8 -P '{ dither.method=shaped5 }'
---
 spa/plugins/audioconvert/fmt-ops-c.c | 192 +++++++++++++++++++++++++++
 spa/plugins/audioconvert/fmt-ops.c   |   7 +
 spa/plugins/audioconvert/fmt-ops.h   |  23 +++-
 3 files changed, 216 insertions(+), 6 deletions(-)

diff --git a/spa/plugins/audioconvert/fmt-ops-c.c b/spa/plugins/audioconvert/fmt-ops-c.c
index d69b1e563..9191fec68 100644
--- a/spa/plugins/audioconvert/fmt-ops-c.c
+++ b/spa/plugins/audioconvert/fmt-ops-c.c
@@ -753,6 +753,26 @@ static inline void update_dither_c(struct convert *conv, uint32_t n_samples)
 		dither[n] = lcnoise(state) * scale;
 }
 
+#define SHAPER5(type,s,scale,offs,sh,min,max,d)			\
+({								\
+	type t;							\
+	float v = s * scale + offs +				\
+		- sh->e[idx] * 2.033f				\
+		+ sh->e[(idx - 1) & NS_MASK] * 2.165f		\
+		- sh->e[(idx - 2) & NS_MASK] * 1.959f		\
+		+ sh->e[(idx - 3) & NS_MASK] * 1.590f		\
+		- sh->e[(idx - 4) & NS_MASK] * 0.6149f;		\
+	t = (type)SPA_CLAMP(v + d, min, max);			\
+	idx = (idx + 1) & NS_MASK;				\
+	sh->e[idx] = t - v;					\
+	t;							\
+})
+
+#define F32_TO_U8_SH(s,sh,d)	SHAPER5(uint8_t, s, U8_SCALE, U8_OFFS, sh, U8_MIN, U8_MAX, d)
+#define F32_TO_S8_SH(s,sh,d)	SHAPER5(int8_t, s, S8_SCALE, 0, sh, S8_MIN, S8_MAX, d)
+#define F32_TO_S16_SH(s,sh,d)	SHAPER5(int16_t, s, S16_SCALE, 0, sh, S16_MIN, S16_MAX, d)
+#define F32_TO_S16S_SH(s,sh,d)	bswap_16(F32_TO_S16_SH(s,sh,d))
+
 void
 conv_f32d_to_u8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -789,6 +809,30 @@ conv_f32d_to_u8d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 	}
 }
 
+void
+conv_f32d_to_u8d_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		uint8_t *d = dst[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j] = F32_TO_U8_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
+
 void
 conv_f32_to_u8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -851,6 +895,31 @@ conv_f32d_to_u8_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 	}
 }
 
+void
+conv_f32d_to_u8_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	uint8_t *d0 = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		uint8_t *d = &d0[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j * n_channels] = F32_TO_U8_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
+
 void
 conv_f32d_to_s8d_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -887,6 +956,30 @@ conv_f32d_to_s8d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 	}
 }
 
+void
+conv_f32d_to_s8d_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		int8_t *d = dst[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j] = F32_TO_S8_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
+
 void
 conv_f32_to_s8_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -949,6 +1042,31 @@ conv_f32d_to_s8_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 	}
 }
 
+void
+conv_f32d_to_s8_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	int8_t *d0 = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		int8_t *d = &d0[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j * n_channels] = F32_TO_S8_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
+
 void
 conv_f32d_to_alaw_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -1040,6 +1158,31 @@ conv_f32d_to_s16d_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons
 	}
 }
 
+
+void
+conv_f32d_to_s16d_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		int16_t *d = dst[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j] = F32_TO_S16_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
+
 void
 conv_f32_to_s16_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -1102,6 +1245,31 @@ conv_f32d_to_s16_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], const
 	}
 }
 
+void
+conv_f32d_to_s16_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	int16_t *d0 = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		int16_t *d = &d0[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j * n_channels] = F32_TO_S16_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
+
 void
 conv_f32d_to_s16s_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
@@ -1136,6 +1304,30 @@ conv_f32d_to_s16s_dither_c(struct convert *conv, void * SPA_RESTRICT dst[], cons
 	}
 }
 
+void
+conv_f32d_to_s16s_shaped_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
+		uint32_t n_samples)
+{
+	int16_t *d0 = dst[0];
+	uint32_t i, j, k, chunk, n_channels = conv->n_channels, dither_size = conv->dither_size;
+	float *dither = conv->dither;
+
+	update_dither_c(conv, SPA_MIN(n_samples, dither_size));
+
+	for (i = 0; i < n_channels; i++) {
+		const float *s = src[i];
+		int16_t *d = &d0[i];
+		struct shaper *sh = &conv->shaper[i];
+		uint32_t idx = sh->idx;
+
+		for (j = 0; j < n_samples;) {
+			chunk = SPA_MIN(n_samples - j, dither_size);
+			for (k = 0; k < chunk; k++, j++)
+				d[j * n_channels] = F32_TO_S16S_SH(s[j], sh, dither[k]);
+		}
+		sh->idx = idx;
+	}
+}
 void
 conv_f32_to_u32_c(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 		uint32_t n_samples)
diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c
index 7ad743f57..c2849729f 100644
--- a/spa/plugins/audioconvert/fmt-ops.c
+++ b/spa/plugins/audioconvert/fmt-ops.c
@@ -170,16 +170,20 @@ static struct conv_info conv_table[] =
 
 	/* from f32 */
 	MAKE(F32, U8, 0, conv_f32_to_u8_c),
+	MAKE(F32P, U8P, 0, conv_f32d_to_u8d_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, U8P, 0, conv_f32d_to_u8d_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, U8P, 0, conv_f32d_to_u8d_c),
 	MAKE(F32, U8P, 0, conv_f32_to_u8d_c),
+	MAKE(F32P, U8, 0, conv_f32d_to_u8_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, U8, 0, conv_f32d_to_u8_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, U8, 0, conv_f32d_to_u8_c),
 
 	MAKE(F32, S8, 0, conv_f32_to_s8_c),
+	MAKE(F32P, S8P, 0, conv_f32d_to_s8d_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, S8P, 0, conv_f32d_to_s8d_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, S8P, 0, conv_f32d_to_s8d_c),
 	MAKE(F32, S8P, 0, conv_f32_to_s8d_c),
+	MAKE(F32P, S8, 0, conv_f32d_to_s8_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, S8, 0, conv_f32d_to_s8_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, S8, 0, conv_f32d_to_s8_c),
 
@@ -194,6 +198,7 @@ static struct conv_info conv_table[] =
 #endif
 	MAKE(F32, S16, 0, conv_f32_to_s16_c),
 
+	MAKE(F32P, S16P, 0, conv_f32d_to_s16d_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, S16P, 0, conv_f32d_to_s16d_dither_c, 0, CONV_DITHER),
 #if defined (HAVE_SSE2)
 	MAKE(F32P, S16P, 0, conv_f32d_to_s16d_sse2, SPA_CPU_FLAG_SSE2),
@@ -202,6 +207,7 @@ static struct conv_info conv_table[] =
 
 	MAKE(F32, S16P, 0, conv_f32_to_s16d_c),
 
+	MAKE(F32P, S16, 0, conv_f32d_to_s16_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, S16, 0, conv_f32d_to_s16_dither_c, 0, CONV_DITHER),
 #if defined (HAVE_NEON)
 	MAKE(F32P, S16, 0, conv_f32d_to_s16_neon, SPA_CPU_FLAG_NEON),
@@ -217,6 +223,7 @@ static struct conv_info conv_table[] =
 #endif
 	MAKE(F32P, S16, 0, conv_f32d_to_s16_c),
 
+	MAKE(F32P, S16_OE, 0, conv_f32d_to_s16s_shaped_c, 0, CONV_SHAPE),
 	MAKE(F32P, S16_OE, 0, conv_f32d_to_s16s_dither_c, 0, CONV_DITHER),
 	MAKE(F32P, S16_OE, 0, conv_f32d_to_s16s_c),
 
diff --git a/spa/plugins/audioconvert/fmt-ops.h b/spa/plugins/audioconvert/fmt-ops.h
index 0cb990ded..4878395c3 100644
--- a/spa/plugins/audioconvert/fmt-ops.h
+++ b/spa/plugins/audioconvert/fmt-ops.h
@@ -41,7 +41,7 @@
 #define U8_MAX			255u
 #define U8_SCALE		127.5f
 #define U8_OFFS			128.f
-#define U8_TO_F32(v)		((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
+#define U8_TO_F32(v)		((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0f)
 #define F32_TO_U8(v)		(uint8_t)SPA_CLAMP((v) * U8_SCALE + U8_OFFS, U8_MIN, U8_MAX)
 #define F32_TO_U8_D(v,d)	(uint8_t)SPA_CLAMP((v) * U8_SCALE + U8_OFFS + (d), U8_MIN, U8_MAX)
 
@@ -196,7 +196,14 @@ static inline void write_s24s(void *dst, int32_t val)
 #endif
 }
 
-#define MAX_NS	64
+#define NS_MAX	8
+#define NS_MASK	(NS_MAX-1)
+
+struct shaper {
+	float e[NS_MAX];
+	uint32_t idx;
+	float r;
+};
 
 struct convert {
 	uint32_t quantize;
@@ -220,10 +227,7 @@ struct convert {
 	uint32_t random[16 + FMT_OPS_MAX_ALIGN/4];
 	float *dither;
 	uint32_t dither_size;
-
-	float ns_data[MAX_NS];
-	uint32_t ns_idx;
-	uint32_t ns_size;
+	struct shaper shaper[64];
 
 	void (*process) (struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
 			uint32_t n_samples);
@@ -315,28 +319,35 @@ DEFINE_FUNCTION(f64s_to_f32d, c);
 DEFINE_FUNCTION(f64d_to_f32, c);
 DEFINE_FUNCTION(f32d_to_u8d, c);
 DEFINE_FUNCTION(f32d_to_u8d_dither, c);
+DEFINE_FUNCTION(f32d_to_u8d_shaped, c);
 DEFINE_FUNCTION(f32_to_u8, c);
 DEFINE_FUNCTION(f32_to_u8d, c);
 DEFINE_FUNCTION(f32d_to_u8, c);
 DEFINE_FUNCTION(f32d_to_u8_dither, c);
+DEFINE_FUNCTION(f32d_to_u8_shaped, c);
 DEFINE_FUNCTION(f32d_to_s8d, c);
 DEFINE_FUNCTION(f32d_to_s8d_dither, c);
+DEFINE_FUNCTION(f32d_to_s8d_shaped, c);
 DEFINE_FUNCTION(f32_to_s8, c);
 DEFINE_FUNCTION(f32_to_s8d, c);
 DEFINE_FUNCTION(f32d_to_s8, c);
 DEFINE_FUNCTION(f32d_to_s8_dither, c);
+DEFINE_FUNCTION(f32d_to_s8_shaped, c);
 DEFINE_FUNCTION(f32d_to_alaw, c);
 DEFINE_FUNCTION(f32d_to_ulaw, c);
 DEFINE_FUNCTION(f32_to_u16, c);
 DEFINE_FUNCTION(f32d_to_u16, c);
 DEFINE_FUNCTION(f32d_to_s16d, c);
 DEFINE_FUNCTION(f32d_to_s16d_dither, c);
+DEFINE_FUNCTION(f32d_to_s16d_shaped, c);
 DEFINE_FUNCTION(f32_to_s16, c);
 DEFINE_FUNCTION(f32_to_s16d, c);
 DEFINE_FUNCTION(f32d_to_s16, c);
 DEFINE_FUNCTION(f32d_to_s16_dither, c);
+DEFINE_FUNCTION(f32d_to_s16_shaped, c);
 DEFINE_FUNCTION(f32d_to_s16s, c);
 DEFINE_FUNCTION(f32d_to_s16s_dither, c);
+DEFINE_FUNCTION(f32d_to_s16s_shaped, c);
 DEFINE_FUNCTION(f32_to_u32, c);
 DEFINE_FUNCTION(f32d_to_u32, c);
 DEFINE_FUNCTION(f32d_to_s32d, c);