channelmix: Convert matrices to float arrays

Convert matrix_orig and matrix to float arrays and use variable size 2d arrays to access the elements of the matrices. This removes the need for storing pointers to matrix rows.
2026-07-10 11:03:57 -04:00 · 2026-05-12 14:47:57 +02:00 · 2026-05-12 14:47:57 +02:00 · a7994882b9
commit a7994882b9
parent 7cfcb46fbf
6 changed files with 161 additions and 134 deletions
--- a/spa/plugins/audioconvert/channelmix-ops-sse.c
+++ b/spa/plugins/audioconvert/channelmix-ops-sse.c
@ -144,11 +144,12 @@ static inline void sub_sse(float *d, const float *s0, const float *s1, uint32_t
 void channelmix_copy_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t i, n_dst = mix->dst_chan;
 	float **d = (float **)dst;
 	const float **s = (const float **)src;
 	for (i = 0; i < n_dst; i++)
-		vol_sse(d[i], s[i], mix->matrix[i][i], n_samples);
+		vol_sse(d[i], s[i], matrix[i][i], n_samples);
 }

 static void lr4_process_sse(struct lr4 *lr4, float *dst, const float *src, const float vol, int samples)
@ -346,6 +347,7 @@ void
 channelmix_f32_n_m_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		   const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	float **d = (float **) dst;
 	const float **s = (const float **) src;
 	uint32_t i, j, n_dst = mix->dst_chan, n_src = mix->src_chan;
@ -357,9 +359,9 @@ channelmix_f32_n_m_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		uint32_t n_j = 0;

 		for (j = 0; j < n_src; j++) {
-			if (mix->matrix[i][j] == 0.0f)
+			if (matrix[i][j] == 0.0f)
 				continue;
-			mj[n_j] = mix->matrix[i][j];
+			mj[n_j] = matrix[i][j];
 			sj[n_j++] = s[j];
 		}
 		if (n_j == 0) {
@ -377,13 +379,14 @@ void
 channelmix_f32_2_3p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		   const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t i, n, unrolled, n_dst = mix->dst_chan;
 	float **d = (float **)dst;
 	const float **s = (const float **)src;
-	const float v0 = mix->matrix[0][0];
-	const float v1 = mix->matrix[1][1];
-	const float v2 = (mix->matrix[2][0] + mix->matrix[2][1]) * 0.5f;
-	const float v3 = (mix->matrix[3][0] + mix->matrix[3][1]) * 0.5f;
+	const float v0 = matrix[0][0];
+	const float v1 = matrix[1][1];
+	const float v2 = (matrix[2][0] + matrix[2][1]) * 0.5f;
+	const float v3 = (matrix[3][0] + matrix[3][1]) * 0.5f;

 	if (SPA_FLAG_IS_SET(mix->flags, CHANNELMIX_FLAG_ZERO)) {
 		for (i = 0; i < n_dst; i++)
@ -395,8 +398,8 @@ channelmix_f32_2_3p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 			vol_sse(d[1], s[1], v1, n_samples);
 			avg_sse(d[2], s[0], s[1], n_samples);
 		} else {
-			const __m128 mv0 = _mm_set1_ps(mix->matrix[0][0]);
-			const __m128 mv1 = _mm_set1_ps(mix->matrix[1][1]);
+			const __m128 mv0 = _mm_set1_ps(matrix[0][0]);
+			const __m128 mv1 = _mm_set1_ps(matrix[1][1]);
 			const __m128 mw = _mm_set1_ps(mix->widen);
 			const __m128 mh = _mm_set1_ps(0.5f);
 			__m128 t0[1], t1[1], w[1], c[1];
@ -437,11 +440,12 @@ void
 channelmix_f32_2_5p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		   const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t i, n_dst = mix->dst_chan;
 	float **d = (float **)dst;
 	const float **s = (const float **)src;
-	const float v4 = mix->matrix[4][0];
-	const float v5 = mix->matrix[5][1];
+	const float v4 = matrix[4][0];
+	const float v5 = matrix[5][1];

 	if (SPA_FLAG_IS_SET(mix->flags, CHANNELMIX_FLAG_ZERO)) {
 		for (i = 0; i < n_dst; i++)
@ -468,13 +472,14 @@ void
 channelmix_f32_2_7p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		   const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t i, n_dst = mix->dst_chan;
 	float **d = (float **)dst;
 	const float **s = (const float **)src;
-	const float v4 = mix->matrix[4][0];
-	const float v5 = mix->matrix[5][1];
-	const float v6 = mix->matrix[6][0];
-	const float v7 = mix->matrix[7][1];
+	const float v4 = matrix[4][0];
+	const float v5 = matrix[5][1];
+	const float v6 = matrix[6][0];
+	const float v7 = matrix[7][1];

 	if (SPA_FLAG_IS_SET(mix->flags, CHANNELMIX_FLAG_ZERO)) {
 		for (i = 0; i < n_dst; i++)
@ -504,12 +509,13 @@ void
 channelmix_f32_3p1_2_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		   const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	float **d = (float **) dst;
 	const float **s = (const float **) src;
-	const float m0 = mix->matrix[0][0];
-	const float m1 = mix->matrix[1][1];
-	const float m2 = (mix->matrix[0][2] + mix->matrix[1][2]) * 0.5f;
-	const float m3 = (mix->matrix[0][3] + mix->matrix[1][3]) * 0.5f;
+	const float m0 = matrix[0][0];
+	const float m1 = matrix[1][1];
+	const float m2 = (matrix[0][2] + matrix[1][2]) * 0.5f;
+	const float m3 = (matrix[0][3] + matrix[1][3]) * 0.5f;

 	if (m0 == 0.0f && m1 == 0.0f && m2 == 0.0f && m3 == 0.0f) {
 		clear_sse(d[0], n_samples);
@ -554,15 +560,16 @@ void
 channelmix_f32_5p1_2_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t n, unrolled;
 	float **d = (float **) dst;
 	const float **s = (const float **) src;
-	const float m00 = mix->matrix[0][0];
-	const float m11 = mix->matrix[1][1];
-	const __m128 clev = _mm_set1_ps((mix->matrix[0][2] + mix->matrix[1][2]) * 0.5f);
-	const __m128 llev = _mm_set1_ps((mix->matrix[0][3] + mix->matrix[1][3]) * 0.5f);
-	const __m128 slev0 = _mm_set1_ps(mix->matrix[0][4]);
-	const __m128 slev1 = _mm_set1_ps(mix->matrix[1][5]);
+	const float m00 = matrix[0][0];
+	const float m11 = matrix[1][1];
+	const __m128 clev = _mm_set1_ps((matrix[0][2] + matrix[1][2]) * 0.5f);
+	const __m128 llev = _mm_set1_ps((matrix[0][3] + matrix[1][3]) * 0.5f);
+	const __m128 slev0 = _mm_set1_ps(matrix[0][4]);
+	const __m128 slev1 = _mm_set1_ps(matrix[1][5]);
 	__m128 in, ctr;

 	if (SPA_IS_ALIGNED(s[0], 16) &&
@ -616,6 +623,7 @@ void
 channelmix_f32_5p1_3p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t i, n, unrolled, n_dst = mix->dst_chan;
 	float **d = (float **) dst;
 	const float **s = (const float **) src;
@ -639,10 +647,10 @@ channelmix_f32_5p1_3p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 			clear_sse(d[i], n_samples);
 	}
 	else {
-		const __m128 v0 = _mm_set1_ps(mix->matrix[0][0]);
-		const __m128 v1 = _mm_set1_ps(mix->matrix[1][1]);
-		const __m128 slev0 = _mm_set1_ps(mix->matrix[0][4]);
-		const __m128 slev1 = _mm_set1_ps(mix->matrix[1][5]);
+		const __m128 v0 = _mm_set1_ps(matrix[0][0]);
+		const __m128 v1 = _mm_set1_ps(matrix[1][1]);
+		const __m128 slev0 = _mm_set1_ps(matrix[0][4]);
+		const __m128 slev1 = _mm_set1_ps(matrix[1][5]);

 		for(n = 0; n < unrolled; n += 4) {
 			_mm_store_ps(&d[0][n], _mm_add_ps(
@ -662,8 +670,8 @@ channelmix_f32_5p1_3p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 					_mm_mul_ss(_mm_load_ss(&s[1][n]), v1),
 					_mm_mul_ss(_mm_load_ss(&s[5][n]), slev1)));
 		}
-		vol_sse(d[2], s[2], mix->matrix[2][2], n_samples);
-		vol_sse(d[3], s[3], mix->matrix[3][3], n_samples);
+		vol_sse(d[2], s[2], matrix[2][2], n_samples);
+		vol_sse(d[3], s[3], matrix[3][3], n_samples);
 	}
 }

@ -672,11 +680,12 @@ void
 channelmix_f32_5p1_4_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t i, n_dst = mix->dst_chan;
 	float **d = (float **) dst;
 	const float **s = (const float **) src;
-	const float v4 = mix->matrix[2][4];
-	const float v5 = mix->matrix[3][5];
+	const float v4 = matrix[2][4];
+	const float v5 = matrix[3][5];

 	if (SPA_FLAG_IS_SET(mix->flags, CHANNELMIX_FLAG_ZERO)) {
 		for (i = 0; i < n_dst; i++)
@ -695,17 +704,18 @@ void
 channelmix_f32_7p1_2_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
 		const void * SPA_RESTRICT src[], uint32_t n_samples)
 {
+	CHANNELMIX_DEF_MATRIX(matrix, mix, matrix);
 	uint32_t n, unrolled;
 	float **d = (float **) dst;
 	const float **s = (const float **) src;
-	const __m128 v0 = _mm_set1_ps(mix->matrix[0][0]);
-	const __m128 v1 = _mm_set1_ps(mix->matrix[1][1]);
-	const __m128 clev = _mm_set1_ps((mix->matrix[0][2] + mix->matrix[1][2]) * 0.5f);
-	const __m128 llev = _mm_set1_ps((mix->matrix[0][3] + mix->matrix[1][3]) * 0.5f);
-	const __m128 slev0 = _mm_set1_ps(mix->matrix[0][4]);
-	const __m128 slev1 = _mm_set1_ps(mix->matrix[1][5]);
-	const __m128 rlev0 = _mm_set1_ps(mix->matrix[0][6]);
-	const __m128 rlev1 = _mm_set1_ps(mix->matrix[1][7]);
+	const __m128 v0 = _mm_set1_ps(matrix[0][0]);
+	const __m128 v1 = _mm_set1_ps(matrix[1][1]);
+	const __m128 clev = _mm_set1_ps((matrix[0][2] + matrix[1][2]) * 0.5f);
+	const __m128 llev = _mm_set1_ps((matrix[0][3] + matrix[1][3]) * 0.5f);
+	const __m128 slev0 = _mm_set1_ps(matrix[0][4]);
+	const __m128 slev1 = _mm_set1_ps(matrix[1][5]);
+	const __m128 rlev0 = _mm_set1_ps(matrix[0][6]);
+	const __m128 rlev1 = _mm_set1_ps(matrix[1][7]);
 	__m128 in, ctr;

 	if (SPA_IS_ALIGNED(s[0], 16) &&