From edb3c27aa4ee87860e8f89c69603f0ea35a0abb1 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Mon, 27 Apr 2026 15:59:38 +0200 Subject: [PATCH] channelmix: add SEE 7p1 to stereo downmix --- spa/plugins/audioconvert/channelmix-ops-sse.c | 68 +++++++++++++++++++ spa/plugins/audioconvert/channelmix-ops.c | 3 + spa/plugins/audioconvert/channelmix-ops.h | 1 + 3 files changed, 72 insertions(+) diff --git a/spa/plugins/audioconvert/channelmix-ops-sse.c b/spa/plugins/audioconvert/channelmix-ops-sse.c index 1058959c7..f99fb1f3a 100644 --- a/spa/plugins/audioconvert/channelmix-ops-sse.c +++ b/spa/plugins/audioconvert/channelmix-ops-sse.c @@ -689,3 +689,71 @@ channelmix_f32_5p1_4_sse(struct channelmix *mix, void * SPA_RESTRICT dst[], vol_sse(d[3], s[5], v5, n_samples); } } + +/* FL+FR+FC+LFE+SL+SR+RL+RR -> FL+FR */ +void +channelmix_f32_7p1_2_sse(struct channelmix *mix, void * SPA_RESTRICT dst[], + const void * SPA_RESTRICT src[], uint32_t n_samples) +{ + uint32_t n, unrolled; + float **d = (float **) dst; + const float **s = (const float **) src; + const __m128 v0 = _mm_set1_ps(mix->matrix[0][0]); + const __m128 v1 = _mm_set1_ps(mix->matrix[1][1]); + const __m128 clev = _mm_set1_ps((mix->matrix[0][2] + mix->matrix[1][2]) * 0.5f); + const __m128 llev = _mm_set1_ps((mix->matrix[0][3] + mix->matrix[1][3]) * 0.5f); + const __m128 slev0 = _mm_set1_ps(mix->matrix[0][4]); + const __m128 slev1 = _mm_set1_ps(mix->matrix[1][5]); + const __m128 rlev0 = _mm_set1_ps(mix->matrix[0][6]); + const __m128 rlev1 = _mm_set1_ps(mix->matrix[1][7]); + __m128 in, ctr; + + if (SPA_IS_ALIGNED(s[0], 16) && + SPA_IS_ALIGNED(s[1], 16) && + SPA_IS_ALIGNED(s[2], 16) && + SPA_IS_ALIGNED(s[3], 16) && + SPA_IS_ALIGNED(s[4], 16) && + SPA_IS_ALIGNED(s[5], 16) && + SPA_IS_ALIGNED(s[6], 16) && + SPA_IS_ALIGNED(s[7], 16) && + SPA_IS_ALIGNED(d[0], 16) && + SPA_IS_ALIGNED(d[1], 16)) + unrolled = n_samples & ~3; + else + unrolled = 0; + + if (SPA_FLAG_IS_SET(mix->flags, CHANNELMIX_FLAG_ZERO)) { + clear_sse(d[0], n_samples); + clear_sse(d[1], n_samples); + } + else { + for (n = 0; n < unrolled; n += 4) { + ctr = _mm_add_ps(_mm_mul_ps(_mm_load_ps(&s[2][n]), clev), + _mm_mul_ps(_mm_load_ps(&s[3][n]), llev)); + in = _mm_mul_ps(_mm_load_ps(&s[4][n]), slev0); + in = _mm_add_ps(in, _mm_mul_ps(_mm_load_ps(&s[6][n]), rlev0)); + in = _mm_add_ps(in, ctr); + in = _mm_add_ps(in, _mm_mul_ps(_mm_load_ps(&s[0][n]), v0)); + _mm_store_ps(&d[0][n], in); + in = _mm_mul_ps(_mm_load_ps(&s[5][n]), slev1); + in = _mm_add_ps(in, _mm_mul_ps(_mm_load_ps(&s[7][n]), rlev1)); + in = _mm_add_ps(in, ctr); + in = _mm_add_ps(in, _mm_mul_ps(_mm_load_ps(&s[1][n]), v1)); + _mm_store_ps(&d[1][n], in); + } + for (; n < n_samples; n++) { + ctr = _mm_add_ss(_mm_mul_ss(_mm_load_ss(&s[2][n]), clev), + _mm_mul_ss(_mm_load_ss(&s[3][n]), llev)); + in = _mm_mul_ss(_mm_load_ss(&s[4][n]), slev0); + in = _mm_add_ss(in, _mm_mul_ss(_mm_load_ss(&s[6][n]), rlev0)); + in = _mm_add_ss(in, ctr); + in = _mm_add_ss(in, _mm_mul_ss(_mm_load_ss(&s[0][n]), v0)); + _mm_store_ss(&d[0][n], in); + in = _mm_mul_ss(_mm_load_ss(&s[5][n]), slev1); + in = _mm_add_ss(in, _mm_mul_ss(_mm_load_ss(&s[7][n]), rlev1)); + in = _mm_add_ss(in, ctr); + in = _mm_add_ss(in, _mm_mul_ss(_mm_load_ss(&s[1][n]), v1)); + _mm_store_ss(&d[1][n], in); + } + } +} diff --git a/spa/plugins/audioconvert/channelmix-ops.c b/spa/plugins/audioconvert/channelmix-ops.c index 574d0dd1b..8ae172fbd 100644 --- a/spa/plugins/audioconvert/channelmix-ops.c +++ b/spa/plugins/audioconvert/channelmix-ops.c @@ -85,6 +85,9 @@ static const struct channelmix_info { #endif MAKE(6, MASK_5_1, 4, MASK_3_1, channelmix_f32_5p1_3p1_c), +#if defined (HAVE_SSE) + MAKE(8, MASK_7_1, 2, MASK_STEREO, channelmix_f32_7p1_2_sse, SPA_CPU_FLAG_SSE), +#endif MAKE(8, MASK_7_1, 2, MASK_STEREO, channelmix_f32_7p1_2_c), MAKE(8, MASK_7_1, 4, MASK_QUAD, channelmix_f32_7p1_4_c), MAKE(8, MASK_7_1, 4, MASK_3_1, channelmix_f32_7p1_3p1_c), diff --git a/spa/plugins/audioconvert/channelmix-ops.h b/spa/plugins/audioconvert/channelmix-ops.h index c66eaddd3..f18e76153 100644 --- a/spa/plugins/audioconvert/channelmix-ops.h +++ b/spa/plugins/audioconvert/channelmix-ops.h @@ -152,6 +152,7 @@ DEFINE_FUNCTION(f32_3p1_2, sse); DEFINE_FUNCTION(f32_5p1_2, sse); DEFINE_FUNCTION(f32_5p1_3p1, sse); DEFINE_FUNCTION(f32_5p1_4, sse); +DEFINE_FUNCTION(f32_7p1_2, sse); DEFINE_FUNCTION(f32_7p1_4, sse); #endif