From 4867edb9479aa5fdf0ac191b53bb216ec8ca2479 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 2 Sep 2020 11:15:23 +0200 Subject: [PATCH] channelmix: read mix coefficients correctly Fix 5p1 to stereo mixdown by reading the coefficients correctly. Align 5p1 to quad SSE and C implementation Fixes #272 --- spa/plugins/audioconvert/channelmix-ops-c.c | 12 +++++----- spa/plugins/audioconvert/channelmix-ops-sse.c | 22 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/spa/plugins/audioconvert/channelmix-ops-c.c b/spa/plugins/audioconvert/channelmix-ops-c.c index 9f7e93ec2..b9eed6f6f 100644 --- a/spa/plugins/audioconvert/channelmix-ops-c.c +++ b/spa/plugins/audioconvert/channelmix-ops-c.c @@ -295,10 +295,10 @@ channelmix_f32_5p1_2_c(struct channelmix *mix, uint32_t n_dst, void * SPA_RESTRI const float **s = (const float **) src; const float v0 = mix->matrix[0][0]; const float v1 = mix->matrix[1][1]; - const float clev = mix->matrix[2][0]; - const float llev = mix->matrix[3][0]; - const float slev0 = mix->matrix[4][0]; - const float slev1 = mix->matrix[4][1]; + const float clev = (mix->matrix[0][2] + mix->matrix[1][2]) * 0.5f; + const float llev = (mix->matrix[0][3] + mix->matrix[1][3]) * 0.5f; + const float slev0 = mix->matrix[0][4]; + const float slev1 = mix->matrix[1][5]; if (mix->zero) { memset(d[0], 0, n_samples * sizeof(float)); @@ -350,8 +350,8 @@ channelmix_f32_5p1_4_c(struct channelmix *mix, uint32_t n_dst, void * SPA_RESTRI uint32_t i, n; float **d = (float **) dst; const float **s = (const float **) src; - const float clev = mix->matrix[2][0]; - const float llev = mix->matrix[3][0]; + const float clev = mix->matrix[0][2]; + const float llev = mix->matrix[0][3]; const float v0 = mix->matrix[0][0]; const float v1 = mix->matrix[1][1]; const float v4 = mix->matrix[2][4]; diff --git a/spa/plugins/audioconvert/channelmix-ops-sse.c b/spa/plugins/audioconvert/channelmix-ops-sse.c index 1fde89d67..84c2e3aa4 100644 --- a/spa/plugins/audioconvert/channelmix-ops-sse.c +++ b/spa/plugins/audioconvert/channelmix-ops-sse.c @@ -145,10 +145,10 @@ channelmix_f32_5p1_2_sse(struct channelmix *mix, uint32_t n_dst, void * SPA_REST const float **s = (const float **) src; const __m128 v0 = _mm_set1_ps(mix->matrix[0][0]); const __m128 v1 = _mm_set1_ps(mix->matrix[1][1]); - const __m128 clev = _mm_set1_ps(mix->matrix[2][0]); - const __m128 llev = _mm_set1_ps(mix->matrix[3][0]); - const __m128 slev0 = _mm_set1_ps(mix->matrix[4][0]); - const __m128 slev1 = _mm_set1_ps(mix->matrix[4][1]); + const __m128 clev = _mm_set1_ps((mix->matrix[0][2] + mix->matrix[1][2]) * 0.5f); + const __m128 llev = _mm_set1_ps((mix->matrix[0][3] + mix->matrix[1][3]) * 0.5f); + const __m128 slev0 = _mm_set1_ps(mix->matrix[0][4]); + const __m128 slev1 = _mm_set1_ps(mix->matrix[1][5]); __m128 in, ctr; const float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5]; float *dFL = d[0], *dFR = d[1]; @@ -313,10 +313,12 @@ channelmix_f32_5p1_4_sse(struct channelmix *mix, uint32_t n_dst, void * SPA_REST uint32_t i, n, unrolled; float **d = (float **) dst; const float **s = (const float **) src; - const __m128 clev = _mm_set1_ps(mix->matrix[2][2]); - const __m128 llev = _mm_set1_ps(mix->matrix[3][3]); + const __m128 clev = _mm_set1_ps(mix->matrix[0][2]); + const __m128 llev = _mm_set1_ps(mix->matrix[0][3]); const __m128 v0 = _mm_set1_ps(mix->matrix[0][0]); const __m128 v1 = _mm_set1_ps(mix->matrix[1][1]); + const __m128 v4 = _mm_set1_ps(mix->matrix[2][4]); + const __m128 v5 = _mm_set1_ps(mix->matrix[3][5]); __m128 ctr; const float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5]; float *dFL = d[0], *dFR = d[1], *dRL = d[2], *dRR = d[3]; @@ -363,16 +365,16 @@ channelmix_f32_5p1_4_sse(struct channelmix *mix, uint32_t n_dst, void * SPA_REST ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_load_ps(&sLFE[n]), llev)); _mm_store_ps(&dFL[n], _mm_mul_ps(_mm_add_ps(_mm_load_ps(&sFL[n]), ctr), v0)); _mm_store_ps(&dFR[n], _mm_mul_ps(_mm_add_ps(_mm_load_ps(&sFR[n]), ctr), v1)); - _mm_store_ps(&dRL[n], _mm_mul_ps(_mm_load_ps(&sSL[n]), v0)); - _mm_store_ps(&dRR[n], _mm_mul_ps(_mm_load_ps(&sSR[n]), v1)); + _mm_store_ps(&dRL[n], _mm_mul_ps(_mm_load_ps(&sSL[n]), v4)); + _mm_store_ps(&dRR[n], _mm_mul_ps(_mm_load_ps(&sSR[n]), v5)); } for(; n < n_samples; n++) { ctr = _mm_mul_ss(_mm_load_ss(&sFC[n]), clev); ctr = _mm_add_ss(ctr, _mm_mul_ss(_mm_load_ss(&sLFE[n]), llev)); _mm_store_ss(&dFL[n], _mm_mul_ss(_mm_add_ss(_mm_load_ss(&sFL[n]), ctr), v0)); _mm_store_ss(&dFR[n], _mm_mul_ss(_mm_add_ss(_mm_load_ss(&sFR[n]), ctr), v1)); - _mm_store_ss(&dRL[n], _mm_mul_ss(_mm_load_ss(&sSL[n]), v0)); - _mm_store_ss(&dRR[n], _mm_mul_ss(_mm_load_ss(&sSR[n]), v1)); + _mm_store_ss(&dRL[n], _mm_mul_ss(_mm_load_ss(&sSL[n]), v4)); + _mm_store_ss(&dRR[n], _mm_mul_ss(_mm_load_ss(&sSR[n]), v5)); } } }