diff --git a/spa/plugins/audioconvert/biquad.c b/spa/plugins/audioconvert/biquad.c index 9e8bae5f8..27c3fee59 100644 --- a/spa/plugins/audioconvert/biquad.c +++ b/spa/plugins/audioconvert/biquad.c @@ -101,6 +101,9 @@ void biquad_set(struct biquad *bq, enum biquad_type type, double freq) { switch (type) { + case BQ_NONE: + set_coefficient(bq, 1, 0, 0, 1, 0, 0); + break; case BQ_LOWPASS: biquad_lowpass(bq, freq); break; diff --git a/spa/plugins/audioconvert/biquad.h b/spa/plugins/audioconvert/biquad.h index 8b7ecccaa..519a3275b 100644 --- a/spa/plugins/audioconvert/biquad.h +++ b/spa/plugins/audioconvert/biquad.h @@ -25,6 +25,7 @@ struct biquad { /* The type of the biquad filters */ enum biquad_type { + BQ_NONE, BQ_LOWPASS, BQ_HIGHPASS, }; diff --git a/spa/plugins/audioconvert/channelmix-ops-c.c b/spa/plugins/audioconvert/channelmix-ops-c.c index 47b08f83e..2e4d25a20 100644 --- a/spa/plugins/audioconvert/channelmix-ops-c.c +++ b/spa/plugins/audioconvert/channelmix-ops-c.c @@ -2,6 +2,8 @@ /* SPDX-FileCopyrightText: Copyright © 2018 Wim Taymans */ /* SPDX-License-Identifier: MIT */ +#include + #include "channelmix-ops.h" static inline void clear_c(float *d, uint32_t n_samples) @@ -63,6 +65,43 @@ channelmix_copy_c(struct channelmix *mix, void * SPA_RESTRICT dst[], vol_c(d[i], s[i], mix->matrix[i][i], n_samples); } +static void lr4_process_c(struct lr4 *lr4, float *dst, const float *src, const float vol, int samples) +{ + float x1 = lr4->x1; + float x2 = lr4->x2; + float y1 = lr4->y1; + float y2 = lr4->y2; + float b0 = lr4->bq.b0; + float b1 = lr4->bq.b1; + float b2 = lr4->bq.b2; + float a1 = lr4->bq.a1; + float a2 = lr4->bq.a2; + float x, y, z; + int i; + + if (vol == 0.0f || !lr4->active) { + vol_c(dst, src, vol, samples); + return; + } + + for (i = 0; i < samples; i++) { + x = src[i]; + y = b0 * x + x1; + x1 = b1 * x - a1 * y + x2; + x2 = b2 * x - a2 * y; + z = b0 * y + y1; + y1 = b1 * y - a1 * z + y2; + y2 = b2 * y - a2 * z; + dst[i] = z * vol; + } +#define F(x) (-FLT_MIN < (x) && (x) < FLT_MIN ? 0.0f : (x)) + lr4->x1 = F(x1); + lr4->x2 = F(x2); + lr4->y1 = F(y1); + lr4->y2 = F(y2); +#undef F +} + #define _M(ch) (1UL << SPA_AUDIO_CHANNEL_ ## ch) void @@ -100,10 +139,10 @@ channelmix_f32_n_m_c(struct channelmix *mix, void * SPA_RESTRICT dst[], if (n_j == 0) { clear_c(di, n_samples); } else if (n_j == 1) { - lr4_process(&mix->lr4[i], di, sj[0], mj[0], n_samples); + lr4_process_c(&mix->lr4[i], di, sj[0], mj[0], n_samples); } else { conv_c(di, sj, mj, n_j, n_samples); - lr4_process(&mix->lr4[i], di, di, 1.0f, n_samples); + lr4_process_c(&mix->lr4[i], di, di, 1.0f, n_samples); } } } @@ -239,8 +278,8 @@ channelmix_f32_2_3p1_c(struct channelmix *mix, void * SPA_RESTRICT dst[], d[2][n] = c * 0.5f; } } - lr4_process(&mix->lr4[3], d[3], d[2], v3, n_samples); - lr4_process(&mix->lr4[2], d[2], d[2], v2, n_samples); + lr4_process_c(&mix->lr4[3], d[3], d[2], v3, n_samples); + lr4_process_c(&mix->lr4[2], d[2], d[2], v2, n_samples); } } diff --git a/spa/plugins/audioconvert/channelmix-ops-sse.c b/spa/plugins/audioconvert/channelmix-ops-sse.c index 36840286d..7a300b45f 100644 --- a/spa/plugins/audioconvert/channelmix-ops-sse.c +++ b/spa/plugins/audioconvert/channelmix-ops-sse.c @@ -5,6 +5,7 @@ #include "channelmix-ops.h" #include +#include static inline void clear_sse(float *d, uint32_t n_samples) { @@ -149,6 +150,113 @@ void channelmix_copy_sse(struct channelmix *mix, void * SPA_RESTRICT dst[], vol_sse(d[i], s[i], mix->matrix[i][i], n_samples); } +static void lr4_process_sse(struct lr4 *lr4, float *dst, const float *src, const float vol, int samples) +{ + __m128 x, y, z; + __m128 b012; + __m128 a12; + __m128 x12, y12, v; + int i; + + if (vol == 0.0f || !lr4->active) { + vol_sse(dst, src, vol, samples); + return; + } + + b012 = _mm_setr_ps(lr4->bq.b0, lr4->bq.b1, lr4->bq.b2, 0.0f); /* b0 b1 b2 0 */ + a12 = _mm_setr_ps(0.0f, lr4->bq.a1, lr4->bq.a2, 0.0f); /* 0 a1 a2 0 */ + x12 = _mm_setr_ps(lr4->x1, lr4->x2, 0.0f, 0.0f); /* x1 x2 0 0 */ + y12 = _mm_setr_ps(lr4->y1, lr4->y2, 0.0f, 0.0f); /* y1 y2 0 0 */ + v = _mm_setr_ps(vol, vol, 0.0f, 0.0f); + + for (i = 0; i < samples; i++) { + x = _mm_load1_ps(&src[i]); /* x x x x */ + + z = _mm_mul_ps(x, b012); /* b0*x b1*x b2*x 0 */ + z = _mm_add_ps(z, x12); /* b0*x+x1 b1*x+x2 b2*x 0 */ + y = _mm_shuffle_ps(z, z, _MM_SHUFFLE(0,0,0,0)); /* b0*x+x1 b0*x+x1 b0*x+x1 b0*x+x1 = y*/ + x = _mm_mul_ps(y, a12); /* 0 a1*y a2*y 0 */ + x = _mm_sub_ps(z, x); /* y x1 x2 0 */ + x12 = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3,3,2,1)); /* x1 x2 0 0*/ + + z = _mm_mul_ps(y, b012); + z = _mm_add_ps(z, y12); + x = _mm_shuffle_ps(z, z, _MM_SHUFFLE(0,0,0,0)); + y = _mm_mul_ps(x, a12); + y = _mm_sub_ps(z, y); + y12 = _mm_shuffle_ps(y, y, _MM_SHUFFLE(3,3,2,1)); + + x = _mm_mul_ps(x, v); + _mm_store_ss(&dst[i], x); + } +#define F(x) (-FLT_MIN < (x) && (x) < FLT_MIN ? 0.0f : (x)) + lr4->x1 = F(x12[0]); + lr4->x2 = F(x12[1]); + lr4->y1 = F(y12[0]); + lr4->y2 = F(y12[1]); +#undef F +} + +static void lr4_process_2_sse(struct lr4 *lr40, struct lr4 *lr41, float *dst0, float *dst1, + const float *src0, const float *src1, const float vol0, const float vol1, uint32_t samples) +{ + __m128 x, y, z; + __m128 b0, b1, b2; + __m128 a1, a2; + __m128 x1, x2; + __m128 y1, y2, v; + uint32_t i; + + b0 = _mm_setr_ps(lr40->bq.b0, lr41->bq.b0, 0.0f, 0.0f); + b1 = _mm_setr_ps(lr40->bq.b1, lr41->bq.b1, 0.0f, 0.0f); + b2 = _mm_setr_ps(lr40->bq.b2, lr41->bq.b2, 0.0f, 0.0f); + a1 = _mm_setr_ps(lr40->bq.a1, lr41->bq.a1, 0.0f, 0.0f); + a2 = _mm_setr_ps(lr40->bq.a2, lr41->bq.a2, 0.0f, 0.0f); + x1 = _mm_setr_ps(lr40->x1, lr41->x1, 0.0f, 0.0f); + x2 = _mm_setr_ps(lr40->x2, lr41->x2, 0.0f, 0.0f); + y1 = _mm_setr_ps(lr40->y1, lr41->y1, 0.0f, 0.0f); + y2 = _mm_setr_ps(lr40->y2, lr41->y2, 0.0f, 0.0f); + v = _mm_setr_ps(vol0, vol1, 0.0f, 0.0f); + + for (i = 0; i < samples; i++) { + x = _mm_setr_ps(src0[i], src1[i], 0.0f, 0.0f); + + y = _mm_mul_ps(x, b0); /* y = x * b0 */ + y = _mm_add_ps(y, x1); /* y = x * b0 + x1*/ + z = _mm_mul_ps(y, a1); /* z = a1 * y */ + x1 = _mm_mul_ps(x, b1); /* x1 = x * b1 */ + x1 = _mm_add_ps(x1, x2); /* x1 = x * b1 + x2*/ + x1 = _mm_sub_ps(x1, z); /* x1 = x * b1 + x2 - a1 * y*/ + z = _mm_mul_ps(y, a2); /* z = a2 * y */ + x2 = _mm_mul_ps(x, b2); /* x2 = x * b2 */ + x2 = _mm_sub_ps(x2, z); /* x2 = x * b2 - a2 * y*/ + + x = _mm_mul_ps(y, b0); /* y = x * b0 */ + x = _mm_add_ps(x, y1); /* y = x * b0 + x1*/ + z = _mm_mul_ps(x, a1); /* z = a1 * y */ + y1 = _mm_mul_ps(y, b1); /* x1 = x * b1 */ + y1 = _mm_add_ps(y1, y2); /* x1 = x * b1 + x2*/ + y1 = _mm_sub_ps(y1, z); /* x1 = x * b1 + x2 - a1 * y*/ + z = _mm_mul_ps(x, a2); /* z = a2 * y */ + y2 = _mm_mul_ps(y, b2); /* x2 = x * b2 */ + y2 = _mm_sub_ps(y2, z); /* x2 = x * b2 - a2 * y*/ + + x = _mm_mul_ps(x, v); + dst0[i] = x[0]; + dst1[i] = x[1]; + } +#define F(x) (-FLT_MIN < (x) && (x) < FLT_MIN ? 0.0f : (x)) + lr40->x1 = F(x1[0]); + lr40->x2 = F(x2[0]); + lr40->y1 = F(y1[0]); + lr40->y2 = F(y2[0]); + lr41->x1 = F(x1[1]); + lr41->x2 = F(x2[1]); + lr41->y1 = F(y1[1]); + lr41->y2 = F(y2[1]); +#undef F +} + void channelmix_f32_n_m_sse(struct channelmix *mix, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -172,13 +280,10 @@ channelmix_f32_n_m_sse(struct channelmix *mix, void * SPA_RESTRICT dst[], if (n_j == 0) { clear_sse(di, n_samples); } else if (n_j == 1) { - if (mix->lr4[i].active) - lr4_process(&mix->lr4[i], di, sj[0], mj[0], n_samples); - else - vol_sse(di, sj[0], mj[0], n_samples); + lr4_process_sse(&mix->lr4[i], di, sj[0], mj[0], n_samples); } else { conv_sse(di, sj, mj, n_j, n_samples); - lr4_process(&mix->lr4[i], di, di, 1.0f, n_samples); + lr4_process_sse(&mix->lr4[i], di, di, 1.0f, n_samples); } } } @@ -239,8 +344,7 @@ channelmix_f32_2_3p1_sse(struct channelmix *mix, void * SPA_RESTRICT dst[], _mm_store_ss(&d[2][n], _mm_mul_ss(c[0], mh)); } } - lr4_process(&mix->lr4[3], d[3], d[2], v3, n_samples); - lr4_process(&mix->lr4[2], d[2], d[2], v2, n_samples); + lr4_process_2_sse(&mix->lr4[3], &mix->lr4[2], d[3], d[2], d[2], d[2], v3, v2, n_samples); } } diff --git a/spa/plugins/audioconvert/channelmix-ops.c b/spa/plugins/audioconvert/channelmix-ops.c index 553ac7306..015d285ed 100644 --- a/spa/plugins/audioconvert/channelmix-ops.c +++ b/spa/plugins/audioconvert/channelmix-ops.c @@ -673,7 +673,7 @@ done: spa_log_info(mix->log, "channel %d is FC cutoff:%f", ic, mix->fc_cutoff); lr4_set(&mix->lr4[ic], BQ_LOWPASS, mix->fc_cutoff / mix->freq); } else { - mix->lr4[ic].active = false; + lr4_set(&mix->lr4[ic], BQ_NONE, mix->fc_cutoff / mix->freq); } ic++; } diff --git a/spa/plugins/audioconvert/crossover.c b/spa/plugins/audioconvert/crossover.c index 177cf6106..ba3eaf023 100644 --- a/spa/plugins/audioconvert/crossover.c +++ b/spa/plugins/audioconvert/crossover.c @@ -15,50 +15,5 @@ void lr4_set(struct lr4 *lr4, enum biquad_type type, float freq) lr4->x2 = 0; lr4->y1 = 0; lr4->y2 = 0; - lr4->z1 = 0; - lr4->z2 = 0; - lr4->active = true; -} - -void lr4_process(struct lr4 *lr4, float *dst, const float *src, const float vol, int samples) -{ - float x1 = lr4->x1; - float x2 = lr4->x2; - float y1 = lr4->y1; - float y2 = lr4->y2; - float b0 = lr4->bq.b0; - float b1 = lr4->bq.b1; - float b2 = lr4->bq.b2; - float a1 = lr4->bq.a1; - float a2 = lr4->bq.a2; - float x, y, z; - int i; - - if (vol == 0.0f) { - memset(dst, 0, samples * sizeof(float)); - return; - } else if (!lr4->active) { - if (src != dst || vol != 1.0f) { - for (i = 0; i < samples; i++) - dst[i] = src[i] * vol; - } - return; - } - - for (i = 0; i < samples; i++) { - x = src[i]; - y = b0 * x + x1; - x1 = b1 * x - a1 * y + x2; - x2 = b2 * x - a2 * y; - z = b0 * y + y1; - y1 = b1 * y - a1 * z + y2; - y2 = b2 * y - a2 * z; - dst[i] = z * vol; - } -#define F(x) (-FLT_MIN < (x) && (x) < FLT_MIN ? 0.0f : (x)) - lr4->x1 = F(x1); - lr4->x2 = F(x2); - lr4->y1 = F(y1); - lr4->y2 = F(y2); -#undef F + lr4->active = type != BQ_NONE; } diff --git a/spa/plugins/audioconvert/crossover.h b/spa/plugins/audioconvert/crossover.h index b6f458baf..34bf80f80 100644 --- a/spa/plugins/audioconvert/crossover.h +++ b/spa/plugins/audioconvert/crossover.h @@ -20,7 +20,6 @@ struct lr4 { struct biquad bq; float x1, x2; float y1, y2; - float z1, z2; bool active; };