From 6e9e02b42021944027fa2b13d1d83f4b9fcb5040 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 7 Sep 2022 16:00:31 +0200 Subject: [PATCH] audioconvert: refactor peaks resampler Use common code in macro and generate arch specific version. Compile with -Ofast to optimize some fmaxf calls. --- spa/plugins/audioconvert/meson.build | 2 +- .../audioconvert/resample-native-avx.c | 4 +- spa/plugins/audioconvert/resample-native-c.c | 4 +- .../audioconvert/resample-native-neon.c | 4 +- .../audioconvert/resample-native-sse.c | 4 +- .../audioconvert/resample-native-ssse3.c | 4 +- spa/plugins/audioconvert/resample-peaks-c.c | 50 ++------------ .../audioconvert/resample-peaks-impl.h | 60 +++++++++++++++-- spa/plugins/audioconvert/resample-peaks-sse.c | 67 +++++-------------- 9 files changed, 89 insertions(+), 110 deletions(-) diff --git a/spa/plugins/audioconvert/meson.build b/spa/plugins/audioconvert/meson.build index d84699242..ab0581e76 100644 --- a/spa/plugins/audioconvert/meson.build +++ b/spa/plugins/audioconvert/meson.build @@ -27,7 +27,7 @@ if have_sse 'resample-peaks-sse.c', 'volume-ops-sse.c', 'channelmix-ops-sse.c' ], - c_args : [sse_args, '-O3', '-DHAVE_SSE'], + c_args : [sse_args, '-Ofast', '-DHAVE_SSE'], dependencies : [ spa_dep ], install : false ) diff --git a/spa/plugins/audioconvert/resample-native-avx.c b/spa/plugins/audioconvert/resample-native-avx.c index b23c0b729..136d6cb2d 100644 --- a/spa/plugins/audioconvert/resample-native-avx.c +++ b/spa/plugins/audioconvert/resample-native-avx.c @@ -27,7 +27,7 @@ #include #include -static void inner_product_avx(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT taps, uint32_t n_taps) { __m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty; @@ -56,7 +56,7 @@ static void inner_product_avx(float *d, const float * SPA_RESTRICT s, _mm_store_ss(d, sx[0]); } -static void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { diff --git a/spa/plugins/audioconvert/resample-native-c.c b/spa/plugins/audioconvert/resample-native-c.c index 3fe50b769..ce6c57d92 100644 --- a/spa/plugins/audioconvert/resample-native-c.c +++ b/spa/plugins/audioconvert/resample-native-c.c @@ -24,7 +24,7 @@ #include "resample-native-impl.h" -static void inner_product_c(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_c(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT taps, uint32_t n_taps) { float sum = 0.0f; @@ -40,7 +40,7 @@ static void inner_product_c(float *d, const float * SPA_RESTRICT s, *d = sum; } -static void inner_product_ip_c(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ip_c(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { diff --git a/spa/plugins/audioconvert/resample-native-neon.c b/spa/plugins/audioconvert/resample-native-neon.c index afe68914a..079152afd 100644 --- a/spa/plugins/audioconvert/resample-native-neon.c +++ b/spa/plugins/audioconvert/resample-native-neon.c @@ -26,7 +26,7 @@ #include -static void inner_product_neon(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_neon(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT taps, uint32_t n_taps) { unsigned int remainder = n_taps % 16; @@ -137,7 +137,7 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s, #endif } -static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { diff --git a/spa/plugins/audioconvert/resample-native-sse.c b/spa/plugins/audioconvert/resample-native-sse.c index d0ebe39ec..fcdb32c08 100644 --- a/spa/plugins/audioconvert/resample-native-sse.c +++ b/spa/plugins/audioconvert/resample-native-sse.c @@ -26,7 +26,7 @@ #include -static void inner_product_sse(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_sse(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT taps, uint32_t n_taps) { __m128 sum = _mm_setzero_ps(); @@ -68,7 +68,7 @@ static void inner_product_sse(float *d, const float * SPA_RESTRICT s, _mm_store_ss(d, sum); } -static void inner_product_ip_sse(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ip_sse(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { diff --git a/spa/plugins/audioconvert/resample-native-ssse3.c b/spa/plugins/audioconvert/resample-native-ssse3.c index c39bc610a..ac3675f03 100644 --- a/spa/plugins/audioconvert/resample-native-ssse3.c +++ b/spa/plugins/audioconvert/resample-native-ssse3.c @@ -26,7 +26,7 @@ #include -static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ssse3(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT taps, uint32_t n_taps) { __m128 sum = _mm_setzero_ps(); @@ -97,7 +97,7 @@ static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s, _mm_store_ss(d, sum); } -static void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { diff --git a/spa/plugins/audioconvert/resample-peaks-c.c b/spa/plugins/audioconvert/resample-peaks-c.c index 3d27016e5..161e06fe0 100644 --- a/spa/plugins/audioconvert/resample-peaks-c.c +++ b/spa/plugins/audioconvert/resample-peaks-c.c @@ -26,48 +26,12 @@ #include "resample-peaks-impl.h" -void resample_peaks_process_c(struct resample *r, - const void * SPA_RESTRICT src[], uint32_t *in_len, - void * SPA_RESTRICT dst[], uint32_t *out_len) +static inline float find_abs_max_c(const float *s, uint32_t n_samples, float m) { - struct peaks_data *pd = r->data; - uint32_t c, i, o, end, chunk, o_count, i_count; - - if (SPA_UNLIKELY(r->channels == 0)) - return; - - for (c = 0; c < r->channels; c++) { - const float *s = src[c]; - float *d = dst[c], m = pd->max_f[c]; - - o_count = pd->o_count; - i_count = pd->i_count; - o = i = 0; - - while (i < *in_len && o < *out_len) { - end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate; - end = end > i_count ? end - i_count : 0; - chunk = SPA_MIN(end, *in_len); - - for (; i < chunk; i++) - m = SPA_MAX(fabsf(s[i]), m); - - if (i == end) { - d[o++] = m; - m = 0.0f; - o_count++; - } - } - pd->max_f[c] = m; - } - - *out_len = o; - *in_len = i; - pd->o_count = o_count; - pd->i_count = i_count + i; - - while (pd->i_count >= r->i_rate) { - pd->i_count -= r->i_rate; - pd->o_count -= r->o_rate; - } + uint32_t n; + for (n = 0; n < n_samples; n++) + m = fmaxf(fabsf(s[n]), m); + return m; } + +MAKE_PEAKS(c); diff --git a/spa/plugins/audioconvert/resample-peaks-impl.h b/spa/plugins/audioconvert/resample-peaks-impl.h index 7a39af078..9d9d55cff 100644 --- a/spa/plugins/audioconvert/resample-peaks-impl.h +++ b/spa/plugins/audioconvert/resample-peaks-impl.h @@ -34,11 +34,59 @@ struct peaks_data { float max_f[]; }; -void resample_peaks_process_c(struct resample *r, - const void * SPA_RESTRICT src[], uint32_t *in_len, - void * SPA_RESTRICT dst[], uint32_t *out_len); +#define DEFINE_PEAKS(arch) \ +void resample_peaks_process_##arch(struct resample *r, \ + const void * SPA_RESTRICT src[], uint32_t *in_len, \ + void * SPA_RESTRICT dst[], uint32_t *out_len) + +#define MAKE_PEAKS(arch) \ +DEFINE_PEAKS(arch) \ +{ \ + struct peaks_data *pd = r->data; \ + uint32_t c, i, o, end, chunk, i_count, o_count; \ + \ + if (SPA_UNLIKELY(r->channels == 0)) \ + return; \ + \ + for (c = 0; c < r->channels; c++) { \ + const float *s = src[c]; \ + float *d = dst[c], m = pd->max_f[c]; \ + \ + o_count = pd->o_count; \ + i_count = pd->i_count; \ + o = i = 0; \ + \ + while (i < *in_len && o < *out_len) { \ + end = ((uint64_t) (o_count + 1) \ + * r->i_rate) / r->o_rate; \ + end = end > i_count ? end - i_count : 0; \ + chunk = SPA_MIN(end, *in_len); \ + \ + m = find_abs_max_##arch(&s[i], chunk - i, m); \ + \ + i += chunk; \ + \ + if (i == end) { \ + d[o++] = m; \ + m = 0.0f; \ + o_count++; \ + } \ + } \ + pd->max_f[c] = m; \ + } \ + *out_len = o; \ + *in_len = i; \ + pd->o_count = o_count; \ + pd->i_count = i_count + i; \ + \ + while (pd->i_count >= r->i_rate) { \ + pd->i_count -= r->i_rate; \ + pd->o_count -= r->o_rate; \ + } \ +} + + +DEFINE_PEAKS(c); #if defined (HAVE_SSE) -void resample_peaks_process_sse(struct resample *r, - const void * SPA_RESTRICT src[], uint32_t *in_len, - void * SPA_RESTRICT dst[], uint32_t *out_len); +DEFINE_PEAKS(sse); #endif diff --git a/spa/plugins/audioconvert/resample-peaks-sse.c b/spa/plugins/audioconvert/resample-peaks-sse.c index 13886efdb..f13919832 100644 --- a/spa/plugins/audioconvert/resample-peaks-sse.c +++ b/spa/plugins/audioconvert/resample-peaks-sse.c @@ -37,60 +37,27 @@ static inline float hmax_ps(__m128 val) return _mm_cvtss_f32(val); } -void resample_peaks_process_sse(struct resample *r, - const void * SPA_RESTRICT src[], uint32_t *in_len, - void * SPA_RESTRICT dst[], uint32_t *out_len) +static inline float find_abs_max_sse(const float *s, uint32_t n_samples, float m) { - struct peaks_data *pd = r->data; - uint32_t c, i, o, end, chunk, unrolled, i_count, o_count; - __m128 in, max, mask = _mm_andnot_ps(_mm_set_ps1(-0.0f), - _mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps())); + __m128 in, max; + uint32_t n, unrolled; + const __m128 mask = _mm_andnot_ps( + _mm_set_ps1(-0.0f), + _mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps())); - if (r->channels == 0) - return; + max = _mm_set1_ps(m); - for (c = 0; c < r->channels; c++) { - const float *s = src[c]; - float *d = dst[c], m = pd->max_f[c]; + unrolled = n_samples & ~3; - o_count = pd->o_count; - i_count = pd->i_count; - o = i = 0; - - max = _mm_set1_ps(m); - - while (i < *in_len && o < *out_len) { - end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate; - end = end > i_count ? end - i_count : 0; - chunk = SPA_MIN(end, *in_len); - - unrolled = chunk - ((chunk - i) & 3); - - for (; i < unrolled; i+=4) { - in = _mm_loadu_ps(&s[i]); - in = _mm_and_ps(mask, in); - max = _mm_max_ps(in, max); - } - for (; i < chunk; i++) - m = SPA_MAX(fabsf(s[i]), m); - - if (i == end) { - d[o++] = SPA_MAX(hmax_ps(max), m); - m = 0.0f; - max = _mm_set1_ps(m); - o_count++; - } - } - pd->max_f[c] = SPA_MAX(hmax_ps(max), m); + for (n = 0; n < unrolled; n+=4) { + in = _mm_loadu_ps(&s[n]); + in = _mm_and_ps(mask, in); + max = _mm_max_ps(in, max); } + for (; n < n_samples; n++) + m = fmaxf(fabsf(s[n]), m); - *out_len = o; - *in_len = i; - pd->o_count = o_count; - pd->i_count = i_count + i; - - while (pd->i_count >= r->i_rate) { - pd->i_count -= r->i_rate; - pd->o_count -= r->o_rate; - } + return fmaxf(hmax_ps(max), m); } + +MAKE_PEAKS(sse);