From 0a4ef3d2c0fecd9bd6130ae5b58fd16e8672d8e0 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Dec 2018 16:47:20 +0100 Subject: [PATCH] resample: add SSE peaks resampler --- spa/plugins/audioconvert/resample-peaks-sse.h | 83 +++++++++++++++++++ spa/plugins/audioconvert/resample-peaks.h | 28 +++++-- spa/plugins/audioconvert/resample.c | 12 ++- spa/plugins/audioconvert/resample.h | 3 + 4 files changed, 116 insertions(+), 10 deletions(-) create mode 100644 spa/plugins/audioconvert/resample-peaks-sse.h diff --git a/spa/plugins/audioconvert/resample-peaks-sse.h b/spa/plugins/audioconvert/resample-peaks-sse.h new file mode 100644 index 000000000..acc80244e --- /dev/null +++ b/spa/plugins/audioconvert/resample-peaks-sse.h @@ -0,0 +1,83 @@ +/* Spa + * + * Copyright © 2018 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +static inline float hmax_ps(__m128 val) +{ + __m128 t = _mm_movehl_ps(val, val); + t = _mm_max_ps(t, val); + val = _mm_shuffle_ps(val, t, 0x55); + val = _mm_max_ss(t, val); + return _mm_cvtss_f32(val); +} + +static void impl_peaks_process_sse(struct resample *r, int channel, + void *src, uint32_t *in_len, void *dst, uint32_t *out_len) +{ + struct peaks_data *pd = r->data; + float *s = src, *d = dst, m; + int i, o, end, chunk, unrolled; + __m128 in, max, mask = _mm_set_ps1(-0.0f); + + o = i = 0; + + m = pd->max_f[channel]; + max = _mm_set1_ps(m); + + while (i < *in_len && o < *out_len) { + end = ((uint64_t) (pd->o_count + 1) * r->i_rate) / r->o_rate; + end = end > pd->i_count ? end - pd->i_count : 0; + chunk = SPA_MIN(end, *in_len); + + unrolled = chunk - (chunk & 3); + + for (; i < unrolled; i+=4) { + in = _mm_loadu_ps(&s[i]); + in = _mm_andnot_ps(mask, in); + max = _mm_max_ps(max, in); + } + for (; i < chunk; i++) + m = SPA_MAX(fabsf(s[i]), m); + + if (i == end) { + d[o++] = SPA_MAX(hmax_ps(max), m); + m = 0.0f; + max = _mm_set1_ps(m); + pd->o_count++; + } + } + pd->max_f[channel] = SPA_MAX(hmax_ps(max), m); + + *out_len = o; + *in_len = i; + pd->i_count += i; + + while (pd->i_count >= r->i_rate) { + pd->i_count -= r->i_rate; + pd->o_count -= r->o_rate; + } +} diff --git a/spa/plugins/audioconvert/resample-peaks.h b/spa/plugins/audioconvert/resample-peaks.h index 6120a34c7..396ea1407 100644 --- a/spa/plugins/audioconvert/resample-peaks.h +++ b/spa/plugins/audioconvert/resample-peaks.h @@ -30,6 +30,10 @@ struct peaks_data { float max_f[0]; }; +#if defined (__SSE__) +#include "resample-peaks-sse.h" +#endif + static void impl_peaks_free(struct resample *r) { if (r->data) @@ -45,28 +49,28 @@ static void impl_peaks_process(struct resample *r, int channel, void *src, uint32_t *in_len, void *dst, uint32_t *out_len) { struct peaks_data *pd = r->data; - float *s = src, *d = dst; + float *s = src, *d = dst, m; int i, o, end, chunk; o = i = 0; + m = pd->max_f[channel]; while (i < *in_len && o < *out_len) { end = ((uint64_t) (pd->o_count + 1) * r->i_rate) / r->o_rate; end = end > pd->i_count ? end - pd->i_count : 0; chunk = SPA_MIN(end, *in_len); - for (; i < chunk; i++) { - float n = fabsf(s[i]); - if (n > pd->max_f[channel]) - pd->max_f[channel] = n; - } + for (; i < chunk; i++) + m = SPA_MAX(fabsf(s[i]), m); if (i == end) { - d[o++] = pd->max_f[channel]; - pd->max_f[channel] = 0.0f; + d[o++] = m; + m = 0.0f; pd->o_count++; } } + pd->max_f[channel] = m; + *out_len = o; *in_len = i; pd->i_count += i; @@ -89,7 +93,13 @@ static int impl_peaks_init(struct resample *r) r->free = impl_peaks_free; r->update_rate = impl_peaks_update_rate; - r->process = impl_peaks_process; +#if defined (__SSE__) + if (r->cpu_flags & SPA_CPU_FLAG_SSE) + r->process = impl_peaks_process_sse; + else +#endif + r->process = impl_peaks_process; + r->reset = impl_peaks_reset; d = r->data = calloc(1, sizeof(struct peaks_data) * sizeof(float) * r->channels); if (r->data == NULL) diff --git a/spa/plugins/audioconvert/resample.c b/spa/plugins/audioconvert/resample.c index 7fb9c5737..d8be27479 100644 --- a/spa/plugins/audioconvert/resample.c +++ b/spa/plugins/audioconvert/resample.c @@ -92,6 +92,7 @@ struct impl { struct spa_node node; struct spa_log *log; + struct spa_cpu *cpu; struct props props; @@ -925,10 +926,19 @@ impl_init(const struct spa_handle_factory *factory, this = (struct impl *) handle; for (i = 0; i < n_support; i++) { - if (support[i].type == SPA_TYPE_INTERFACE_Log) + switch (support[i].type) { + case SPA_TYPE_INTERFACE_Log: this->log = support[i].data; + break; + case SPA_TYPE_INTERFACE_CPU: + this->cpu = support[i].data; + break; + } } + if (this->cpu) + this->resample.cpu_flags = spa_cpu_get_flags(this->cpu); + if (info != NULL && (str = spa_dict_lookup(info, "resample.peaks")) != NULL) this->monitor = atoi(str); diff --git a/spa/plugins/audioconvert/resample.h b/spa/plugins/audioconvert/resample.h index eb27542b7..0333b031a 100644 --- a/spa/plugins/audioconvert/resample.h +++ b/spa/plugins/audioconvert/resample.h @@ -22,7 +22,10 @@ * DEALINGS IN THE SOFTWARE. */ +#include + struct resample { + uint32_t cpu_flags; uint32_t channels; uint32_t i_rate; uint32_t o_rate;