diff --git a/spa/plugins/audiomixer/meson.build b/spa/plugins/audiomixer/meson.build index c6aa5f2b3..4312566f8 100644 --- a/spa/plugins/audiomixer/meson.build +++ b/spa/plugins/audiomixer/meson.build @@ -35,6 +35,16 @@ if have_sse2 simd_cargs += ['-DHAVE_SSE2'] simd_dependencies += audiomixer_sse2 endif +if have_avx and have_fma + audiomixer_avx = static_library('audiomixer_avx', + ['mix-ops-avx.c'], + c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA'] + simd_dependencies += audiomixer_avx +endif audiomixerlib = shared_library('spa-audiomixer', audiomixer_sources, diff --git a/spa/plugins/audiomixer/mix-ops-avx.c b/spa/plugins/audiomixer/mix-ops-avx.c new file mode 100644 index 000000000..7b995d67d --- /dev/null +++ b/spa/plugins/audiomixer/mix-ops-avx.c @@ -0,0 +1,135 @@ +/* Spa + * + * Copyright © 2019 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include + +#include "mix-ops.h" + +#include + +static inline void mix_4(float * dst, + const float * SPA_RESTRICT src0, + const float * SPA_RESTRICT src1, + const float * SPA_RESTRICT src2, + uint32_t n_samples) +{ + uint32_t n, unrolled; + + if (SPA_IS_ALIGNED(src0, 32) && + SPA_IS_ALIGNED(src1, 32) && + SPA_IS_ALIGNED(src2, 32) && + SPA_IS_ALIGNED(dst, 32)) + unrolled = n_samples & ~15; + else + unrolled = 0; + + for (n = 0; n < unrolled; n += 16) { + __m256 in1[4], in2[4]; + + in1[0] = _mm256_load_ps(&dst[n + 0]); + in2[0] = _mm256_load_ps(&dst[n + 8]); + in1[1] = _mm256_load_ps(&src0[n + 0]); + in2[1] = _mm256_load_ps(&src0[n + 8]); + in1[2] = _mm256_load_ps(&src1[n + 0]); + in2[2] = _mm256_load_ps(&src1[n + 8]); + in1[3] = _mm256_load_ps(&src2[n + 0]); + in2[3] = _mm256_load_ps(&src2[n + 8]); + + in1[0] = _mm256_add_ps(in1[0], in1[1]); + in2[0] = _mm256_add_ps(in2[0], in2[1]); + in1[2] = _mm256_add_ps(in1[2], in1[3]); + in2[2] = _mm256_add_ps(in2[2], in2[3]); + in1[0] = _mm256_add_ps(in1[0], in1[2]); + in2[0] = _mm256_add_ps(in2[0], in2[2]); + + _mm256_store_ps(&dst[n + 0], in1[0]); + _mm256_store_ps(&dst[n + 8], in2[0]); + } + for (; n < n_samples; n++) { + __m128 in[4]; + in[0] = _mm_load_ss(&dst[n]), + in[1] = _mm_load_ss(&src0[n]), + in[2] = _mm_load_ss(&src1[n]), + in[3] = _mm_load_ss(&src2[n]), + in[0] = _mm_add_ss(in[0], in[1]); + in[2] = _mm_add_ss(in[2], in[3]); + in[0] = _mm_add_ss(in[0], in[2]); + _mm_store_ss(&dst[n], in[0]); + } +} + + +static inline void mix_2(float * dst, const float * SPA_RESTRICT src, uint32_t n_samples) +{ + uint32_t n, unrolled; + + if (SPA_IS_ALIGNED(src, 32) && + SPA_IS_ALIGNED(dst, 32)) + unrolled = n_samples & ~15; + else + unrolled = 0; + + for (n = 0; n < unrolled; n += 16) { + __m256 in1[2], in2[2]; + + in1[0] = _mm256_load_ps(&dst[n + 0]); + in1[1] = _mm256_load_ps(&dst[n + 8]); + in2[0] = _mm256_load_ps(&src[n + 0]); + in2[1] = _mm256_load_ps(&src[n + 8]); + + in1[0] = _mm256_add_ps(in1[0], in2[0]); + in1[1] = _mm256_add_ps(in1[1], in2[1]); + + _mm256_store_ps(&dst[n + 0], in1[0]); + _mm256_store_ps(&dst[n + 8], in1[1]); + } + for (; n < n_samples; n++) { + __m128 in1[0], in2[0]; + in1[0] = _mm_load_ss(&dst[n]), + in2[0] = _mm_load_ss(&src[n]), + in1[0] = _mm_add_ss(in1[0], in2[0]); + _mm_store_ss(&dst[n], in1[0]); + } +} + +void +mix_f32_avx(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], + uint32_t n_src, uint32_t n_samples) +{ + uint32_t i; + + if (n_src == 0) + memset(dst, 0, n_samples * sizeof(float)); + else if (dst != src[0]) + memcpy(dst, src[0], n_samples * sizeof(float)); + + for (i = 1; i + 2 < n_src; i += 3) + mix_4(dst, src[i], src[i + 1], src[i + 2], n_samples); + for (; i < n_src; i++) + mix_2(dst, src[i], n_samples); +} diff --git a/spa/plugins/audiomixer/mix-ops.c b/spa/plugins/audiomixer/mix-ops.c index 6f6acbafe..3d1c0f8e5 100644 --- a/spa/plugins/audiomixer/mix-ops.c +++ b/spa/plugins/audiomixer/mix-ops.c @@ -46,6 +46,10 @@ struct mix_info { static struct mix_info mix_table[] = { /* f32 */ +#if defined(HAVE_AVX) + { SPA_AUDIO_FORMAT_F32, 1, SPA_CPU_FLAG_AVX, 4, mix_f32_avx }, + { SPA_AUDIO_FORMAT_F32P, 1, SPA_CPU_FLAG_AVX, 4, mix_f32_avx }, +#endif #if defined (HAVE_SSE) { SPA_AUDIO_FORMAT_F32, 1, SPA_CPU_FLAG_SSE, 4, mix_f32_sse }, { SPA_AUDIO_FORMAT_F32P, 1, SPA_CPU_FLAG_SSE, 4, mix_f32_sse }, diff --git a/spa/plugins/audiomixer/mix-ops.h b/spa/plugins/audiomixer/mix-ops.h index 692fae3bd..0c878eeca 100644 --- a/spa/plugins/audiomixer/mix-ops.h +++ b/spa/plugins/audiomixer/mix-ops.h @@ -59,3 +59,6 @@ DEFINE_FUNCTION(f32, sse); #if defined(HAVE_SSE2) DEFINE_FUNCTION(f64, sse2); #endif +#if defined(HAVE_AVX) +DEFINE_FUNCTION(f32, avx); +#endif