diff --git a/spa/plugins/audioconvert/channelmix-ops-avx.c b/spa/plugins/audioconvert/channelmix-ops-avx.c new file mode 100644 index 000000000..08d8e2b00 --- /dev/null +++ b/spa/plugins/audioconvert/channelmix-ops-avx.c @@ -0,0 +1,63 @@ +/* Spa */ +/* SPDX-FileCopyrightText: Copyright © 2018 Wim Taymans */ +/* SPDX-License-Identifier: MIT */ + +#include "channelmix-ops.h" + +#include +#include +#include + +static inline void clear_avx(float *d, uint32_t n_samples) +{ + memset(d, 0, n_samples * sizeof(float)); +} + +static inline void copy_avx(float *d, const float *s, uint32_t n_samples) +{ + spa_memcpy(d, s, n_samples * sizeof(float)); +} + +static inline void vol_avx(float *d, const float *s, float vol, uint32_t n_samples) +{ + uint32_t n, unrolled; + if (vol == 0.0f) { + clear_avx(d, n_samples); + } else if (vol == 1.0f) { + copy_avx(d, s, n_samples); + } else { + __m256 t[4]; + const __m256 v = _mm256_set1_ps(vol); + + if (SPA_IS_ALIGNED(d, 32) && + SPA_IS_ALIGNED(s, 32)) + unrolled = n_samples & ~31; + else + unrolled = 0; + + for(n = 0; n < unrolled; n += 32) { + t[0] = _mm256_load_ps(&s[n]); + t[1] = _mm256_load_ps(&s[n+8]); + t[2] = _mm256_load_ps(&s[n+16]); + t[3] = _mm256_load_ps(&s[n+24]); + _mm256_store_ps(&d[n], _mm256_mul_ps(t[0], v)); + _mm256_store_ps(&d[n+8], _mm256_mul_ps(t[1], v)); + _mm256_store_ps(&d[n+16], _mm256_mul_ps(t[2], v)); + _mm256_store_ps(&d[n+24], _mm256_mul_ps(t[3], v)); + } + for(; n < n_samples; n++) { + __m128 v = _mm_set1_ps(vol); + _mm_store_ss(&d[n], _mm_mul_ss(_mm_load_ss(&s[n]), v)); + } + } +} + +void channelmix_copy_avx(struct channelmix *mix, void * SPA_RESTRICT dst[], + const void * SPA_RESTRICT src[], uint32_t n_samples) +{ + uint32_t i, n_dst = mix->dst_chan; + float **d = (float **)dst; + const float **s = (const float **)src; + for (i = 0; i < n_dst; i++) + vol_avx(d[i], s[i], mix->matrix[i][i], n_samples); +} diff --git a/spa/plugins/audioconvert/channelmix-ops.c b/spa/plugins/audioconvert/channelmix-ops.c index c8c01eb81..921270450 100644 --- a/spa/plugins/audioconvert/channelmix-ops.c +++ b/spa/plugins/audioconvert/channelmix-ops.c @@ -36,6 +36,11 @@ static const struct channelmix_info { uint32_t cpu_flags; } channelmix_table[] = { +#if defined (HAVE_AVX) + MAKE(2, MASK_MONO, 2, MASK_MONO, channelmix_copy_avx, SPA_CPU_FLAG_AVX), + MAKE(2, MASK_STEREO, 2, MASK_STEREO, channelmix_copy_avx, SPA_CPU_FLAG_AVX), + MAKE(EQ, 0, EQ, 0, channelmix_copy_avx, SPA_CPU_FLAG_AVX), +#endif #if defined (HAVE_SSE) MAKE(2, MASK_MONO, 2, MASK_MONO, channelmix_copy_sse, SPA_CPU_FLAG_SSE), MAKE(2, MASK_STEREO, 2, MASK_STEREO, channelmix_copy_sse, SPA_CPU_FLAG_SSE), diff --git a/spa/plugins/audioconvert/channelmix-ops.h b/spa/plugins/audioconvert/channelmix-ops.h index 155079cd2..947cc6f34 100644 --- a/spa/plugins/audioconvert/channelmix-ops.h +++ b/spa/plugins/audioconvert/channelmix-ops.h @@ -140,4 +140,8 @@ DEFINE_FUNCTION(f32_5p1_4, sse); DEFINE_FUNCTION(f32_7p1_4, sse); #endif +#if defined (HAVE_AVX) +DEFINE_FUNCTION(copy, avx); +#endif + #undef DEFINE_FUNCTION diff --git a/spa/plugins/audioconvert/meson.build b/spa/plugins/audioconvert/meson.build index 559db4308..71d5d56cd 100644 --- a/spa/plugins/audioconvert/meson.build +++ b/spa/plugins/audioconvert/meson.build @@ -72,6 +72,16 @@ if have_sse41 simd_cargs += ['-DHAVE_SSE41'] simd_dependencies += audioconvert_sse41 endif +if have_avx + audioconvert_avx = static_library('audioconvert_avx', + ['channelmix-ops-avx.c'], + c_args : [avx_args, '-O3', '-DHAVE_AVX', simd_cargs], + dependencies : [ spa_dep ], + install : false + ) + simd_cargs += ['-DHAVE_AVX'] + simd_dependencies += audioconvert_avx +endif if have_avx2 and have_fma audioconvert_avx2_fma = static_library('audioconvert_avx2_fma', ['resample-native-avx2.c'],