spa: use the right AVX2 flags

Our AVX optimizations are really AVX2 so rename the files and functions
and use the right HAVE_AVX2 and cpu flags to compile and select the
right functions.

Fixes #5072
This commit is contained in:
Wim Taymans 2026-01-13 12:03:09 +01:00
parent 5871f88b81
commit 8eb60b132c
15 changed files with 66 additions and 66 deletions

View file

@ -156,17 +156,17 @@ int main(int argc, char *argv[])
}
}
#endif
#if defined (HAVE_AVX) && defined(HAVE_FMA)
if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3)) {
#if defined (HAVE_AVX2) && defined(HAVE_FMA)
if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3)) {
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
spa_zero(r);
r.channels = 2;
r.cpu_flags = SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3;
r.cpu_flags = SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3;
r.i_rate = in_rates[i];
r.o_rate = out_rates[i];
r.quality = RESAMPLE_DEFAULT_QUALITY;
resample_native_init(&r);
run_test("native", "avx", &r);
run_test("native", "avx2", &r);
resample_free(&r);
}
}

View file

@ -72,15 +72,15 @@ if have_sse41
simd_cargs += ['-DHAVE_SSE41']
simd_dependencies += audioconvert_sse41
endif
if have_avx and have_fma
audioconvert_avx = static_library('audioconvert_avx',
['resample-native-avx.c'],
c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'],
if have_avx2 and have_fma
audioconvert_avx2_fma = static_library('audioconvert_avx2_fma',
['resample-native-avx2.c'],
c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'],
dependencies : [ spa_dep ],
install : false
)
simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA']
simd_dependencies += audioconvert_avx
simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA']
simd_dependencies += audioconvert_avx2_fma
endif
if have_avx2
audioconvert_avx2 = static_library('audioconvert_avx2',

View file

@ -7,7 +7,7 @@
#include <assert.h>
#include <immintrin.h>
static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
static inline void inner_product_avx2(float *d, const float * SPA_RESTRICT s,
const float * SPA_RESTRICT taps, uint32_t n_taps)
{
__m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty;
@ -36,7 +36,7 @@ static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
_mm_store_ss(d, sx[0]);
}
static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
static inline void inner_product_ip_avx2(float *d, const float * SPA_RESTRICT s,
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
uint32_t n_taps)
{
@ -70,5 +70,5 @@ static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
_mm_store_ss(d, sx[0]);
}
MAKE_RESAMPLER_FULL(avx);
MAKE_RESAMPLER_INTER(avx);
MAKE_RESAMPLER_FULL(avx2);
MAKE_RESAMPLER_INTER(avx2);

View file

@ -160,7 +160,7 @@ DEFINE_RESAMPLER(inter,sse);
DEFINE_RESAMPLER(full,ssse3);
DEFINE_RESAMPLER(inter,ssse3);
#endif
#if defined (HAVE_AVX) && defined(HAVE_FMA)
DEFINE_RESAMPLER(full,avx);
DEFINE_RESAMPLER(inter,avx);
#if defined (HAVE_AVX2) && defined(HAVE_FMA)
DEFINE_RESAMPLER(full,avx2);
DEFINE_RESAMPLER(inter,avx2);
#endif

View file

@ -95,8 +95,8 @@ static struct resample_info resample_table[] =
#if defined (HAVE_NEON)
MAKE(F32, copy_c, full_neon, inter_neon, SPA_CPU_FLAG_NEON),
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA)
MAKE(F32, copy_c, full_avx, inter_avx, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3),
#if defined(HAVE_AVX2) && defined(HAVE_FMA)
MAKE(F32, copy_c, full_avx2, inter_avx2, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3),
#endif
#if defined (HAVE_SSSE3)
MAKE(F32, copy_c, full_ssse3, inter_ssse3, SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED),