diff --git a/spa/include/spa/support/cpu.h b/spa/include/spa/support/cpu.h index c69338855..2faf42154 100644 --- a/spa/include/spa/support/cpu.h +++ b/spa/include/spa/support/cpu.h @@ -62,6 +62,7 @@ struct spa_cpu { struct spa_interface iface; }; #define SPA_CPU_FLAG_BMI2 (1<<18) /**< Bit Manipulation Instruction Set 2 */ #define SPA_CPU_FLAG_AVX512 (1<<19) /**< AVX-512 */ #define SPA_CPU_FLAG_SLOW_UNALIGNED (1<<20) /**< unaligned loads/stores are slow */ +#define SPA_CPU_FLAG_SLOW_GATHER (1<<21) /**< gather functions are slow */ /* PPC specific */ #define SPA_CPU_FLAG_ALTIVEC (1<<0) /**< standard */ diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c index 3fc2c5f0a..057f3294a 100644 --- a/spa/plugins/audioconvert/fmt-ops.c +++ b/spa/plugins/audioconvert/fmt-ops.c @@ -108,6 +108,9 @@ static struct conv_info conv_table[] = MAKE(U32, F32, 0, conv_u32_to_f32_c), MAKE(U32, F32P, 0, conv_u32_to_f32d_c), +#if defined (HAVE_SSE2) + MAKE(S32, F32P, 0, conv_s32_to_f32d_sse2, SPA_CPU_FLAG_SSE2 | SPA_CPU_FLAG_SLOW_GATHER), +#endif #if defined (HAVE_AVX2) MAKE(S32, F32P, 0, conv_s32_to_f32d_avx2, SPA_CPU_FLAG_AVX2), #endif @@ -129,6 +132,9 @@ static struct conv_info conv_table[] = MAKE(S24, F32, 0, conv_s24_to_f32_c), MAKE(S24P, F32P, 0, conv_s24d_to_f32d_c), +#if defined (HAVE_SSE2) + MAKE(S24, F32P, 0, conv_s24_to_f32d_sse2, SPA_CPU_FLAG_SSE2 | SPA_CPU_FLAG_SLOW_GATHER), +#endif #if defined (HAVE_AVX2) MAKE(S24, F32P, 0, conv_s24_to_f32d_avx2, SPA_CPU_FLAG_AVX2), #endif diff --git a/spa/plugins/support/cpu-x86.c b/spa/plugins/support/cpu-x86.c index c1c53855d..0fb866671 100644 --- a/spa/plugins/support/cpu-x86.c +++ b/spa/plugins/support/cpu-x86.c @@ -78,6 +78,8 @@ x86_init(struct impl *impl) if ((ebx & AVX512_BITS) == AVX512_BITS) flags |= SPA_CPU_FLAG_AVX512; } + if (max_level < 0x16) + flags |= SPA_CPU_FLAG_SLOW_GATHER; /* Check cpuid level of extended features. */ __cpuid (0x80000000, ext_level, ebx, ecx, edx);