cpu: add SLOW_GATHER flag

Intel Skylake (level 0x16) is the first model with fast gather
opcodes. Mark lower versions with the SLOW_GATHER flag.

Prefer the SSE2 version of the format conversion without gather when
SLOW_GATHER is set. Makes the conversion much faster on my Ivy
Bridge.
This commit is contained in:
Wim Taymans 2026-03-19 16:21:35 +01:00
parent 5ade045654
commit 3b422e31a2
3 changed files with 9 additions and 0 deletions

View file

@ -108,6 +108,9 @@ static struct conv_info conv_table[] =
MAKE(U32, F32, 0, conv_u32_to_f32_c),
MAKE(U32, F32P, 0, conv_u32_to_f32d_c),
#if defined (HAVE_SSE2)
MAKE(S32, F32P, 0, conv_s32_to_f32d_sse2, SPA_CPU_FLAG_SSE2 | SPA_CPU_FLAG_SLOW_GATHER),
#endif
#if defined (HAVE_AVX2)
MAKE(S32, F32P, 0, conv_s32_to_f32d_avx2, SPA_CPU_FLAG_AVX2),
#endif
@ -129,6 +132,9 @@ static struct conv_info conv_table[] =
MAKE(S24, F32, 0, conv_s24_to_f32_c),
MAKE(S24P, F32P, 0, conv_s24d_to_f32d_c),
#if defined (HAVE_SSE2)
MAKE(S24, F32P, 0, conv_s24_to_f32d_sse2, SPA_CPU_FLAG_SSE2 | SPA_CPU_FLAG_SLOW_GATHER),
#endif
#if defined (HAVE_AVX2)
MAKE(S24, F32P, 0, conv_s24_to_f32d_avx2, SPA_CPU_FLAG_AVX2),
#endif