mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2026-03-20 05:33:55 -04:00
cpu: add SLOW_GATHER flag
Intel Skylake (level 0x16) is the first model with fast gather opcodes. Mark lower versions with the SLOW_GATHER flag. Prefer the SSE2 version of the format conversion without gather when SLOW_GATHER is set. Makes the conversion much faster on my Ivy Bridge.
This commit is contained in:
parent
5ade045654
commit
3b422e31a2
3 changed files with 9 additions and 0 deletions
|
|
@ -62,6 +62,7 @@ struct spa_cpu { struct spa_interface iface; };
|
|||
#define SPA_CPU_FLAG_BMI2 (1<<18) /**< Bit Manipulation Instruction Set 2 */
|
||||
#define SPA_CPU_FLAG_AVX512 (1<<19) /**< AVX-512 */
|
||||
#define SPA_CPU_FLAG_SLOW_UNALIGNED (1<<20) /**< unaligned loads/stores are slow */
|
||||
#define SPA_CPU_FLAG_SLOW_GATHER (1<<21) /**< gather functions are slow */
|
||||
|
||||
/* PPC specific */
|
||||
#define SPA_CPU_FLAG_ALTIVEC (1<<0) /**< standard */
|
||||
|
|
|
|||
|
|
@ -108,6 +108,9 @@ static struct conv_info conv_table[] =
|
|||
MAKE(U32, F32, 0, conv_u32_to_f32_c),
|
||||
MAKE(U32, F32P, 0, conv_u32_to_f32d_c),
|
||||
|
||||
#if defined (HAVE_SSE2)
|
||||
MAKE(S32, F32P, 0, conv_s32_to_f32d_sse2, SPA_CPU_FLAG_SSE2 | SPA_CPU_FLAG_SLOW_GATHER),
|
||||
#endif
|
||||
#if defined (HAVE_AVX2)
|
||||
MAKE(S32, F32P, 0, conv_s32_to_f32d_avx2, SPA_CPU_FLAG_AVX2),
|
||||
#endif
|
||||
|
|
@ -129,6 +132,9 @@ static struct conv_info conv_table[] =
|
|||
|
||||
MAKE(S24, F32, 0, conv_s24_to_f32_c),
|
||||
MAKE(S24P, F32P, 0, conv_s24d_to_f32d_c),
|
||||
#if defined (HAVE_SSE2)
|
||||
MAKE(S24, F32P, 0, conv_s24_to_f32d_sse2, SPA_CPU_FLAG_SSE2 | SPA_CPU_FLAG_SLOW_GATHER),
|
||||
#endif
|
||||
#if defined (HAVE_AVX2)
|
||||
MAKE(S24, F32P, 0, conv_s24_to_f32d_avx2, SPA_CPU_FLAG_AVX2),
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -78,6 +78,8 @@ x86_init(struct impl *impl)
|
|||
if ((ebx & AVX512_BITS) == AVX512_BITS)
|
||||
flags |= SPA_CPU_FLAG_AVX512;
|
||||
}
|
||||
if (max_level < 0x16)
|
||||
flags |= SPA_CPU_FLAG_SLOW_GATHER;
|
||||
|
||||
/* Check cpuid level of extended features. */
|
||||
__cpuid (0x80000000, ext_level, ebx, ecx, edx);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue