mirror of
				https://gitlab.freedesktop.org/pipewire/pipewire.git
				synced 2025-11-02 09:01:50 -05:00 
			
		
		
		
	Check if we can run SSE instructions before executing the denormals SSE code. Fixes #1775
		
			
				
	
	
		
			203 lines
		
	
	
	
		
			5.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			203 lines
		
	
	
	
		
			5.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Spa
 | 
						|
 *
 | 
						|
 * Copyright © 2018 Wim Taymans
 | 
						|
 *
 | 
						|
 * Permission is hereby granted, free of charge, to any person obtaining a
 | 
						|
 * copy of this software and associated documentation files (the "Software"),
 | 
						|
 * to deal in the Software without restriction, including without limitation
 | 
						|
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | 
						|
 * and/or sell copies of the Software, and to permit persons to whom the
 | 
						|
 * Software is furnished to do so, subject to the following conditions:
 | 
						|
 *
 | 
						|
 * The above copyright notice and this permission notice (including the next
 | 
						|
 * paragraph) shall be included in all copies or substantial portions of the
 | 
						|
 * Software.
 | 
						|
 *
 | 
						|
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
						|
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
						|
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 | 
						|
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
						|
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | 
						|
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 | 
						|
 * DEALINGS IN THE SOFTWARE.
 | 
						|
 */
 | 
						|
 | 
						|
#include <cpuid.h>
 | 
						|
 | 
						|
static int
 | 
						|
x86_init(struct impl *impl)
 | 
						|
{
 | 
						|
	uint32_t flags;
 | 
						|
 | 
						|
	unsigned int vendor;
 | 
						|
	unsigned int model, family;
 | 
						|
	unsigned int max_level, ext_level, has_osxsave;
 | 
						|
	unsigned int eax, ebx, ecx, edx;
 | 
						|
 | 
						|
 | 
						|
	max_level = __get_cpuid_max(0, &vendor);
 | 
						|
	if (max_level < 1)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	__cpuid(1, eax, ebx, ecx, edx);
 | 
						|
 | 
						|
	model = (eax >> 4) & 0x0f;
 | 
						|
	family = (eax >> 8) & 0x0f;
 | 
						|
 | 
						|
	if (vendor == signature_INTEL_ebx ||
 | 
						|
	    vendor == signature_AMD_ebx) {
 | 
						|
		unsigned int extended_model, extended_family;
 | 
						|
 | 
						|
		extended_model = (eax >> 12) & 0xf0;
 | 
						|
		extended_family = (eax >> 20) & 0xff;
 | 
						|
		if (family == 0x0f) {
 | 
						|
			family += extended_family;
 | 
						|
			model += extended_model;
 | 
						|
		} else if (family == 0x06)
 | 
						|
			model += extended_model;
 | 
						|
	}
 | 
						|
 | 
						|
	flags = 0;
 | 
						|
	if (ecx & bit_SSE3)
 | 
						|
		flags |= SPA_CPU_FLAG_SSE3;
 | 
						|
	if (ecx & bit_SSSE3)
 | 
						|
		flags |= SPA_CPU_FLAG_SSSE3;
 | 
						|
	if (ecx & bit_SSE4_1)
 | 
						|
		flags |= SPA_CPU_FLAG_SSE41;
 | 
						|
	if (ecx & bit_SSE4_2)
 | 
						|
		flags |= SPA_CPU_FLAG_SSE42;
 | 
						|
	if (ecx & bit_AVX)
 | 
						|
		flags |= SPA_CPU_FLAG_AVX;
 | 
						|
	has_osxsave = ecx & bit_OSXSAVE;
 | 
						|
	if (ecx & bit_FMA)
 | 
						|
		flags |= SPA_CPU_FLAG_FMA3;
 | 
						|
 | 
						|
	if (edx & bit_CMOV)
 | 
						|
		flags |= SPA_CPU_FLAG_CMOV;
 | 
						|
	if (edx & bit_MMX)
 | 
						|
		flags |= SPA_CPU_FLAG_MMX;
 | 
						|
	if (edx & bit_MMXEXT)
 | 
						|
		flags |= SPA_CPU_FLAG_MMXEXT;
 | 
						|
	if (edx & bit_SSE)
 | 
						|
		flags |= SPA_CPU_FLAG_SSE;
 | 
						|
	if (edx & bit_SSE2)
 | 
						|
		flags |= SPA_CPU_FLAG_SSE2;
 | 
						|
 | 
						|
 | 
						|
	if (max_level >= 7) {
 | 
						|
		__cpuid_count(7, 0, eax, ebx, ecx, edx);
 | 
						|
 | 
						|
		if (ebx & bit_BMI)
 | 
						|
			flags |= SPA_CPU_FLAG_BMI1;
 | 
						|
		if (ebx & bit_AVX2)
 | 
						|
			flags |= SPA_CPU_FLAG_AVX2;
 | 
						|
		if (ebx & bit_BMI2)
 | 
						|
			flags |= SPA_CPU_FLAG_BMI2;
 | 
						|
#define AVX512_BITS (bit_AVX512F | bit_AVX512DQ | bit_AVX512CD | bit_AVX512BW | bit_AVX512VL)
 | 
						|
		if ((ebx & AVX512_BITS) == AVX512_BITS)
 | 
						|
			flags |= SPA_CPU_FLAG_AVX512;
 | 
						|
	}
 | 
						|
 | 
						|
	/* Check cpuid level of extended features.  */
 | 
						|
	__cpuid (0x80000000, ext_level, ebx, ecx, edx);
 | 
						|
 | 
						|
	if (ext_level >= 0x80000001) {
 | 
						|
		__cpuid (0x80000001, eax, ebx, ecx, edx);
 | 
						|
 | 
						|
		if (edx & bit_3DNOW)
 | 
						|
			flags |= SPA_CPU_FLAG_3DNOW;
 | 
						|
		if (edx & bit_3DNOWP)
 | 
						|
			flags |= SPA_CPU_FLAG_3DNOWEXT;
 | 
						|
		if (edx & bit_MMX)
 | 
						|
			flags |= SPA_CPU_FLAG_MMX;
 | 
						|
		if (edx & bit_MMXEXT)
 | 
						|
			flags |= SPA_CPU_FLAG_MMXEXT;
 | 
						|
		if (ecx & bit_FMA4)
 | 
						|
			flags |= SPA_CPU_FLAG_FMA4;
 | 
						|
		if (ecx & bit_XOP)
 | 
						|
			flags |= SPA_CPU_FLAG_XOP;
 | 
						|
	}
 | 
						|
 | 
						|
	/* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
 | 
						|
#define XCR_XFEATURE_ENABLED_MASK	0x0
 | 
						|
#define XSTATE_FP			0x1
 | 
						|
#define XSTATE_SSE			0x2
 | 
						|
#define XSTATE_YMM			0x4
 | 
						|
#define XSTATE_OPMASK			0x20
 | 
						|
#define XSTATE_ZMM			0x40
 | 
						|
#define XSTATE_HI_ZMM			0x80
 | 
						|
 | 
						|
#define XCR_AVX_ENABLED_MASK \
 | 
						|
	(XSTATE_SSE | XSTATE_YMM)
 | 
						|
#define XCR_AVX512F_ENABLED_MASK \
 | 
						|
	(XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
 | 
						|
 | 
						|
	if (has_osxsave)
 | 
						|
		asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
 | 
						|
			: "=a" (eax), "=d" (edx)
 | 
						|
			: "c" (XCR_XFEATURE_ENABLED_MASK));
 | 
						|
	else
 | 
						|
		eax = 0;
 | 
						|
 | 
						|
	/* Check if AVX registers are supported.  */
 | 
						|
	if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK) {
 | 
						|
		flags &= ~(SPA_CPU_FLAG_AVX |
 | 
						|
				SPA_CPU_FLAG_AVX2 |
 | 
						|
				SPA_CPU_FLAG_FMA3 |
 | 
						|
				SPA_CPU_FLAG_FMA4 |
 | 
						|
				SPA_CPU_FLAG_XOP);
 | 
						|
	}
 | 
						|
 | 
						|
	/* Check if AVX512F registers are supported.  */
 | 
						|
	if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK) {
 | 
						|
		flags &= ~SPA_CPU_FLAG_AVX512;
 | 
						|
	}
 | 
						|
 | 
						|
	if (flags & SPA_CPU_FLAG_AVX512)
 | 
						|
		impl->max_align = 64;
 | 
						|
	else if (flags & (SPA_CPU_FLAG_AVX2 |
 | 
						|
	    SPA_CPU_FLAG_AVX |
 | 
						|
	    SPA_CPU_FLAG_XOP |
 | 
						|
	    SPA_CPU_FLAG_FMA4 |
 | 
						|
	    SPA_CPU_FLAG_FMA3))
 | 
						|
		impl->max_align = 32;
 | 
						|
	else if (flags & (SPA_CPU_FLAG_AESNI |
 | 
						|
	    SPA_CPU_FLAG_SSE42 |
 | 
						|
	    SPA_CPU_FLAG_SSE41 |
 | 
						|
	    SPA_CPU_FLAG_SSSE3 |
 | 
						|
	    SPA_CPU_FLAG_SSE3 |
 | 
						|
	    SPA_CPU_FLAG_SSE2 |
 | 
						|
	    SPA_CPU_FLAG_SSE))
 | 
						|
		impl->max_align = 16;
 | 
						|
	else
 | 
						|
		impl->max_align = 8;
 | 
						|
 | 
						|
	impl->flags = flags;
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
#if defined(HAVE_SSE)
 | 
						|
#include <xmmintrin.h>
 | 
						|
#endif
 | 
						|
 | 
						|
static int x86_zero_denormals(void *object, bool enable)
 | 
						|
{
 | 
						|
#if defined(HAVE_SSE)
 | 
						|
	struct impl *impl = object;
 | 
						|
	if (impl->flags & SPA_CPU_FLAG_SSE) {
 | 
						|
		unsigned int mxcsr;
 | 
						|
		mxcsr = _mm_getcsr();
 | 
						|
		if (enable)
 | 
						|
			mxcsr |= 0x8040;
 | 
						|
		else
 | 
						|
			mxcsr &= ~0x8040;
 | 
						|
		_mm_setcsr(mxcsr);
 | 
						|
		spa_log_debug(impl->log, "%p: zero-denormals:%s",
 | 
						|
				impl, enable ? "on" : "off");
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
#else
 | 
						|
	return -ENOTSUP;
 | 
						|
#endif
 | 
						|
}
 |