mirror of
				https://gitlab.freedesktop.org/pipewire/pipewire.git
				synced 2025-11-03 09:01:54 -05:00 
			
		
		
		
	cpu: add zero_denormals method
Add a method to enable/disable the denormals flush-to-zero and denormals-as-zero CPU options. Add a config option to make it possible to disable this again. Fixes high CPU usage when dealing with denormals, which can happen in many DSP functions. Fixes #1681
This commit is contained in:
		
							parent
							
								
									036371c48f
								
							
						
					
					
						commit
						85d5c8cd6c
					
				
					 6 changed files with 70 additions and 3 deletions
				
			
		| 
						 | 
					@ -113,7 +113,7 @@ struct spa_cpu { struct spa_interface iface; };
 | 
				
			||||||
struct spa_cpu_methods {
 | 
					struct spa_cpu_methods {
 | 
				
			||||||
	/** the version of the methods. This can be used to expand this
 | 
						/** the version of the methods. This can be used to expand this
 | 
				
			||||||
	  structure in the future */
 | 
						  structure in the future */
 | 
				
			||||||
#define SPA_VERSION_CPU_METHODS	1
 | 
					#define SPA_VERSION_CPU_METHODS	2
 | 
				
			||||||
	uint32_t version;
 | 
						uint32_t version;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/** get CPU flags */
 | 
						/** get CPU flags */
 | 
				
			||||||
| 
						 | 
					@ -130,6 +130,10 @@ struct spa_cpu_methods {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* check if running in a VM. Since:1 */
 | 
						/* check if running in a VM. Since:1 */
 | 
				
			||||||
	uint32_t (*get_vm_type) (void *object);
 | 
						uint32_t (*get_vm_type) (void *object);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* denormals will be handled as zero, either with FTZ or DAZ.
 | 
				
			||||||
 | 
						 * Since:2 */
 | 
				
			||||||
 | 
						int (*zero_denormals) (void *object, bool enable);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define spa_cpu_method(o,method,version,...)				\
 | 
					#define spa_cpu_method(o,method,version,...)				\
 | 
				
			||||||
| 
						 | 
					@ -146,10 +150,12 @@ struct spa_cpu_methods {
 | 
				
			||||||
#define spa_cpu_get_count(c)		spa_cpu_method(c, get_count, 0)
 | 
					#define spa_cpu_get_count(c)		spa_cpu_method(c, get_count, 0)
 | 
				
			||||||
#define spa_cpu_get_max_align(c)	spa_cpu_method(c, get_max_align, 0)
 | 
					#define spa_cpu_get_max_align(c)	spa_cpu_method(c, get_max_align, 0)
 | 
				
			||||||
#define spa_cpu_get_vm_type(c)		spa_cpu_method(c, get_vm_type, 1)
 | 
					#define spa_cpu_get_vm_type(c)		spa_cpu_method(c, get_vm_type, 1)
 | 
				
			||||||
 | 
					#define spa_cpu_zero_denormals(c,e)	spa_cpu_method(c, zero_denormals, 2, e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/** keys can be given when initializing the cpu handle */
 | 
					/** keys can be given when initializing the cpu handle */
 | 
				
			||||||
#define SPA_KEY_CPU_FORCE		"cpu.force"		/**< force cpu flags */
 | 
					#define SPA_KEY_CPU_FORCE		"cpu.force"		/**< force cpu flags */
 | 
				
			||||||
#define SPA_KEY_CPU_VM_TYPE		"cpu.vm.type"		/**< force a VM type */
 | 
					#define SPA_KEY_CPU_VM_TYPE		"cpu.vm.type"		/**< force a VM type */
 | 
				
			||||||
 | 
					#define SPA_KEY_CPU_ZERO_DENORMALS	"cpu.zero.denormals"	/**< zero denormals */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * \}
 | 
					 * \}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -123,3 +123,40 @@ arm_init(struct impl *impl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int arm_zero_denormals(void *object, bool enable)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if defined(__aarch64__)
 | 
				
			||||||
 | 
						uint64_t cw;
 | 
				
			||||||
 | 
						if (enable)
 | 
				
			||||||
 | 
							__asm__ __volatile__(
 | 
				
			||||||
 | 
								"mrs	%0, fpcr		\n"
 | 
				
			||||||
 | 
								"orr	%0, %0, #0x1000000	\n"
 | 
				
			||||||
 | 
								"msr	fpcr, %0		\n"
 | 
				
			||||||
 | 
								"isb				\n"
 | 
				
			||||||
 | 
								: "=r"(cw)::"memory");
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							__asm__ __volatile__(
 | 
				
			||||||
 | 
								"mrs	%0, fpcr		\n"
 | 
				
			||||||
 | 
								"and	%0, %0, #~0x1000000	\n"
 | 
				
			||||||
 | 
								"msr	fpcr, %0		\n"
 | 
				
			||||||
 | 
								"isb				\n"
 | 
				
			||||||
 | 
								: "=r"(cw)::"memory");
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						uint32_t cw;
 | 
				
			||||||
 | 
						if (enable)
 | 
				
			||||||
 | 
							__asm__ __volatile__(
 | 
				
			||||||
 | 
								"vmrs	%0, fpscr		\n"
 | 
				
			||||||
 | 
								"orr	%0, %0, #0x1000000	\n"
 | 
				
			||||||
 | 
								"vmsr	fpscr, %0		\n"
 | 
				
			||||||
 | 
								: "=r"(cw)::"memory");
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							__asm__ __volatile__(
 | 
				
			||||||
 | 
								"vmrs	%0, fpscr		\n"
 | 
				
			||||||
 | 
								"and	%0, %0, #~0x1000000	\n"
 | 
				
			||||||
 | 
								"vmsr	fpscr, %0		\n"
 | 
				
			||||||
 | 
								: "=r"(cw)::"memory");
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -176,3 +176,15 @@ x86_init(struct impl *impl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int x86_zero_denormals(void *object, bool enable)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned int mxcsr;
 | 
				
			||||||
 | 
						mxcsr = __builtin_ia32_stmxcsr();
 | 
				
			||||||
 | 
						if (enable)
 | 
				
			||||||
 | 
							mxcsr |= 0x8040;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							mxcsr &= ~0x8040;
 | 
				
			||||||
 | 
						__builtin_ia32_ldmxcsr(mxcsr);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -78,11 +78,14 @@ static char *read_file(const char *name, char *buffer, size_t len)
 | 
				
			||||||
# if defined (__i386__) || defined (__x86_64__)
 | 
					# if defined (__i386__) || defined (__x86_64__)
 | 
				
			||||||
#include "cpu-x86.c"
 | 
					#include "cpu-x86.c"
 | 
				
			||||||
#define init(t)	x86_init(t)
 | 
					#define init(t)	x86_init(t)
 | 
				
			||||||
 | 
					#define impl_cpu_zero_denormals x86_zero_denormals
 | 
				
			||||||
# elif defined (__arm__) || defined (__aarch64__)
 | 
					# elif defined (__arm__) || defined (__aarch64__)
 | 
				
			||||||
#include "cpu-arm.c"
 | 
					#include "cpu-arm.c"
 | 
				
			||||||
#define init(t)	arm_init(t)
 | 
					#define init(t)	arm_init(t)
 | 
				
			||||||
 | 
					#define impl_cpu_zero_denormals arm_zero_denormals
 | 
				
			||||||
# else
 | 
					# else
 | 
				
			||||||
#define init(t)
 | 
					#define init(t)
 | 
				
			||||||
 | 
					#define impl_cpu_zero_denormals NULL
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static uint32_t
 | 
					static uint32_t
 | 
				
			||||||
| 
						 | 
					@ -197,6 +200,7 @@ static const struct spa_cpu_methods impl_cpu = {
 | 
				
			||||||
	.get_count = impl_cpu_get_count,
 | 
						.get_count = impl_cpu_get_count,
 | 
				
			||||||
	.get_max_align = impl_cpu_get_max_align,
 | 
						.get_max_align = impl_cpu_get_max_align,
 | 
				
			||||||
	.get_vm_type = impl_cpu_get_vm_type,
 | 
						.get_vm_type = impl_cpu_get_vm_type,
 | 
				
			||||||
 | 
						.zero_denormals = impl_cpu_zero_denormals,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int impl_get_interface(struct spa_handle *handle, const char *type, void **interface)
 | 
					static int impl_get_interface(struct spa_handle *handle, const char *type, void **interface)
 | 
				
			||||||
| 
						 | 
					@ -265,6 +269,8 @@ impl_init(const struct spa_handle_factory *factory,
 | 
				
			||||||
			this->flags = atoi(str);
 | 
								this->flags = atoi(str);
 | 
				
			||||||
		if ((str = spa_dict_lookup(info, SPA_KEY_CPU_VM_TYPE)) != NULL)
 | 
							if ((str = spa_dict_lookup(info, SPA_KEY_CPU_VM_TYPE)) != NULL)
 | 
				
			||||||
			this->vm_type = atoi(str);
 | 
								this->vm_type = atoi(str);
 | 
				
			||||||
 | 
							if ((str = spa_dict_lookup(info, SPA_KEY_CPU_ZERO_DENORMALS)) != NULL)
 | 
				
			||||||
 | 
								impl_cpu_zero_denormals(this, spa_atob(str));
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spa_log_debug(this->log, "%p: count:%d align:%d flags:%08x",
 | 
						spa_log_debug(this->log, "%p: count:%d align:%d flags:%08x",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,7 @@ context.properties = {
 | 
				
			||||||
    #mem.mlock-all                         = false
 | 
					    #mem.mlock-all                         = false
 | 
				
			||||||
    #clock.power-of-two-quantum            = true
 | 
					    #clock.power-of-two-quantum            = true
 | 
				
			||||||
    #log.level                             = 2
 | 
					    #log.level                             = 2
 | 
				
			||||||
 | 
					    #cpu.zero.denormals                    = true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    core.daemon = true              # listening for socket connections
 | 
					    core.daemon = true              # listening for socket connections
 | 
				
			||||||
    core.name   = pipewire-0        # core name and socket name
 | 
					    core.name   = pipewire-0        # core name and socket name
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -395,9 +395,14 @@ struct pw_context *pw_context_new(struct pw_loop *main_loop,
 | 
				
			||||||
			pw_properties_update_string(properties, str, strlen(str));
 | 
								pw_properties_update_string(properties, str, strlen(str));
 | 
				
			||||||
		pw_properties_set(properties, "vm.overrides", NULL);
 | 
							pw_properties_set(properties, "vm.overrides", NULL);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (pw_properties_get(properties, PW_KEY_CPU_MAX_ALIGN) == NULL && cpu != NULL)
 | 
						if (cpu != NULL) {
 | 
				
			||||||
 | 
							if (pw_properties_get(properties, PW_KEY_CPU_MAX_ALIGN) == NULL)
 | 
				
			||||||
			pw_properties_setf(properties, PW_KEY_CPU_MAX_ALIGN,
 | 
								pw_properties_setf(properties, PW_KEY_CPU_MAX_ALIGN,
 | 
				
			||||||
				"%u", spa_cpu_get_max_align(cpu));
 | 
									"%u", spa_cpu_get_max_align(cpu));
 | 
				
			||||||
 | 
							if ((str = pw_properties_get(properties, SPA_KEY_CPU_ZERO_DENORMALS)) == NULL)
 | 
				
			||||||
 | 
								str = "true";
 | 
				
			||||||
 | 
							spa_cpu_zero_denormals(cpu, spa_atob(str));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (getenv("PIPEWIRE_DEBUG") == NULL &&
 | 
						if (getenv("PIPEWIRE_DEBUG") == NULL &&
 | 
				
			||||||
	    (str = pw_properties_get(properties, "log.level")) != NULL)
 | 
						    (str = pw_properties_get(properties, "log.level")) != NULL)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue