cpu: add zero_denormals method

Add a method to enable/disable the denormals flush-to-zero and
denormals-as-zero CPU options.

Add a config option to make it possible to disable this again.

Fixes high CPU usage when dealing with denormals, which can happen
in many DSP functions.

Fixes #1681
This commit is contained in:
Wim Taymans 2021-10-11 14:54:48 +02:00
parent 036371c48f
commit 85d5c8cd6c
6 changed files with 70 additions and 3 deletions

View file

@ -113,7 +113,7 @@ struct spa_cpu { struct spa_interface iface; };
struct spa_cpu_methods { struct spa_cpu_methods {
/** the version of the methods. This can be used to expand this /** the version of the methods. This can be used to expand this
structure in the future */ structure in the future */
#define SPA_VERSION_CPU_METHODS 1 #define SPA_VERSION_CPU_METHODS 2
uint32_t version; uint32_t version;
/** get CPU flags */ /** get CPU flags */
@ -130,6 +130,10 @@ struct spa_cpu_methods {
/* check if running in a VM. Since:1 */ /* check if running in a VM. Since:1 */
uint32_t (*get_vm_type) (void *object); uint32_t (*get_vm_type) (void *object);
/* denormals will be handled as zero, either with FTZ or DAZ.
* Since:2 */
int (*zero_denormals) (void *object, bool enable);
}; };
#define spa_cpu_method(o,method,version,...) \ #define spa_cpu_method(o,method,version,...) \
@ -146,10 +150,12 @@ struct spa_cpu_methods {
#define spa_cpu_get_count(c) spa_cpu_method(c, get_count, 0) #define spa_cpu_get_count(c) spa_cpu_method(c, get_count, 0)
#define spa_cpu_get_max_align(c) spa_cpu_method(c, get_max_align, 0) #define spa_cpu_get_max_align(c) spa_cpu_method(c, get_max_align, 0)
#define spa_cpu_get_vm_type(c) spa_cpu_method(c, get_vm_type, 1) #define spa_cpu_get_vm_type(c) spa_cpu_method(c, get_vm_type, 1)
#define spa_cpu_zero_denormals(c,e) spa_cpu_method(c, zero_denormals, 2, e)
/** keys can be given when initializing the cpu handle */ /** keys can be given when initializing the cpu handle */
#define SPA_KEY_CPU_FORCE "cpu.force" /**< force cpu flags */ #define SPA_KEY_CPU_FORCE "cpu.force" /**< force cpu flags */
#define SPA_KEY_CPU_VM_TYPE "cpu.vm.type" /**< force a VM type */ #define SPA_KEY_CPU_VM_TYPE "cpu.vm.type" /**< force a VM type */
#define SPA_KEY_CPU_ZERO_DENORMALS "cpu.zero.denormals" /**< zero denormals */
/** /**
* \} * \}

View file

@ -123,3 +123,40 @@ arm_init(struct impl *impl)
return 0; return 0;
} }
static int arm_zero_denormals(void *object, bool enable)
{
#if defined(__aarch64__)
uint64_t cw;
if (enable)
__asm__ __volatile__(
"mrs %0, fpcr \n"
"orr %0, %0, #0x1000000 \n"
"msr fpcr, %0 \n"
"isb \n"
: "=r"(cw)::"memory");
else
__asm__ __volatile__(
"mrs %0, fpcr \n"
"and %0, %0, #~0x1000000 \n"
"msr fpcr, %0 \n"
"isb \n"
: "=r"(cw)::"memory");
#else
uint32_t cw;
if (enable)
__asm__ __volatile__(
"vmrs %0, fpscr \n"
"orr %0, %0, #0x1000000 \n"
"vmsr fpscr, %0 \n"
: "=r"(cw)::"memory");
else
__asm__ __volatile__(
"vmrs %0, fpscr \n"
"and %0, %0, #~0x1000000 \n"
"vmsr fpscr, %0 \n"
: "=r"(cw)::"memory");
#endif
return 0;
}

View file

@ -176,3 +176,15 @@ x86_init(struct impl *impl)
return 0; return 0;
} }
static int x86_zero_denormals(void *object, bool enable)
{
unsigned int mxcsr;
mxcsr = __builtin_ia32_stmxcsr();
if (enable)
mxcsr |= 0x8040;
else
mxcsr &= ~0x8040;
__builtin_ia32_ldmxcsr(mxcsr);
return 0;
}

View file

@ -78,11 +78,14 @@ static char *read_file(const char *name, char *buffer, size_t len)
# if defined (__i386__) || defined (__x86_64__) # if defined (__i386__) || defined (__x86_64__)
#include "cpu-x86.c" #include "cpu-x86.c"
#define init(t) x86_init(t) #define init(t) x86_init(t)
#define impl_cpu_zero_denormals x86_zero_denormals
# elif defined (__arm__) || defined (__aarch64__) # elif defined (__arm__) || defined (__aarch64__)
#include "cpu-arm.c" #include "cpu-arm.c"
#define init(t) arm_init(t) #define init(t) arm_init(t)
#define impl_cpu_zero_denormals arm_zero_denormals
# else # else
#define init(t) #define init(t)
#define impl_cpu_zero_denormals NULL
#endif #endif
static uint32_t static uint32_t
@ -197,6 +200,7 @@ static const struct spa_cpu_methods impl_cpu = {
.get_count = impl_cpu_get_count, .get_count = impl_cpu_get_count,
.get_max_align = impl_cpu_get_max_align, .get_max_align = impl_cpu_get_max_align,
.get_vm_type = impl_cpu_get_vm_type, .get_vm_type = impl_cpu_get_vm_type,
.zero_denormals = impl_cpu_zero_denormals,
}; };
static int impl_get_interface(struct spa_handle *handle, const char *type, void **interface) static int impl_get_interface(struct spa_handle *handle, const char *type, void **interface)
@ -265,6 +269,8 @@ impl_init(const struct spa_handle_factory *factory,
this->flags = atoi(str); this->flags = atoi(str);
if ((str = spa_dict_lookup(info, SPA_KEY_CPU_VM_TYPE)) != NULL) if ((str = spa_dict_lookup(info, SPA_KEY_CPU_VM_TYPE)) != NULL)
this->vm_type = atoi(str); this->vm_type = atoi(str);
if ((str = spa_dict_lookup(info, SPA_KEY_CPU_ZERO_DENORMALS)) != NULL)
impl_cpu_zero_denormals(this, spa_atob(str));
} }
spa_log_debug(this->log, "%p: count:%d align:%d flags:%08x", spa_log_debug(this->log, "%p: count:%d align:%d flags:%08x",

View file

@ -15,6 +15,7 @@ context.properties = {
#mem.mlock-all = false #mem.mlock-all = false
#clock.power-of-two-quantum = true #clock.power-of-two-quantum = true
#log.level = 2 #log.level = 2
#cpu.zero.denormals = true
core.daemon = true # listening for socket connections core.daemon = true # listening for socket connections
core.name = pipewire-0 # core name and socket name core.name = pipewire-0 # core name and socket name

View file

@ -395,9 +395,14 @@ struct pw_context *pw_context_new(struct pw_loop *main_loop,
pw_properties_update_string(properties, str, strlen(str)); pw_properties_update_string(properties, str, strlen(str));
pw_properties_set(properties, "vm.overrides", NULL); pw_properties_set(properties, "vm.overrides", NULL);
} }
if (pw_properties_get(properties, PW_KEY_CPU_MAX_ALIGN) == NULL && cpu != NULL) if (cpu != NULL) {
pw_properties_setf(properties, PW_KEY_CPU_MAX_ALIGN, if (pw_properties_get(properties, PW_KEY_CPU_MAX_ALIGN) == NULL)
pw_properties_setf(properties, PW_KEY_CPU_MAX_ALIGN,
"%u", spa_cpu_get_max_align(cpu)); "%u", spa_cpu_get_max_align(cpu));
if ((str = pw_properties_get(properties, SPA_KEY_CPU_ZERO_DENORMALS)) == NULL)
str = "true";
spa_cpu_zero_denormals(cpu, spa_atob(str));
}
if (getenv("PIPEWIRE_DEBUG") == NULL && if (getenv("PIPEWIRE_DEBUG") == NULL &&
(str = pw_properties_get(properties, "log.level")) != NULL) (str = pw_properties_get(properties, "log.level")) != NULL)