cpu: add zero_denormals method

Add a method to enable/disable the denormals flush-to-zero and
denormals-as-zero CPU options.

Add a config option to make it possible to disable this again.

Fixes high CPU usage when dealing with denormals, which can happen
in many DSP functions.

Fixes #1681
This commit is contained in:
Wim Taymans 2021-10-11 14:54:48 +02:00
parent 036371c48f
commit 85d5c8cd6c
6 changed files with 70 additions and 3 deletions

View file

@ -113,7 +113,7 @@ struct spa_cpu { struct spa_interface iface; };
struct spa_cpu_methods {
/** the version of the methods. This can be used to expand this
structure in the future */
#define SPA_VERSION_CPU_METHODS 1
#define SPA_VERSION_CPU_METHODS 2
uint32_t version;
/** get CPU flags */
@ -130,6 +130,10 @@ struct spa_cpu_methods {
/* check if running in a VM. Since:1 */
uint32_t (*get_vm_type) (void *object);
/* denormals will be handled as zero, either with FTZ or DAZ.
* Since:2 */
int (*zero_denormals) (void *object, bool enable);
};
#define spa_cpu_method(o,method,version,...) \
@ -146,10 +150,12 @@ struct spa_cpu_methods {
#define spa_cpu_get_count(c) spa_cpu_method(c, get_count, 0)
#define spa_cpu_get_max_align(c) spa_cpu_method(c, get_max_align, 0)
#define spa_cpu_get_vm_type(c) spa_cpu_method(c, get_vm_type, 1)
#define spa_cpu_zero_denormals(c,e) spa_cpu_method(c, zero_denormals, 2, e)
/** keys can be given when initializing the cpu handle */
#define SPA_KEY_CPU_FORCE "cpu.force" /**< force cpu flags */
#define SPA_KEY_CPU_VM_TYPE "cpu.vm.type" /**< force a VM type */
#define SPA_KEY_CPU_ZERO_DENORMALS "cpu.zero.denormals" /**< zero denormals */
/**
* \}

View file

@ -123,3 +123,40 @@ arm_init(struct impl *impl)
return 0;
}
static int arm_zero_denormals(void *object, bool enable)
{
#if defined(__aarch64__)
uint64_t cw;
if (enable)
__asm__ __volatile__(
"mrs %0, fpcr \n"
"orr %0, %0, #0x1000000 \n"
"msr fpcr, %0 \n"
"isb \n"
: "=r"(cw)::"memory");
else
__asm__ __volatile__(
"mrs %0, fpcr \n"
"and %0, %0, #~0x1000000 \n"
"msr fpcr, %0 \n"
"isb \n"
: "=r"(cw)::"memory");
#else
uint32_t cw;
if (enable)
__asm__ __volatile__(
"vmrs %0, fpscr \n"
"orr %0, %0, #0x1000000 \n"
"vmsr fpscr, %0 \n"
: "=r"(cw)::"memory");
else
__asm__ __volatile__(
"vmrs %0, fpscr \n"
"and %0, %0, #~0x1000000 \n"
"vmsr fpscr, %0 \n"
: "=r"(cw)::"memory");
#endif
return 0;
}

View file

@ -176,3 +176,15 @@ x86_init(struct impl *impl)
return 0;
}
static int x86_zero_denormals(void *object, bool enable)
{
unsigned int mxcsr;
mxcsr = __builtin_ia32_stmxcsr();
if (enable)
mxcsr |= 0x8040;
else
mxcsr &= ~0x8040;
__builtin_ia32_ldmxcsr(mxcsr);
return 0;
}

View file

@ -78,11 +78,14 @@ static char *read_file(const char *name, char *buffer, size_t len)
# if defined (__i386__) || defined (__x86_64__)
#include "cpu-x86.c"
#define init(t) x86_init(t)
#define impl_cpu_zero_denormals x86_zero_denormals
# elif defined (__arm__) || defined (__aarch64__)
#include "cpu-arm.c"
#define init(t) arm_init(t)
#define impl_cpu_zero_denormals arm_zero_denormals
# else
#define init(t)
#define impl_cpu_zero_denormals NULL
#endif
static uint32_t
@ -197,6 +200,7 @@ static const struct spa_cpu_methods impl_cpu = {
.get_count = impl_cpu_get_count,
.get_max_align = impl_cpu_get_max_align,
.get_vm_type = impl_cpu_get_vm_type,
.zero_denormals = impl_cpu_zero_denormals,
};
static int impl_get_interface(struct spa_handle *handle, const char *type, void **interface)
@ -265,6 +269,8 @@ impl_init(const struct spa_handle_factory *factory,
this->flags = atoi(str);
if ((str = spa_dict_lookup(info, SPA_KEY_CPU_VM_TYPE)) != NULL)
this->vm_type = atoi(str);
if ((str = spa_dict_lookup(info, SPA_KEY_CPU_ZERO_DENORMALS)) != NULL)
impl_cpu_zero_denormals(this, spa_atob(str));
}
spa_log_debug(this->log, "%p: count:%d align:%d flags:%08x",