audioconvert: make separate noise functions

So that we can reuse optimized versions in unoptimized noise
functions.
Do allocation a little different so that we can align everything
from the start.
This commit is contained in:
Wim Taymans 2022-09-29 20:34:15 +02:00
parent 14028c67cd
commit a145c42ec4
4 changed files with 165 additions and 98 deletions

View file

@ -230,38 +230,57 @@ lcnoise(uint32_t *state)
return (int32_t)(*state);
}
static inline void update_noise_c(struct convert *conv, uint32_t n_samples)
void conv_noise_none_c(struct convert *conv, float *noise, uint32_t n_samples)
{
memset(noise, 0, n_samples * sizeof(float));
}
void conv_noise_rect_c(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
float *noise = conv->noise, scale = conv->scale;
uint32_t *state = &conv->random[0];
const float scale = conv->scale;
for (n = 0; n < n_samples; n++)
noise[n] = lcnoise(state) * scale;
}
void conv_noise_tri_c(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const float scale = conv->scale;
uint32_t *state = &conv->random[0];
for (n = 0; n < n_samples; n++)
noise[n] = (lcnoise(state) - lcnoise(state)) * scale;
}
void conv_noise_tri_hf_c(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const float scale = conv->scale;
uint32_t *state = &conv->random[0];
int32_t *prev = &conv->prev[0], old, new;
switch (conv->noise_method) {
case NOISE_METHOD_RECTANGULAR:
for (n = 0; n < n_samples; n++)
noise[n] = lcnoise(state) * scale;
break;
case NOISE_METHOD_TRIANGULAR:
for (n = 0; n < n_samples; n++)
noise[n] = (lcnoise(state) - lcnoise(state)) * scale;
break;
case NOISE_METHOD_TRIANGULAR_HF:
old = *prev;
for (n = 0; n < n_samples; n++) {
new = lcnoise(state);
noise[n] = (new - old) * scale;
old = new;
}
*prev = old;
break;
case NOISE_METHOD_PATTERN:
old = *prev;
for (n = 0; n < n_samples; n++)
noise[n] = conv->scale * (1-((old++>>10)&1));
*prev = old;
break;
old = *prev;
for (n = 0; n < n_samples; n++) {
new = lcnoise(state);
noise[n] = (new - old) * scale;
old = new;
}
*prev = old;
}
void conv_noise_pattern_c(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const float scale = conv->scale;
int32_t *prev = &conv->prev[0], old;
old = *prev;
for (n = 0; n < n_samples; n++)
noise[n] = scale * (1-((old++>>10)&1));
*prev = old;
}
#define MAKE_D_noise(dname,dtype,func) \
@ -271,7 +290,7 @@ void conv_f32d_to_ ##dname## d_noise_c(struct convert *conv, \
{ \
uint32_t i, j, k, chunk, n_channels = conv->n_channels, noise_size = conv->noise_size; \
float *noise = conv->noise; \
update_noise_c(conv, SPA_MIN(n_samples, noise_size)); \
convert_update_noise(conv, noise, SPA_MIN(n_samples, noise_size)); \
for (i = 0; i < n_channels; i++) { \
const float *s = src[i]; \
dtype *d = dst[i]; \
@ -292,7 +311,7 @@ void conv_f32d_to_ ##dname## _noise_c(struct convert *conv, \
dtype *d = dst[0]; \
uint32_t i, j, k, chunk, n_channels = conv->n_channels, noise_size = conv->noise_size; \
float *noise = conv->noise; \
update_noise_c(conv, SPA_MIN(n_samples, noise_size)); \
convert_update_noise(conv, noise, SPA_MIN(n_samples, noise_size)); \
for (j = 0; j < n_samples;) { \
chunk = SPA_MIN(n_samples - j, noise_size); \
for (k = 0; k < chunk; k++, j++) { \
@ -342,9 +361,10 @@ void conv_f32d_to_ ##dname## d_shaped_c(struct convert *conv, \
uint32_t n_samples) \
{ \
uint32_t i, j, k, chunk, n_channels = conv->n_channels, noise_size = conv->noise_size; \
const float *noise = conv->noise, *ns = conv->ns; \
float *noise = conv->noise; \
const float *ns = conv->ns; \
uint32_t n, n_ns = conv->n_ns; \
update_noise_c(conv, SPA_MIN(n_samples, noise_size)); \
convert_update_noise(conv, noise, SPA_MIN(n_samples, noise_size)); \
for (i = 0; i < n_channels; i++) { \
const float *s = src[i]; \
dtype *d = dst[i]; \
@ -366,9 +386,10 @@ void conv_f32d_to_ ##dname## _shaped_c(struct convert *conv, \
{ \
dtype *d0 = dst[0]; \
uint32_t i, j, k, chunk, n_channels = conv->n_channels, noise_size = conv->noise_size; \
const float *noise = conv->noise, *ns = conv->ns; \
float *noise = conv->noise; \
const float *ns = conv->ns; \
uint32_t n, n_ns = conv->n_ns; \
update_noise_c(conv, SPA_MIN(n_samples, noise_size)); \
convert_update_noise(conv, noise, SPA_MIN(n_samples, noise_size)); \
for (i = 0; i < n_channels; i++) { \
const float *s = src[i]; \
dtype *d = &d0[i]; \

View file

@ -576,11 +576,10 @@ conv_f32d_to_s32_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi
i; \
})
static inline void update_noise_rect_sse2(struct convert *conv, uint32_t n_samples)
void conv_noise_rect_sse2(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
const uint32_t *r = conv->random;
__m128 scale = _mm_set1_ps(conv->scale);
__m128i in[1];
__m128 out[1];
@ -593,11 +592,10 @@ static inline void update_noise_rect_sse2(struct convert *conv, uint32_t n_sampl
}
}
static inline void update_noise_tri_sse2(struct convert *conv, uint32_t n_samples)
void conv_noise_tri_sse2(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
const uint32_t *r = conv->random;
__m128 scale = _mm_set1_ps(conv->scale);
__m128i in[1];
__m128 out[1];
@ -610,12 +608,11 @@ static inline void update_noise_tri_sse2(struct convert *conv, uint32_t n_sample
}
}
static inline void update_noise_tri_hf_sse2(struct convert *conv, uint32_t n_samples)
void conv_noise_tri_hf_sse2(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t);
const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
int32_t *p = conv->prev;
const uint32_t *r = conv->random;
__m128 scale = _mm_set1_ps(conv->scale);
__m128i in[1], old[1], new[1];
__m128 out[1];
@ -632,42 +629,11 @@ static inline void update_noise_tri_hf_sse2(struct convert *conv, uint32_t n_sam
_mm_store_si128((__m128i*)p, old[0]);
}
static inline void update_noise_pattern_sse2(struct convert *conv, uint32_t n_samples)
{
uint32_t n;
int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t), op;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
op = *p;
for (n = 0; n < n_samples; n++)
noise[n] = conv->scale * (1-((op++>>10)&1));
*p = op;
}
static inline void update_noise_sse2(struct convert *conv, uint32_t n_samples)
{
switch (conv->noise_method) {
case DITHER_METHOD_RECTANGULAR:
update_noise_rect_sse2(conv, n_samples);
break;
case DITHER_METHOD_TRIANGULAR:
update_noise_tri_sse2(conv, n_samples);
break;
case DITHER_METHOD_TRIANGULAR_HF:
update_noise_tri_hf_sse2(conv, n_samples);
break;
case NOISE_METHOD_PATTERN:
update_noise_pattern_sse2(conv, n_samples);
break;
}
}
static void
conv_f32d_to_s32_1s_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
uint32_t n_channels, uint32_t n_samples)
float *noise, uint32_t n_channels, uint32_t n_samples)
{
const float *s = src;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
int32_t *d = dst;
uint32_t n, unrolled;
__m128 in[1];
@ -713,14 +679,16 @@ conv_f32d_to_s32_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst[], con
{
int32_t *d = dst[0];
uint32_t i, k, chunk, n_channels = conv->n_channels;
float *noise = conv->noise;
update_noise_sse2(conv, SPA_MIN(n_samples, conv->noise_size));
convert_update_noise(conv, noise, SPA_MIN(n_samples, conv->noise_size));
for(i = 0; i < n_channels; i++) {
const float *s = src[i];
for(k = 0; k < n_samples; k += chunk) {
chunk = SPA_MIN(n_samples - k, conv->noise_size);
conv_f32d_to_s32_1s_noise_sse2(conv, &d[i + k*n_channels], &s[k], n_channels, chunk);
conv_f32d_to_s32_1s_noise_sse2(conv, &d[i + k*n_channels],
&s[k], noise, n_channels, chunk);
}
}
}
@ -1298,11 +1266,10 @@ conv_f32d_to_s16_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi
static void
conv_f32d_to_s16_1s_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
uint32_t n_channels, uint32_t n_samples)
const float *noise, uint32_t n_channels, uint32_t n_samples)
{
const float *s0 = src;
int16_t *d = dst;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
uint32_t n, unrolled;
__m128 in[2];
__m128i out[2];
@ -1349,25 +1316,26 @@ conv_f32d_to_s16_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst[], con
{
int16_t *d = dst[0];
uint32_t i, k, chunk, n_channels = conv->n_channels;
float *noise = conv->noise;
update_noise_sse2(conv, SPA_MIN(n_samples, conv->noise_size));
convert_update_noise(conv, noise, SPA_MIN(n_samples, conv->noise_size));
for(i = 0; i < n_channels; i++) {
const float *s = src[i];
for(k = 0; k < n_samples; k += chunk) {
chunk = SPA_MIN(n_samples - k, conv->noise_size);
conv_f32d_to_s16_1s_noise_sse2(conv, &d[i + k*n_channels], &s[k], n_channels, chunk);
conv_f32d_to_s16_1s_noise_sse2(conv, &d[i + k*n_channels],
&s[k], noise, n_channels, chunk);
}
}
}
static void
conv_f32_to_s16_1_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
uint32_t n_samples)
const float *noise, uint32_t n_samples)
{
const float *s = src;
int16_t *d = dst;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
uint32_t n, unrolled;
__m128 in[2];
__m128i out[2];
@ -1403,15 +1371,16 @@ conv_f32d_to_s16d_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst[], co
uint32_t n_samples)
{
uint32_t i, k, chunk, n_channels = conv->n_channels;
float *noise = conv->noise;
update_noise_sse2(conv, SPA_MIN(n_samples, conv->noise_size));
convert_update_noise(conv, noise, SPA_MIN(n_samples, conv->noise_size));
for(i = 0; i < n_channels; i++) {
const float *s = src[i];
int16_t *d = dst[i];
for(k = 0; k < n_samples; k += chunk) {
chunk = SPA_MIN(n_samples - k, conv->noise_size);
conv_f32_to_s16_1_noise_sse2(conv, &d[k], &s[k], chunk);
conv_f32_to_s16_1_noise_sse2(conv, &d[k], &s[k], noise, chunk);
}
}
}

View file

@ -32,7 +32,8 @@
#include "fmt-ops.h"
#define DITHER_SIZE (1<<10)
#define NOISE_SIZE (1<<10)
#define RANDOM_SIZE (16)
typedef void (*convert_func_t) (struct convert *conv, void * SPA_RESTRICT dst[],
const void * SPA_RESTRICT src[], uint32_t n_samples);
@ -359,7 +360,6 @@ static const struct conv_info *find_conv_info(uint32_t src_fmt, uint32_t dst_fmt
uint32_t n_channels, uint32_t cpu_flags, uint32_t conv_flags)
{
size_t i;
for (i = 0; i < SPA_N_ELEMENTS(conv_table); i++) {
if (conv_table[i].src_fmt == src_fmt &&
conv_table[i].dst_fmt == dst_fmt &&
@ -371,11 +371,52 @@ static const struct conv_info *find_conv_info(uint32_t src_fmt, uint32_t dst_fmt
return NULL;
}
typedef void (*noise_func_t) (struct convert *conv, float * noise, uint32_t n_samples);
struct noise_info {
uint32_t method;
noise_func_t noise;
const char *name;
uint32_t cpu_flags;
};
#define MAKE(method,func,...) \
{ NOISE_METHOD_ ##method, func, #func , __VA_ARGS__ }
static struct noise_info noise_table[] =
{
#if defined (HAVE_SSE2)
MAKE(RECTANGULAR, conv_noise_rect_sse2, SPA_CPU_FLAG_SSE2),
MAKE(TRIANGULAR, conv_noise_tri_sse2, SPA_CPU_FLAG_SSE2),
MAKE(TRIANGULAR_HF, conv_noise_tri_hf_sse2, SPA_CPU_FLAG_SSE2),
#endif
MAKE(NONE, conv_noise_none_c),
MAKE(RECTANGULAR, conv_noise_rect_c),
MAKE(TRIANGULAR, conv_noise_tri_c),
MAKE(TRIANGULAR_HF, conv_noise_tri_hf_c),
MAKE(PATTERN, conv_noise_pattern_c),
};
#undef MAKE
static const struct noise_info *find_noise_info(uint32_t method,
uint32_t cpu_flags)
{
size_t i;
for (i = 0; i < SPA_N_ELEMENTS(noise_table); i++) {
if (noise_table[i].method == method &&
MATCH_CPU_FLAGS(noise_table[i].cpu_flags, cpu_flags))
return &noise_table[i];
}
return NULL;
}
static void impl_convert_free(struct convert *conv)
{
conv->process = NULL;
free(conv->noise);
conv->noise = NULL;
free(conv->data);
conv->data = NULL;
}
static bool need_dither(uint32_t format)
@ -449,7 +490,8 @@ int convert_init(struct convert *conv)
{
const struct conv_info *info;
const struct dither_info *dinfo;
uint32_t i, conv_flags;
const struct noise_info *ninfo;
uint32_t i, conv_flags, data_size[3];
conv->scale = 1.0f / (float)(INT32_MAX);
@ -494,17 +536,31 @@ int convert_init(struct convert *conv)
if (info == NULL)
return -ENOTSUP;
conv->noise_size = DITHER_SIZE;
conv->noise = calloc(conv->noise_size + 16 +
FMT_OPS_MAX_ALIGN / sizeof(float), sizeof(float));
if (conv->noise == NULL)
ninfo = find_noise_info(conv->noise_method, conv->cpu_flags);
if (ninfo == NULL)
return -ENOTSUP;
conv->noise_size = NOISE_SIZE;
data_size[0] = SPA_ROUND_UP(conv->noise_size * sizeof(float), FMT_OPS_MAX_ALIGN);
data_size[1] = SPA_ROUND_UP(RANDOM_SIZE * sizeof(uint32_t), FMT_OPS_MAX_ALIGN);
data_size[2] = SPA_ROUND_UP(RANDOM_SIZE * sizeof(int32_t), FMT_OPS_MAX_ALIGN);
conv->data = calloc(FMT_OPS_MAX_ALIGN +
data_size[0] + data_size[1] + data_size[2], 1);
if (conv->data == NULL)
return -errno;
for (i = 0; i < SPA_N_ELEMENTS(conv->random); i++)
conv->noise = SPA_PTR_ALIGN(conv->data, FMT_OPS_MAX_ALIGN, float);
conv->random = SPA_PTROFF(conv->noise, data_size[0], uint32_t);
conv->prev = SPA_PTROFF(conv->random, data_size[1], int32_t);
for (i = 0; i < RANDOM_SIZE; i++)
conv->random[i] = random();
conv->is_passthrough = conv->src_fmt == conv->dst_fmt;
conv->cpu_flags = info->cpu_flags;
conv->update_noise = ninfo->noise;
conv->process = info->process;
conv->free = impl_convert_free;
conv->func_name = info->name;

View file

@ -226,8 +226,8 @@ struct convert {
unsigned int is_passthrough:1;
float scale;
uint32_t random[16 + FMT_OPS_MAX_ALIGN/4];
int32_t prev[16 + FMT_OPS_MAX_ALIGN/4];
uint32_t *random;
int32_t *prev;
#define NOISE_METHOD_NONE 0
#define NOISE_METHOD_RECTANGULAR 1
#define NOISE_METHOD_TRIANGULAR 2
@ -240,9 +240,12 @@ struct convert {
uint32_t n_ns;
struct shaper shaper[64];
void (*update_noise) (struct convert *conv, float *noise, uint32_t n_samples);
void (*process) (struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_samples);
void (*free) (struct convert *conv);
void *data;
};
int convert_init(struct convert *conv);
@ -276,12 +279,30 @@ static inline uint32_t dither_method_from_label(const char *label)
return DITHER_METHOD_NONE;
}
#define convert_update_noise(conv,...) (conv)->update_noise(conv, __VA_ARGS__)
#define convert_process(conv,...) (conv)->process(conv, __VA_ARGS__)
#define convert_free(conv) (conv)->free(conv)
#define DEFINE_FUNCTION(name,arch) \
#define DEFINE_NOISE_FUNCTION(name,arch) \
void conv_noise_##name##_##arch(struct convert *conv, float *noise, \
uint32_t n_samples)
DEFINE_NOISE_FUNCTION(none, c);
DEFINE_NOISE_FUNCTION(rect, c);
DEFINE_NOISE_FUNCTION(tri, c);
DEFINE_NOISE_FUNCTION(tri_hf, c);
DEFINE_NOISE_FUNCTION(pattern, c);
#if defined(HAVE_SSE2)
DEFINE_NOISE_FUNCTION(rect, sse2);
DEFINE_NOISE_FUNCTION(tri, sse2);
DEFINE_NOISE_FUNCTION(tri_hf, sse2);
#endif
#undef DEFINE_NOISE_FUNCTION
#define DEFINE_FUNCTION(name,arch) \
void conv_##name##_##arch(struct convert *conv, void * SPA_RESTRICT dst[], \
const void * SPA_RESTRICT src[], uint32_t n_samples) \
const void * SPA_RESTRICT src[], uint32_t n_samples)
DEFINE_FUNCTION(copy8d, c);
DEFINE_FUNCTION(copy8, c);