audioconvert: make separate noise functions

So that we can reuse optimized versions in unoptimized noise
functions.
Do allocation a little different so that we can align everything
from the start.
This commit is contained in:
Wim Taymans 2022-09-29 20:34:15 +02:00
parent 14028c67cd
commit a145c42ec4
4 changed files with 165 additions and 98 deletions

View file

@ -576,11 +576,10 @@ conv_f32d_to_s32_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi
i; \
})
static inline void update_noise_rect_sse2(struct convert *conv, uint32_t n_samples)
void conv_noise_rect_sse2(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
const uint32_t *r = conv->random;
__m128 scale = _mm_set1_ps(conv->scale);
__m128i in[1];
__m128 out[1];
@ -593,11 +592,10 @@ static inline void update_noise_rect_sse2(struct convert *conv, uint32_t n_sampl
}
}
static inline void update_noise_tri_sse2(struct convert *conv, uint32_t n_samples)
void conv_noise_tri_sse2(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
const uint32_t *r = conv->random;
__m128 scale = _mm_set1_ps(conv->scale);
__m128i in[1];
__m128 out[1];
@ -610,12 +608,11 @@ static inline void update_noise_tri_sse2(struct convert *conv, uint32_t n_sample
}
}
static inline void update_noise_tri_hf_sse2(struct convert *conv, uint32_t n_samples)
void conv_noise_tri_hf_sse2(struct convert *conv, float *noise, uint32_t n_samples)
{
uint32_t n;
int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t);
const uint32_t *r = SPA_PTR_ALIGN(conv->random, 16, uint32_t);
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
int32_t *p = conv->prev;
const uint32_t *r = conv->random;
__m128 scale = _mm_set1_ps(conv->scale);
__m128i in[1], old[1], new[1];
__m128 out[1];
@ -632,42 +629,11 @@ static inline void update_noise_tri_hf_sse2(struct convert *conv, uint32_t n_sam
_mm_store_si128((__m128i*)p, old[0]);
}
static inline void update_noise_pattern_sse2(struct convert *conv, uint32_t n_samples)
{
uint32_t n;
int32_t *p = SPA_PTR_ALIGN(conv->prev, 16, int32_t), op;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
op = *p;
for (n = 0; n < n_samples; n++)
noise[n] = conv->scale * (1-((op++>>10)&1));
*p = op;
}
static inline void update_noise_sse2(struct convert *conv, uint32_t n_samples)
{
switch (conv->noise_method) {
case DITHER_METHOD_RECTANGULAR:
update_noise_rect_sse2(conv, n_samples);
break;
case DITHER_METHOD_TRIANGULAR:
update_noise_tri_sse2(conv, n_samples);
break;
case DITHER_METHOD_TRIANGULAR_HF:
update_noise_tri_hf_sse2(conv, n_samples);
break;
case NOISE_METHOD_PATTERN:
update_noise_pattern_sse2(conv, n_samples);
break;
}
}
static void
conv_f32d_to_s32_1s_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
uint32_t n_channels, uint32_t n_samples)
float *noise, uint32_t n_channels, uint32_t n_samples)
{
const float *s = src;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
int32_t *d = dst;
uint32_t n, unrolled;
__m128 in[1];
@ -713,14 +679,16 @@ conv_f32d_to_s32_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst[], con
{
int32_t *d = dst[0];
uint32_t i, k, chunk, n_channels = conv->n_channels;
float *noise = conv->noise;
update_noise_sse2(conv, SPA_MIN(n_samples, conv->noise_size));
convert_update_noise(conv, noise, SPA_MIN(n_samples, conv->noise_size));
for(i = 0; i < n_channels; i++) {
const float *s = src[i];
for(k = 0; k < n_samples; k += chunk) {
chunk = SPA_MIN(n_samples - k, conv->noise_size);
conv_f32d_to_s32_1s_noise_sse2(conv, &d[i + k*n_channels], &s[k], n_channels, chunk);
conv_f32d_to_s32_1s_noise_sse2(conv, &d[i + k*n_channels],
&s[k], noise, n_channels, chunk);
}
}
}
@ -1298,11 +1266,10 @@ conv_f32d_to_s16_sse2(struct convert *conv, void * SPA_RESTRICT dst[], const voi
static void
conv_f32d_to_s16_1s_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
uint32_t n_channels, uint32_t n_samples)
const float *noise, uint32_t n_channels, uint32_t n_samples)
{
const float *s0 = src;
int16_t *d = dst;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
uint32_t n, unrolled;
__m128 in[2];
__m128i out[2];
@ -1349,25 +1316,26 @@ conv_f32d_to_s16_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst[], con
{
int16_t *d = dst[0];
uint32_t i, k, chunk, n_channels = conv->n_channels;
float *noise = conv->noise;
update_noise_sse2(conv, SPA_MIN(n_samples, conv->noise_size));
convert_update_noise(conv, noise, SPA_MIN(n_samples, conv->noise_size));
for(i = 0; i < n_channels; i++) {
const float *s = src[i];
for(k = 0; k < n_samples; k += chunk) {
chunk = SPA_MIN(n_samples - k, conv->noise_size);
conv_f32d_to_s16_1s_noise_sse2(conv, &d[i + k*n_channels], &s[k], n_channels, chunk);
conv_f32d_to_s16_1s_noise_sse2(conv, &d[i + k*n_channels],
&s[k], noise, n_channels, chunk);
}
}
}
static void
conv_f32_to_s16_1_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src,
uint32_t n_samples)
const float *noise, uint32_t n_samples)
{
const float *s = src;
int16_t *d = dst;
float *noise = SPA_PTR_ALIGN(conv->noise, 16, float);
uint32_t n, unrolled;
__m128 in[2];
__m128i out[2];
@ -1403,15 +1371,16 @@ conv_f32d_to_s16d_noise_sse2(struct convert *conv, void * SPA_RESTRICT dst[], co
uint32_t n_samples)
{
uint32_t i, k, chunk, n_channels = conv->n_channels;
float *noise = conv->noise;
update_noise_sse2(conv, SPA_MIN(n_samples, conv->noise_size));
convert_update_noise(conv, noise, SPA_MIN(n_samples, conv->noise_size));
for(i = 0; i < n_channels; i++) {
const float *s = src[i];
int16_t *d = dst[i];
for(k = 0; k < n_samples; k += chunk) {
chunk = SPA_MIN(n_samples - k, conv->noise_size);
conv_f32_to_s16_1_noise_sse2(conv, &d[k], &s[k], chunk);
conv_f32_to_s16_1_noise_sse2(conv, &d[k], &s[k], noise, chunk);
}
}
}