remap: support S32NE work format

So far PulseAudio only supported two different work formats: S16NE if
it's sufficient to represent the input and output formats without loss
of precision and FLOAT32NE in all other cases. For systems that use
S32NE exclusively, this results in unnecessary conversions from S32NE to
FLOAT32NE and back again.

Add S32NE remap operations and make use of them (for the COPY and
TRIVIAL resamplers) if both input and output format are S32NE. This
avoids the back and forth conversions between S32NE and FLOAT32NE,
significantly improving performance for those cases.
This commit is contained in:
Sascha Silbe 2019-03-26 10:35:55 +01:00 committed by Arun Raghavan
parent 1e4fb61436
commit 034b77823a
7 changed files with 327 additions and 14 deletions

View file

@ -51,6 +51,24 @@ static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
}
}
static void remap_mono_to_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = src[0];
dst[2] = dst[3] = src[1];
dst[4] = dst[5] = src[2];
dst[6] = dst[7] = src[3];
src += 4;
dst += 8;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = src[0];
src++;
dst += 2;
}
}
static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@ -87,6 +105,28 @@ static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
}
}
static void remap_stereo_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 1 to the ideal result. */
dst[0] = (src[0]/2 + src[1]/2);
dst[1] = (src[2]/2 + src[3]/2);
dst[2] = (src[4]/2 + src[5]/2);
dst[3] = (src[6]/2 + src[7]/2);
src += 8;
dst += 4;
}
for (i = n & 3; i; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 1 to the ideal result. */
dst[0] = (src[0]/2 + src[1]/2);
src += 2;
dst += 1;
}
}
static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@ -123,6 +163,24 @@ static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
}
}
static void remap_mono_to_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
dst[4] = dst[5] = dst[6] = dst[7] = src[1];
dst[8] = dst[9] = dst[10] = dst[11] = src[2];
dst[12] = dst[13] = dst[14] = dst[15] = src[3];
src += 4;
dst += 16;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
src++;
dst += 4;
}
}
static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@ -159,6 +217,28 @@ static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
}
}
static void remap_ch4_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 3 to the ideal result. */
dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
dst[1] = (src[4]/4 + src[5]/4 + src[6]/4 + src[7]/4);
dst[2] = (src[8]/4 + src[9]/4 + src[10]/4 + src[11]/4);
dst[3] = (src[12]/4 + src[13]/4 + src[14]/4 + src[15]/4);
src += 16;
dst += 4;
}
for (i = n & 3; i; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 3 to the ideal result. */
dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
src += 4;
dst += 1;
}
}
static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@ -208,6 +288,36 @@ static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int
}
}
static void remap_channels_matrix_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned oc, ic, i;
unsigned n_ic, n_oc;
n_ic = m->i_ss.channels;
n_oc = m->o_ss.channels;
memset(dst, 0, n * sizeof(int32_t) * n_oc);
for (oc = 0; oc < n_oc; oc++) {
for (ic = 0; ic < n_ic; ic++) {
int32_t *d = dst + oc;
const int32_t *s = src + ic;
int32_t vol = m->map_table_i[oc][ic];
if (vol <= 0)
continue;
if (vol >= 0x10000) {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s;
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += (int32_t) (((int64_t)*s * vol) >> 16);
}
}
}
}
static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned oc, ic, i;
unsigned n_ic, n_oc;
@ -309,6 +419,44 @@ static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
}
}
static void remap_arrange_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
src += arrange[0];
for (; n > 0; n--) {
*dst++ = *src;
src += n_ic;
}
}
static void remap_arrange_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int ic0 = arrange[0], ic1 = arrange[1];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
src += n_ic;
}
}
static void remap_arrange_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int ic0 = arrange[0], ic1 = arrange[1],
ic2 = arrange[2], ic3 = arrange[3];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
*dst++ = (ic2 >= 0) ? *(src + ic2) : 0;
*dst++ = (ic3 >= 0) ? *(src + ic3) : 0;
src += n_ic;
}
}
static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
@ -348,16 +496,19 @@ static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float
}
void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
pa_do_remap_func_t func_float) {
pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float) {
pa_assert(m);
if (m->format == PA_SAMPLE_S16NE)
m->do_remap = func_s16;
else if (m->format == PA_SAMPLE_S32NE)
m->do_remap = func_s32;
else if (m->format == PA_SAMPLE_FLOAT32NE)
m->do_remap = func_float;
else
pa_assert_not_reached();
pa_assert(m->do_remap);
}
static bool force_generic_code = false;
@ -374,6 +525,7 @@ static void init_remap_c(pa_remap_t *m) {
if (force_generic_code) {
pa_log_info("Forced to use generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
(pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
return;
}
@ -383,12 +535,14 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c,
(pa_do_remap_func_t) remap_mono_to_stereo_s32ne_c,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c);
} else if (n_ic == 2 && n_oc == 1 &&
m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
pa_log_info("Using stereo to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c,
(pa_do_remap_func_t) remap_stereo_to_mono_s32ne_c,
(pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c);
} else if (n_ic == 1 && n_oc == 4 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
@ -396,6 +550,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using mono to 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c,
(pa_do_remap_func_t) remap_mono_to_ch4_s32ne_c,
(pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c);
} else if (n_ic == 4 && n_oc == 1 &&
m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
@ -403,11 +558,13 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using 4-channel to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c,
(pa_do_remap_func_t) remap_ch4_to_mono_s32ne_c,
(pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c);
} else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) {
pa_log_info("Using mono arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c,
(pa_do_remap_func_t) remap_arrange_mono_s32ne_c,
(pa_do_remap_func_t) remap_arrange_mono_float32ne_c);
/* setup state */
@ -416,6 +573,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using stereo arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c,
(pa_do_remap_func_t) remap_arrange_stereo_s32ne_c,
(pa_do_remap_func_t) remap_arrange_stereo_float32ne_c);
/* setup state */
@ -424,6 +582,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c,
(pa_do_remap_func_t) remap_arrange_ch4_s32ne_c,
(pa_do_remap_func_t) remap_arrange_ch4_float32ne_c);
/* setup state */
@ -432,6 +591,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
(pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
}
}