remap: support S32NE work format

So far PulseAudio only supported two different work formats: S16NE if
it's sufficient to represent the input and output formats without loss
of precision and FLOAT32NE in all other cases. For systems that use
S32NE exclusively, this results in unnecessary conversions from S32NE to
FLOAT32NE and back again.

Add S32NE remap operations and make use of them (for the COPY and
TRIVIAL resamplers) if both input and output format are S32NE. This
avoids the back and forth conversions between S32NE and FLOAT32NE,
significantly improving performance for those cases.
This commit is contained in:
Sascha Silbe 2019-03-26 10:35:55 +01:00 committed by Arun Raghavan
parent 1e4fb61436
commit 034b77823a
7 changed files with 327 additions and 14 deletions

View file

@ -51,6 +51,24 @@ static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
} }
} }
static void remap_mono_to_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = src[0];
dst[2] = dst[3] = src[1];
dst[4] = dst[5] = src[2];
dst[6] = dst[7] = src[3];
src += 4;
dst += 8;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = src[0];
src++;
dst += 2;
}
}
static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i; unsigned i;
@ -87,6 +105,28 @@ static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
} }
} }
static void remap_stereo_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 1 to the ideal result. */
dst[0] = (src[0]/2 + src[1]/2);
dst[1] = (src[2]/2 + src[3]/2);
dst[2] = (src[4]/2 + src[5]/2);
dst[3] = (src[6]/2 + src[7]/2);
src += 8;
dst += 4;
}
for (i = n & 3; i; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 1 to the ideal result. */
dst[0] = (src[0]/2 + src[1]/2);
src += 2;
dst += 1;
}
}
static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i; unsigned i;
@ -123,6 +163,24 @@ static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
} }
} }
static void remap_mono_to_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
dst[4] = dst[5] = dst[6] = dst[7] = src[1];
dst[8] = dst[9] = dst[10] = dst[11] = src[2];
dst[12] = dst[13] = dst[14] = dst[15] = src[3];
src += 4;
dst += 16;
}
for (i = n & 3; i; i--) {
dst[0] = dst[1] = dst[2] = dst[3] = src[0];
src++;
dst += 4;
}
}
static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i; unsigned i;
@ -159,6 +217,28 @@ static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
} }
} }
static void remap_ch4_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned i;
for (i = n >> 2; i > 0; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 3 to the ideal result. */
dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
dst[1] = (src[4]/4 + src[5]/4 + src[6]/4 + src[7]/4);
dst[2] = (src[8]/4 + src[9]/4 + src[10]/4 + src[11]/4);
dst[3] = (src[12]/4 + src[13]/4 + src[14]/4 + src[15]/4);
src += 16;
dst += 4;
}
for (i = n & 3; i; i--) {
/* Avoid overflow by performing division first. We accept a
* difference of +/- 3 to the ideal result. */
dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
src += 4;
dst += 1;
}
}
static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i; unsigned i;
@ -208,6 +288,36 @@ static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int
} }
} }
static void remap_channels_matrix_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
unsigned oc, ic, i;
unsigned n_ic, n_oc;
n_ic = m->i_ss.channels;
n_oc = m->o_ss.channels;
memset(dst, 0, n * sizeof(int32_t) * n_oc);
for (oc = 0; oc < n_oc; oc++) {
for (ic = 0; ic < n_ic; ic++) {
int32_t *d = dst + oc;
const int32_t *s = src + ic;
int32_t vol = m->map_table_i[oc][ic];
if (vol <= 0)
continue;
if (vol >= 0x10000) {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s;
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += (int32_t) (((int64_t)*s * vol) >> 16);
}
}
}
}
static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned oc, ic, i; unsigned oc, ic, i;
unsigned n_ic, n_oc; unsigned n_ic, n_oc;
@ -309,6 +419,44 @@ static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
} }
} }
static void remap_arrange_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
src += arrange[0];
for (; n > 0; n--) {
*dst++ = *src;
src += n_ic;
}
}
static void remap_arrange_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int ic0 = arrange[0], ic1 = arrange[1];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
src += n_ic;
}
}
static void remap_arrange_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
const int ic0 = arrange[0], ic1 = arrange[1],
ic2 = arrange[2], ic3 = arrange[3];
for (; n > 0; n--) {
*dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
*dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
*dst++ = (ic2 >= 0) ? *(src + ic2) : 0;
*dst++ = (ic3 >= 0) ? *(src + ic3) : 0;
src += n_ic;
}
}
static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels; const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state; const int8_t *arrange = m->state;
@ -348,16 +496,19 @@ static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float
} }
void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16, void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
pa_do_remap_func_t func_float) { pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float) {
pa_assert(m); pa_assert(m);
if (m->format == PA_SAMPLE_S16NE) if (m->format == PA_SAMPLE_S16NE)
m->do_remap = func_s16; m->do_remap = func_s16;
else if (m->format == PA_SAMPLE_S32NE)
m->do_remap = func_s32;
else if (m->format == PA_SAMPLE_FLOAT32NE) else if (m->format == PA_SAMPLE_FLOAT32NE)
m->do_remap = func_float; m->do_remap = func_float;
else else
pa_assert_not_reached(); pa_assert_not_reached();
pa_assert(m->do_remap);
} }
static bool force_generic_code = false; static bool force_generic_code = false;
@ -374,6 +525,7 @@ static void init_remap_c(pa_remap_t *m) {
if (force_generic_code) { if (force_generic_code) {
pa_log_info("Forced to use generic matrix remapping"); pa_log_info("Forced to use generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
(pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c); (pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
return; return;
} }
@ -383,12 +535,14 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using mono to stereo remapping"); pa_log_info("Using mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c,
(pa_do_remap_func_t) remap_mono_to_stereo_s32ne_c,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c); (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c);
} else if (n_ic == 2 && n_oc == 1 && } else if (n_ic == 2 && n_oc == 1 &&
m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) { m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
pa_log_info("Using stereo to mono remapping"); pa_log_info("Using stereo to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c,
(pa_do_remap_func_t) remap_stereo_to_mono_s32ne_c,
(pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c); (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c);
} else if (n_ic == 1 && n_oc == 4 && } else if (n_ic == 1 && n_oc == 4 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
@ -396,6 +550,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using mono to 4-channel remapping"); pa_log_info("Using mono to 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c,
(pa_do_remap_func_t) remap_mono_to_ch4_s32ne_c,
(pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c); (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c);
} else if (n_ic == 4 && n_oc == 1 && } else if (n_ic == 4 && n_oc == 1 &&
m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 && m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
@ -403,11 +558,13 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using 4-channel to mono remapping"); pa_log_info("Using 4-channel to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c,
(pa_do_remap_func_t) remap_ch4_to_mono_s32ne_c,
(pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c); (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c);
} else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) { } else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) {
pa_log_info("Using mono arrange remapping"); pa_log_info("Using mono arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c,
(pa_do_remap_func_t) remap_arrange_mono_s32ne_c,
(pa_do_remap_func_t) remap_arrange_mono_float32ne_c); (pa_do_remap_func_t) remap_arrange_mono_float32ne_c);
/* setup state */ /* setup state */
@ -416,6 +573,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using stereo arrange remapping"); pa_log_info("Using stereo arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c,
(pa_do_remap_func_t) remap_arrange_stereo_s32ne_c,
(pa_do_remap_func_t) remap_arrange_stereo_float32ne_c); (pa_do_remap_func_t) remap_arrange_stereo_float32ne_c);
/* setup state */ /* setup state */
@ -424,6 +582,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using 4-channel arrange remapping"); pa_log_info("Using 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c,
(pa_do_remap_func_t) remap_arrange_ch4_s32ne_c,
(pa_do_remap_func_t) remap_arrange_ch4_float32ne_c); (pa_do_remap_func_t) remap_arrange_ch4_float32ne_c);
/* setup state */ /* setup state */
@ -432,6 +591,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using generic matrix remapping"); pa_log_info("Using generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c, pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
(pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c); (pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
} }
} }

View file

@ -55,6 +55,6 @@ void pa_set_init_remap_func(pa_init_remap_func_t func);
bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]); bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]);
void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16, void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
pa_do_remap_func_t func_float); pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float);
#endif /* fooremapfoo */ #endif /* fooremapfoo */

View file

@ -111,7 +111,8 @@ static void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, int16_t *dst, const in
); );
} }
static void remap_mono_to_stereo_float32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) { /* Works for both S32NE and FLOAT32NE */
static void remap_mono_to_stereo_any32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) {
pa_reg_x86 temp, temp2; pa_reg_x86 temp, temp2;
__asm__ __volatile__ ( __asm__ __volatile__ (
@ -135,7 +136,8 @@ static void init_remap_mmx(pa_remap_t *m) {
pa_log_info("Using MMX mono to stereo remapping"); pa_log_info("Using MMX mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx, pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_mmx); (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx,
(pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx);
} }
} }
#endif /* defined (__i386__) || defined (__amd64__) */ #endif /* defined (__i386__) || defined (__amd64__) */

View file

@ -143,6 +143,25 @@ static void remap_stereo_to_mono_float32ne_neon(pa_remap_t *m, float *dst, const
} }
} }
static void remap_stereo_to_mono_s32ne_neon(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
for (; n >= 4; n -= 4) {
__asm__ __volatile__ (
"vld2.32 {q0,q1}, [%[src]]! \n\t"
"vrhadd.s32 q0, q0, q1 \n\t"
"vst1.32 {q0}, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: /* input operands */
: "memory", "q0", "q1" /* clobber list */
);
}
for (; n > 0; n--) {
dst[0] = src[0]/2 + src[1]/2;
src += 2;
dst++;
}
}
static void remap_stereo_to_mono_s16ne_neon(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) { static void remap_stereo_to_mono_s16ne_neon(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
for (; n >= 8; n -= 8) { for (; n >= 8; n -= 8) {
__asm__ __volatile__ ( __asm__ __volatile__ (
@ -322,7 +341,8 @@ static void remap_arrange_stereo_float32ne_neon(pa_remap_t *m, float *dst, const
} }
} }
static void remap_arrange_ch2_ch4_float32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) { /* Works for both S32NE and FLOAT32NE */
static void remap_arrange_ch2_ch4_any32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const uint8x8_t t0 = ((uint8x8_t *)m->state)[0]; const uint8x8_t t0 = ((uint8x8_t *)m->state)[0];
const uint8x8_t t1 = ((uint8x8_t *)m->state)[1]; const uint8x8_t t1 = ((uint8x8_t *)m->state)[1];
@ -365,39 +385,52 @@ static void init_remap_neon(pa_remap_t *m) {
n_oc = m->o_ss.channels; n_oc = m->o_ss.channels;
n_ic = m->i_ss.channels; n_ic = m->i_ss.channels;
/* We short-circuit remap function selection for S32NE in most
* cases as the corresponding generic C code is performing
* similarly or even better. However there are a few cases where
* there actually is a significant improvement from using
* hand-crafted NEON assembly so we cannot just bail out for S32NE
* here. */
if (n_ic == 1 && n_oc == 2 && if (n_ic == 1 && n_oc == 2 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) { m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) {
if (m->format == PA_SAMPLE_S32NE)
return;
if (arm_flags & PA_CPU_ARM_CORTEX_A8) { if (arm_flags & PA_CPU_ARM_CORTEX_A8) {
pa_log_info("Using ARM NEON/A8 mono to stereo remapping"); pa_log_info("Using ARM NEON/A8 mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8); NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8);
} }
else { else {
pa_log_info("Using ARM NEON mono to stereo remapping"); pa_log_info("Using ARM NEON mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm); NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm);
} }
} else if (n_ic == 1 && n_oc == 4 && } else if (n_ic == 1 && n_oc == 4 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) { m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) {
if (m->format == PA_SAMPLE_S32NE)
return;
pa_log_info("Using ARM NEON mono to 4-channel remapping"); pa_log_info("Using ARM NEON mono to 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_ch4_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_ch4_s16ne_neon,
(pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon); NULL, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon);
} else if (n_ic == 2 && n_oc == 1 && } else if (n_ic == 2 && n_oc == 1 &&
m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) { m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
pa_log_info("Using ARM NEON stereo to mono remapping"); pa_log_info("Using ARM NEON stereo to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_neon,
(pa_do_remap_func_t) remap_stereo_to_mono_s32ne_neon,
(pa_do_remap_func_t) remap_stereo_to_mono_float32ne_neon); (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_neon);
} else if (n_ic == 4 && n_oc == 1 && } else if (n_ic == 4 && n_oc == 1 &&
m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 && m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) { m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) {
if (m->format == PA_SAMPLE_S32NE)
return;
pa_log_info("Using ARM NEON 4-channel to mono remapping"); pa_log_info("Using ARM NEON 4-channel to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_neon,
(pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon); NULL, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon);
} else if (pa_setup_remap_arrange(m, arrange) && } else if (pa_setup_remap_arrange(m, arrange) &&
((n_ic == 2 && n_oc == 2) || ((n_ic == 2 && n_oc == 2) ||
(n_ic == 2 && n_oc == 4) || (n_ic == 2 && n_oc == 4) ||
@ -405,17 +438,22 @@ static void init_remap_neon(pa_remap_t *m) {
unsigned o; unsigned o;
if (n_ic == 2 && n_oc == 2) { if (n_ic == 2 && n_oc == 2) {
if (m->format == PA_SAMPLE_S32NE)
return;
pa_log_info("Using NEON stereo arrange remapping"); pa_log_info("Using NEON stereo arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_neon,
(pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon); NULL, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon);
} else if (n_ic == 2 && n_oc == 4) { } else if (n_ic == 2 && n_oc == 4) {
pa_log_info("Using NEON 2-channel to 4-channel arrange remapping"); pa_log_info("Using NEON 2-channel to 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch2_ch4_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch2_ch4_s16ne_neon,
(pa_do_remap_func_t) remap_arrange_ch2_ch4_float32ne_neon); (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon,
(pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon);
} else if (n_ic == 4 && n_oc == 4) { } else if (n_ic == 4 && n_oc == 4) {
if (m->format == PA_SAMPLE_S32NE)
return;
pa_log_info("Using NEON 4-channel arrange remapping"); pa_log_info("Using NEON 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_neon,
(pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon); NULL, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon);
} }
/* setup state */ /* setup state */
@ -436,6 +474,8 @@ static void init_remap_neon(pa_remap_t *m) {
} }
break; break;
} }
case PA_SAMPLE_S32NE:
/* fall-through */
case PA_SAMPLE_FLOAT32NE: { case PA_SAMPLE_FLOAT32NE: {
uint8x8_t *t = m->state = pa_xnew0(uint8x8_t, 2); uint8x8_t *t = m->state = pa_xnew0(uint8x8_t, 2);
for (o = 0; o < n_oc; o++) { for (o = 0; o < n_oc; o++) {
@ -461,8 +501,11 @@ static void init_remap_neon(pa_remap_t *m) {
} else if (n_ic == 4 && n_oc == 4) { } else if (n_ic == 4 && n_oc == 4) {
unsigned i, o; unsigned i, o;
if (m->format == PA_SAMPLE_S32NE)
return;
pa_log_info("Using ARM NEON 4-channel remapping"); pa_log_info("Using ARM NEON 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_s16ne_neon, pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_s16ne_neon,
(pa_do_remap_func_t) NULL,
(pa_do_remap_func_t) remap_ch4_float32ne_neon); (pa_do_remap_func_t) remap_ch4_float32ne_neon);
/* setup state */ /* setup state */

View file

@ -110,7 +110,8 @@ static void remap_mono_to_stereo_s16ne_sse2(pa_remap_t *m, int16_t *dst, const i
); );
} }
static void remap_mono_to_stereo_float32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) { /* Works for both S32NE and FLOAT32NE */
static void remap_mono_to_stereo_any32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) {
pa_reg_x86 temp, temp2; pa_reg_x86 temp, temp2;
__asm__ __volatile__ ( __asm__ __volatile__ (
@ -134,7 +135,8 @@ static void init_remap_sse2(pa_remap_t *m) {
pa_log_info("Using SSE2 mono to stereo remapping"); pa_log_info("Using SSE2 mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2, pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_sse2); (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2,
(pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2);
} }
} }
#endif /* defined (__i386__) || defined (__amd64__) */ #endif /* defined (__i386__) || defined (__amd64__) */

View file

@ -286,6 +286,14 @@ static pa_sample_format_t choose_work_format(
work_format = a; work_format = a;
break; break;
} }
/* If both input and output are using S32NE and we don't
* need any resampling we can use S32NE directly, avoiding
* converting back and forth between S32NE and
* FLOAT32NE. */
if ((a == PA_SAMPLE_S32NE) && (b == PA_SAMPLE_S32NE)) {
work_format = PA_SAMPLE_S32NE;
break;
}
/* Else fall through */ /* Else fall through */
case PA_RESAMPLER_PEAKS: case PA_RESAMPLER_PEAKS:
/* PEAKS, COPY and TRIVIAL do not benefit from increased /* PEAKS, COPY and TRIVIAL do not benefit from increased

View file

@ -141,6 +141,60 @@ static void run_remap_test_s16(
} }
} }
static void run_remap_test_s32(
pa_remap_t *remap_func,
pa_remap_t *remap_orig,
int align,
bool correct,
bool perf) {
PA_DECLARE_ALIGNED(8, int32_t, out_buf_ref[SAMPLES*8]) = { 0 };
PA_DECLARE_ALIGNED(8, int32_t, out_buf[SAMPLES*8]) = { 0 };
PA_DECLARE_ALIGNED(8, int32_t, in_buf[SAMPLES*8]);
int32_t *out, *out_ref;
int32_t *in;
unsigned n_ic = remap_func->i_ss.channels;
unsigned n_oc = remap_func->o_ss.channels;
unsigned i, nsamples;
pa_assert(n_ic >= 1 && n_ic <= 8);
pa_assert(n_oc >= 1 && n_oc <= 8);
/* Force sample alignment as requested */
out = out_buf + (8 - align);
out_ref = out_buf_ref + (8 - align);
in = in_buf + (8 - align);
nsamples = SAMPLES - (8 - align);
pa_random(in, nsamples * n_ic * sizeof(int32_t));
if (correct) {
remap_orig->do_remap(remap_orig, out_ref, in, nsamples);
remap_func->do_remap(remap_func, out, in, nsamples);
for (i = 0; i < nsamples * n_oc; i++) {
if (abs(out[i] - out_ref[i]) > 4) {
pa_log_debug("Correctness test failed: align=%d", align);
pa_log_debug("%d: %d != %d", i, out[i], out_ref[i]);
ck_abort();
}
}
}
if (perf) {
pa_log_debug("Testing remap performance with %d sample alignment", align);
PA_RUNTIME_TEST_RUN_START("func", TIMES, TIMES2) {
remap_func->do_remap(remap_func, out, in, nsamples);
} PA_RUNTIME_TEST_RUN_STOP
PA_RUNTIME_TEST_RUN_START("orig", TIMES, TIMES2) {
remap_orig->do_remap(remap_orig, out_ref, in, nsamples);
} PA_RUNTIME_TEST_RUN_STOP
}
}
static void setup_remap_channels( static void setup_remap_channels(
pa_remap_t *m, pa_remap_t *m,
pa_sample_format_t f, pa_sample_format_t f,
@ -193,6 +247,12 @@ static void remap_test_channels(
run_remap_test_float(remap_func, remap_orig, 2, true, false); run_remap_test_float(remap_func, remap_orig, 2, true, false);
run_remap_test_float(remap_func, remap_orig, 3, true, true); run_remap_test_float(remap_func, remap_orig, 3, true, true);
break; break;
case PA_SAMPLE_S32NE:
run_remap_test_s32(remap_func, remap_orig, 0, true, false);
run_remap_test_s32(remap_func, remap_orig, 1, true, false);
run_remap_test_s32(remap_func, remap_orig, 2, true, false);
run_remap_test_s32(remap_func, remap_orig, 3, true, true);
break;
case PA_SAMPLE_S16NE: case PA_SAMPLE_S16NE:
run_remap_test_s16(remap_func, remap_orig, 0, true, false); run_remap_test_s16(remap_func, remap_orig, 0, true, false);
run_remap_test_s16(remap_func, remap_orig, 1, true, false); run_remap_test_s16(remap_func, remap_orig, 1, true, false);
@ -251,6 +311,11 @@ START_TEST (remap_special_test) {
pa_log_debug("Checking special remap (float, mono->4-channel)"); pa_log_debug("Checking special remap (float, mono->4-channel)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 1, 4, false); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 1, 4, false);
pa_log_debug("Checking special remap (s32, mono->stereo)");
remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 2, false);
pa_log_debug("Checking special remap (s32, mono->4-channel)");
remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 4, false);
pa_log_debug("Checking special remap (s16, mono->stereo)"); pa_log_debug("Checking special remap (s16, mono->stereo)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 1, 2, false); remap_init2_test_channels(PA_SAMPLE_S16NE, 1, 2, false);
pa_log_debug("Checking special remap (s16, mono->4-channel)"); pa_log_debug("Checking special remap (s16, mono->4-channel)");
@ -261,6 +326,11 @@ START_TEST (remap_special_test) {
pa_log_debug("Checking special remap (float, 4-channel->mono)"); pa_log_debug("Checking special remap (float, 4-channel->mono)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 1, false); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 1, false);
pa_log_debug("Checking special remap (s32, stereo->mono)");
remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 1, false);
pa_log_debug("Checking special remap (s32, 4-channel->mono)");
remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 1, false);
pa_log_debug("Checking special remap (s16, stereo->mono)"); pa_log_debug("Checking special remap (s16, stereo->mono)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 1, false); remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 1, false);
pa_log_debug("Checking special remap (s16, 4-channel->mono)"); pa_log_debug("Checking special remap (s16, 4-channel->mono)");
@ -271,11 +341,15 @@ END_TEST
START_TEST (rearrange_special_test) { START_TEST (rearrange_special_test) {
pa_log_debug("Checking special remap (s16, stereo rearrange)"); pa_log_debug("Checking special remap (s16, stereo rearrange)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 2, true); remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 2, true);
pa_log_debug("Checking special remap (s32, stereo rearrange)");
remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 2, true);
pa_log_debug("Checking special remap (float, stereo rearrange)"); pa_log_debug("Checking special remap (float, stereo rearrange)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 2, 2, true); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 2, 2, true);
pa_log_debug("Checking special remap (s16, 4-channel rearrange)"); pa_log_debug("Checking special remap (s16, 4-channel rearrange)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 4, 4, true); remap_init2_test_channels(PA_SAMPLE_S16NE, 4, 4, true);
pa_log_debug("Checking special remap (s32, 4-channel rearrange)");
remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 4, true);
pa_log_debug("Checking special remap (float, 4-channel rearrange)"); pa_log_debug("Checking special remap (float, 4-channel rearrange)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 4, true); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 4, true);
} }
@ -298,6 +372,9 @@ START_TEST (remap_mmx_test) {
init_func = pa_get_init_remap_func(); init_func = pa_get_init_remap_func();
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false);
pa_log_debug("Checking MMX remap (s32, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
pa_log_debug("Checking MMX remap (s16, mono->stereo)"); pa_log_debug("Checking MMX remap (s16, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
} }
@ -319,6 +396,9 @@ START_TEST (remap_sse2_test) {
init_func = pa_get_init_remap_func(); init_func = pa_get_init_remap_func();
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false);
pa_log_debug("Checking SSE2 remap (s32, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
pa_log_debug("Checking SSE2 remap (s16, mono->stereo)"); pa_log_debug("Checking SSE2 remap (s16, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
} }
@ -345,6 +425,11 @@ START_TEST (remap_neon_test) {
pa_log_debug("Checking NEON remap (float, mono->4-channel)"); pa_log_debug("Checking NEON remap (float, mono->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 4, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 4, false);
pa_log_debug("Checking NEON remap (s32, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
pa_log_debug("Checking NEON remap (s32, mono->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 4, false);
pa_log_debug("Checking NEON remap (s16, mono->stereo)"); pa_log_debug("Checking NEON remap (s16, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
pa_log_debug("Checking NEON remap (s16, mono->4-channel)"); pa_log_debug("Checking NEON remap (s16, mono->4-channel)");
@ -355,6 +440,11 @@ START_TEST (remap_neon_test) {
pa_log_debug("Checking NEON remap (float, 4-channel->mono)"); pa_log_debug("Checking NEON remap (float, 4-channel->mono)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 1, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 1, false);
pa_log_debug("Checking NEON remap (s32, stereo->mono)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 1, false);
pa_log_debug("Checking NEON remap (s32, 4-channel->mono)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 1, false);
pa_log_debug("Checking NEON remap (s16, stereo->mono)"); pa_log_debug("Checking NEON remap (s16, stereo->mono)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 1, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 1, false);
pa_log_debug("Checking NEON remap (s16, 4-channel->mono)"); pa_log_debug("Checking NEON remap (s16, 4-channel->mono)");
@ -362,6 +452,8 @@ START_TEST (remap_neon_test) {
pa_log_debug("Checking NEON remap (float, 4-channel->4-channel)"); pa_log_debug("Checking NEON remap (float, 4-channel->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, false);
pa_log_debug("Checking NEON remap (s32, 4-channel->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, false);
pa_log_debug("Checking NEON remap (s16, 4-channel->4-channel)"); pa_log_debug("Checking NEON remap (s16, 4-channel->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, false); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, false);
} }
@ -383,16 +475,22 @@ START_TEST (rearrange_neon_test) {
pa_log_debug("Checking NEON remap (float, stereo rearrange)"); pa_log_debug("Checking NEON remap (float, stereo rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 2, true); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 2, true);
pa_log_debug("Checking NEON remap (s32, stereo rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 2, true);
pa_log_debug("Checking NEON remap (s16, stereo rearrange)"); pa_log_debug("Checking NEON remap (s16, stereo rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 2, true); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 2, true);
pa_log_debug("Checking NEON remap (float, 2-channel->4-channel rearrange)"); pa_log_debug("Checking NEON remap (float, 2-channel->4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 4, true); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 4, true);
pa_log_debug("Checking NEON remap (s32, 2-channel->4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 4, true);
pa_log_debug("Checking NEON remap (s16, 2-channel->4-channel rearrange)"); pa_log_debug("Checking NEON remap (s16, 2-channel->4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 4, true); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 4, true);
pa_log_debug("Checking NEON remap (float, 4-channel rearrange)"); pa_log_debug("Checking NEON remap (float, 4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, true); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, true);
pa_log_debug("Checking NEON remap (s32, 4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, true);
pa_log_debug("Checking NEON remap (s16, 4-channel rearrange)"); pa_log_debug("Checking NEON remap (s16, 4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, true); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, true);
} }