From 034b77823ad45b5f02baaeea436863ed104ee66d Mon Sep 17 00:00:00 2001 From: Sascha Silbe Date: Tue, 26 Mar 2019 10:35:55 +0100 Subject: [PATCH] remap: support S32NE work format So far PulseAudio only supported two different work formats: S16NE if it's sufficient to represent the input and output formats without loss of precision and FLOAT32NE in all other cases. For systems that use S32NE exclusively, this results in unnecessary conversions from S32NE to FLOAT32NE and back again. Add S32NE remap operations and make use of them (for the COPY and TRIVIAL resamplers) if both input and output format are S32NE. This avoids the back and forth conversions between S32NE and FLOAT32NE, significantly improving performance for those cases. --- src/pulsecore/remap.c | 162 ++++++++++++++++++++++++++++++++++++- src/pulsecore/remap.h | 2 +- src/pulsecore/remap_mmx.c | 6 +- src/pulsecore/remap_neon.c | 59 ++++++++++++-- src/pulsecore/remap_sse.c | 6 +- src/pulsecore/resampler.c | 8 ++ src/tests/cpu-remap-test.c | 98 ++++++++++++++++++++++ 7 files changed, 327 insertions(+), 14 deletions(-) diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c index 09e2c8f82..35fffd7d2 100644 --- a/src/pulsecore/remap.c +++ b/src/pulsecore/remap.c @@ -51,6 +51,24 @@ static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int1 } } +static void remap_mono_to_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i; i--) { + dst[0] = dst[1] = src[0]; + dst[2] = dst[3] = src[1]; + dst[4] = dst[5] = src[2]; + dst[6] = dst[7] = src[3]; + src += 4; + dst += 8; + } + for (i = n & 3; i; i--) { + dst[0] = dst[1] = src[0]; + src++; + dst += 2; + } +} + static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -87,6 +105,28 @@ static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int1 } } +static void remap_stereo_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i > 0; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 1 to the ideal result. */ + dst[0] = (src[0]/2 + src[1]/2); + dst[1] = (src[2]/2 + src[3]/2); + dst[2] = (src[4]/2 + src[5]/2); + dst[3] = (src[6]/2 + src[7]/2); + src += 8; + dst += 4; + } + for (i = n & 3; i; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 1 to the ideal result. */ + dst[0] = (src[0]/2 + src[1]/2); + src += 2; + dst += 1; + } +} + static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -123,6 +163,24 @@ static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t } } +static void remap_mono_to_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i; i--) { + dst[0] = dst[1] = dst[2] = dst[3] = src[0]; + dst[4] = dst[5] = dst[6] = dst[7] = src[1]; + dst[8] = dst[9] = dst[10] = dst[11] = src[2]; + dst[12] = dst[13] = dst[14] = dst[15] = src[3]; + src += 4; + dst += 16; + } + for (i = n & 3; i; i--) { + dst[0] = dst[1] = dst[2] = dst[3] = src[0]; + src++; + dst += 4; + } +} + static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -159,6 +217,28 @@ static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t } } +static void remap_ch4_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i > 0; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 3 to the ideal result. */ + dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4); + dst[1] = (src[4]/4 + src[5]/4 + src[6]/4 + src[7]/4); + dst[2] = (src[8]/4 + src[9]/4 + src[10]/4 + src[11]/4); + dst[3] = (src[12]/4 + src[13]/4 + src[14]/4 + src[15]/4); + src += 16; + dst += 4; + } + for (i = n & 3; i; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 3 to the ideal result. */ + dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4); + src += 4; + dst += 1; + } +} + static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -208,6 +288,36 @@ static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int } } +static void remap_channels_matrix_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned oc, ic, i; + unsigned n_ic, n_oc; + + n_ic = m->i_ss.channels; + n_oc = m->o_ss.channels; + + memset(dst, 0, n * sizeof(int32_t) * n_oc); + + for (oc = 0; oc < n_oc; oc++) { + + for (ic = 0; ic < n_ic; ic++) { + int32_t *d = dst + oc; + const int32_t *s = src + ic; + int32_t vol = m->map_table_i[oc][ic]; + + if (vol <= 0) + continue; + + if (vol >= 0x10000) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += (int32_t) (((int64_t)*s * vol) >> 16); + } + } + } +} + static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned oc, ic, i; unsigned n_ic, n_oc; @@ -309,6 +419,44 @@ static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t } } +static void remap_arrange_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + const unsigned n_ic = m->i_ss.channels; + const int8_t *arrange = m->state; + + src += arrange[0]; + for (; n > 0; n--) { + *dst++ = *src; + src += n_ic; + } +} + +static void remap_arrange_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + const unsigned n_ic = m->i_ss.channels; + const int8_t *arrange = m->state; + const int ic0 = arrange[0], ic1 = arrange[1]; + + for (; n > 0; n--) { + *dst++ = (ic0 >= 0) ? *(src + ic0) : 0; + *dst++ = (ic1 >= 0) ? *(src + ic1) : 0; + src += n_ic; + } +} + +static void remap_arrange_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + const unsigned n_ic = m->i_ss.channels; + const int8_t *arrange = m->state; + const int ic0 = arrange[0], ic1 = arrange[1], + ic2 = arrange[2], ic3 = arrange[3]; + + for (; n > 0; n--) { + *dst++ = (ic0 >= 0) ? *(src + ic0) : 0; + *dst++ = (ic1 >= 0) ? *(src + ic1) : 0; + *dst++ = (ic2 >= 0) ? *(src + ic2) : 0; + *dst++ = (ic3 >= 0) ? *(src + ic3) : 0; + src += n_ic; + } +} + static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { const unsigned n_ic = m->i_ss.channels; const int8_t *arrange = m->state; @@ -348,16 +496,19 @@ static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float } void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16, - pa_do_remap_func_t func_float) { + pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float) { pa_assert(m); if (m->format == PA_SAMPLE_S16NE) m->do_remap = func_s16; + else if (m->format == PA_SAMPLE_S32NE) + m->do_remap = func_s32; else if (m->format == PA_SAMPLE_FLOAT32NE) m->do_remap = func_float; else pa_assert_not_reached(); + pa_assert(m->do_remap); } static bool force_generic_code = false; @@ -374,6 +525,7 @@ static void init_remap_c(pa_remap_t *m) { if (force_generic_code) { pa_log_info("Forced to use generic matrix remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c, + (pa_do_remap_func_t) remap_channels_matrix_s32ne_c, (pa_do_remap_func_t) remap_channels_matrix_float32ne_c); return; } @@ -383,12 +535,14 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c, + (pa_do_remap_func_t) remap_mono_to_stereo_s32ne_c, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c); } else if (n_ic == 2 && n_oc == 1 && m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) { pa_log_info("Using stereo to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c, + (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_c, (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c); } else if (n_ic == 1 && n_oc == 4 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 && @@ -396,6 +550,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using mono to 4-channel remapping"); pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c, + (pa_do_remap_func_t) remap_mono_to_ch4_s32ne_c, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c); } else if (n_ic == 4 && n_oc == 1 && m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 && @@ -403,11 +558,13 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using 4-channel to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c, + (pa_do_remap_func_t) remap_ch4_to_mono_s32ne_c, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c); } else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) { pa_log_info("Using mono arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c, + (pa_do_remap_func_t) remap_arrange_mono_s32ne_c, (pa_do_remap_func_t) remap_arrange_mono_float32ne_c); /* setup state */ @@ -416,6 +573,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using stereo arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c, + (pa_do_remap_func_t) remap_arrange_stereo_s32ne_c, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_c); /* setup state */ @@ -424,6 +582,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using 4-channel arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c, + (pa_do_remap_func_t) remap_arrange_ch4_s32ne_c, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_c); /* setup state */ @@ -432,6 +591,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using generic matrix remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c, + (pa_do_remap_func_t) remap_channels_matrix_s32ne_c, (pa_do_remap_func_t) remap_channels_matrix_float32ne_c); } } diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h index 4bad3ea33..473f0ceaf 100644 --- a/src/pulsecore/remap.h +++ b/src/pulsecore/remap.h @@ -55,6 +55,6 @@ void pa_set_init_remap_func(pa_init_remap_func_t func); bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]); void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16, - pa_do_remap_func_t func_float); + pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float); #endif /* fooremapfoo */ diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c index 688da6c19..9d0767183 100644 --- a/src/pulsecore/remap_mmx.c +++ b/src/pulsecore/remap_mmx.c @@ -111,7 +111,8 @@ static void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, int16_t *dst, const in ); } -static void remap_mono_to_stereo_float32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) { +/* Works for both S32NE and FLOAT32NE */ +static void remap_mono_to_stereo_any32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) { pa_reg_x86 temp, temp2; __asm__ __volatile__ ( @@ -135,7 +136,8 @@ static void init_remap_mmx(pa_remap_t *m) { pa_log_info("Using MMX mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_mmx); + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx, + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx); } } #endif /* defined (__i386__) || defined (__amd64__) */ diff --git a/src/pulsecore/remap_neon.c b/src/pulsecore/remap_neon.c index ebacf922f..41208986d 100644 --- a/src/pulsecore/remap_neon.c +++ b/src/pulsecore/remap_neon.c @@ -143,6 +143,25 @@ static void remap_stereo_to_mono_float32ne_neon(pa_remap_t *m, float *dst, const } } +static void remap_stereo_to_mono_s32ne_neon(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + for (; n >= 4; n -= 4) { + __asm__ __volatile__ ( + "vld2.32 {q0,q1}, [%[src]]! \n\t" + "vrhadd.s32 q0, q0, q1 \n\t" + "vst1.32 {q0}, [%[dst]]! \n\t" + : [dst] "+r" (dst), [src] "+r" (src) /* output operands */ + : /* input operands */ + : "memory", "q0", "q1" /* clobber list */ + ); + } + + for (; n > 0; n--) { + dst[0] = src[0]/2 + src[1]/2; + src += 2; + dst++; + } +} + static void remap_stereo_to_mono_s16ne_neon(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) { for (; n >= 8; n -= 8) { __asm__ __volatile__ ( @@ -322,7 +341,8 @@ static void remap_arrange_stereo_float32ne_neon(pa_remap_t *m, float *dst, const } } -static void remap_arrange_ch2_ch4_float32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) { +/* Works for both S32NE and FLOAT32NE */ +static void remap_arrange_ch2_ch4_any32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) { const uint8x8_t t0 = ((uint8x8_t *)m->state)[0]; const uint8x8_t t1 = ((uint8x8_t *)m->state)[1]; @@ -365,39 +385,52 @@ static void init_remap_neon(pa_remap_t *m) { n_oc = m->o_ss.channels; n_ic = m->i_ss.channels; + /* We short-circuit remap function selection for S32NE in most + * cases as the corresponding generic C code is performing + * similarly or even better. However there are a few cases where + * there actually is a significant improvement from using + * hand-crafted NEON assembly so we cannot just bail out for S32NE + * here. */ if (n_ic == 1 && n_oc == 2 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) { + if (m->format == PA_SAMPLE_S32NE) + return; if (arm_flags & PA_CPU_ARM_CORTEX_A8) { pa_log_info("Using ARM NEON/A8 mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8); + NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8); } else { pa_log_info("Using ARM NEON mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm); + NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm); } } else if (n_ic == 1 && n_oc == 4 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 && m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using ARM NEON mono to 4-channel remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_ch4_s16ne_neon, - (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon); } else if (n_ic == 2 && n_oc == 1 && m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) { pa_log_info("Using ARM NEON stereo to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_neon, + (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_neon, (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_neon); } else if (n_ic == 4 && n_oc == 1 && m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 && m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using ARM NEON 4-channel to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_neon, - (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon); } else if (pa_setup_remap_arrange(m, arrange) && ((n_ic == 2 && n_oc == 2) || (n_ic == 2 && n_oc == 4) || @@ -405,17 +438,22 @@ static void init_remap_neon(pa_remap_t *m) { unsigned o; if (n_ic == 2 && n_oc == 2) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using NEON stereo arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_neon, - (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon); } else if (n_ic == 2 && n_oc == 4) { pa_log_info("Using NEON 2-channel to 4-channel arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch2_ch4_s16ne_neon, - (pa_do_remap_func_t) remap_arrange_ch2_ch4_float32ne_neon); + (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon, + (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon); } else if (n_ic == 4 && n_oc == 4) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using NEON 4-channel arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_neon, - (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon); } /* setup state */ @@ -436,6 +474,8 @@ static void init_remap_neon(pa_remap_t *m) { } break; } + case PA_SAMPLE_S32NE: + /* fall-through */ case PA_SAMPLE_FLOAT32NE: { uint8x8_t *t = m->state = pa_xnew0(uint8x8_t, 2); for (o = 0; o < n_oc; o++) { @@ -461,8 +501,11 @@ static void init_remap_neon(pa_remap_t *m) { } else if (n_ic == 4 && n_oc == 4) { unsigned i, o; + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using ARM NEON 4-channel remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_s16ne_neon, + (pa_do_remap_func_t) NULL, (pa_do_remap_func_t) remap_ch4_float32ne_neon); /* setup state */ diff --git a/src/pulsecore/remap_sse.c b/src/pulsecore/remap_sse.c index 73e1cc84c..5c3b931f2 100644 --- a/src/pulsecore/remap_sse.c +++ b/src/pulsecore/remap_sse.c @@ -110,7 +110,8 @@ static void remap_mono_to_stereo_s16ne_sse2(pa_remap_t *m, int16_t *dst, const i ); } -static void remap_mono_to_stereo_float32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) { +/* Works for both S32NE and FLOAT32NE */ +static void remap_mono_to_stereo_any32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) { pa_reg_x86 temp, temp2; __asm__ __volatile__ ( @@ -134,7 +135,8 @@ static void init_remap_sse2(pa_remap_t *m) { pa_log_info("Using SSE2 mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_sse2); + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2, + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2); } } #endif /* defined (__i386__) || defined (__amd64__) */ diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 6a4ded690..ff9795ec4 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -286,6 +286,14 @@ static pa_sample_format_t choose_work_format( work_format = a; break; } + /* If both input and output are using S32NE and we don't + * need any resampling we can use S32NE directly, avoiding + * converting back and forth between S32NE and + * FLOAT32NE. */ + if ((a == PA_SAMPLE_S32NE) && (b == PA_SAMPLE_S32NE)) { + work_format = PA_SAMPLE_S32NE; + break; + } /* Else fall through */ case PA_RESAMPLER_PEAKS: /* PEAKS, COPY and TRIVIAL do not benefit from increased diff --git a/src/tests/cpu-remap-test.c b/src/tests/cpu-remap-test.c index c8c8addde..7e2b7a498 100644 --- a/src/tests/cpu-remap-test.c +++ b/src/tests/cpu-remap-test.c @@ -141,6 +141,60 @@ static void run_remap_test_s16( } } + +static void run_remap_test_s32( + pa_remap_t *remap_func, + pa_remap_t *remap_orig, + int align, + bool correct, + bool perf) { + + PA_DECLARE_ALIGNED(8, int32_t, out_buf_ref[SAMPLES*8]) = { 0 }; + PA_DECLARE_ALIGNED(8, int32_t, out_buf[SAMPLES*8]) = { 0 }; + PA_DECLARE_ALIGNED(8, int32_t, in_buf[SAMPLES*8]); + int32_t *out, *out_ref; + int32_t *in; + unsigned n_ic = remap_func->i_ss.channels; + unsigned n_oc = remap_func->o_ss.channels; + unsigned i, nsamples; + + pa_assert(n_ic >= 1 && n_ic <= 8); + pa_assert(n_oc >= 1 && n_oc <= 8); + + /* Force sample alignment as requested */ + out = out_buf + (8 - align); + out_ref = out_buf_ref + (8 - align); + in = in_buf + (8 - align); + nsamples = SAMPLES - (8 - align); + + pa_random(in, nsamples * n_ic * sizeof(int32_t)); + + if (correct) { + remap_orig->do_remap(remap_orig, out_ref, in, nsamples); + remap_func->do_remap(remap_func, out, in, nsamples); + + for (i = 0; i < nsamples * n_oc; i++) { + if (abs(out[i] - out_ref[i]) > 4) { + pa_log_debug("Correctness test failed: align=%d", align); + pa_log_debug("%d: %d != %d", i, out[i], out_ref[i]); + ck_abort(); + } + } + } + + if (perf) { + pa_log_debug("Testing remap performance with %d sample alignment", align); + + PA_RUNTIME_TEST_RUN_START("func", TIMES, TIMES2) { + remap_func->do_remap(remap_func, out, in, nsamples); + } PA_RUNTIME_TEST_RUN_STOP + + PA_RUNTIME_TEST_RUN_START("orig", TIMES, TIMES2) { + remap_orig->do_remap(remap_orig, out_ref, in, nsamples); + } PA_RUNTIME_TEST_RUN_STOP + } +} + static void setup_remap_channels( pa_remap_t *m, pa_sample_format_t f, @@ -193,6 +247,12 @@ static void remap_test_channels( run_remap_test_float(remap_func, remap_orig, 2, true, false); run_remap_test_float(remap_func, remap_orig, 3, true, true); break; + case PA_SAMPLE_S32NE: + run_remap_test_s32(remap_func, remap_orig, 0, true, false); + run_remap_test_s32(remap_func, remap_orig, 1, true, false); + run_remap_test_s32(remap_func, remap_orig, 2, true, false); + run_remap_test_s32(remap_func, remap_orig, 3, true, true); + break; case PA_SAMPLE_S16NE: run_remap_test_s16(remap_func, remap_orig, 0, true, false); run_remap_test_s16(remap_func, remap_orig, 1, true, false); @@ -251,6 +311,11 @@ START_TEST (remap_special_test) { pa_log_debug("Checking special remap (float, mono->4-channel)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 1, 4, false); + pa_log_debug("Checking special remap (s32, mono->stereo)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking special remap (s32, mono->4-channel)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 4, false); + pa_log_debug("Checking special remap (s16, mono->stereo)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 1, 2, false); pa_log_debug("Checking special remap (s16, mono->4-channel)"); @@ -261,6 +326,11 @@ START_TEST (remap_special_test) { pa_log_debug("Checking special remap (float, 4-channel->mono)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 1, false); + pa_log_debug("Checking special remap (s32, stereo->mono)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 1, false); + pa_log_debug("Checking special remap (s32, 4-channel->mono)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 1, false); + pa_log_debug("Checking special remap (s16, stereo->mono)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 1, false); pa_log_debug("Checking special remap (s16, 4-channel->mono)"); @@ -271,11 +341,15 @@ END_TEST START_TEST (rearrange_special_test) { pa_log_debug("Checking special remap (s16, stereo rearrange)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 2, true); + pa_log_debug("Checking special remap (s32, stereo rearrange)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 2, true); pa_log_debug("Checking special remap (float, stereo rearrange)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 2, 2, true); pa_log_debug("Checking special remap (s16, 4-channel rearrange)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 4, 4, true); + pa_log_debug("Checking special remap (s32, 4-channel rearrange)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 4, true); pa_log_debug("Checking special remap (float, 4-channel rearrange)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 4, true); } @@ -298,6 +372,9 @@ START_TEST (remap_mmx_test) { init_func = pa_get_init_remap_func(); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false); + pa_log_debug("Checking MMX remap (s32, mono->stereo)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking MMX remap (s16, mono->stereo)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); } @@ -319,6 +396,9 @@ START_TEST (remap_sse2_test) { init_func = pa_get_init_remap_func(); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false); + pa_log_debug("Checking SSE2 remap (s32, mono->stereo)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking SSE2 remap (s16, mono->stereo)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); } @@ -345,6 +425,11 @@ START_TEST (remap_neon_test) { pa_log_debug("Checking NEON remap (float, mono->4-channel)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 4, false); + pa_log_debug("Checking NEON remap (s32, mono->stereo)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking NEON remap (s32, mono->4-channel)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 4, false); + pa_log_debug("Checking NEON remap (s16, mono->stereo)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); pa_log_debug("Checking NEON remap (s16, mono->4-channel)"); @@ -355,6 +440,11 @@ START_TEST (remap_neon_test) { pa_log_debug("Checking NEON remap (float, 4-channel->mono)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 1, false); + pa_log_debug("Checking NEON remap (s32, stereo->mono)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 1, false); + pa_log_debug("Checking NEON remap (s32, 4-channel->mono)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 1, false); + pa_log_debug("Checking NEON remap (s16, stereo->mono)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 1, false); pa_log_debug("Checking NEON remap (s16, 4-channel->mono)"); @@ -362,6 +452,8 @@ START_TEST (remap_neon_test) { pa_log_debug("Checking NEON remap (float, 4-channel->4-channel)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, false); + pa_log_debug("Checking NEON remap (s32, 4-channel->4-channel)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, false); pa_log_debug("Checking NEON remap (s16, 4-channel->4-channel)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, false); } @@ -383,16 +475,22 @@ START_TEST (rearrange_neon_test) { pa_log_debug("Checking NEON remap (float, stereo rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 2, true); + pa_log_debug("Checking NEON remap (s32, stereo rearrange)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 2, true); pa_log_debug("Checking NEON remap (s16, stereo rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 2, true); pa_log_debug("Checking NEON remap (float, 2-channel->4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 4, true); + pa_log_debug("Checking NEON remap (s32, 2-channel->4-channel rearrange)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 4, true); pa_log_debug("Checking NEON remap (s16, 2-channel->4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 4, true); pa_log_debug("Checking NEON remap (float, 4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, true); + pa_log_debug("Checking NEON remap (s32, 4-channel rearrange)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, true); pa_log_debug("Checking NEON remap (s16, 4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, true); }