diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c index 09e2c8f82..35fffd7d2 100644 --- a/src/pulsecore/remap.c +++ b/src/pulsecore/remap.c @@ -51,6 +51,24 @@ static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int1 } } +static void remap_mono_to_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i; i--) { + dst[0] = dst[1] = src[0]; + dst[2] = dst[3] = src[1]; + dst[4] = dst[5] = src[2]; + dst[6] = dst[7] = src[3]; + src += 4; + dst += 8; + } + for (i = n & 3; i; i--) { + dst[0] = dst[1] = src[0]; + src++; + dst += 2; + } +} + static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -87,6 +105,28 @@ static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int1 } } +static void remap_stereo_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i > 0; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 1 to the ideal result. */ + dst[0] = (src[0]/2 + src[1]/2); + dst[1] = (src[2]/2 + src[3]/2); + dst[2] = (src[4]/2 + src[5]/2); + dst[3] = (src[6]/2 + src[7]/2); + src += 8; + dst += 4; + } + for (i = n & 3; i; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 1 to the ideal result. */ + dst[0] = (src[0]/2 + src[1]/2); + src += 2; + dst += 1; + } +} + static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -123,6 +163,24 @@ static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t } } +static void remap_mono_to_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i; i--) { + dst[0] = dst[1] = dst[2] = dst[3] = src[0]; + dst[4] = dst[5] = dst[6] = dst[7] = src[1]; + dst[8] = dst[9] = dst[10] = dst[11] = src[2]; + dst[12] = dst[13] = dst[14] = dst[15] = src[3]; + src += 4; + dst += 16; + } + for (i = n & 3; i; i--) { + dst[0] = dst[1] = dst[2] = dst[3] = src[0]; + src++; + dst += 4; + } +} + static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -159,6 +217,28 @@ static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t } } +static void remap_ch4_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned i; + + for (i = n >> 2; i > 0; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 3 to the ideal result. */ + dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4); + dst[1] = (src[4]/4 + src[5]/4 + src[6]/4 + src[7]/4); + dst[2] = (src[8]/4 + src[9]/4 + src[10]/4 + src[11]/4); + dst[3] = (src[12]/4 + src[13]/4 + src[14]/4 + src[15]/4); + src += 16; + dst += 4; + } + for (i = n & 3; i; i--) { + /* Avoid overflow by performing division first. We accept a + * difference of +/- 3 to the ideal result. */ + dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4); + src += 4; + dst += 1; + } +} + static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned i; @@ -208,6 +288,36 @@ static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int } } +static void remap_channels_matrix_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + unsigned oc, ic, i; + unsigned n_ic, n_oc; + + n_ic = m->i_ss.channels; + n_oc = m->o_ss.channels; + + memset(dst, 0, n * sizeof(int32_t) * n_oc); + + for (oc = 0; oc < n_oc; oc++) { + + for (ic = 0; ic < n_ic; ic++) { + int32_t *d = dst + oc; + const int32_t *s = src + ic; + int32_t vol = m->map_table_i[oc][ic]; + + if (vol <= 0) + continue; + + if (vol >= 0x10000) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += (int32_t) (((int64_t)*s * vol) >> 16); + } + } + } +} + static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { unsigned oc, ic, i; unsigned n_ic, n_oc; @@ -309,6 +419,44 @@ static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t } } +static void remap_arrange_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + const unsigned n_ic = m->i_ss.channels; + const int8_t *arrange = m->state; + + src += arrange[0]; + for (; n > 0; n--) { + *dst++ = *src; + src += n_ic; + } +} + +static void remap_arrange_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + const unsigned n_ic = m->i_ss.channels; + const int8_t *arrange = m->state; + const int ic0 = arrange[0], ic1 = arrange[1]; + + for (; n > 0; n--) { + *dst++ = (ic0 >= 0) ? *(src + ic0) : 0; + *dst++ = (ic1 >= 0) ? *(src + ic1) : 0; + src += n_ic; + } +} + +static void remap_arrange_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + const unsigned n_ic = m->i_ss.channels; + const int8_t *arrange = m->state; + const int ic0 = arrange[0], ic1 = arrange[1], + ic2 = arrange[2], ic3 = arrange[3]; + + for (; n > 0; n--) { + *dst++ = (ic0 >= 0) ? *(src + ic0) : 0; + *dst++ = (ic1 >= 0) ? *(src + ic1) : 0; + *dst++ = (ic2 >= 0) ? *(src + ic2) : 0; + *dst++ = (ic3 >= 0) ? *(src + ic3) : 0; + src += n_ic; + } +} + static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { const unsigned n_ic = m->i_ss.channels; const int8_t *arrange = m->state; @@ -348,16 +496,19 @@ static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float } void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16, - pa_do_remap_func_t func_float) { + pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float) { pa_assert(m); if (m->format == PA_SAMPLE_S16NE) m->do_remap = func_s16; + else if (m->format == PA_SAMPLE_S32NE) + m->do_remap = func_s32; else if (m->format == PA_SAMPLE_FLOAT32NE) m->do_remap = func_float; else pa_assert_not_reached(); + pa_assert(m->do_remap); } static bool force_generic_code = false; @@ -374,6 +525,7 @@ static void init_remap_c(pa_remap_t *m) { if (force_generic_code) { pa_log_info("Forced to use generic matrix remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c, + (pa_do_remap_func_t) remap_channels_matrix_s32ne_c, (pa_do_remap_func_t) remap_channels_matrix_float32ne_c); return; } @@ -383,12 +535,14 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c, + (pa_do_remap_func_t) remap_mono_to_stereo_s32ne_c, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c); } else if (n_ic == 2 && n_oc == 1 && m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) { pa_log_info("Using stereo to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c, + (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_c, (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c); } else if (n_ic == 1 && n_oc == 4 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 && @@ -396,6 +550,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using mono to 4-channel remapping"); pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c, + (pa_do_remap_func_t) remap_mono_to_ch4_s32ne_c, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c); } else if (n_ic == 4 && n_oc == 1 && m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 && @@ -403,11 +558,13 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using 4-channel to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c, + (pa_do_remap_func_t) remap_ch4_to_mono_s32ne_c, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c); } else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) { pa_log_info("Using mono arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c, + (pa_do_remap_func_t) remap_arrange_mono_s32ne_c, (pa_do_remap_func_t) remap_arrange_mono_float32ne_c); /* setup state */ @@ -416,6 +573,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using stereo arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c, + (pa_do_remap_func_t) remap_arrange_stereo_s32ne_c, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_c); /* setup state */ @@ -424,6 +582,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using 4-channel arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c, + (pa_do_remap_func_t) remap_arrange_ch4_s32ne_c, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_c); /* setup state */ @@ -432,6 +591,7 @@ static void init_remap_c(pa_remap_t *m) { pa_log_info("Using generic matrix remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c, + (pa_do_remap_func_t) remap_channels_matrix_s32ne_c, (pa_do_remap_func_t) remap_channels_matrix_float32ne_c); } } diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h index 4bad3ea33..473f0ceaf 100644 --- a/src/pulsecore/remap.h +++ b/src/pulsecore/remap.h @@ -55,6 +55,6 @@ void pa_set_init_remap_func(pa_init_remap_func_t func); bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]); void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16, - pa_do_remap_func_t func_float); + pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float); #endif /* fooremapfoo */ diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c index 688da6c19..9d0767183 100644 --- a/src/pulsecore/remap_mmx.c +++ b/src/pulsecore/remap_mmx.c @@ -111,7 +111,8 @@ static void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, int16_t *dst, const in ); } -static void remap_mono_to_stereo_float32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) { +/* Works for both S32NE and FLOAT32NE */ +static void remap_mono_to_stereo_any32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) { pa_reg_x86 temp, temp2; __asm__ __volatile__ ( @@ -135,7 +136,8 @@ static void init_remap_mmx(pa_remap_t *m) { pa_log_info("Using MMX mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_mmx); + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx, + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx); } } #endif /* defined (__i386__) || defined (__amd64__) */ diff --git a/src/pulsecore/remap_neon.c b/src/pulsecore/remap_neon.c index ebacf922f..41208986d 100644 --- a/src/pulsecore/remap_neon.c +++ b/src/pulsecore/remap_neon.c @@ -143,6 +143,25 @@ static void remap_stereo_to_mono_float32ne_neon(pa_remap_t *m, float *dst, const } } +static void remap_stereo_to_mono_s32ne_neon(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) { + for (; n >= 4; n -= 4) { + __asm__ __volatile__ ( + "vld2.32 {q0,q1}, [%[src]]! \n\t" + "vrhadd.s32 q0, q0, q1 \n\t" + "vst1.32 {q0}, [%[dst]]! \n\t" + : [dst] "+r" (dst), [src] "+r" (src) /* output operands */ + : /* input operands */ + : "memory", "q0", "q1" /* clobber list */ + ); + } + + for (; n > 0; n--) { + dst[0] = src[0]/2 + src[1]/2; + src += 2; + dst++; + } +} + static void remap_stereo_to_mono_s16ne_neon(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) { for (; n >= 8; n -= 8) { __asm__ __volatile__ ( @@ -322,7 +341,8 @@ static void remap_arrange_stereo_float32ne_neon(pa_remap_t *m, float *dst, const } } -static void remap_arrange_ch2_ch4_float32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) { +/* Works for both S32NE and FLOAT32NE */ +static void remap_arrange_ch2_ch4_any32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) { const uint8x8_t t0 = ((uint8x8_t *)m->state)[0]; const uint8x8_t t1 = ((uint8x8_t *)m->state)[1]; @@ -365,39 +385,52 @@ static void init_remap_neon(pa_remap_t *m) { n_oc = m->o_ss.channels; n_ic = m->i_ss.channels; + /* We short-circuit remap function selection for S32NE in most + * cases as the corresponding generic C code is performing + * similarly or even better. However there are a few cases where + * there actually is a significant improvement from using + * hand-crafted NEON assembly so we cannot just bail out for S32NE + * here. */ if (n_ic == 1 && n_oc == 2 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) { + if (m->format == PA_SAMPLE_S32NE) + return; if (arm_flags & PA_CPU_ARM_CORTEX_A8) { pa_log_info("Using ARM NEON/A8 mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8); + NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8); } else { pa_log_info("Using ARM NEON mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm); + NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm); } } else if (n_ic == 1 && n_oc == 4 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 && m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using ARM NEON mono to 4-channel remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_ch4_s16ne_neon, - (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon); } else if (n_ic == 2 && n_oc == 1 && m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) { pa_log_info("Using ARM NEON stereo to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_neon, + (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_neon, (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_neon); } else if (n_ic == 4 && n_oc == 1 && m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 && m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using ARM NEON 4-channel to mono remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_neon, - (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon); } else if (pa_setup_remap_arrange(m, arrange) && ((n_ic == 2 && n_oc == 2) || (n_ic == 2 && n_oc == 4) || @@ -405,17 +438,22 @@ static void init_remap_neon(pa_remap_t *m) { unsigned o; if (n_ic == 2 && n_oc == 2) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using NEON stereo arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_neon, - (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon); } else if (n_ic == 2 && n_oc == 4) { pa_log_info("Using NEON 2-channel to 4-channel arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch2_ch4_s16ne_neon, - (pa_do_remap_func_t) remap_arrange_ch2_ch4_float32ne_neon); + (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon, + (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon); } else if (n_ic == 4 && n_oc == 4) { + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using NEON 4-channel arrange remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_neon, - (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon); + NULL, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon); } /* setup state */ @@ -436,6 +474,8 @@ static void init_remap_neon(pa_remap_t *m) { } break; } + case PA_SAMPLE_S32NE: + /* fall-through */ case PA_SAMPLE_FLOAT32NE: { uint8x8_t *t = m->state = pa_xnew0(uint8x8_t, 2); for (o = 0; o < n_oc; o++) { @@ -461,8 +501,11 @@ static void init_remap_neon(pa_remap_t *m) { } else if (n_ic == 4 && n_oc == 4) { unsigned i, o; + if (m->format == PA_SAMPLE_S32NE) + return; pa_log_info("Using ARM NEON 4-channel remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_s16ne_neon, + (pa_do_remap_func_t) NULL, (pa_do_remap_func_t) remap_ch4_float32ne_neon); /* setup state */ diff --git a/src/pulsecore/remap_sse.c b/src/pulsecore/remap_sse.c index 73e1cc84c..5c3b931f2 100644 --- a/src/pulsecore/remap_sse.c +++ b/src/pulsecore/remap_sse.c @@ -110,7 +110,8 @@ static void remap_mono_to_stereo_s16ne_sse2(pa_remap_t *m, int16_t *dst, const i ); } -static void remap_mono_to_stereo_float32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) { +/* Works for both S32NE and FLOAT32NE */ +static void remap_mono_to_stereo_any32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) { pa_reg_x86 temp, temp2; __asm__ __volatile__ ( @@ -134,7 +135,8 @@ static void init_remap_sse2(pa_remap_t *m) { pa_log_info("Using SSE2 mono to stereo remapping"); pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2, - (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_sse2); + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2, + (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2); } } #endif /* defined (__i386__) || defined (__amd64__) */ diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 6a4ded690..ff9795ec4 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -286,6 +286,14 @@ static pa_sample_format_t choose_work_format( work_format = a; break; } + /* If both input and output are using S32NE and we don't + * need any resampling we can use S32NE directly, avoiding + * converting back and forth between S32NE and + * FLOAT32NE. */ + if ((a == PA_SAMPLE_S32NE) && (b == PA_SAMPLE_S32NE)) { + work_format = PA_SAMPLE_S32NE; + break; + } /* Else fall through */ case PA_RESAMPLER_PEAKS: /* PEAKS, COPY and TRIVIAL do not benefit from increased diff --git a/src/tests/cpu-remap-test.c b/src/tests/cpu-remap-test.c index c8c8addde..7e2b7a498 100644 --- a/src/tests/cpu-remap-test.c +++ b/src/tests/cpu-remap-test.c @@ -141,6 +141,60 @@ static void run_remap_test_s16( } } + +static void run_remap_test_s32( + pa_remap_t *remap_func, + pa_remap_t *remap_orig, + int align, + bool correct, + bool perf) { + + PA_DECLARE_ALIGNED(8, int32_t, out_buf_ref[SAMPLES*8]) = { 0 }; + PA_DECLARE_ALIGNED(8, int32_t, out_buf[SAMPLES*8]) = { 0 }; + PA_DECLARE_ALIGNED(8, int32_t, in_buf[SAMPLES*8]); + int32_t *out, *out_ref; + int32_t *in; + unsigned n_ic = remap_func->i_ss.channels; + unsigned n_oc = remap_func->o_ss.channels; + unsigned i, nsamples; + + pa_assert(n_ic >= 1 && n_ic <= 8); + pa_assert(n_oc >= 1 && n_oc <= 8); + + /* Force sample alignment as requested */ + out = out_buf + (8 - align); + out_ref = out_buf_ref + (8 - align); + in = in_buf + (8 - align); + nsamples = SAMPLES - (8 - align); + + pa_random(in, nsamples * n_ic * sizeof(int32_t)); + + if (correct) { + remap_orig->do_remap(remap_orig, out_ref, in, nsamples); + remap_func->do_remap(remap_func, out, in, nsamples); + + for (i = 0; i < nsamples * n_oc; i++) { + if (abs(out[i] - out_ref[i]) > 4) { + pa_log_debug("Correctness test failed: align=%d", align); + pa_log_debug("%d: %d != %d", i, out[i], out_ref[i]); + ck_abort(); + } + } + } + + if (perf) { + pa_log_debug("Testing remap performance with %d sample alignment", align); + + PA_RUNTIME_TEST_RUN_START("func", TIMES, TIMES2) { + remap_func->do_remap(remap_func, out, in, nsamples); + } PA_RUNTIME_TEST_RUN_STOP + + PA_RUNTIME_TEST_RUN_START("orig", TIMES, TIMES2) { + remap_orig->do_remap(remap_orig, out_ref, in, nsamples); + } PA_RUNTIME_TEST_RUN_STOP + } +} + static void setup_remap_channels( pa_remap_t *m, pa_sample_format_t f, @@ -193,6 +247,12 @@ static void remap_test_channels( run_remap_test_float(remap_func, remap_orig, 2, true, false); run_remap_test_float(remap_func, remap_orig, 3, true, true); break; + case PA_SAMPLE_S32NE: + run_remap_test_s32(remap_func, remap_orig, 0, true, false); + run_remap_test_s32(remap_func, remap_orig, 1, true, false); + run_remap_test_s32(remap_func, remap_orig, 2, true, false); + run_remap_test_s32(remap_func, remap_orig, 3, true, true); + break; case PA_SAMPLE_S16NE: run_remap_test_s16(remap_func, remap_orig, 0, true, false); run_remap_test_s16(remap_func, remap_orig, 1, true, false); @@ -251,6 +311,11 @@ START_TEST (remap_special_test) { pa_log_debug("Checking special remap (float, mono->4-channel)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 1, 4, false); + pa_log_debug("Checking special remap (s32, mono->stereo)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking special remap (s32, mono->4-channel)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 4, false); + pa_log_debug("Checking special remap (s16, mono->stereo)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 1, 2, false); pa_log_debug("Checking special remap (s16, mono->4-channel)"); @@ -261,6 +326,11 @@ START_TEST (remap_special_test) { pa_log_debug("Checking special remap (float, 4-channel->mono)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 1, false); + pa_log_debug("Checking special remap (s32, stereo->mono)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 1, false); + pa_log_debug("Checking special remap (s32, 4-channel->mono)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 1, false); + pa_log_debug("Checking special remap (s16, stereo->mono)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 1, false); pa_log_debug("Checking special remap (s16, 4-channel->mono)"); @@ -271,11 +341,15 @@ END_TEST START_TEST (rearrange_special_test) { pa_log_debug("Checking special remap (s16, stereo rearrange)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 2, true); + pa_log_debug("Checking special remap (s32, stereo rearrange)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 2, true); pa_log_debug("Checking special remap (float, stereo rearrange)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 2, 2, true); pa_log_debug("Checking special remap (s16, 4-channel rearrange)"); remap_init2_test_channels(PA_SAMPLE_S16NE, 4, 4, true); + pa_log_debug("Checking special remap (s32, 4-channel rearrange)"); + remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 4, true); pa_log_debug("Checking special remap (float, 4-channel rearrange)"); remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 4, true); } @@ -298,6 +372,9 @@ START_TEST (remap_mmx_test) { init_func = pa_get_init_remap_func(); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false); + pa_log_debug("Checking MMX remap (s32, mono->stereo)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking MMX remap (s16, mono->stereo)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); } @@ -319,6 +396,9 @@ START_TEST (remap_sse2_test) { init_func = pa_get_init_remap_func(); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false); + pa_log_debug("Checking SSE2 remap (s32, mono->stereo)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking SSE2 remap (s16, mono->stereo)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); } @@ -345,6 +425,11 @@ START_TEST (remap_neon_test) { pa_log_debug("Checking NEON remap (float, mono->4-channel)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 4, false); + pa_log_debug("Checking NEON remap (s32, mono->stereo)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false); + pa_log_debug("Checking NEON remap (s32, mono->4-channel)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 4, false); + pa_log_debug("Checking NEON remap (s16, mono->stereo)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false); pa_log_debug("Checking NEON remap (s16, mono->4-channel)"); @@ -355,6 +440,11 @@ START_TEST (remap_neon_test) { pa_log_debug("Checking NEON remap (float, 4-channel->mono)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 1, false); + pa_log_debug("Checking NEON remap (s32, stereo->mono)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 1, false); + pa_log_debug("Checking NEON remap (s32, 4-channel->mono)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 1, false); + pa_log_debug("Checking NEON remap (s16, stereo->mono)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 1, false); pa_log_debug("Checking NEON remap (s16, 4-channel->mono)"); @@ -362,6 +452,8 @@ START_TEST (remap_neon_test) { pa_log_debug("Checking NEON remap (float, 4-channel->4-channel)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, false); + pa_log_debug("Checking NEON remap (s32, 4-channel->4-channel)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, false); pa_log_debug("Checking NEON remap (s16, 4-channel->4-channel)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, false); } @@ -383,16 +475,22 @@ START_TEST (rearrange_neon_test) { pa_log_debug("Checking NEON remap (float, stereo rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 2, true); + pa_log_debug("Checking NEON remap (s32, stereo rearrange)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 2, true); pa_log_debug("Checking NEON remap (s16, stereo rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 2, true); pa_log_debug("Checking NEON remap (float, 2-channel->4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 4, true); + pa_log_debug("Checking NEON remap (s32, 2-channel->4-channel rearrange)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 4, true); pa_log_debug("Checking NEON remap (s16, 2-channel->4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 4, true); pa_log_debug("Checking NEON remap (float, 4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, true); + pa_log_debug("Checking NEON remap (s32, 4-channel rearrange)"); + remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, true); pa_log_debug("Checking NEON remap (s16, 4-channel rearrange)"); remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, true); }