core: Add ARM NEON optimized sample conversion code

final:
* includes some minor style fixes and build-time changes to allow
  building a single binary for neon and non-neon systems
v4:
* fix for sample length < 4
v3:
* convert from intrinsics to inline assembly
v2:
* load and store data with vld1/vld1q and vst1/vst1q, resp., to work
  around alignment issues of compiler-generated vldmia instruction
* remove redundant check for NEON flags

Ubuntu/Linaro gcc 4.6.3
arm-linux-gnueabi-gcc -O2 -mcpu=cortex-a8 -mfloat-abi=softfp -mfpu=neon

runtime on beagle-xm:

D: [pulseaudio] sconv_neon.c: checking NEON sconv_s16le_from_float
I: [pulseaudio] sconv_neon.c: NEON: 3754 usec.
I: [pulseaudio] sconv_neon.c: ref: 58594 usec.
D: [pulseaudio] sconv_neon.c: checking NEON sconv_s16le_to_float
I: [pulseaudio] sconv_neon.c: NEON: 1831 usec.
I: [pulseaudio] sconv_neon.c: ref: 10528 usec.
I: [pulseaudio] sconv_neon.c: Initialising ARM NEON optimized conversions.

conversion may be off by one for some samples due to rounding issues
This commit is contained in:
Peter Meerwald 2012-10-23 17:54:57 +02:00 committed by Arun Raghavan
parent 4171df3019
commit 1319c4533a
6 changed files with 193 additions and 1 deletions

View file

@ -282,6 +282,35 @@ START_TEST (sconv_sse_test) {
END_TEST
#endif /* defined (__i386__) || defined (__amd64__) */
#if defined (__arm__) && defined (__linux__)
START_TEST (sconv_neon_test) {
pa_cpu_arm_flag_t flags = 0;
pa_convert_func_t orig_func, neon_func;
pa_cpu_get_arm_flags(&flags);
if (!(flags & PA_CPU_ARM_NEON)) {
pa_log_info("NEON not supported. Skipping");
return;
}
orig_func = pa_get_convert_from_float32ne_function(PA_SAMPLE_S16LE);
pa_convert_func_init_neon(flags);
neon_func = pa_get_convert_from_float32ne_function(PA_SAMPLE_S16LE);
pa_log_debug("Checking NEON sconv (s16 -> float)");
run_conv_test_float_to_s16(neon_func, orig_func, 0, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 1, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 2, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 3, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 4, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 5, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 6, TRUE, FALSE);
run_conv_test_float_to_s16(neon_func, orig_func, 7, TRUE, TRUE);
}
END_TEST
#endif /* defined (__arm__) && defined (__linux__) */
#undef SAMPLES
#undef TIMES
/* End conversion tests */
@ -313,6 +342,11 @@ int main(int argc, char *argv[]) {
tc = tcase_create("sconv");
#if defined (__i386__) || defined (__amd64__)
tcase_add_test(tc, sconv_sse_test);
#endif
#if defined (__arm__) && defined (__linux__)
#if HAVE_NEON
tcase_add_test(tc, sconv_neon_test);
#endif
#endif
suite_add_tcase(s, tc);