mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-10-31 22:25:38 -04:00
audioconvert: add stereo deinterleave neon asm
This can take some shortcuts and convert twice as many samples in one iteration as the strided stereo deinterleave one.
This commit is contained in:
parent
6fab8fabca
commit
0ace131d72
3 changed files with 84 additions and 0 deletions
|
|
@ -28,6 +28,88 @@
|
||||||
|
|
||||||
#include "fmt-ops.h"
|
#include "fmt-ops.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
conv_s16_to_f32d_2_neon(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||||
|
uint32_t n_samples)
|
||||||
|
{
|
||||||
|
const int16_t *s = src[0];
|
||||||
|
float *d0 = dst[0], *d1 = dst[1];
|
||||||
|
unsigned int remainder = n_samples & 7;
|
||||||
|
n_samples -= remainder;
|
||||||
|
|
||||||
|
#ifdef __aarch64__
|
||||||
|
asm volatile(
|
||||||
|
" cmp %[n_samples], #0\n"
|
||||||
|
" beq 2f\n"
|
||||||
|
"1:"
|
||||||
|
" ld2 {v2.8h, v3.8h}, [%[s]], #32\n"
|
||||||
|
" subs %w[n_samples], %w[n_samples], #8\n"
|
||||||
|
" sxtl v0.4s, v2.4h\n"
|
||||||
|
" sxtl2 v1.4s, v2.8h\n"
|
||||||
|
" sxtl v2.4s, v3.4h\n"
|
||||||
|
" sxtl2 v3.4s, v3.8h\n"
|
||||||
|
" scvtf v0.4s, v0.4s, #15\n"
|
||||||
|
" scvtf v1.4s, v1.4s, #15\n"
|
||||||
|
" scvtf v2.4s, v2.4s, #15\n"
|
||||||
|
" scvtf v3.4s, v3.4s, #15\n"
|
||||||
|
" st1 {v0.4s, v1.4s}, [%[d0]], #32\n"
|
||||||
|
" st1 {v2.4s, v3.4s}, [%[d1]], #32\n"
|
||||||
|
" b.ne 1b\n"
|
||||||
|
"2:"
|
||||||
|
" cmp %[remainder], #0\n"
|
||||||
|
" beq 4f\n"
|
||||||
|
"3:"
|
||||||
|
" ld2 { v0.h, v1.h }[0], [%[s]], #4\n"
|
||||||
|
" subs %[remainder], %[remainder], #1\n"
|
||||||
|
" sshll v2.4s, v0.4h, #0\n"
|
||||||
|
" sshll v3.4s, v1.4h, #0\n"
|
||||||
|
" scvtf v0.4s, v2.4s, #15\n"
|
||||||
|
" scvtf v1.4s, v3.4s, #15\n"
|
||||||
|
" st1 { v0.s }[0], [%[d0]], #4\n"
|
||||||
|
" st1 { v1.s }[0], [%[d1]], #4\n"
|
||||||
|
" bne 3b\n"
|
||||||
|
"4:"
|
||||||
|
: [d0] "+r" (d0), [d1] "+r" (d1), [s] "+r" (s), [n_samples] "+r" (n_samples),
|
||||||
|
[remainder] "+r" (remainder)
|
||||||
|
: : "v0", "v1", "v2", "v3", "memory", "cc");
|
||||||
|
#else
|
||||||
|
asm volatile(
|
||||||
|
" cmp %[n_samples], #0\n"
|
||||||
|
" beq 2f\n"
|
||||||
|
"1:"
|
||||||
|
" vld2.16 {d0-d3}, [%[s]]!\n"
|
||||||
|
" subs %[n_samples], #8\n"
|
||||||
|
" vmovl.s16 q3, d3\n"
|
||||||
|
" vmovl.s16 q2, d2\n"
|
||||||
|
" vmovl.s16 q1, d1\n"
|
||||||
|
" vmovl.s16 q0, d0\n"
|
||||||
|
" vcvt.f32.s32 q3, q3, #15\n"
|
||||||
|
" vcvt.f32.s32 q2, q2, #15\n"
|
||||||
|
" vcvt.f32.s32 q1, q1, #15\n"
|
||||||
|
" vcvt.f32.s32 q0, q0, #15\n"
|
||||||
|
" vst1.32 {d4-d7}, [%[d1]]!\n"
|
||||||
|
" vst1.32 {d0-d3}, [%[d0]]!\n"
|
||||||
|
" bne 1b\n"
|
||||||
|
"2:"
|
||||||
|
" cmp %[remainder], #0\n"
|
||||||
|
" beq 4f\n"
|
||||||
|
"3:"
|
||||||
|
" vld2.16 { d0[0], d1[0] }, [%[s]], #4\n"
|
||||||
|
" subs %[remainder], %[remainder], #1\n"
|
||||||
|
" vmovl.s16 q1, d1\n"
|
||||||
|
" vmovl.s16 q0, d0\n"
|
||||||
|
" vcvt.f32.s32 q1, q1, #15\n"
|
||||||
|
" vcvt.f32.s32 q0, q0, #15\n"
|
||||||
|
" vst1.32 { d2[0] }, [%[d1]]!\n"
|
||||||
|
" vst1.32 { d0[0] }, [%[d0]]!\n"
|
||||||
|
" bne 3b\n"
|
||||||
|
"4:"
|
||||||
|
: [d0] "+r" (d0), [d1] "+r" (d1), [s] "+r" (s), [n_samples] "+r" (n_samples),
|
||||||
|
[remainder] "+r" (remainder)
|
||||||
|
: : "q0", "q1", "q2", "q3", "memory", "cc");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
conv_s16_to_f32d_2s_neon(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
|
conv_s16_to_f32d_2s_neon(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
|
||||||
uint32_t n_channels, uint32_t n_samples)
|
uint32_t n_channels, uint32_t n_samples)
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ static struct conv_info conv_table[] =
|
||||||
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32, 0, 0, conv_s16_to_f32_c },
|
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32, 0, 0, conv_s16_to_f32_c },
|
||||||
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, 0, conv_s16d_to_f32d_c },
|
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, 0, conv_s16d_to_f32d_c },
|
||||||
#if defined (HAVE_NEON)
|
#if defined (HAVE_NEON)
|
||||||
|
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, 2, SPA_CPU_FLAG_NEON, conv_s16_to_f32d_2_neon },
|
||||||
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_NEON, conv_s16_to_f32d_neon },
|
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_NEON, conv_s16_to_f32d_neon },
|
||||||
#endif
|
#endif
|
||||||
#if defined (HAVE_AVX2)
|
#if defined (HAVE_AVX2)
|
||||||
|
|
|
||||||
|
|
@ -295,6 +295,7 @@ DEFINE_FUNCTION(interleave_32, c);
|
||||||
DEFINE_FUNCTION(interleave_32s, c);
|
DEFINE_FUNCTION(interleave_32s, c);
|
||||||
|
|
||||||
#if defined(HAVE_NEON)
|
#if defined(HAVE_NEON)
|
||||||
|
DEFINE_FUNCTION(s16_to_f32d_2, neon);
|
||||||
DEFINE_FUNCTION(s16_to_f32d, neon);
|
DEFINE_FUNCTION(s16_to_f32d, neon);
|
||||||
DEFINE_FUNCTION(f32d_to_s16, neon);
|
DEFINE_FUNCTION(f32d_to_s16, neon);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue