fmt-ops: add RVV optimizations for s16_to_f32d

This commit is contained in:
sunyuechi 2024-09-23 21:03:28 +08:00 committed by Wim Taymans
parent 588f2bcb69
commit 74832445ba
5 changed files with 64 additions and 0 deletions

View file

@ -160,6 +160,11 @@ static void test_s16_f32(void)
run_test("test_s16_f32d", "avx2", true, false, conv_s16_to_f32d_avx2);
run_testc("test_s16_f32d_2", "avx2", true, false, conv_s16_to_f32d_2_avx2, 2);
}
#endif
#if defined (HAVE_RVV)
if (cpu_flags & SPA_CPU_FLAG_RISCV_V) {
run_test("test_s16_f32d", "rvv", true, false, conv_s16_to_f32d_rvv);
}
#endif
run_test("test_s16d_f32d", "c", false, false, conv_s16d_to_f32d_c);
}

View file

@ -108,4 +108,53 @@ conv_f32d_to_s16_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void
for(i = 0; i < n_channels; i++)
f32d_to_s16(conv, &d[i], &src[i], n_channels, n_samples);
}
static void
s16_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
uint32_t n_channels, uint32_t n_samples)
{
float *d = dst[0];
uint32_t stride = n_channels << 1;
asm __volatile__ (
".option arch, +v \n\t"
"li t0, 939524096 \n\t"
"fmv.w.x fa5, t0 \n\t"
"1: \n\t"
"vsetvli t0, %[n_samples], e16, m4, ta, ma \n\t"
"vlse16.v v8, (%[src]), %[stride] \n\t"
"sub %[n_samples], %[n_samples], t0 \n\t"
"vfwcvt.f.x.v v16, v8 \n\t"
"vsetvli zero, zero, e32, m8, ta, ma \n\t"
"mul t4, t0, %[stride] \n\t"
"vfmul.vf v8, v16, fa5 \n\t"
"slli t3, t0, 2 \n\t"
"vse32.v v8, (%[d]) \n\t"
"add %[src], %[src], t4 \n\t"
"add %[d], %[d], t3 \n\t"
"bnez %[n_samples], 1b \n\t"
: [n_samples] "+r" (n_samples),
[src] "+r" (src),
[d] "+r" (d)
: [stride] "r" (stride)
: "cc", "memory"
);
}
void
conv_s16_to_f32d_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_samples)
{
if (n_samples <= 4) {
conv_s16_to_f32d_c(conv, dst, src, n_samples);
return;
}
const int16_t *s = src[0];
uint32_t i = 0, n_channels = conv->n_channels;
for(i = 0; i < n_channels; i++)
s16_to_f32d(conv, &dst[i], &s[i], n_channels, n_samples);
}
#endif

View file

@ -68,6 +68,9 @@ static struct conv_info conv_table[] =
#if defined (HAVE_SSE2)
MAKE(S16, F32P, 2, conv_s16_to_f32d_2_sse2, SPA_CPU_FLAG_SSE2),
MAKE(S16, F32P, 0, conv_s16_to_f32d_sse2, SPA_CPU_FLAG_SSE2),
#endif
#if defined (HAVE_RVV)
MAKE(S16, F32P, 0, conv_s16_to_f32d_rvv, SPA_CPU_FLAG_RISCV_V),
#endif
MAKE(S16, F32P, 0, conv_s16_to_f32d_c),
MAKE(S16P, F32, 0, conv_s16d_to_f32_c),

View file

@ -444,6 +444,7 @@ DEFINE_FUNCTION(f32d_to_s16, neon);
DEFINE_FUNCTION(f32_to_s16, rvv);
DEFINE_FUNCTION(f32d_to_s16d, rvv);
DEFINE_FUNCTION(f32d_to_s16, rvv);
DEFINE_FUNCTION(s16_to_f32d, rvv);
#endif
#if defined(HAVE_SSE2)
DEFINE_FUNCTION(s16_to_f32d_2, sse2);

View file

@ -271,6 +271,12 @@ static void test_s16_f32(void)
true, false, conv_s16_to_f32d_neon);
}
#endif
#if defined(HAVE_RVV)
if (cpu_flags & SPA_CPU_FLAG_RISCV_V) {
run_test("test_s16_f32d_rvv", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_s16_to_f32d_rvv);
}
#endif
}
static void test_f32_u32(void)