fmt-ops: add RVV optimizations for f32d_s16d

This commit is contained in:
sunyuechi 2024-09-23 10:22:44 +08:00 committed by Wim Taymans
parent 9348ad8115
commit 852de6c35c
5 changed files with 50 additions and 25 deletions

View file

@ -136,6 +136,7 @@ static void test_f32_s16(void)
#if defined (HAVE_RVV)
if (cpu_flags & SPA_CPU_FLAG_RISCV_V) {
run_test("test_f32_s16", "rvv", true, true, conv_f32_to_s16_rvv);
run_test("test_f32d_s16d", "rvv", false, false, conv_f32d_to_s16d_rvv);
}
#endif
run_test("test_f32_s16d", "c", true, false, conv_f32_to_s16d_c);

View file

@ -5,6 +5,35 @@
#include "fmt-ops.h"
#if HAVE_RVV
void
f32_to_s16(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src,
uint32_t n_samples)
{
asm __volatile__ (
".option arch, +v \n\t"
"li t0, 1191182336 \n\t"
"fmv.w.x fa5, t0 \n\t"
"1: \n\t"
"vsetvli t0, %[n_samples], e32, m8, ta, ma \n\t"
"vle32.v v8, (%[src]) \n\t"
"sub %[n_samples], %[n_samples], t0 \n\t"
"vfmul.vf v8, v8, fa5 \n\t"
"vsetvli zero, zero, e16, m4, ta, ma \n\t"
"vfncvt.x.f.w v8, v8 \n\t"
"slli t0, t0, 1 \n\t"
"vse16.v v8, (%[dst]) \n\t"
"add %[src], %[src], t0 \n\t"
"add %[dst], %[dst], t0 \n\t"
"add %[src], %[src], t0 \n\t"
"bnez %[n_samples], 1b \n\t"
: [n_samples] "+r" (n_samples),
[src] "+r" (src),
[dst] "+r" (dst)
:
: "cc", "memory"
);
}
void
conv_f32_to_s16_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_samples)
@ -14,30 +43,21 @@ conv_f32_to_s16_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void
return;
}
__asm__ __volatile__ (
".option arch, +v \n\t"
"ld a1, (a1) \n\t"
"ld a2, (a2) \n\t"
"lwu a0, 16(a0) \n\t"
"li t0, 1191182336 \n\t"
"mul a0, a0, a3 \n\t"
"fmv.w.x fa5, t0 \n\t"
"1: \n\t"
"vsetvli t0, a0, e32, m8, ta, ma \n\t"
"vle32.v v8, (a2) \n\t"
"sub a0, a0, t0 \n\t"
"vfmul.vf v8, v8, fa5 \n\t"
"vsetvli zero, zero, e16, m4, ta, ma \n\t"
"vfncvt.x.f.w v8, v8 \n\t"
"slli t0, t0, 1 \n\t"
"vse16.v v8, (a1) \n\t"
"add a2, a2, t0 \n\t"
"add a1, a1, t0 \n\t"
"add a2, a2, t0 \n\t"
"bnez a0, 1b \n\t"
:
:
: "cc", "memory"
);
f32_to_s16(conv, *dst, *src, n_samples * conv -> n_channels);
}
void
conv_f32d_to_s16d_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_samples)
{
if (n_samples <= 4) {
conv_f32d_to_s16d_c(conv, dst, src, n_samples);
return;
}
uint32_t i = 0, n_channels = conv->n_channels;
for(i = 0; i < n_channels; i++) {
f32_to_s16(conv, dst[i], src[i], n_samples);
}
}
#endif

View file

@ -179,6 +179,7 @@ static struct conv_info conv_table[] =
#endif
#if defined (HAVE_RVV)
MAKE(F32, S16, 0, conv_f32_to_s16_rvv, SPA_CPU_FLAG_RISCV_V),
MAKE(F32P, S16P, 0, conv_f32d_to_s16d_rvv, SPA_CPU_FLAG_RISCV_V),
#endif
MAKE(F32, S16, 0, conv_f32_to_s16_c),

View file

@ -442,6 +442,7 @@ DEFINE_FUNCTION(f32d_to_s16, neon);
#endif
#if defined(HAVE_RVV)
DEFINE_FUNCTION(f32_to_s16, rvv);
DEFINE_FUNCTION(f32d_to_s16d, rvv);
#endif
#if defined(HAVE_SSE2)
DEFINE_FUNCTION(s16_to_f32d_2, sse2);

View file

@ -232,6 +232,8 @@ static void test_f32_s16(void)
if (cpu_flags & SPA_CPU_FLAG_RISCV_V) {
run_test("test_f32_s16_rvv", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, true, conv_f32_to_s16_rvv);
run_test("test_f32d_s16d_rvv", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_f32d_to_s16d_rvv);
}
#endif
}