From 852de6c35c8eb9af8fc8670d94f46bfe1bb79037 Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Mon, 23 Sep 2024 10:22:44 +0800 Subject: [PATCH] fmt-ops: add RVV optimizations for f32d_s16d --- spa/plugins/audioconvert/benchmark-fmt-ops.c | 1 + spa/plugins/audioconvert/fmt-ops-rvv.c | 70 +++++++++++++------- spa/plugins/audioconvert/fmt-ops.c | 1 + spa/plugins/audioconvert/fmt-ops.h | 1 + spa/plugins/audioconvert/test-fmt-ops.c | 2 + 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/spa/plugins/audioconvert/benchmark-fmt-ops.c b/spa/plugins/audioconvert/benchmark-fmt-ops.c index c8144b5ce..7efc809b7 100644 --- a/spa/plugins/audioconvert/benchmark-fmt-ops.c +++ b/spa/plugins/audioconvert/benchmark-fmt-ops.c @@ -136,6 +136,7 @@ static void test_f32_s16(void) #if defined (HAVE_RVV) if (cpu_flags & SPA_CPU_FLAG_RISCV_V) { run_test("test_f32_s16", "rvv", true, true, conv_f32_to_s16_rvv); + run_test("test_f32d_s16d", "rvv", false, false, conv_f32d_to_s16d_rvv); } #endif run_test("test_f32_s16d", "c", true, false, conv_f32_to_s16d_c); diff --git a/spa/plugins/audioconvert/fmt-ops-rvv.c b/spa/plugins/audioconvert/fmt-ops-rvv.c index b8c41a853..ea5eb955a 100644 --- a/spa/plugins/audioconvert/fmt-ops-rvv.c +++ b/spa/plugins/audioconvert/fmt-ops-rvv.c @@ -5,6 +5,35 @@ #include "fmt-ops.h" #if HAVE_RVV +void +f32_to_s16(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, + uint32_t n_samples) +{ + asm __volatile__ ( + ".option arch, +v \n\t" + "li t0, 1191182336 \n\t" + "fmv.w.x fa5, t0 \n\t" + "1: \n\t" + "vsetvli t0, %[n_samples], e32, m8, ta, ma \n\t" + "vle32.v v8, (%[src]) \n\t" + "sub %[n_samples], %[n_samples], t0 \n\t" + "vfmul.vf v8, v8, fa5 \n\t" + "vsetvli zero, zero, e16, m4, ta, ma \n\t" + "vfncvt.x.f.w v8, v8 \n\t" + "slli t0, t0, 1 \n\t" + "vse16.v v8, (%[dst]) \n\t" + "add %[src], %[src], t0 \n\t" + "add %[dst], %[dst], t0 \n\t" + "add %[src], %[src], t0 \n\t" + "bnez %[n_samples], 1b \n\t" + : [n_samples] "+r" (n_samples), + [src] "+r" (src), + [dst] "+r" (dst) + : + : "cc", "memory" + ); +} + void conv_f32_to_s16_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_samples) @@ -14,30 +43,21 @@ conv_f32_to_s16_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void return; } - __asm__ __volatile__ ( - ".option arch, +v \n\t" - "ld a1, (a1) \n\t" - "ld a2, (a2) \n\t" - "lwu a0, 16(a0) \n\t" - "li t0, 1191182336 \n\t" - "mul a0, a0, a3 \n\t" - "fmv.w.x fa5, t0 \n\t" - "1: \n\t" - "vsetvli t0, a0, e32, m8, ta, ma \n\t" - "vle32.v v8, (a2) \n\t" - "sub a0, a0, t0 \n\t" - "vfmul.vf v8, v8, fa5 \n\t" - "vsetvli zero, zero, e16, m4, ta, ma \n\t" - "vfncvt.x.f.w v8, v8 \n\t" - "slli t0, t0, 1 \n\t" - "vse16.v v8, (a1) \n\t" - "add a2, a2, t0 \n\t" - "add a1, a1, t0 \n\t" - "add a2, a2, t0 \n\t" - "bnez a0, 1b \n\t" - : - : - : "cc", "memory" - ); + f32_to_s16(conv, *dst, *src, n_samples * conv -> n_channels); +} + +void +conv_f32d_to_s16d_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_samples) +{ + if (n_samples <= 4) { + conv_f32d_to_s16d_c(conv, dst, src, n_samples); + return; + } + + uint32_t i = 0, n_channels = conv->n_channels; + for(i = 0; i < n_channels; i++) { + f32_to_s16(conv, dst[i], src[i], n_samples); + } } #endif diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c index 317926b5b..b101ba3ce 100644 --- a/spa/plugins/audioconvert/fmt-ops.c +++ b/spa/plugins/audioconvert/fmt-ops.c @@ -179,6 +179,7 @@ static struct conv_info conv_table[] = #endif #if defined (HAVE_RVV) MAKE(F32, S16, 0, conv_f32_to_s16_rvv, SPA_CPU_FLAG_RISCV_V), + MAKE(F32P, S16P, 0, conv_f32d_to_s16d_rvv, SPA_CPU_FLAG_RISCV_V), #endif MAKE(F32, S16, 0, conv_f32_to_s16_c), diff --git a/spa/plugins/audioconvert/fmt-ops.h b/spa/plugins/audioconvert/fmt-ops.h index f82465f7d..29fe2a679 100644 --- a/spa/plugins/audioconvert/fmt-ops.h +++ b/spa/plugins/audioconvert/fmt-ops.h @@ -442,6 +442,7 @@ DEFINE_FUNCTION(f32d_to_s16, neon); #endif #if defined(HAVE_RVV) DEFINE_FUNCTION(f32_to_s16, rvv); +DEFINE_FUNCTION(f32d_to_s16d, rvv); #endif #if defined(HAVE_SSE2) DEFINE_FUNCTION(s16_to_f32d_2, sse2); diff --git a/spa/plugins/audioconvert/test-fmt-ops.c b/spa/plugins/audioconvert/test-fmt-ops.c index 388580323..10c8e1f72 100644 --- a/spa/plugins/audioconvert/test-fmt-ops.c +++ b/spa/plugins/audioconvert/test-fmt-ops.c @@ -232,6 +232,8 @@ static void test_f32_s16(void) if (cpu_flags & SPA_CPU_FLAG_RISCV_V) { run_test("test_f32_s16_rvv", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out), true, true, conv_f32_to_s16_rvv); + run_test("test_f32d_s16d_rvv", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out), + false, false, conv_f32d_to_s16d_rvv); } #endif }