fmt-ops: add R-V V optimizations for f32_s16

This commit is contained in:
sunyuechi 2024-09-17 08:58:14 +08:00 committed by Wim Taymans
parent 8166b9c580
commit d932e52d5b
6 changed files with 71 additions and 0 deletions

View file

@ -132,6 +132,11 @@ static void test_f32_s16(void)
run_testc("test_f32d_s16_2", "avx2", false, true, conv_f32d_to_s16_2_avx2, 2);
run_testc("test_f32d_s16_4", "avx2", false, true, conv_f32d_to_s16_4_avx2, 4);
}
#endif
#if defined (HAVE_RVV)
if (cpu_flags & SPA_CPU_FLAG_RISCV_V) {
run_test("test_f32_s16", "rvv", false, true, conv_f32_to_s16_rvv);
}
#endif
run_test("test_f32_s16d", "c", true, false, conv_f32_to_s16d_c);
run_test("test_f32d_s16d", "c", false, false, conv_f32d_to_s16d_c);

View file

@ -0,0 +1,43 @@
/* Spa */
/* SPDX-FileCopyrightText: Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS). */
/* SPDX-License-Identifier: MIT */
#include "fmt-ops.h"
#if HAVE_RVV
void
conv_f32_to_s16_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_samples)
{
if (n_samples * conv->n_channels <= 4) {
conv_f32_to_s16_c(conv, dst, src, n_samples);
return;
}
__asm__ __volatile__ (
".option arch, +v \n\t"
"ld a1, (a1) \n\t"
"ld a2, (a2) \n\t"
"lwu a0, 16(a0) \n\t"
"li t0, 1191182336 \n\t"
"mul a0, a0, a3 \n\t"
"fmv.w.x fa5, t0 \n\t"
"1: \n\t"
"vsetvli t0, a0, e32, m8, ta, ma \n\t"
"vle32.v v8, (a2) \n\t"
"sub a0, a0, t0 \n\t"
"vfmul.vf v8, v8, fa5 \n\t"
"vsetvli zero, zero, e16, m4, ta, ma \n\t"
"vfncvt.x.f.w v8, v8 \n\t"
"slli t0, t0, 1 \n\t"
"vse16.v v8, (a1) \n\t"
"add a2, a2, t0 \n\t"
"add a1, a1, t0 \n\t"
"add a2, a2, t0 \n\t"
"bnez a0, 1b \n\t"
:
:
: "cc", "memory"
);
}
#endif

View file

@ -176,6 +176,9 @@ static struct conv_info conv_table[] =
#if defined (HAVE_SSE2)
MAKE(F32, S16, 0, conv_f32_to_s16_sse2, SPA_CPU_FLAG_SSE2),
#endif
#if defined (HAVE_RVV)
MAKE(F32, S16, 0, conv_f32_to_s16_rvv, SPA_CPU_FLAG_RISCV_V),
#endif
MAKE(F32, S16, 0, conv_f32_to_s16_c),

View file

@ -440,6 +440,9 @@ DEFINE_FUNCTION(s16_to_f32d_2, neon);
DEFINE_FUNCTION(s16_to_f32d, neon);
DEFINE_FUNCTION(f32d_to_s16, neon);
#endif
#if defined(HAVE_RVV)
DEFINE_FUNCTION(f32_to_s16, rvv);
#endif
#if defined(HAVE_SSE2)
DEFINE_FUNCTION(s16_to_f32d_2, sse2);
DEFINE_FUNCTION(s16_to_f32d, sse2);

View file

@ -105,6 +105,17 @@ if have_neon
simd_dependencies += audioconvert_neon
endif
if have_rvv
audioconvert_rvv = static_library('audioconvert_rvv',
['fmt-ops-rvv.c' ],
c_args : ['-O3', '-DHAVE_RVV'],
dependencies : [ spa_dep ],
install : false
)
simd_cargs += ['-DHAVE_RVV']
simd_dependencies += audioconvert_rvv
endif
sparesampledumpcoeffs_sources = [
'resample-native.c',
'resample-native-c.c',

View file

@ -228,6 +228,12 @@ static void test_f32_s16(void)
false, true, conv_f32d_to_s16_neon);
}
#endif
#if defined(HAVE_RVV)
if (cpu_flags & SPA_CPU_FLAG_RISCV_V) {
run_test("test_f32_s16_rvv", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, true, conv_f32_to_s16_rvv);
}
#endif
}
static void test_s16_f32(void)