From ad0580a22c12ecd66b16d673748861f98d7b671a Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Fri, 13 Sep 2024 11:15:43 +0800 Subject: [PATCH 1/2] core: Add RISC-V V optimized sconv pa_sconv_s16le_from_float32ne --- meson.build | 13 +++++++++ src/pulsecore/cpu-riscv.c | 57 ++++++++++++++++++++++++++++++++++++++ src/pulsecore/cpu-riscv.h | 41 +++++++++++++++++++++++++++ src/pulsecore/cpu.c | 2 ++ src/pulsecore/cpu.h | 3 ++ src/pulsecore/meson.build | 2 ++ src/pulsecore/sconv_rvv.c | 58 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 176 insertions(+) create mode 100644 src/pulsecore/cpu-riscv.c create mode 100644 src/pulsecore/cpu-riscv.h create mode 100644 src/pulsecore/sconv_rvv.c diff --git a/meson.build b/meson.build index 1aec95adc..db712380d 100644 --- a/meson.build +++ b/meson.build @@ -293,6 +293,7 @@ endif check_usable_headers = [ 'cpuid.h', + 'sys/auxv.h', ] foreach h : check_usable_headers @@ -594,6 +595,18 @@ if host_machine.cpu_family() == 'arm' endif # NEON checks are automatically done by the unstable-simd module +if host_machine.cpu_family() == 'riscv64' + if cc.compiles(''' + int main() { + __asm__ __volatile__ ( + ".option arch, +v\nvsetivli zero, 0, e8, m1, ta, ma" + ); + } + ''', name : 'rvv code') + cdata.set('HAVE_RVV', 1) + endif +endif + # Dependencies common to client, daemon and modules if get_option('ipv6') diff --git a/src/pulsecore/cpu-riscv.c b/src/pulsecore/cpu-riscv.c new file mode 100644 index 000000000..209ccf518 --- /dev/null +++ b/src/pulsecore/cpu-riscv.c @@ -0,0 +1,57 @@ +/*** + This file is part of PulseAudio. + + Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, see . +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#if HAVE_SYS_AUXV_H +#include +#define HWCAP_RV(letter) (1ul << ((letter) - 'A')) +#endif + +#include + +#include "cpu-riscv.h" + +void pa_cpu_get_riscv_flags(pa_cpu_riscv_flag_t *flags) { +#if HAVE_SYS_AUXV_H + const unsigned long hwcap = getauxval(AT_HWCAP); + + if (hwcap & HWCAP_RV('V')) + *flags |= PA_CPU_RISCV_V; + + pa_log_info("CPU flags: %s", (*flags & PA_CPU_RISCV_V) ? "V" : ""); +#endif +} + +bool pa_cpu_init_riscv(pa_cpu_riscv_flag_t *flags) { + pa_cpu_get_riscv_flags(flags); + +#if HAVE_RVV + if (*flags & PA_CPU_RISCV_V) { + pa_convert_func_init_rvv(*flags); + } + return true; +#else + return false; +#endif +} diff --git a/src/pulsecore/cpu-riscv.h b/src/pulsecore/cpu-riscv.h new file mode 100644 index 000000000..352629b43 --- /dev/null +++ b/src/pulsecore/cpu-riscv.h @@ -0,0 +1,41 @@ +#ifndef foocpuriscvhfoo +#define foocpuriscvhfoo + +/*** + This file is part of PulseAudio. + + Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, see . +***/ + +#include +#include + +#ifndef PACKAGE +#error "Please include config.h before including this file!" +#endif + +typedef enum pa_cpu_riscv_flag { + PA_CPU_RISCV_V = (1 << 0), +} pa_cpu_riscv_flag_t; + +void pa_cpu_get_riscv_flags(pa_cpu_riscv_flag_t *flags); +bool pa_cpu_init_riscv (pa_cpu_riscv_flag_t *flags); + +#ifdef HAVE_RVV +void pa_convert_func_init_rvv(pa_cpu_riscv_flag_t flags); +#endif + +#endif /* foocpuxriscvhfoo */ diff --git a/src/pulsecore/cpu.c b/src/pulsecore/cpu.c index e0c110e72..857466d8d 100644 --- a/src/pulsecore/cpu.c +++ b/src/pulsecore/cpu.c @@ -30,6 +30,8 @@ void pa_cpu_init(pa_cpu_info *cpu_info) { cpu_info->cpu_type = PA_CPU_X86; else if (pa_cpu_init_arm(&cpu_info->flags.arm)) cpu_info->cpu_type = PA_CPU_ARM; + else if (pa_cpu_init_riscv(&cpu_info->flags.riscv)) + cpu_info->cpu_type = PA_CPU_RISCV; pa_cpu_init_orc(*cpu_info); } diff --git a/src/pulsecore/cpu.h b/src/pulsecore/cpu.h index e65c4fb60..5eb071b2f 100644 --- a/src/pulsecore/cpu.h +++ b/src/pulsecore/cpu.h @@ -22,11 +22,13 @@ #include #include +#include typedef enum { PA_CPU_UNDEFINED = 0, PA_CPU_X86, PA_CPU_ARM, + PA_CPU_RISCV, } pa_cpu_type_t; typedef struct pa_cpu_info pa_cpu_info; @@ -37,6 +39,7 @@ struct pa_cpu_info { union { pa_cpu_x86_flag_t x86; pa_cpu_arm_flag_t arm; + pa_cpu_riscv_flag_t riscv; } flags; bool force_generic_code; }; diff --git a/src/pulsecore/meson.build b/src/pulsecore/meson.build index b37fec499..b6b8af0ac 100644 --- a/src/pulsecore/meson.build +++ b/src/pulsecore/meson.build @@ -13,6 +13,7 @@ libpulsecore_sources = [ 'cpu-arm.c', 'cpu-orc.c', 'cpu-x86.c', + 'cpu-riscv.c', 'device-port.c', 'database.c', 'ffmpeg/resample2.c', @@ -40,6 +41,7 @@ libpulsecore_sources = [ 'sconv-s16be.c', 'sconv-s16le.c', 'sconv.c', + 'sconv_rvv.c', 'shared.c', 'sink.c', 'sink-input.c', diff --git a/src/pulsecore/sconv_rvv.c b/src/pulsecore/sconv_rvv.c new file mode 100644 index 000000000..087402732 --- /dev/null +++ b/src/pulsecore/sconv_rvv.c @@ -0,0 +1,58 @@ +/*** + This file is part of PulseAudio. + + Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "cpu-riscv.h" +#include "sconv.h" + +#if HAVE_RVV +static void pa_sconv_s16le_from_f32ne_rvv(unsigned n, const float *src, int16_t *dst) { + __asm__ __volatile__ ( + ".option arch, +v \n\t" + "li t0, 1191182336 \n\t" + "fmv.w.x fa5, t0 \n\t" + "1: \n\t" + "vsetvli t0, a0, e32, m8, ta, ma \n\t" + "vle32.v v8, (a1) \n\t" + "sub a0, a0, t0 \n\t" + "vfmul.vf v8, v8, fa5 \n\t" + "vsetvli zero, zero, e16, m4, ta, ma \n\t" + "vfncvt.x.f.w v8, v8 \n\t" + "slli t0, t0, 1 \n\t" + "vse16.v v8, (a2) \n\t" + "add a1, a1, t0 \n\t" + "add a1, a1, t0 \n\t" + "add a2, a2, t0 \n\t" + "bnez a0, 1b \n\t" + + : + : + : "cc", "memory" + ); +} + +void pa_convert_func_init_rvv(pa_cpu_riscv_flag_t flags) { + pa_log_info("Initialising RVV optimized conversions."); + + pa_set_convert_from_float32ne_function(PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_rvv); +} +#endif From 790c94617bea9390bac11503a8e331261f18c0ba Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Fri, 13 Sep 2024 22:26:01 +0800 Subject: [PATCH 2/2] tests: Implement test code for RISC-V V sconv pa_sconv_s16le_from_float32ne banana_f3: func: 52582 usec (avg: 525.82, min = 523, max = 565, stddev = 6.4271). orig: 3568592 usec (avg: 35685.9, min = 35659, max = 35790, stddev = 20.7353). 100%: Checks: 1, Failures: 0, Errors: 0 k230: func: 96150 usec (avg: 961.5, min = 919, max = 1275, stddev = 75.9664). orig: 3961687 usec (avg: 39616.9, min = 38924, max = 40617, stddev = 484.576). 100%: Checks: 1, Failures: 0, Errors: 0 --- src/tests/cpu-sconv-test.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/tests/cpu-sconv-test.c b/src/tests/cpu-sconv-test.c index 5ae939d45..e4aaf59c6 100644 --- a/src/tests/cpu-sconv-test.c +++ b/src/tests/cpu-sconv-test.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -190,6 +191,35 @@ START_TEST (sconv_sse_test) { END_TEST #endif /* (defined (__i386__) || defined (__amd64__)) && defined (HAVE_SSE) */ +#if HAVE_RVV +START_TEST (sconv_rvv_test) { + pa_cpu_riscv_flag_t flags = 0; + pa_convert_func_t orig_func, rvv_func; + + pa_cpu_get_riscv_flags(&flags); + + if (!(flags & PA_CPU_RISCV_V)) { + pa_log_info("RVV not supported. Skipping"); + return; + } + + orig_func = pa_get_convert_from_float32ne_function(PA_SAMPLE_S16LE); + pa_convert_func_init_rvv(PA_CPU_RISCV_V); + rvv_func = pa_get_convert_from_float32ne_function(PA_SAMPLE_S16LE); + + pa_log_debug("Checking RVV sconv (float -> s16)"); + run_conv_test_float_to_s16(rvv_func, orig_func, 0, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 1, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 2, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 3, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 4, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 5, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 6, true, false); + run_conv_test_float_to_s16(rvv_func, orig_func, 7, true, true); +} +END_TEST +#endif /* (defined () */ + #if defined (__arm__) && defined (__linux__) && defined (HAVE_NEON) START_TEST (sconv_neon_test) { pa_cpu_arm_flag_t flags = 0; @@ -250,6 +280,10 @@ int main(int argc, char *argv[]) { #endif #if defined (__arm__) && defined (__linux__) && defined (HAVE_NEON) tcase_add_test(tc, sconv_neon_test); +#endif +#if HAVE_RVV + tcase_set_timeout(tc, 0); + tcase_add_test(tc, sconv_rvv_test); #endif suite_add_tcase(s, tc);