fmt-ops: add RVV optimizations for f32d_to_s32

This commit is contained in:
sunyuechi 2024-09-25 13:11:08 +08:00 committed by hleft
parent c7c5b61dac
commit 79d41e183e
5 changed files with 65 additions and 0 deletions

View file

@ -157,4 +157,54 @@ conv_s16_to_f32d_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void
for(i = 0; i < n_channels; i++)
s16_to_f32d(conv, &dst[i], &s[i], n_channels, n_samples);
}
static void
f32d_to_s32(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples)
{
const float *s = src[0];
uint32_t stride = n_channels << 2;
asm __volatile__ (
".option arch, +v \n\t"
"li t0, 1325400064 \n\t"
"li t2, 1325400063 \n\t"
"fmv.w.x fa5, t0 \n\t"
"fmv.w.x fa4, t2 \n\t"
"1: \n\t"
"vsetvli t0, %[n_samples], e32, m8, ta, ma \n\t"
"vle32.v v8, (%[s]) \n\t"
"sub %[n_samples], %[n_samples], t0 \n\t"
"vfmul.vf v8, v8, fa5 \n\t"
"vfmin.vf v8, v8, fa4 \n\t"
"vfcvt.x.f.v v8, v8 \n\t"
"slli t2, t0, 2 \n\t"
"mul t3, t0, %[stride] \n\t"
"vsse32.v v8, (%[dst]), %[stride] \n\t"
"add %[s], %[s], t2 \n\t"
"add %[dst], %[dst], t3 \n\t"
"bnez %[n_samples], 1b \n\t"
: [n_samples] "+r" (n_samples),
[s] "+r" (s),
[dst] "+r" (dst)
: [stride] "r" (stride)
: "cc", "memory"
);
}
void
conv_f32d_to_s32_rvv(struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_samples)
{
if (n_samples <= 4) {
conv_f32d_to_s32_c(conv, dst, src, n_samples);
return;
}
int32_t *d = dst[0];
uint32_t i = 0, n_channels = conv->n_channels;
for(i = 0; i < n_channels; i++)
f32d_to_s32(conv, &d[i], &src[i], n_channels, n_samples);
}
#endif