mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-03 09:01:54 -05:00
filter-chain: remove another copy
This commit is contained in:
parent
4677cc348c
commit
e90c436f3a
3 changed files with 38 additions and 88 deletions
|
|
@ -83,11 +83,6 @@ static void fft_cpx_free(struct fft_cpx *cpx)
|
|||
fft_free(cpx->v);
|
||||
}
|
||||
|
||||
static void fft_cpx_copy(struct fft_cpx *dst, struct fft_cpx *src, int size)
|
||||
{
|
||||
memcpy(dst->v, src->v, sizeof(float) * 2 * size);
|
||||
}
|
||||
|
||||
static int next_power_of_two(int val)
|
||||
{
|
||||
int r = 1;
|
||||
|
|
@ -127,9 +122,9 @@ static inline void fft_convolve(void *fft, struct fft_cpx *r,
|
|||
pffft_zconvolve(fft, a->v, b->v, r->v, scale);
|
||||
}
|
||||
static inline void fft_convolve_accum(void *fft, struct fft_cpx *r,
|
||||
const struct fft_cpx *a, const struct fft_cpx *b, int len, float scale)
|
||||
const struct fft_cpx *c, const struct fft_cpx *a, const struct fft_cpx *b, int len, float scale)
|
||||
{
|
||||
pffft_zconvolve_accumulate(fft, a->v, b->v, r->v, scale);
|
||||
pffft_zconvolve_accumulate(fft, a->v, b->v, c->v, r->v, scale);
|
||||
}
|
||||
|
||||
static inline void fft_sum(float *r, const float *a, const float *b,int len)
|
||||
|
|
@ -249,15 +244,17 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
|
|||
for (i = 2; i < conv->segCount; i++) {
|
||||
indexAudio = (conv->current + i) % conv->segCount;
|
||||
|
||||
fft_convolve_accum(conv->fft, &conv->pre_mult,
|
||||
fft_convolve_accum(conv->fft,
|
||||
&conv->pre_mult,
|
||||
&conv->pre_mult,
|
||||
&conv->segmentsIr[i],
|
||||
&conv->segments[indexAudio],
|
||||
conv->fftComplexSize, conv->scale);
|
||||
}
|
||||
}
|
||||
fft_cpx_copy(&conv->conv, &conv->pre_mult, conv->fftComplexSize);
|
||||
|
||||
fft_convolve_accum(conv->fft, &conv->conv,
|
||||
fft_convolve_accum(conv->fft,
|
||||
&conv->conv,
|
||||
&conv->pre_mult,
|
||||
&conv->segments[conv->current],
|
||||
&conv->segmentsIr[0],
|
||||
conv->fftComplexSize, conv->scale);
|
||||
|
|
|
|||
|
|
@ -1410,7 +1410,7 @@ struct funcs {
|
|||
PFFFT_Setup * (*new_setup) (int N, pffft_transform_t transform);
|
||||
void (*transform) (PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction, int ordered);
|
||||
void (*zreorder)(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
|
||||
void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
|
||||
void (*zconvolve_accumulate)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling);
|
||||
void (*zconvolve)(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
|
||||
void (*sum)(const float *a, const float *b, float *ab, int len);
|
||||
int (*simd_size)(void);
|
||||
|
|
@ -2011,35 +2011,30 @@ static void transform_simd(PFFFT_Setup * setup, const float *finput,
|
|||
}
|
||||
|
||||
static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a, const float *b,
|
||||
float *ab, float scaling)
|
||||
const float *c, float *ab, float scaling)
|
||||
{
|
||||
const int Ncvec2 = s->Ncvec * 2;
|
||||
const v4sf *RESTRICT va = (const v4sf *)a;
|
||||
const v4sf *RESTRICT vb = (const v4sf *)b;
|
||||
v4sf *RESTRICT vab = (v4sf *) ab;
|
||||
v4sf *RESTRICT vc = (v4sf *) c;
|
||||
v4sf vscal = LD_PS1(scaling);
|
||||
float ar, ai, br, bi, cr, ci;
|
||||
int i;
|
||||
|
||||
#ifdef __arm__
|
||||
__builtin_prefetch(va);
|
||||
__builtin_prefetch(vb);
|
||||
__builtin_prefetch(vab);
|
||||
__builtin_prefetch(c);
|
||||
__builtin_prefetch(va + 2);
|
||||
__builtin_prefetch(vb + 2);
|
||||
__builtin_prefetch(vab + 2);
|
||||
__builtin_prefetch(c + 2);
|
||||
__builtin_prefetch(va + 4);
|
||||
__builtin_prefetch(vb + 4);
|
||||
__builtin_prefetch(vab + 4);
|
||||
__builtin_prefetch(c + 4);
|
||||
__builtin_prefetch(va + 6);
|
||||
__builtin_prefetch(vb + 6);
|
||||
__builtin_prefetch(vab + 6);
|
||||
#ifndef __clang__
|
||||
#define ZCONVOLVE_USING_INLINE_NEON_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
float ar, ai, br, bi, abr, abi;
|
||||
#ifndef ZCONVOLVE_USING_INLINE_ASM
|
||||
v4sf vscal = LD_PS1(scaling);
|
||||
int i;
|
||||
__builtin_prefetch(c + 6);
|
||||
#endif
|
||||
|
||||
assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
|
||||
|
|
@ -2047,48 +2042,9 @@ static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a, const flo
|
|||
ai = ((v4sf_union *) va)[1].f[0];
|
||||
br = ((v4sf_union *) vb)[0].f[0];
|
||||
bi = ((v4sf_union *) vb)[1].f[0];
|
||||
abr = ((v4sf_union *) vab)[0].f[0];
|
||||
abi = ((v4sf_union *) vab)[1].f[0];
|
||||
cr = ((v4sf_union *) vc)[0].f[0];
|
||||
ci = ((v4sf_union *) vc)[1].f[0];
|
||||
|
||||
#ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc
|
||||
const float *a_ = a, *b_ = b;
|
||||
float *ab_ = ab;
|
||||
int N = s->Ncvec;
|
||||
asm volatile ("mov r8, %2 \n"
|
||||
"vdup.f32 q15, %4 \n"
|
||||
"1: \n"
|
||||
"pld [%0,#64] \n"
|
||||
"pld [%1,#64] \n"
|
||||
"pld [%2,#64] \n"
|
||||
"pld [%0,#96] \n"
|
||||
"pld [%1,#96] \n"
|
||||
"pld [%2,#96] \n"
|
||||
"vld1.f32 {q0,q1}, [%0,:128]! \n"
|
||||
"vld1.f32 {q4,q5}, [%1,:128]! \n"
|
||||
"vld1.f32 {q2,q3}, [%0,:128]! \n"
|
||||
"vld1.f32 {q6,q7}, [%1,:128]! \n"
|
||||
"vld1.f32 {q8,q9}, [r8,:128]! \n"
|
||||
"vmul.f32 q10, q0, q4 \n"
|
||||
"vmul.f32 q11, q0, q5 \n"
|
||||
"vmul.f32 q12, q2, q6 \n"
|
||||
"vmul.f32 q13, q2, q7 \n"
|
||||
"vmls.f32 q10, q1, q5 \n"
|
||||
"vmla.f32 q11, q1, q4 \n"
|
||||
"vld1.f32 {q0,q1}, [r8,:128]! \n"
|
||||
"vmls.f32 q12, q3, q7 \n"
|
||||
"vmla.f32 q13, q3, q6 \n"
|
||||
"vmla.f32 q8, q10, q15 \n"
|
||||
"vmla.f32 q9, q11, q15 \n"
|
||||
"vmla.f32 q0, q12, q15 \n"
|
||||
"vmla.f32 q1, q13, q15 \n"
|
||||
"vst1.f32 {q8,q9},[%2,:128]! \n"
|
||||
"vst1.f32 {q0,q1},[%2,:128]! \n"
|
||||
"subs %3, #2 \n"
|
||||
"bne 1b \n":"+r" (a_),
|
||||
"+r"(b_), "+r"(ab_), "+r"(N):"r"(scaling):"r8", "q0",
|
||||
"q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
|
||||
"q10", "q11", "q12", "q13", "q15", "memory");
|
||||
#else // default routine, works fine for non-arm cpus with current compilers
|
||||
for (i = 0; i < Ncvec2; i += 4) {
|
||||
v4sf ar, ai, br, bi;
|
||||
ar = va[i + 0];
|
||||
|
|
@ -2096,20 +2052,19 @@ static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a, const flo
|
|||
br = vb[i + 0];
|
||||
bi = vb[i + 1];
|
||||
VCPLXMUL(ar, ai, br, bi);
|
||||
vab[i + 0] = VMADD(ar, vscal, vab[i + 0]);
|
||||
vab[i + 1] = VMADD(ai, vscal, vab[i + 1]);
|
||||
vab[i + 0] = VMADD(ar, vscal, vc[i + 0]);
|
||||
vab[i + 1] = VMADD(ai, vscal, vc[i + 1]);
|
||||
ar = va[i + 2];
|
||||
ai = va[i + 3];
|
||||
br = vb[i + 2];
|
||||
bi = vb[i + 3];
|
||||
VCPLXMUL(ar, ai, br, bi);
|
||||
vab[i + 2] = VMADD(ar, vscal, vab[i + 2]);
|
||||
vab[i + 3] = VMADD(ai, vscal, vab[i + 3]);
|
||||
vab[i + 2] = VMADD(ar, vscal, vc[i + 2]);
|
||||
vab[i + 3] = VMADD(ai, vscal, vc[i + 3]);
|
||||
}
|
||||
#endif
|
||||
if (s->transform == PFFFT_REAL) {
|
||||
((v4sf_union *) vab)[0].f[0] = abr + ar * br * scaling;
|
||||
((v4sf_union *) vab)[1].f[0] = abi + ai * bi * scaling;
|
||||
((v4sf_union *) vab)[0].f[0] = cr + ar * br * scaling;
|
||||
((v4sf_union *) vab)[1].f[0] = ci + ai * bi * scaling;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2137,9 +2092,6 @@ static void zconvolve_simd(PFFFT_Setup * s, const float *a, const float *b,
|
|||
__builtin_prefetch(va+6);
|
||||
__builtin_prefetch(vb+6);
|
||||
__builtin_prefetch(vab+6);
|
||||
# ifndef __clang__
|
||||
# define ZCONVOLVE_USING_INLINE_NEON_ASM
|
||||
# endif
|
||||
#endif
|
||||
|
||||
assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
|
||||
|
|
@ -2285,16 +2237,16 @@ static void transform_simd(PFFFT_Setup * setup, const float *input,
|
|||
|
||||
|
||||
static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a,
|
||||
const float *b, float *ab, float scaling)
|
||||
const float *b, const float *c, float *ab, float scaling)
|
||||
{
|
||||
int i, Ncvec2 = s->Ncvec * 2;
|
||||
|
||||
if (s->transform == PFFFT_REAL) {
|
||||
// take care of the fftpack ordering
|
||||
ab[0] += a[0] * b[0] * scaling;
|
||||
ab[Ncvec2 - 1] +=
|
||||
a[Ncvec2 - 1] * b[Ncvec2 - 1] * scaling;
|
||||
ab[0] = c[0] + a[0] * b[0] * scaling;
|
||||
ab[Ncvec2 - 1] = c[Ncvec2 - 1] + a[Ncvec2 - 1] * b[Ncvec2 - 1] * scaling;
|
||||
++ab;
|
||||
++c;
|
||||
++a;
|
||||
++b;
|
||||
Ncvec2 -= 2;
|
||||
|
|
@ -2306,13 +2258,13 @@ static void zconvolve_accumulate_simd(PFFFT_Setup * s, const float *a,
|
|||
br = b[i + 0];
|
||||
bi = b[i + 1];
|
||||
VCPLXMUL(ar, ai, br, bi);
|
||||
ab[i + 0] += ar * scaling;
|
||||
ab[i + 1] += ai * scaling;
|
||||
ab[i + 0] = c[i + 0] + ar * scaling;
|
||||
ab[i + 1] = c[i + 1] + ai * scaling;
|
||||
}
|
||||
}
|
||||
|
||||
static void zconvolve_simd(PFFFT_Setup * s, const float *a,
|
||||
const float *b, float *ab, float scaling)
|
||||
const float *b, float *ab, float scaling)
|
||||
{
|
||||
int i, Ncvec2 = s->Ncvec * 2;
|
||||
|
||||
|
|
@ -2427,9 +2379,9 @@ void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft
|
|||
return funcs->zreorder(setup, input, output, direction);
|
||||
}
|
||||
|
||||
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling)
|
||||
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *c, float *dft_ab, float scaling)
|
||||
{
|
||||
return funcs->zconvolve_accumulate(setup, dft_a, dft_b, dft_ab, scaling);
|
||||
return funcs->zconvolve_accumulate(setup, dft_a, dft_b, c, dft_ab, scaling);
|
||||
}
|
||||
|
||||
void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling)
|
||||
|
|
|
|||
|
|
@ -153,11 +153,12 @@ extern "C" {
|
|||
perform the operation yourself as the dft coefs are stored as
|
||||
interleaved complex numbers).
|
||||
|
||||
the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
|
||||
the operation performed is: dft_ab = dft_c + (dft_a * fdt_b)*scaling
|
||||
|
||||
The dft_a, dft_b and dft_ab pointers may alias.
|
||||
*/
|
||||
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
|
||||
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, const float *dft_c, float *dft_ab, float scaling);
|
||||
|
||||
void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
|
||||
|
||||
void pffft_sum(const float *a, const float *b, float *ab, int len);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue