mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2026-04-25 06:46:40 -04:00
dsp: move scaling out of complex multiply
do scaling as part of iFFT.
This commit is contained in:
parent
7fc020098c
commit
aabcbf1261
6 changed files with 108 additions and 83 deletions
|
|
@ -239,13 +239,18 @@ void dsp_sum_avx2(void *obj, float *r, const float *a, const float *b, uint32_t
|
||||||
|
|
||||||
#define FFT_BLOCK 8
|
#define FFT_BLOCK 8
|
||||||
|
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info {
|
struct fft_info {
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
fftwf_plan plan_r2c;
|
fftwf_plan plan_r2c;
|
||||||
fftwf_plan plan_c2r;
|
fftwf_plan plan_c2r;
|
||||||
|
#else
|
||||||
|
void *setup;
|
||||||
|
#endif
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
|
||||||
/* interleaved [r0,i0,...,r7,i7] -> blocked [r0..r7,i0..i7] */
|
/* interleaved [r0,i0,...,r7,i7] -> blocked [r0..r7,i0..i7] */
|
||||||
static void fft_blocked_avx2(float *data, uint32_t len)
|
static void fft_blocked_avx2(float *data, uint32_t len)
|
||||||
{
|
{
|
||||||
|
|
@ -262,16 +267,17 @@ static void fft_blocked_avx2(float *data, uint32_t len)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* blocked [r0..r7,i0..i7] -> interleaved [r0,i0,...,r7,i7] */
|
/* blocked [r0..r7,i0..i7] -> interleaved [r0,i0,...,r7,i7] with scaling */
|
||||||
static void fft_interleaved_avx2(float *data, uint32_t len)
|
static void fft_interleaved_avx2(float *data, uint32_t len, float scale)
|
||||||
{
|
{
|
||||||
const __m256i idx = _mm256_setr_epi32(0,4,1,5,2,6,3,7);
|
const __m256i idx = _mm256_setr_epi32(0,4,1,5,2,6,3,7);
|
||||||
|
__m256 s = _mm256_set1_ps(scale);
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
for (i = 0; i < len; i += FFT_BLOCK) {
|
for (i = 0; i < len; i += FFT_BLOCK) {
|
||||||
__m256 r = _mm256_load_ps(&data[0]); /* r0 r1 r2 r3 r4 r5 r6 r7 */
|
__m256 r = _mm256_mul_ps(_mm256_load_ps(&data[0]), s);
|
||||||
__m256 im = _mm256_load_ps(&data[8]); /* i0 i1 i2 i3 i4 i5 i6 i7 */
|
__m256 im = _mm256_mul_ps(_mm256_load_ps(&data[8]), s);
|
||||||
__m256 t0 = _mm256_permute2f128_ps(r, im, 0x20); /* r0 r1 r2 r3 i0 i1 i2 i3 */
|
__m256 t0 = _mm256_permute2f128_ps(r, im, 0x20);
|
||||||
__m256 t1 = _mm256_permute2f128_ps(r, im, 0x31); /* r4 r5 r6 r7 i4 i5 i6 i7 */
|
__m256 t1 = _mm256_permute2f128_ps(r, im, 0x31);
|
||||||
_mm256_store_ps(&data[0], _mm256_permutevar8x32_ps(t0, idx));
|
_mm256_store_ps(&data[0], _mm256_permutevar8x32_ps(t0, idx));
|
||||||
_mm256_store_ps(&data[8], _mm256_permutevar8x32_ps(t1, idx));
|
_mm256_store_ps(&data[8], _mm256_permutevar8x32_ps(t1, idx));
|
||||||
data += 2 * FFT_BLOCK;
|
data += 2 * FFT_BLOCK;
|
||||||
|
|
@ -303,27 +309,29 @@ void dsp_fft_memclear_avx2(void *obj, void *data, uint32_t size, bool real)
|
||||||
void dsp_fft_run_avx2(void *obj, void *fft, int direction,
|
void dsp_fft_run_avx2(void *obj, void *fft, int direction,
|
||||||
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
|
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info *info = fft;
|
struct fft_info *info = fft;
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
uint32_t freq_size = SPA_ROUND_UP_N(info->size / 2 + 1, FFT_BLOCK);
|
uint32_t freq_size = SPA_ROUND_UP_N(info->size / 2 + 1, FFT_BLOCK);
|
||||||
if (direction > 0) {
|
if (direction > 0) {
|
||||||
fftwf_execute_dft_r2c(info->plan_r2c, (float*)src, (fftwf_complex*)dst);
|
fftwf_execute_dft_r2c(info->plan_r2c, (float*)src, (fftwf_complex*)dst);
|
||||||
fft_blocked_avx2(dst, freq_size);
|
fft_blocked_avx2(dst, freq_size);
|
||||||
} else {
|
} else {
|
||||||
fft_interleaved_avx2((float*)src, freq_size);
|
fft_interleaved_avx2((float*)src, freq_size, 1.0f / info->size);
|
||||||
fftwf_execute_dft_c2r(info->plan_c2r, (fftwf_complex*)src, dst);
|
fftwf_execute_dft_c2r(info->plan_c2r, (fftwf_complex*)src, dst);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_transform(fft, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
|
if (direction < 0)
|
||||||
|
spa_fga_dsp_linear(obj, (float*)src, (float*)src,
|
||||||
|
1.0f / info->size, 0.0f, info->size);
|
||||||
|
pffft_transform(info->setup, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_cmul_avx2(void *obj, void *fft,
|
void dsp_fft_cmul_avx2(void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
||||||
const float * SPA_RESTRICT b, uint32_t len, const float scale)
|
const float * SPA_RESTRICT b, uint32_t len)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
__m256 s = _mm256_set1_ps(scale);
|
|
||||||
uint32_t i, plen = SPA_ROUND_UP_N(len, 8) * 2;
|
uint32_t i, plen = SPA_ROUND_UP_N(len, 8) * 2;
|
||||||
|
|
||||||
for (i = 0; i < plen; i += 16) {
|
for (i = 0; i < plen; i += 16) {
|
||||||
|
|
@ -335,21 +343,21 @@ void dsp_fft_cmul_avx2(void *obj, void *fft,
|
||||||
__m256 di = _mm256_mul_ps(ar, bi);
|
__m256 di = _mm256_mul_ps(ar, bi);
|
||||||
dr = _mm256_fnmadd_ps(ai, bi, dr); /* ar*br - ai*bi */
|
dr = _mm256_fnmadd_ps(ai, bi, dr); /* ar*br - ai*bi */
|
||||||
di = _mm256_fmadd_ps(ai, br, di); /* ar*bi + ai*br */
|
di = _mm256_fmadd_ps(ai, br, di); /* ar*bi + ai*br */
|
||||||
_mm256_store_ps(&dst[i], _mm256_mul_ps(dr, s));
|
_mm256_store_ps(&dst[i], dr);
|
||||||
_mm256_store_ps(&dst[i+8], _mm256_mul_ps(di, s));
|
_mm256_store_ps(&dst[i+8], di);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_zconvolve(fft, a, b, dst, scale);
|
struct fft_info *info = fft;
|
||||||
|
pffft_zconvolve(info->setup, a, b, dst, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_cmuladd_avx2(void *obj, void *fft,
|
void dsp_fft_cmuladd_avx2(void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
||||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
||||||
uint32_t len, const float scale)
|
uint32_t len)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
__m256 s = _mm256_set1_ps(scale);
|
|
||||||
uint32_t i, plen = SPA_ROUND_UP_N(len, 8) * 2;
|
uint32_t i, plen = SPA_ROUND_UP_N(len, 8) * 2;
|
||||||
|
|
||||||
for (i = 0; i < plen; i += 16) {
|
for (i = 0; i < plen; i += 16) {
|
||||||
|
|
@ -361,12 +369,13 @@ void dsp_fft_cmuladd_avx2(void *obj, void *fft,
|
||||||
__m256 di = _mm256_mul_ps(ar, bi);
|
__m256 di = _mm256_mul_ps(ar, bi);
|
||||||
dr = _mm256_fnmadd_ps(ai, bi, dr); /* ar*br - ai*bi */
|
dr = _mm256_fnmadd_ps(ai, bi, dr); /* ar*br - ai*bi */
|
||||||
di = _mm256_fmadd_ps(ai, br, di); /* ar*bi + ai*br */
|
di = _mm256_fmadd_ps(ai, br, di); /* ar*bi + ai*br */
|
||||||
_mm256_store_ps(&dst[i], _mm256_fmadd_ps(dr, s,
|
_mm256_store_ps(&dst[i], _mm256_add_ps(dr,
|
||||||
_mm256_load_ps(&src[i])));
|
_mm256_load_ps(&src[i])));
|
||||||
_mm256_store_ps(&dst[i+8], _mm256_fmadd_ps(di, s,
|
_mm256_store_ps(&dst[i+8], _mm256_add_ps(di,
|
||||||
_mm256_load_ps(&src[i+8])));
|
_mm256_load_ps(&src[i+8])));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
|
struct fft_info *info = fft;
|
||||||
|
pffft_zconvolve_accumulate(info->setup, a, b, src, dst, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -235,26 +235,30 @@ void dsp_delay_c(void *obj, float *buffer, uint32_t *pos, uint32_t n_buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info {
|
struct fft_info {
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
fftwf_plan plan_r2c;
|
fftwf_plan plan_r2c;
|
||||||
fftwf_plan plan_c2r;
|
fftwf_plan plan_c2r;
|
||||||
|
#else
|
||||||
|
void *setup;
|
||||||
|
#endif
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
||||||
void *dsp_fft_new_c(void *obj, uint32_t size, bool real)
|
void *dsp_fft_new_c(void *obj, uint32_t size, bool real)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info *info = calloc(1, sizeof(struct fft_info));
|
struct fft_info *info = calloc(1, sizeof(struct fft_info));
|
||||||
float *rdata;
|
|
||||||
fftwf_complex *cdata;
|
|
||||||
|
|
||||||
if (info == NULL)
|
if (info == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
info->size = size;
|
info->size = size;
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
{
|
||||||
|
float *rdata;
|
||||||
|
fftwf_complex *cdata;
|
||||||
|
|
||||||
rdata = fftwf_alloc_real(size * 2);
|
rdata = fftwf_alloc_real(size * 2);
|
||||||
cdata = fftwf_alloc_complex(size + 1);
|
cdata = fftwf_alloc_complex(size + 1);
|
||||||
|
|
||||||
|
|
@ -263,23 +267,23 @@ void *dsp_fft_new_c(void *obj, uint32_t size, bool real)
|
||||||
|
|
||||||
fftwf_free(rdata);
|
fftwf_free(rdata);
|
||||||
fftwf_free(cdata);
|
fftwf_free(cdata);
|
||||||
|
}
|
||||||
return info;
|
|
||||||
#else
|
#else
|
||||||
return pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX);
|
info->setup = pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX);
|
||||||
#endif
|
#endif
|
||||||
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_free_c(void *obj, void *fft)
|
void dsp_fft_free_c(void *obj, void *fft)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info *info = fft;
|
struct fft_info *info = fft;
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
fftwf_destroy_plan(info->plan_r2c);
|
fftwf_destroy_plan(info->plan_r2c);
|
||||||
fftwf_destroy_plan(info->plan_c2r);
|
fftwf_destroy_plan(info->plan_c2r);
|
||||||
free(info);
|
|
||||||
#else
|
#else
|
||||||
pffft_destroy_setup(fft);
|
pffft_destroy_setup(info->setup);
|
||||||
#endif
|
#endif
|
||||||
|
free(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *dsp_fft_memalloc_c(void *obj, uint32_t size, bool real)
|
void *dsp_fft_memalloc_c(void *obj, uint32_t size, bool real)
|
||||||
|
|
@ -318,43 +322,51 @@ void dsp_fft_memclear_c(void *obj, void *data, uint32_t size, bool real)
|
||||||
void dsp_fft_run_c(void *obj, void *fft, int direction,
|
void dsp_fft_run_c(void *obj, void *fft, int direction,
|
||||||
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
|
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info *info = fft;
|
struct fft_info *info = fft;
|
||||||
if (direction > 0)
|
#ifdef HAVE_FFTW
|
||||||
|
if (direction > 0) {
|
||||||
fftwf_execute_dft_r2c(info->plan_r2c, (float*)src, (fftwf_complex*)dst);
|
fftwf_execute_dft_r2c(info->plan_r2c, (float*)src, (fftwf_complex*)dst);
|
||||||
else
|
} else {
|
||||||
|
spa_fga_dsp_linear(obj, (float*)src, (float*)src,
|
||||||
|
1.0f / info->size, 0.0f, (info->size / 2 + 1) * 2);
|
||||||
fftwf_execute_dft_c2r(info->plan_c2r, (fftwf_complex*)src, dst);
|
fftwf_execute_dft_c2r(info->plan_c2r, (fftwf_complex*)src, dst);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
pffft_transform(fft, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
|
if (direction < 0)
|
||||||
|
spa_fga_dsp_linear(obj, (float*)src, (float*)src,
|
||||||
|
1.0f / info->size, 0.0f, info->size);
|
||||||
|
pffft_transform(info->setup, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_cmul_c(void *obj, void *fft,
|
void dsp_fft_cmul_c(void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
||||||
const float * SPA_RESTRICT b, uint32_t len, const float scale)
|
const float * SPA_RESTRICT b, uint32_t len)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
for (uint32_t i = 0; i < len; i++) {
|
for (uint32_t i = 0; i < len; i++) {
|
||||||
dst[2*i ] = (a[2*i] * b[2*i ] - a[2*i+1] * b[2*i+1]) * scale;
|
dst[2*i ] = a[2*i] * b[2*i ] - a[2*i+1] * b[2*i+1];
|
||||||
dst[2*i+1] = (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i ]) * scale;
|
dst[2*i+1] = a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i ];
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_zconvolve(fft, a, b, dst, scale);
|
struct fft_info *info = fft;
|
||||||
|
pffft_zconvolve(info->setup, a, b, dst, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_cmuladd_c(void *obj, void *fft,
|
void dsp_fft_cmuladd_c(void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
||||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
||||||
uint32_t len, const float scale)
|
uint32_t len)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
for (uint32_t i = 0; i < len; i++) {
|
for (uint32_t i = 0; i < len; i++) {
|
||||||
dst[2*i ] = src[2*i ] + (a[2*i] * b[2*i ] - a[2*i+1] * b[2*i+1]) * scale;
|
dst[2*i ] = src[2*i ] + a[2*i] * b[2*i ] - a[2*i+1] * b[2*i+1];
|
||||||
dst[2*i+1] = src[2*i+1] + (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i ]) * scale;
|
dst[2*i+1] = src[2*i+1] + a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i ];
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
|
struct fft_info *info = fft;
|
||||||
|
pffft_zconvolve_accumulate(info->setup, a, b, src, dst, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,12 +50,12 @@ void dsp_fft_run_##arch(void *obj, void *fft, int direction, \
|
||||||
#define MAKE_FFT_CMUL_FUNC(arch) \
|
#define MAKE_FFT_CMUL_FUNC(arch) \
|
||||||
void dsp_fft_cmul_##arch(void *obj, void *fft, \
|
void dsp_fft_cmul_##arch(void *obj, void *fft, \
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, \
|
||||||
const float * SPA_RESTRICT b, uint32_t len, const float scale)
|
const float * SPA_RESTRICT b, uint32_t len)
|
||||||
#define MAKE_FFT_CMULADD_FUNC(arch) \
|
#define MAKE_FFT_CMULADD_FUNC(arch) \
|
||||||
void dsp_fft_cmuladd_##arch(void *obj, void *fft, \
|
void dsp_fft_cmuladd_##arch(void *obj, void *fft, \
|
||||||
float * dst, const float * src, \
|
float * dst, const float * src, \
|
||||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, \
|
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, \
|
||||||
uint32_t len, const float scale)
|
uint32_t len)
|
||||||
|
|
||||||
|
|
||||||
MAKE_CLEAR_FUNC(c);
|
MAKE_CLEAR_FUNC(c);
|
||||||
|
|
|
||||||
|
|
@ -686,13 +686,18 @@ void dsp_delay_sse(void *obj, float *buffer, uint32_t *pos, uint32_t n_buffer, u
|
||||||
|
|
||||||
#define FFT_BLOCK 4
|
#define FFT_BLOCK 4
|
||||||
|
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info {
|
struct fft_info {
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
fftwf_plan plan_r2c;
|
fftwf_plan plan_r2c;
|
||||||
fftwf_plan plan_c2r;
|
fftwf_plan plan_c2r;
|
||||||
|
#else
|
||||||
|
void *setup;
|
||||||
|
#endif
|
||||||
uint32_t size;
|
uint32_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
|
||||||
/* interleaved [r0,i0,r1,i1,r2,i2,r3,i3] -> blocked [r0,r1,r2,r3,i0,i1,i2,i3] */
|
/* interleaved [r0,i0,r1,i1,r2,i2,r3,i3] -> blocked [r0,r1,r2,r3,i0,i1,i2,i3] */
|
||||||
static void fft_blocked_sse(float *data, uint32_t len)
|
static void fft_blocked_sse(float *data, uint32_t len)
|
||||||
{
|
{
|
||||||
|
|
@ -706,13 +711,14 @@ static void fft_blocked_sse(float *data, uint32_t len)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* blocked [r0,r1,r2,r3,i0,i1,i2,i3] -> interleaved [r0,i0,r1,i1,r2,i2,r3,i3] */
|
/* blocked [r0,r1,r2,r3,i0,i1,i2,i3] -> interleaved [r0,i0,r1,i1,r2,i2,r3,i3] with scaling */
|
||||||
static void fft_interleaved_sse(float *data, uint32_t len)
|
static void fft_interleaved_sse(float *data, uint32_t len, float scale)
|
||||||
{
|
{
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
__m128 s = _mm_set1_ps(scale);
|
||||||
for (i = 0; i < len; i += FFT_BLOCK) {
|
for (i = 0; i < len; i += FFT_BLOCK) {
|
||||||
__m128 r = _mm_load_ps(&data[0]); /* r0 r1 r2 r3 */
|
__m128 r = _mm_mul_ps(_mm_load_ps(&data[0]), s);
|
||||||
__m128 im = _mm_load_ps(&data[4]); /* i0 i1 i2 i3 */
|
__m128 im = _mm_mul_ps(_mm_load_ps(&data[4]), s);
|
||||||
_mm_store_ps(&data[0], _mm_unpacklo_ps(r, im));
|
_mm_store_ps(&data[0], _mm_unpacklo_ps(r, im));
|
||||||
_mm_store_ps(&data[4], _mm_unpackhi_ps(r, im));
|
_mm_store_ps(&data[4], _mm_unpackhi_ps(r, im));
|
||||||
data += 2 * FFT_BLOCK;
|
data += 2 * FFT_BLOCK;
|
||||||
|
|
@ -744,27 +750,29 @@ void dsp_fft_memclear_sse(void *obj, void *data, uint32_t size, bool real)
|
||||||
void dsp_fft_run_sse(void *obj, void *fft, int direction,
|
void dsp_fft_run_sse(void *obj, void *fft, int direction,
|
||||||
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
|
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
|
||||||
struct fft_info *info = fft;
|
struct fft_info *info = fft;
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
uint32_t freq_size = SPA_ROUND_UP_N(info->size / 2 + 1, FFT_BLOCK);
|
uint32_t freq_size = SPA_ROUND_UP_N(info->size / 2 + 1, FFT_BLOCK);
|
||||||
if (direction > 0) {
|
if (direction > 0) {
|
||||||
fftwf_execute_dft_r2c(info->plan_r2c, (float*)src, (fftwf_complex*)dst);
|
fftwf_execute_dft_r2c(info->plan_r2c, (float*)src, (fftwf_complex*)dst);
|
||||||
fft_blocked_sse(dst, freq_size);
|
fft_blocked_sse(dst, freq_size);
|
||||||
} else {
|
} else {
|
||||||
fft_interleaved_sse((float*)src, freq_size);
|
fft_interleaved_sse((float*)src, freq_size, 1.0f / info->size);
|
||||||
fftwf_execute_dft_c2r(info->plan_c2r, (fftwf_complex*)src, dst);
|
fftwf_execute_dft_c2r(info->plan_c2r, (fftwf_complex*)src, dst);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_transform(fft, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
|
if (direction < 0)
|
||||||
|
spa_fga_dsp_linear(obj, (float*)src, (float*)src,
|
||||||
|
1.0f / info->size, 0.0f, info->size);
|
||||||
|
pffft_transform(info->setup, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_cmul_sse(void *obj, void *fft,
|
void dsp_fft_cmul_sse(void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
||||||
const float * SPA_RESTRICT b, uint32_t len, const float scale)
|
const float * SPA_RESTRICT b, uint32_t len)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
__m128 s = _mm_set1_ps(scale);
|
|
||||||
uint32_t i, plen = SPA_ROUND_UP_N(len, FFT_BLOCK) * 2;
|
uint32_t i, plen = SPA_ROUND_UP_N(len, FFT_BLOCK) * 2;
|
||||||
|
|
||||||
for (i = 0; i < plen; i += 2 * FFT_BLOCK) {
|
for (i = 0; i < plen; i += 2 * FFT_BLOCK) {
|
||||||
|
|
@ -772,23 +780,23 @@ void dsp_fft_cmul_sse(void *obj, void *fft,
|
||||||
__m128 ai = _mm_load_ps(&a[i + FFT_BLOCK]);
|
__m128 ai = _mm_load_ps(&a[i + FFT_BLOCK]);
|
||||||
__m128 br = _mm_load_ps(&b[i]);
|
__m128 br = _mm_load_ps(&b[i]);
|
||||||
__m128 bi = _mm_load_ps(&b[i + FFT_BLOCK]);
|
__m128 bi = _mm_load_ps(&b[i + FFT_BLOCK]);
|
||||||
__m128 dr = _mm_sub_ps(_mm_mul_ps(ar, br), _mm_mul_ps(ai, bi));
|
_mm_store_ps(&dst[i], _mm_sub_ps(
|
||||||
__m128 di = _mm_add_ps(_mm_mul_ps(ar, bi), _mm_mul_ps(ai, br));
|
_mm_mul_ps(ar, br), _mm_mul_ps(ai, bi)));
|
||||||
_mm_store_ps(&dst[i], _mm_mul_ps(dr, s));
|
_mm_store_ps(&dst[i + FFT_BLOCK], _mm_add_ps(
|
||||||
_mm_store_ps(&dst[i + FFT_BLOCK], _mm_mul_ps(di, s));
|
_mm_mul_ps(ar, bi), _mm_mul_ps(ai, br)));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_zconvolve(fft, a, b, dst, scale);
|
struct fft_info *info = fft;
|
||||||
|
pffft_zconvolve(info->setup, a, b, dst, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void dsp_fft_cmuladd_sse(void *obj, void *fft,
|
void dsp_fft_cmuladd_sse(void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
||||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
||||||
uint32_t len, const float scale)
|
uint32_t len)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
__m128 s = _mm_set1_ps(scale);
|
|
||||||
uint32_t i, plen = SPA_ROUND_UP_N(len, FFT_BLOCK) * 2;
|
uint32_t i, plen = SPA_ROUND_UP_N(len, FFT_BLOCK) * 2;
|
||||||
|
|
||||||
for (i = 0; i < plen; i += 2 * FFT_BLOCK) {
|
for (i = 0; i < plen; i += 2 * FFT_BLOCK) {
|
||||||
|
|
@ -796,14 +804,13 @@ void dsp_fft_cmuladd_sse(void *obj, void *fft,
|
||||||
__m128 ai = _mm_load_ps(&a[i + FFT_BLOCK]);
|
__m128 ai = _mm_load_ps(&a[i + FFT_BLOCK]);
|
||||||
__m128 br = _mm_load_ps(&b[i]);
|
__m128 br = _mm_load_ps(&b[i]);
|
||||||
__m128 bi = _mm_load_ps(&b[i + FFT_BLOCK]);
|
__m128 bi = _mm_load_ps(&b[i + FFT_BLOCK]);
|
||||||
__m128 dr = _mm_sub_ps(_mm_mul_ps(ar, br), _mm_mul_ps(ai, bi));
|
|
||||||
__m128 di = _mm_add_ps(_mm_mul_ps(ar, bi), _mm_mul_ps(ai, br));
|
|
||||||
_mm_store_ps(&dst[i], _mm_add_ps(_mm_load_ps(&src[i]),
|
_mm_store_ps(&dst[i], _mm_add_ps(_mm_load_ps(&src[i]),
|
||||||
_mm_mul_ps(dr, s)));
|
_mm_sub_ps(_mm_mul_ps(ar, br), _mm_mul_ps(ai, bi))));
|
||||||
_mm_store_ps(&dst[i + FFT_BLOCK], _mm_add_ps(_mm_load_ps(&src[i + FFT_BLOCK]),
|
_mm_store_ps(&dst[i + FFT_BLOCK], _mm_add_ps(_mm_load_ps(&src[i + FFT_BLOCK]),
|
||||||
_mm_mul_ps(di, s)));
|
_mm_add_ps(_mm_mul_ps(ar, bi), _mm_mul_ps(ai, br))));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
|
struct fft_info *info = fft;
|
||||||
|
pffft_zconvolve_accumulate(info->setup, a, b, src, dst, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,11 +43,11 @@ struct spa_fga_dsp_methods {
|
||||||
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst);
|
const float * SPA_RESTRICT src, float * SPA_RESTRICT dst);
|
||||||
void (*fft_cmul) (void *obj, void *fft,
|
void (*fft_cmul) (void *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
||||||
const float * SPA_RESTRICT b, uint32_t len, const float scale);
|
const float * SPA_RESTRICT b, uint32_t len);
|
||||||
void (*fft_cmuladd) (void *obj, void *fft,
|
void (*fft_cmuladd) (void *obj, void *fft,
|
||||||
float * dst, const float * src,
|
float * dst, const float * src,
|
||||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
||||||
uint32_t len, const float scale);
|
uint32_t len);
|
||||||
void (*linear) (void *obj,
|
void (*linear) (void *obj,
|
||||||
float * dst, const float * SPA_RESTRICT src,
|
float * dst, const float * SPA_RESTRICT src,
|
||||||
const float mult, const float add, uint32_t n_samples);
|
const float mult, const float add, uint32_t n_samples);
|
||||||
|
|
@ -123,18 +123,18 @@ static inline void spa_fga_dsp_fft_run(struct spa_fga_dsp *obj, void *fft, int d
|
||||||
}
|
}
|
||||||
static inline void spa_fga_dsp_fft_cmul(struct spa_fga_dsp *obj, void *fft,
|
static inline void spa_fga_dsp_fft_cmul(struct spa_fga_dsp *obj, void *fft,
|
||||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
||||||
const float * SPA_RESTRICT b, uint32_t len, const float scale)
|
const float * SPA_RESTRICT b, uint32_t len)
|
||||||
{
|
{
|
||||||
spa_api_method_v(spa_fga_dsp, &obj->iface, fft_cmul, 0,
|
spa_api_method_v(spa_fga_dsp, &obj->iface, fft_cmul, 0,
|
||||||
fft, dst, a, b, len, scale);
|
fft, dst, a, b, len);
|
||||||
}
|
}
|
||||||
static inline void spa_fga_dsp_fft_cmuladd(struct spa_fga_dsp *obj, void *fft,
|
static inline void spa_fga_dsp_fft_cmuladd(struct spa_fga_dsp *obj, void *fft,
|
||||||
float * dst, const float * src,
|
float * dst, const float * src,
|
||||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
||||||
uint32_t len, const float scale)
|
uint32_t len)
|
||||||
{
|
{
|
||||||
spa_api_method_v(spa_fga_dsp, &obj->iface, fft_cmuladd, 0,
|
spa_api_method_v(spa_fga_dsp, &obj->iface, fft_cmuladd, 0,
|
||||||
fft, dst, src, a, b, len, scale);
|
fft, dst, src, a, b, len);
|
||||||
}
|
}
|
||||||
static inline void spa_fga_dsp_linear(struct spa_fga_dsp *obj,
|
static inline void spa_fga_dsp_linear(struct spa_fga_dsp *obj,
|
||||||
float * dst, const float * SPA_RESTRICT src,
|
float * dst, const float * SPA_RESTRICT src,
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,6 @@ struct partition {
|
||||||
struct ir *ir;
|
struct ir *ir;
|
||||||
int time_idx;
|
int time_idx;
|
||||||
int precalc_idx;
|
int precalc_idx;
|
||||||
|
|
||||||
float scale;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct convolver
|
struct convolver
|
||||||
|
|
@ -191,7 +189,6 @@ static struct partition *partition_new(struct convolver *conv, int block,
|
||||||
spa_fga_dsp_fft_run(dsp, part->fft, 1, r->time_buffer[0], r->segments[j]);
|
spa_fga_dsp_fft_run(dsp, part->fft, 1, r->time_buffer[0], r->segments[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
part->scale = 1.0f / part->time_size;
|
|
||||||
partition_reset(dsp, part);
|
partition_reset(dsp, part);
|
||||||
|
|
||||||
return part;
|
return part;
|
||||||
|
|
@ -218,7 +215,7 @@ static int partition_run(struct spa_fga_dsp *dsp, struct partition *part, const
|
||||||
part->freq,
|
part->freq,
|
||||||
part->segments[current],
|
part->segments[current],
|
||||||
r->segments[0],
|
r->segments[0],
|
||||||
part->freq_size, part->scale);
|
part->freq_size);
|
||||||
|
|
||||||
for (j = 1; j < part->n_segments; j++) {
|
for (j = 1; j < part->n_segments; j++) {
|
||||||
if (++current == part->n_segments)
|
if (++current == part->n_segments)
|
||||||
|
|
@ -229,7 +226,7 @@ static int partition_run(struct spa_fga_dsp *dsp, struct partition *part, const
|
||||||
part->freq,
|
part->freq,
|
||||||
part->segments[current],
|
part->segments[current],
|
||||||
r->segments[j],
|
r->segments[j],
|
||||||
part->freq_size, part->scale);
|
part->freq_size);
|
||||||
}
|
}
|
||||||
spa_fga_dsp_fft_run(dsp, part->ifft, -1, part->freq, r->time_buffer[idx]);
|
spa_fga_dsp_fft_run(dsp, part->ifft, -1, part->freq, r->time_buffer[idx]);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue