mirror of
				https://gitlab.freedesktop.org/pipewire/pipewire.git
				synced 2025-11-03 09:01:54 -05:00 
			
		
		
		
	filter-chain: add support for fftw in the convolver
It's faster than pffft.
This commit is contained in:
		
							parent
							
								
									f810c7c15f
								
							
						
					
					
						commit
						7f8ce35709
					
				
					 7 changed files with 267 additions and 5 deletions
				
			
		| 
						 | 
				
			
			@ -83,6 +83,7 @@ if have_sse
 | 
			
		|||
  filter_chain_sse = static_library('filter_chain_sse',
 | 
			
		||||
    ['module-filter-chain/pffft.c',
 | 
			
		||||
     'module-filter-chain/dsp-ops-sse.c' ],
 | 
			
		||||
    include_directories : [configinc],
 | 
			
		||||
    c_args : [sse_args, '-O3', '-DHAVE_SSE'],
 | 
			
		||||
    dependencies : [ spa_dep ],
 | 
			
		||||
    install : false
 | 
			
		||||
| 
						 | 
				
			
			@ -93,6 +94,7 @@ endif
 | 
			
		|||
if have_avx
 | 
			
		||||
  filter_chain_avx = static_library('filter_chain_avx',
 | 
			
		||||
    ['module-filter-chain/dsp-ops-avx.c' ],
 | 
			
		||||
    include_directories : [configinc],
 | 
			
		||||
    c_args : [avx_args, fma_args,'-O3', '-DHAVE_AVX'],
 | 
			
		||||
    dependencies : [ spa_dep ],
 | 
			
		||||
    install : false
 | 
			
		||||
| 
						 | 
				
			
			@ -115,8 +117,9 @@ filter_chain_c = static_library('filter_chain_c',
 | 
			
		|||
  ['module-filter-chain/pffft.c',
 | 
			
		||||
   'module-filter-chain/dsp-ops.c',
 | 
			
		||||
   'module-filter-chain/dsp-ops-c.c' ],
 | 
			
		||||
  include_directories : [configinc],
 | 
			
		||||
  c_args : [simd_cargs, '-O3', '-DPFFFT_SIMD_DISABLE'],
 | 
			
		||||
  dependencies : [ spa_dep ],
 | 
			
		||||
  dependencies : [ spa_dep, fftw_dep],
 | 
			
		||||
  install : false
 | 
			
		||||
)
 | 
			
		||||
simd_dependencies += filter_chain_c
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -123,3 +123,93 @@ void dsp_sum_avx(struct dsp_ops *ops, float *r, const float *a, const float *b,
 | 
			
		|||
		_mm_store_ss(&r[n], in[0]);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline static __m256 _mm256_mul_pz(__m256 ab, __m256 cd)
 | 
			
		||||
{
 | 
			
		||||
	__m256 aa, bb, dc, x0, x1;
 | 
			
		||||
	aa = _mm256_moveldup_ps(ab);
 | 
			
		||||
	bb = _mm256_movehdup_ps(ab);
 | 
			
		||||
	x0 = _mm256_mul_ps(aa, cd);
 | 
			
		||||
	dc = _mm256_shuffle_ps(cd, cd, _MM_SHUFFLE(2,3,0,1));
 | 
			
		||||
	x1 = _mm256_mul_ps(bb, dc);
 | 
			
		||||
	return _mm256_addsub_ps(x0, x1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_cmul_avx(struct dsp_ops *ops, void *fft,
 | 
			
		||||
	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
 | 
			
		||||
	const float * SPA_RESTRICT b, uint32_t len, const float scale)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	__m256 s = _mm256_set1_ps(scale);
 | 
			
		||||
	__m256 aa[2], bb[2], dd[2];
 | 
			
		||||
	uint32_t i, unrolled;
 | 
			
		||||
 | 
			
		||||
	if (SPA_IS_ALIGNED(a, 32) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(b, 32) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(dst, 32))
 | 
			
		||||
		unrolled = len & ~7;
 | 
			
		||||
	else
 | 
			
		||||
		unrolled = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < unrolled; i+=8) {
 | 
			
		||||
		aa[0] = _mm256_load_ps(&a[2*i]);	/* ar0 ai0 ar1 ai1 */
 | 
			
		||||
		aa[1] = _mm256_load_ps(&a[2*i+8]);	/* ar1 ai1 ar2 ai2 */
 | 
			
		||||
		bb[0] = _mm256_load_ps(&b[2*i]);	/* br0 bi0 br1 bi1 */
 | 
			
		||||
		bb[1] = _mm256_load_ps(&b[2*i+8]);	/* br2 bi2 br3 bi3 */
 | 
			
		||||
		dd[0] = _mm256_mul_pz(aa[0], bb[0]);
 | 
			
		||||
		dd[1] = _mm256_mul_pz(aa[1], bb[1]);
 | 
			
		||||
		dd[0] = _mm256_mul_ps(dd[0], s);
 | 
			
		||||
		dd[1] = _mm256_mul_ps(dd[1], s);
 | 
			
		||||
		_mm256_store_ps(&dst[2*i], dd[0]);
 | 
			
		||||
		_mm256_store_ps(&dst[2*i+8], dd[1]);
 | 
			
		||||
	}
 | 
			
		||||
	for (; i < len; i++) {
 | 
			
		||||
		dst[2*i  ] = (a[2*i] * b[2*i  ] - a[2*i+1] * b[2*i+1]) * scale;
 | 
			
		||||
		dst[2*i+1] = (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i  ]) * scale;
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	pffft_zconvolve(fft, a, b, dst, scale);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_cmuladd_avx(struct dsp_ops *ops, void *fft,
 | 
			
		||||
	float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
 | 
			
		||||
	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
 | 
			
		||||
	uint32_t len, const float scale)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	__m256 s = _mm256_set1_ps(scale);
 | 
			
		||||
	__m256 aa[2], bb[2], dd[2], t[1];
 | 
			
		||||
	uint32_t i, unrolled;
 | 
			
		||||
 | 
			
		||||
	if (SPA_IS_ALIGNED(a, 32) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(b, 32) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(dst, 32))
 | 
			
		||||
		unrolled = len & ~7;
 | 
			
		||||
	else
 | 
			
		||||
		unrolled = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < unrolled; i+=8) {
 | 
			
		||||
		aa[0] = _mm256_load_ps(&a[2*i]);	/* ar0 ai0 ar1 ai1 */
 | 
			
		||||
		aa[1] = _mm256_load_ps(&a[2*i+8]);	/* ar1 ai1 ar2 ai2 */
 | 
			
		||||
		bb[0] = _mm256_load_ps(&b[2*i]);	/* br0 bi0 br1 bi1 */
 | 
			
		||||
		bb[1] = _mm256_load_ps(&b[2*i+8]);	/* br2 bi2 br3 bi3 */
 | 
			
		||||
		dd[0] = _mm256_mul_pz(aa[0], bb[0]);
 | 
			
		||||
		dd[1] = _mm256_mul_pz(aa[1], bb[1]);
 | 
			
		||||
		dd[0] = _mm256_mul_ps(dd[0], s);
 | 
			
		||||
		dd[1] = _mm256_mul_ps(dd[1], s);
 | 
			
		||||
		t[0] = _mm256_load_ps(&dst[2*i]);
 | 
			
		||||
		t[1] = _mm256_load_ps(&dst[2*i+8]);
 | 
			
		||||
		t[0] = _mm256_add_ps(t[0], dd[0]);
 | 
			
		||||
		t[1] = _mm256_add_ps(t[1], dd[1]);
 | 
			
		||||
		_mm256_store_ps(&dst[2*i], t[0]);
 | 
			
		||||
		_mm256_store_ps(&dst[2*i+8], t[1]);
 | 
			
		||||
	}
 | 
			
		||||
	for (; i < len; i++) {
 | 
			
		||||
		dst[2*i  ] += (a[2*i] * b[2*i  ] - a[2*i+1] * b[2*i+1]) * scale;
 | 
			
		||||
		dst[2*i+1] += (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i  ]) * scale;
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,10 +6,16 @@
 | 
			
		|||
#include <stdio.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <float.h>
 | 
			
		||||
#include <errno.h>
 | 
			
		||||
 | 
			
		||||
#include <spa/utils/defs.h>
 | 
			
		||||
 | 
			
		||||
#include "config.h"
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
#include <fftw3.h>
 | 
			
		||||
#else
 | 
			
		||||
#include "pffft.h"
 | 
			
		||||
#endif
 | 
			
		||||
#include "dsp-ops.h"
 | 
			
		||||
 | 
			
		||||
void dsp_clear_c(struct dsp_ops *ops, void * SPA_RESTRICT dst, uint32_t n_samples)
 | 
			
		||||
| 
						 | 
				
			
			@ -209,26 +215,75 @@ void dsp_delay_c(struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n_b
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
struct fft_info {
 | 
			
		||||
	fftwf_plan plan_r2c;
 | 
			
		||||
	fftwf_plan plan_c2r;
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void *dsp_fft_new_c(struct dsp_ops *ops, int32_t size, bool real)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	struct fft_info *info = calloc(1, sizeof(struct fft_info));
 | 
			
		||||
	float *rdata;
 | 
			
		||||
	fftwf_complex *cdata;
 | 
			
		||||
 | 
			
		||||
	if (info == NULL)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	rdata = fftwf_alloc_real (size * 2);
 | 
			
		||||
	cdata = fftwf_alloc_complex (size + 1);
 | 
			
		||||
 | 
			
		||||
	info->plan_r2c = fftwf_plan_dft_r2c_1d(size, rdata, cdata, FFTW_ESTIMATE);
 | 
			
		||||
	info->plan_c2r = fftwf_plan_dft_c2r_1d(size, cdata, rdata, FFTW_ESTIMATE);
 | 
			
		||||
 | 
			
		||||
	fftwf_free(rdata);
 | 
			
		||||
	fftwf_free(cdata);
 | 
			
		||||
 | 
			
		||||
	return info;
 | 
			
		||||
#else
 | 
			
		||||
	return pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_free_c(struct dsp_ops *ops, void *fft)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	struct fft_info *info = fft;
 | 
			
		||||
	fftwf_destroy_plan(info->plan_r2c);
 | 
			
		||||
	fftwf_destroy_plan(info->plan_c2r);
 | 
			
		||||
	free(info);
 | 
			
		||||
#else
 | 
			
		||||
	pffft_destroy_setup(fft);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void dsp_fft_run_c(struct dsp_ops *ops, void *fft, int direction,
 | 
			
		||||
	const float * SPA_RESTRICT src, float * SPA_RESTRICT dst)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	struct fft_info *info = fft;
 | 
			
		||||
	if (direction > 0)
 | 
			
		||||
		fftwf_execute_dft_r2c (info->plan_r2c, (float*)src, (fftwf_complex*)dst);
 | 
			
		||||
	else
 | 
			
		||||
		fftwf_execute_dft_c2r (info->plan_c2r, (fftwf_complex*)src, dst);
 | 
			
		||||
#else
 | 
			
		||||
	pffft_transform(fft, src, dst, NULL, direction < 0 ? PFFFT_BACKWARD : PFFFT_FORWARD);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_cmul_c(struct dsp_ops *ops, void *fft,
 | 
			
		||||
	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
 | 
			
		||||
	const float * SPA_RESTRICT b, uint32_t len, const float scale)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	for (uint32_t i = 0; i <= len; i++) {
 | 
			
		||||
		dst[2*i  ] = (a[2*i] * b[2*i  ] - a[2*i+1] * b[2*i+1]) * scale;
 | 
			
		||||
		dst[2*i+1] = (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i  ]) * scale;
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	pffft_zconvolve(fft, a, b, dst, scale);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_cmuladd_c(struct dsp_ops *ops, void *fft,
 | 
			
		||||
| 
						 | 
				
			
			@ -236,6 +291,13 @@ void dsp_fft_cmuladd_c(struct dsp_ops *ops, void *fft,
 | 
			
		|||
	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
 | 
			
		||||
	uint32_t len, const float scale)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	for (uint32_t i = 0; i <= len; i++) {
 | 
			
		||||
		dst[2*i  ] += (a[2*i] * b[2*i  ] - a[2*i+1] * b[2*i+1]) * scale;
 | 
			
		||||
		dst[2*i+1] += (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i  ]) * scale;
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,9 +6,12 @@
 | 
			
		|||
#include <stdio.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <float.h>
 | 
			
		||||
#include <complex.h>
 | 
			
		||||
 | 
			
		||||
#include <spa/utils/defs.h>
 | 
			
		||||
 | 
			
		||||
#include "config.h"
 | 
			
		||||
 | 
			
		||||
#include "dsp-ops.h"
 | 
			
		||||
 | 
			
		||||
#include <xmmintrin.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -533,3 +536,100 @@ void dsp_delay_sse(struct dsp_ops *ops, float *buffer, uint32_t *pos, uint32_t n
 | 
			
		|||
	}
 | 
			
		||||
	*pos = w;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline static void _mm_mul_pz(__m128 *a, __m128 *b, __m128 *d)
 | 
			
		||||
{
 | 
			
		||||
    __m128 ar, ai, br, bi, arbr, arbi, aibi, aibr, dr, di;
 | 
			
		||||
    ar = _mm_shuffle_ps(a[0], a[1], _MM_SHUFFLE(2,0,2,0));	/* ar0 ar1 ar2 ar3 */
 | 
			
		||||
    ai = _mm_shuffle_ps(a[0], a[1], _MM_SHUFFLE(3,1,3,1));	/* ai0 ai1 ai2 ai3 */
 | 
			
		||||
    br = _mm_shuffle_ps(b[0], b[1], _MM_SHUFFLE(2,0,2,0));	/* br0 br1 br2 br3 */
 | 
			
		||||
    bi = _mm_shuffle_ps(b[0], b[1], _MM_SHUFFLE(3,1,3,1))	/* bi0 bi1 bi2 bi3 */;
 | 
			
		||||
 | 
			
		||||
    arbr = _mm_mul_ps(ar, br);	/* ar * br */
 | 
			
		||||
    arbi = _mm_mul_ps(ar, bi);	/* ar * bi */
 | 
			
		||||
 | 
			
		||||
    aibi = _mm_mul_ps(ai, bi);	/* ai * bi */
 | 
			
		||||
    aibr = _mm_mul_ps(ai, br);	/* ai * br */
 | 
			
		||||
 | 
			
		||||
    dr = _mm_sub_ps(arbr, aibi);	/* ar * br  - ai * bi */
 | 
			
		||||
    di = _mm_add_ps(arbi, aibr);	/* ar * bi  + ai * br */
 | 
			
		||||
    d[0] = _mm_unpacklo_ps(dr, di);
 | 
			
		||||
    d[1] = _mm_unpackhi_ps(dr, di);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_cmul_sse(struct dsp_ops *ops, void *fft,
 | 
			
		||||
	float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
 | 
			
		||||
	const float * SPA_RESTRICT b, uint32_t len, const float scale)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	__m128 s = _mm_set1_ps(scale);
 | 
			
		||||
	__m128 aa[2], bb[2], dd[2];
 | 
			
		||||
	uint32_t i, unrolled;
 | 
			
		||||
 | 
			
		||||
	if (SPA_IS_ALIGNED(a, 16) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(b, 16) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(dst, 16))
 | 
			
		||||
		unrolled = len & ~3;
 | 
			
		||||
	else
 | 
			
		||||
		unrolled = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < unrolled; i+=4) {
 | 
			
		||||
		aa[0] = _mm_load_ps(&a[2*i]);	/* ar0 ai0 ar1 ai1 */
 | 
			
		||||
		aa[1] = _mm_load_ps(&a[2*i+4]);	/* ar1 ai1 ar2 ai2 */
 | 
			
		||||
		bb[0] = _mm_load_ps(&b[2*i]);	/* br0 bi0 br1 bi1 */
 | 
			
		||||
		bb[1] = _mm_load_ps(&b[2*i+4]);	/* br2 bi2 br3 bi3 */
 | 
			
		||||
		_mm_mul_pz(aa, bb, dd);
 | 
			
		||||
		dd[0] = _mm_mul_ps(dd[0], s);
 | 
			
		||||
		dd[1] = _mm_mul_ps(dd[1], s);
 | 
			
		||||
		_mm_store_ps(&dst[2*i], dd[0]);
 | 
			
		||||
		_mm_store_ps(&dst[2*i+4], dd[1]);
 | 
			
		||||
	}
 | 
			
		||||
	for (; i < len; i++) {
 | 
			
		||||
		dst[2*i  ] = (a[2*i] * b[2*i  ] - a[2*i+1] * b[2*i+1]) * scale;
 | 
			
		||||
		dst[2*i+1] = (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i  ]) * scale;
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	pffft_zconvolve(fft, a, b, dst, scale);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dsp_fft_cmuladd_sse(struct dsp_ops *ops, void *fft,
 | 
			
		||||
	float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
 | 
			
		||||
	const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
 | 
			
		||||
	uint32_t len, const float scale)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_FFTW
 | 
			
		||||
	__m128 s = _mm_set1_ps(scale);
 | 
			
		||||
	__m128 aa[2], bb[2], dd[2], t[1];
 | 
			
		||||
	uint32_t i, unrolled;
 | 
			
		||||
 | 
			
		||||
	if (SPA_IS_ALIGNED(a, 16) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(b, 16) &&
 | 
			
		||||
	    SPA_IS_ALIGNED(dst, 16))
 | 
			
		||||
		unrolled = len & ~3;
 | 
			
		||||
	else
 | 
			
		||||
		unrolled = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < unrolled; i+=4) {
 | 
			
		||||
		aa[0] = _mm_load_ps(&a[2*i]);	/* ar0 ai0 ar1 ai1 */
 | 
			
		||||
		aa[1] = _mm_load_ps(&a[2*i+4]);	/* ar1 ai1 ar2 ai2 */
 | 
			
		||||
		bb[0] = _mm_load_ps(&b[2*i]);	/* br0 bi0 br1 bi1 */
 | 
			
		||||
		bb[1] = _mm_load_ps(&b[2*i+4]);	/* br2 bi2 br3 bi3 */
 | 
			
		||||
		_mm_mul_pz(aa, bb, dd);
 | 
			
		||||
		dd[0] = _mm_mul_ps(dd[0], s);
 | 
			
		||||
		dd[1] = _mm_mul_ps(dd[1], s);
 | 
			
		||||
		t[0] = _mm_load_ps(&dst[2*i]);
 | 
			
		||||
		t[1] = _mm_load_ps(&dst[2*i+4]);
 | 
			
		||||
		t[0] = _mm_add_ps(t[0], dd[0]);
 | 
			
		||||
		t[1] = _mm_add_ps(t[1], dd[1]);
 | 
			
		||||
		_mm_store_ps(&dst[2*i], t[0]);
 | 
			
		||||
		_mm_store_ps(&dst[2*i+4], t[1]);
 | 
			
		||||
	}
 | 
			
		||||
	for (; i < len; i++) {
 | 
			
		||||
		dst[2*i  ] += (a[2*i] * b[2*i  ] - a[2*i+1] * b[2*i+1]) * scale;
 | 
			
		||||
		dst[2*i+1] += (a[2*i] * b[2*i+1] + a[2*i+1] * b[2*i  ]) * scale;
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	pffft_zconvolve_accumulate(fft, a, b, src, dst, scale);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,8 +33,8 @@ static struct dsp_info dsp_table[] =
 | 
			
		|||
		.funcs.fft_new = dsp_fft_new_c,
 | 
			
		||||
		.funcs.fft_free = dsp_fft_free_c,
 | 
			
		||||
		.funcs.fft_run = dsp_fft_run_c,
 | 
			
		||||
		.funcs.fft_cmul = dsp_fft_cmul_c,
 | 
			
		||||
		.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
 | 
			
		||||
		.funcs.fft_cmul = dsp_fft_cmul_avx,
 | 
			
		||||
		.funcs.fft_cmuladd = dsp_fft_cmuladd_avx,
 | 
			
		||||
		.funcs.biquadn_run = dsp_biquadn_run_sse,
 | 
			
		||||
		.funcs.delay = dsp_delay_sse,
 | 
			
		||||
	},
 | 
			
		||||
| 
						 | 
				
			
			@ -51,8 +51,8 @@ static struct dsp_info dsp_table[] =
 | 
			
		|||
		.funcs.fft_new = dsp_fft_new_c,
 | 
			
		||||
		.funcs.fft_free = dsp_fft_free_c,
 | 
			
		||||
		.funcs.fft_run = dsp_fft_run_c,
 | 
			
		||||
		.funcs.fft_cmul = dsp_fft_cmul_c,
 | 
			
		||||
		.funcs.fft_cmuladd = dsp_fft_cmuladd_c,
 | 
			
		||||
		.funcs.fft_cmul = dsp_fft_cmul_sse,
 | 
			
		||||
		.funcs.fft_cmuladd = dsp_fft_cmuladd_sse,
 | 
			
		||||
		.funcs.biquadn_run = dsp_biquadn_run_sse,
 | 
			
		||||
		.funcs.delay = dsp_delay_sse,
 | 
			
		||||
	},
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -148,10 +148,14 @@ MAKE_SUM_FUNC(sse);
 | 
			
		|||
MAKE_BIQUAD_RUN_FUNC(sse);
 | 
			
		||||
MAKE_BIQUADN_RUN_FUNC(sse);
 | 
			
		||||
MAKE_DELAY_FUNC(sse);
 | 
			
		||||
MAKE_FFT_CMUL_FUNC(sse);
 | 
			
		||||
MAKE_FFT_CMULADD_FUNC(sse);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined (HAVE_AVX)
 | 
			
		||||
MAKE_MIX_GAIN_FUNC(avx);
 | 
			
		||||
MAKE_SUM_FUNC(avx);
 | 
			
		||||
MAKE_FFT_CMUL_FUNC(avx);
 | 
			
		||||
MAKE_FFT_CMULADD_FUNC(avx);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* DSP_OPS_H */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue