mirror of
				https://gitlab.freedesktop.org/pipewire/pipewire.git
				synced 2025-11-03 09:01:54 -05:00 
			
		
		
		
	filter-chain: use pffft for the convolver
It is faster.
This commit is contained in:
		
							parent
							
								
									9dbfa63193
								
							
						
					
					
						commit
						0f5face73f
					
				
					 9 changed files with 2550 additions and 990 deletions
				
			
		| 
						 | 
				
			
			@ -50,8 +50,7 @@ pipewire_module_filter_chain = shared_library('pipewire-module-filter-chain',
 | 
			
		|||
    'module-filter-chain/biquad.c',
 | 
			
		||||
    'module-filter-chain/ladspa_plugin.c',
 | 
			
		||||
    'module-filter-chain/builtin_plugin.c',
 | 
			
		||||
    'module-filter-chain/kiss_fft_f32.c',
 | 
			
		||||
    'module-filter-chain/kiss_fftr_f32.c',
 | 
			
		||||
    'module-filter-chain/pffft.c',
 | 
			
		||||
    'module-filter-chain/convolver.c' ],
 | 
			
		||||
  include_directories : [configinc, spa_inc],
 | 
			
		||||
  install : true,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,173 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
 *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
 | 
			
		||||
 *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
 | 
			
		||||
 *
 | 
			
		||||
 *  SPDX-License-Identifier: BSD-3-Clause
 | 
			
		||||
 *  See COPYING file for more information.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* kiss_fft_f32.h
 | 
			
		||||
   defines kiss_fft_f32_scalar as either short or a float type
 | 
			
		||||
   and defines
 | 
			
		||||
   typedef struct { kiss_fft_f32_scalar r; kiss_fft_f32_scalar i; }kiss_fft_f32_cpx; */
 | 
			
		||||
#include "kiss_fft_f32.h"
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
 | 
			
		||||
/* The 2*sizeof(size_t) alignment here is borrowed from
 | 
			
		||||
 * GNU libc, so it should be good most everywhere.
 | 
			
		||||
 * It is more conservative than is needed on some 64-bit
 | 
			
		||||
 * platforms, but ia64 does require a 16-byte alignment.
 | 
			
		||||
 * The SIMD extensions for x86 and ppc32 would want a
 | 
			
		||||
 * larger alignment than this, but we don't need to
 | 
			
		||||
 * do better than malloc.
 | 
			
		||||
 *
 | 
			
		||||
 * Borrowed from GLib's gobject/gtype.c
 | 
			
		||||
 */
 | 
			
		||||
#define STRUCT_ALIGNMENT (2 * sizeof (size_t))
 | 
			
		||||
#define ALIGN_STRUCT(offset) \
 | 
			
		||||
      ((offset + (STRUCT_ALIGNMENT - 1)) & -STRUCT_ALIGNMENT)
 | 
			
		||||
 | 
			
		||||
#define MAXFACTORS 32
 | 
			
		||||
/* e.g. an fft of length 128 has 4 factors 
 | 
			
		||||
 as far as kissfft is concerned
 | 
			
		||||
 4*4*4*2
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
struct kiss_fft_f32_state{
 | 
			
		||||
    int nfft;
 | 
			
		||||
    int inverse;
 | 
			
		||||
    int factors[2*MAXFACTORS];
 | 
			
		||||
    kiss_fft_f32_cpx twiddles[1];
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
  Explanation of macros dealing with complex math:
 | 
			
		||||
 | 
			
		||||
   C_MUL(m,a,b)         : m = a*b
 | 
			
		||||
   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
 | 
			
		||||
   C_SUB( res, a,b)     : res = a - b
 | 
			
		||||
   C_SUBFROM( res , a)  : res -= a
 | 
			
		||||
   C_ADDTO( res , a)    : res += a
 | 
			
		||||
 * */
 | 
			
		||||
#ifdef FIXED_POINT
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#if (FIXED_POINT==32)
 | 
			
		||||
# define FRACBITS 31
 | 
			
		||||
# define SAMPPROD int64_t
 | 
			
		||||
#define SAMP_MAX INT32_MAX
 | 
			
		||||
#define SAMP_MIN INT32_MIN
 | 
			
		||||
#else
 | 
			
		||||
# define FRACBITS 15
 | 
			
		||||
# define SAMPPROD int32_t
 | 
			
		||||
#define SAMP_MAX INT16_MAX
 | 
			
		||||
#define SAMP_MIN INT16_MIN
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(CHECK_OVERFLOW)
 | 
			
		||||
#  define CHECK_OVERFLOW_OP(a,op,b)  \
 | 
			
		||||
	if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
 | 
			
		||||
		g_critical("overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#   define smul(a,b) ( (SAMPPROD)(a)*(b) )
 | 
			
		||||
#   define sround( x )  (kiss_fft_f32_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
 | 
			
		||||
 | 
			
		||||
#   define S_MUL(a,b) sround( smul(a,b) )
 | 
			
		||||
 | 
			
		||||
#   define C_MUL(m,a,b) \
 | 
			
		||||
      do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
 | 
			
		||||
          (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
 | 
			
		||||
 | 
			
		||||
#   define DIVSCALAR(x,k) \
 | 
			
		||||
	(x) = sround( smul(  x, SAMP_MAX/k ) )
 | 
			
		||||
 | 
			
		||||
#   define C_FIXDIV(c,div) \
 | 
			
		||||
	do {    DIVSCALAR( (c).r , div);  \
 | 
			
		||||
		DIVSCALAR( (c).i  , div); }while (0)
 | 
			
		||||
 | 
			
		||||
#   define C_MULBYSCALAR( c, s ) \
 | 
			
		||||
    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
 | 
			
		||||
        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
 | 
			
		||||
 | 
			
		||||
#else  /* not FIXED_POINT*/
 | 
			
		||||
 | 
			
		||||
#   define S_MUL(a,b) ( (a)*(b) )
 | 
			
		||||
#define C_MUL(m,a,b) \
 | 
			
		||||
    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
 | 
			
		||||
        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
 | 
			
		||||
#   define C_FIXDIV(c,div) /* NOOP */
 | 
			
		||||
#   define C_MULBYSCALAR( c, s ) \
 | 
			
		||||
    do{ (c).r *= (s);\
 | 
			
		||||
        (c).i *= (s); }while(0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef CHECK_OVERFLOW_OP
 | 
			
		||||
#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define  C_ADD( res, a,b)\
 | 
			
		||||
    do { \
 | 
			
		||||
	    CHECK_OVERFLOW_OP((a).r,+,(b).r)\
 | 
			
		||||
	    CHECK_OVERFLOW_OP((a).i,+,(b).i)\
 | 
			
		||||
	    (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
 | 
			
		||||
    }while(0)
 | 
			
		||||
#define  C_SUB( res, a,b)\
 | 
			
		||||
    do { \
 | 
			
		||||
	    CHECK_OVERFLOW_OP((a).r,-,(b).r)\
 | 
			
		||||
	    CHECK_OVERFLOW_OP((a).i,-,(b).i)\
 | 
			
		||||
	    (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
 | 
			
		||||
    }while(0)
 | 
			
		||||
#define C_ADDTO( res , a)\
 | 
			
		||||
    do { \
 | 
			
		||||
	    CHECK_OVERFLOW_OP((res).r,+,(a).r)\
 | 
			
		||||
	    CHECK_OVERFLOW_OP((res).i,+,(a).i)\
 | 
			
		||||
	    (res).r += (a).r;  (res).i += (a).i;\
 | 
			
		||||
    }while(0)
 | 
			
		||||
 | 
			
		||||
#define C_SUBFROM( res , a)\
 | 
			
		||||
    do {\
 | 
			
		||||
	    CHECK_OVERFLOW_OP((res).r,-,(a).r)\
 | 
			
		||||
	    CHECK_OVERFLOW_OP((res).i,-,(a).i)\
 | 
			
		||||
	    (res).r -= (a).r;  (res).i -= (a).i; \
 | 
			
		||||
    }while(0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef FIXED_POINT
 | 
			
		||||
#  define KISS_FFT_F32_COS(phase)  floor(.5+SAMP_MAX * cos (phase))
 | 
			
		||||
#  define KISS_FFT_F32_SIN(phase)  floor(.5+SAMP_MAX * sin (phase))
 | 
			
		||||
#  define HALF_OF(x) ((x)>>1)
 | 
			
		||||
#elif defined(USE_SIMD)
 | 
			
		||||
#  define KISS_FFT_F32_COS(phase) _mm_set1_ps( cos(phase) )
 | 
			
		||||
#  define KISS_FFT_F32_SIN(phase) _mm_set1_ps( sin(phase) )
 | 
			
		||||
#  define HALF_OF(x) ((x)*_mm_set1_ps(.5))
 | 
			
		||||
#else
 | 
			
		||||
#  define KISS_FFT_F32_COS(phase) (kiss_fft_f32_scalar) cos(phase)
 | 
			
		||||
#  define KISS_FFT_F32_SIN(phase) (kiss_fft_f32_scalar) sin(phase)
 | 
			
		||||
#  define HALF_OF(x) ((x)*.5)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define  kf_cexp(x,phase) \
 | 
			
		||||
	do{ \
 | 
			
		||||
		(x)->r = KISS_FFT_F32_COS(phase);\
 | 
			
		||||
		(x)->i = KISS_FFT_F32_SIN(phase);\
 | 
			
		||||
	}while(0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* a debugging function */
 | 
			
		||||
#define pcpx(c)\
 | 
			
		||||
    fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef KISS_FFT_F32_USE_ALLOCA
 | 
			
		||||
// define this to allow use of alloca instead of malloc for temporary buffers
 | 
			
		||||
// Temporary buffers are used in two case: 
 | 
			
		||||
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
 | 
			
		||||
// 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
 | 
			
		||||
#include <alloca.h>
 | 
			
		||||
#define  KISS_FFT_F32_TMP_ALLOC(nbytes) alloca(nbytes)
 | 
			
		||||
#define  KISS_FFT_F32_TMP_FREE(ptr) 
 | 
			
		||||
#else
 | 
			
		||||
#define  KISS_FFT_F32_TMP_ALLOC(nbytes) KISS_FFT_F32_MALLOC(nbytes)
 | 
			
		||||
#define  KISS_FFT_F32_TMP_FREE(ptr) KISS_FFT_F32_FREE(ptr)
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -28,8 +28,14 @@
 | 
			
		|||
 | 
			
		||||
#include <spa/utils/defs.h>
 | 
			
		||||
 | 
			
		||||
#include "kiss_fft_f32.h"
 | 
			
		||||
#include "kiss_fftr_f32.h"
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <xmmintrin.h>
 | 
			
		||||
 | 
			
		||||
#include "pffft.h"
 | 
			
		||||
 | 
			
		||||
struct fft_cpx {
 | 
			
		||||
	float *v;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct convolver1 {
 | 
			
		||||
	int blockSize;
 | 
			
		||||
| 
						 | 
				
			
			@ -37,16 +43,16 @@ struct convolver1 {
 | 
			
		|||
	int segCount;
 | 
			
		||||
	int fftComplexSize;
 | 
			
		||||
 | 
			
		||||
	kiss_fft_f32_cpx **segments;
 | 
			
		||||
	kiss_fft_f32_cpx **segmentsIr;
 | 
			
		||||
	struct fft_cpx *segments;
 | 
			
		||||
	struct fft_cpx *segmentsIr;
 | 
			
		||||
 | 
			
		||||
	float *fft_buffer;
 | 
			
		||||
 | 
			
		||||
	void *fft;
 | 
			
		||||
	void *ifft;
 | 
			
		||||
 | 
			
		||||
	kiss_fft_f32_cpx *pre_mult;
 | 
			
		||||
	kiss_fft_f32_cpx *conv;
 | 
			
		||||
	struct fft_cpx pre_mult;
 | 
			
		||||
	struct fft_cpx conv;
 | 
			
		||||
	float *overlap;
 | 
			
		||||
 | 
			
		||||
	float *inputBuffer;
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +61,38 @@ struct convolver1 {
 | 
			
		|||
	int current;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void *fft_alloc(int size)
 | 
			
		||||
{
 | 
			
		||||
	void *d;
 | 
			
		||||
	d = pffft_aligned_malloc(size);
 | 
			
		||||
	memset(d, 0, size);
 | 
			
		||||
	return d;
 | 
			
		||||
}
 | 
			
		||||
static void fft_free(void  *data)
 | 
			
		||||
{
 | 
			
		||||
	pffft_aligned_free(data);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void fft_cpx_init(struct fft_cpx *cpx, int size)
 | 
			
		||||
{
 | 
			
		||||
	cpx->v = fft_alloc(size * 2 * sizeof(float));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void fft_cpx_free(struct fft_cpx *cpx)
 | 
			
		||||
{
 | 
			
		||||
	fft_free(cpx->v);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void fft_cpx_clear(struct fft_cpx *cpx, int size)
 | 
			
		||||
{
 | 
			
		||||
	memset(cpx->v, 0, sizeof(float) * 2 * size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void fft_cpx_copy(struct fft_cpx *dst, struct fft_cpx *src, int size)
 | 
			
		||||
{
 | 
			
		||||
	memcpy(dst->v, src->v, sizeof(float) * 2 * size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int next_power_of_two(int val)
 | 
			
		||||
{
 | 
			
		||||
	int r = 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -63,6 +101,37 @@ static int next_power_of_two(int val)
 | 
			
		|||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void *fft_new(int size)
 | 
			
		||||
{
 | 
			
		||||
	return pffft_new_setup(size, PFFFT_REAL);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void *ifft_new(int size)
 | 
			
		||||
{
 | 
			
		||||
	return pffft_new_setup(size, PFFFT_REAL);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void fft_destroy(void *fft)
 | 
			
		||||
{
 | 
			
		||||
	pffft_destroy_setup(fft);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void fft_run(void *fft, float *in, struct fft_cpx *out)
 | 
			
		||||
{
 | 
			
		||||
	pffft_transform(fft, in, out->v, NULL, PFFFT_FORWARD);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void ifft_run(void *ifft, struct fft_cpx *in, float *out)
 | 
			
		||||
{
 | 
			
		||||
	pffft_transform(ifft, in->v, out, NULL, PFFFT_BACKWARD);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void fft_convolve_accum(void *fft, struct fft_cpx *r,
 | 
			
		||||
		const struct fft_cpx *a, const struct fft_cpx *b, int len, float scale)
 | 
			
		||||
{
 | 
			
		||||
	pffft_zconvolve_accumulate(fft, a->v, b->v, r->v, scale);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
 | 
			
		||||
{
 | 
			
		||||
	struct convolver1 *conv;
 | 
			
		||||
| 
						 | 
				
			
			@ -86,37 +155,37 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
 | 
			
		|||
	conv->segCount = (irlen + conv->blockSize-1) / conv->blockSize;
 | 
			
		||||
	conv->fftComplexSize = (conv->segSize / 2) + 1;
 | 
			
		||||
 | 
			
		||||
        conv->fft = kiss_fftr_f32_alloc(conv->segSize, 0, NULL, NULL);
 | 
			
		||||
        conv->fft = fft_new(conv->segSize);
 | 
			
		||||
        if (conv->fft == NULL)
 | 
			
		||||
                return NULL;
 | 
			
		||||
        conv->ifft = kiss_fftr_f32_alloc(conv->segSize, 1, NULL, NULL);
 | 
			
		||||
        conv->ifft = ifft_new(conv->segSize);
 | 
			
		||||
        if (conv->ifft == NULL)
 | 
			
		||||
                return NULL;
 | 
			
		||||
 | 
			
		||||
	conv->fft_buffer = calloc(sizeof(float), conv->segSize);
 | 
			
		||||
	conv->fft_buffer = fft_alloc(sizeof(float) * conv->segSize);
 | 
			
		||||
        if (conv->fft_buffer == NULL)
 | 
			
		||||
                return NULL;
 | 
			
		||||
 | 
			
		||||
	conv->segments = calloc(sizeof(kiss_fft_f32_cpx*), conv->segCount);
 | 
			
		||||
	conv->segmentsIr = calloc(sizeof(kiss_fft_f32_cpx*), conv->segCount);
 | 
			
		||||
	conv->segments = calloc(sizeof(struct fft_cpx), conv->segCount);
 | 
			
		||||
	conv->segmentsIr = calloc(sizeof(struct fft_cpx), conv->segCount);
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < conv->segCount; i++) {
 | 
			
		||||
		int left = irlen - (i * conv->blockSize);
 | 
			
		||||
		int copy = SPA_MIN(conv->blockSize, left);
 | 
			
		||||
 | 
			
		||||
		conv->segments[i] = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
 | 
			
		||||
		conv->segmentsIr[i] = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
 | 
			
		||||
		fft_cpx_init(&conv->segments[i], conv->fftComplexSize);
 | 
			
		||||
		fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize);
 | 
			
		||||
 | 
			
		||||
		memcpy(conv->fft_buffer, &ir[i * conv->blockSize], copy * sizeof(float));
 | 
			
		||||
		if (copy < conv->segSize)
 | 
			
		||||
			memset(conv->fft_buffer + copy, 0, (conv->segSize - copy) * sizeof(float));
 | 
			
		||||
 | 
			
		||||
	        kiss_fftr_f32(conv->fft, conv->fft_buffer, conv->segmentsIr[i]);
 | 
			
		||||
	        fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]);
 | 
			
		||||
	}
 | 
			
		||||
	conv->pre_mult = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
 | 
			
		||||
	conv->conv = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
 | 
			
		||||
	conv->overlap = calloc(sizeof(float), conv->blockSize);
 | 
			
		||||
	conv->inputBuffer = calloc(sizeof(float), conv->blockSize);
 | 
			
		||||
	fft_cpx_init(&conv->pre_mult, conv->fftComplexSize);
 | 
			
		||||
	fft_cpx_init(&conv->conv, conv->fftComplexSize);
 | 
			
		||||
	conv->overlap = fft_alloc(sizeof(float) * conv->blockSize);
 | 
			
		||||
	conv->inputBuffer = fft_alloc(sizeof(float) * conv->blockSize);
 | 
			
		||||
	conv->inputBufferFill = 0;
 | 
			
		||||
	conv->current = 0;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -127,35 +196,38 @@ static void convolver1_free(struct convolver1 *conv)
 | 
			
		|||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	for (i = 0; i < conv->segCount; i++) {
 | 
			
		||||
		free(conv->segments[i]);
 | 
			
		||||
		free(conv->segmentsIr[i]);
 | 
			
		||||
		fft_cpx_free(&conv->segments[i]);
 | 
			
		||||
		fft_cpx_free(&conv->segmentsIr[i]);
 | 
			
		||||
	}
 | 
			
		||||
	free(conv->fft);
 | 
			
		||||
	free(conv->ifft);
 | 
			
		||||
	free(conv->fft_buffer);
 | 
			
		||||
	fft_destroy(conv->fft);
 | 
			
		||||
	fft_destroy(conv->ifft);
 | 
			
		||||
	fft_free(conv->fft_buffer);
 | 
			
		||||
	free(conv->segments);
 | 
			
		||||
	free(conv->segmentsIr);
 | 
			
		||||
	free(conv->pre_mult);
 | 
			
		||||
	free(conv->conv);
 | 
			
		||||
	free(conv->overlap);
 | 
			
		||||
	free(conv->inputBuffer);
 | 
			
		||||
	fft_cpx_free(&conv->pre_mult);
 | 
			
		||||
	fft_cpx_free(&conv->conv);
 | 
			
		||||
	fft_free(conv->overlap);
 | 
			
		||||
	fft_free(conv->inputBuffer);
 | 
			
		||||
	free(conv);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Sum(float* result, const float* a, const float* b, int len)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
#if defined (__SSE__)
 | 
			
		||||
	const int end4 = 4 * (len / 4);
 | 
			
		||||
	for (i = 0; i < end4; i += 4) {
 | 
			
		||||
		const __m128 va = _mm_load_ps(&a[i]);
 | 
			
		||||
		const __m128 vb = _mm_load_ps(&b[i]);
 | 
			
		||||
		_mm_store_ps(&result[i], _mm_add_ps(va,vb));
 | 
			
		||||
	}
 | 
			
		||||
	for (i = end4; i < len; ++i) {
 | 
			
		||||
		result[i] = a[i] + b[i];
 | 
			
		||||
	}
 | 
			
		||||
#else
 | 
			
		||||
	for (i = 0; i < len; i++)
 | 
			
		||||
		result[i] = a[i] + b[i];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ComplexMultiplyAccumulate(kiss_fft_f32_cpx *r, const kiss_fft_f32_cpx *a, const kiss_fft_f32_cpx *b, int len)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	for (i = 0; i < len; i++) {
 | 
			
		||||
		r[i].r += a[i].r * b[i].r - a[i].i * b[i].i;
 | 
			
		||||
		r[i].i += a[i].r * b[i].i + a[i].i * b[i].r;
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int convolver1_run(struct convolver1 *conv, const float *input, float *output, int len)
 | 
			
		||||
| 
						 | 
				
			
			@ -176,30 +248,27 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
 | 
			
		|||
		memcpy(conv->fft_buffer, conv->inputBuffer, conv->blockSize * sizeof(float));
 | 
			
		||||
		memset(conv->fft_buffer + conv->blockSize, 0, (conv->segSize - conv->blockSize) * sizeof(float));
 | 
			
		||||
 | 
			
		||||
		kiss_fftr_f32(conv->fft, conv->fft_buffer, conv->segments[conv->current]);
 | 
			
		||||
		fft_run(conv->fft, conv->fft_buffer, &conv->segments[conv->current]);
 | 
			
		||||
 | 
			
		||||
		if (conv->inputBufferFill == 0) {
 | 
			
		||||
			memset(conv->pre_mult, 0, sizeof(kiss_fft_f32_cpx) * conv->fftComplexSize);
 | 
			
		||||
			fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize);
 | 
			
		||||
 | 
			
		||||
			for (i = 1; i < conv->segCount; i++) {
 | 
			
		||||
				const int indexIr = i;
 | 
			
		||||
				const int indexAudio = (conv->current + i) % conv->segCount;
 | 
			
		||||
 | 
			
		||||
				ComplexMultiplyAccumulate(conv->pre_mult,
 | 
			
		||||
						conv->segmentsIr[indexIr],
 | 
			
		||||
						conv->segments[indexAudio],
 | 
			
		||||
						conv->fftComplexSize);
 | 
			
		||||
				fft_convolve_accum(conv->fft, &conv->pre_mult,
 | 
			
		||||
						&conv->segmentsIr[indexIr],
 | 
			
		||||
						&conv->segments[indexAudio],
 | 
			
		||||
						conv->fftComplexSize, 1.0f / conv->segSize);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		memcpy(conv->conv, conv->pre_mult, sizeof(kiss_fft_f32_cpx) * conv->fftComplexSize);
 | 
			
		||||
		fft_cpx_copy(&conv->conv, &conv->pre_mult, conv->fftComplexSize);
 | 
			
		||||
 | 
			
		||||
		ComplexMultiplyAccumulate(conv->conv, conv->segments[conv->current], conv->segmentsIr[0],
 | 
			
		||||
				conv->fftComplexSize);
 | 
			
		||||
		fft_convolve_accum(conv->fft, &conv->conv, &conv->segments[conv->current], &conv->segmentsIr[0],
 | 
			
		||||
				conv->fftComplexSize, 1.0f / conv->segSize);
 | 
			
		||||
 | 
			
		||||
		kiss_fftri_f32(conv->ifft, conv->conv, conv->fft_buffer);
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < conv->segSize; i++)
 | 
			
		||||
			conv->fft_buffer[i] /= conv->segSize;
 | 
			
		||||
		ifft_run(conv->ifft, &conv->conv, conv->fft_buffer);
 | 
			
		||||
 | 
			
		||||
		Sum(output + processed, conv->fft_buffer + inputBufferPos, conv->overlap + inputBufferPos, processing);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -265,19 +334,19 @@ struct convolver *convolver_new(int head_block, int tail_block, const float *ir,
 | 
			
		|||
	if (irlen > conv->tailBlockSize) {
 | 
			
		||||
		int conv1IrLen = SPA_MIN(irlen - conv->tailBlockSize, conv->tailBlockSize);
 | 
			
		||||
		conv->tailConvolver0 = convolver1_new(conv->headBlockSize, ir + conv->tailBlockSize, conv1IrLen);
 | 
			
		||||
		conv->tailOutput0 = calloc(conv->tailBlockSize, sizeof(float));
 | 
			
		||||
		conv->tailPrecalculated0 = calloc(conv->tailBlockSize, sizeof(float));
 | 
			
		||||
		conv->tailOutput0 = fft_alloc(conv->tailBlockSize * sizeof(float));
 | 
			
		||||
		conv->tailPrecalculated0 = fft_alloc(conv->tailBlockSize * sizeof(float));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (irlen > 2 * conv->tailBlockSize) {
 | 
			
		||||
		int tailIrLen = irlen - (2 * conv->tailBlockSize);
 | 
			
		||||
		conv->tailConvolver = convolver1_new(conv->tailBlockSize, ir + (2 * conv->tailBlockSize), tailIrLen);
 | 
			
		||||
		conv->tailOutput = calloc(conv->tailBlockSize, sizeof(float));
 | 
			
		||||
		conv->tailPrecalculated = calloc(conv->tailBlockSize, sizeof(float));
 | 
			
		||||
		conv->tailOutput = fft_alloc(conv->tailBlockSize * sizeof(float));
 | 
			
		||||
		conv->tailPrecalculated = fft_alloc(conv->tailBlockSize * sizeof(float));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (conv->tailConvolver0 || conv->tailConvolver)
 | 
			
		||||
		conv->tailInput = calloc(conv->tailBlockSize, sizeof(float));
 | 
			
		||||
		conv->tailInput = fft_alloc(conv->tailBlockSize * sizeof(float));
 | 
			
		||||
 | 
			
		||||
	conv->tailInputFill = 0;
 | 
			
		||||
	conv->precalculatedPos = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -293,11 +362,11 @@ void convolver_free(struct convolver *conv)
 | 
			
		|||
		convolver1_free(conv->tailConvolver0);
 | 
			
		||||
	if (conv->tailConvolver)
 | 
			
		||||
		convolver1_free(conv->tailConvolver);
 | 
			
		||||
	free(conv->tailOutput0);
 | 
			
		||||
	free(conv->tailPrecalculated0);
 | 
			
		||||
	free(conv->tailOutput);
 | 
			
		||||
	free(conv->tailPrecalculated);
 | 
			
		||||
	free(conv->tailInput);
 | 
			
		||||
	fft_free(conv->tailOutput0);
 | 
			
		||||
	fft_free(conv->tailPrecalculated0);
 | 
			
		||||
	fft_free(conv->tailOutput);
 | 
			
		||||
	fft_free(conv->tailPrecalculated);
 | 
			
		||||
	fft_free(conv->tailInput);
 | 
			
		||||
	free(conv);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,442 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
 *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
 | 
			
		||||
 *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
 | 
			
		||||
 *
 | 
			
		||||
 *  SPDX-License-Identifier: BSD-3-Clause
 | 
			
		||||
 *  See COPYING file for more information.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "_kiss_fft_guts_f32.h"
 | 
			
		||||
/* The guts header contains all the multiplication and addition macros that are defined for
 | 
			
		||||
 fixed or floating point complex numbers.  It also delares the kf_ internal functions.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
kf_bfly2 (kiss_fft_f32_cpx * Fout,
 | 
			
		||||
    const size_t fstride, const kiss_fft_f32_cfg st, int m)
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_cpx *Fout2;
 | 
			
		||||
  kiss_fft_f32_cpx *tw1 = st->twiddles;
 | 
			
		||||
  kiss_fft_f32_cpx t;
 | 
			
		||||
  Fout2 = Fout + m;
 | 
			
		||||
  do {
 | 
			
		||||
    C_FIXDIV (*Fout, 2);
 | 
			
		||||
    C_FIXDIV (*Fout2, 2);
 | 
			
		||||
 | 
			
		||||
    C_MUL (t, *Fout2, *tw1);
 | 
			
		||||
    tw1 += fstride;
 | 
			
		||||
    C_SUB (*Fout2, *Fout, t);
 | 
			
		||||
    C_ADDTO (*Fout, t);
 | 
			
		||||
    ++Fout2;
 | 
			
		||||
    ++Fout;
 | 
			
		||||
  } while (--m);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
kf_bfly4 (kiss_fft_f32_cpx * Fout,
 | 
			
		||||
    const size_t fstride, const kiss_fft_f32_cfg st, const size_t m)
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_cpx *tw1, *tw2, *tw3;
 | 
			
		||||
  kiss_fft_f32_cpx scratch[6];
 | 
			
		||||
  size_t k = m;
 | 
			
		||||
  const size_t m2 = 2 * m;
 | 
			
		||||
  const size_t m3 = 3 * m;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  tw3 = tw2 = tw1 = st->twiddles;
 | 
			
		||||
 | 
			
		||||
  do {
 | 
			
		||||
    C_FIXDIV (*Fout, 4);
 | 
			
		||||
    C_FIXDIV (Fout[m], 4);
 | 
			
		||||
    C_FIXDIV (Fout[m2], 4);
 | 
			
		||||
    C_FIXDIV (Fout[m3], 4);
 | 
			
		||||
 | 
			
		||||
    C_MUL (scratch[0], Fout[m], *tw1);
 | 
			
		||||
    C_MUL (scratch[1], Fout[m2], *tw2);
 | 
			
		||||
    C_MUL (scratch[2], Fout[m3], *tw3);
 | 
			
		||||
 | 
			
		||||
    C_SUB (scratch[5], *Fout, scratch[1]);
 | 
			
		||||
    C_ADDTO (*Fout, scratch[1]);
 | 
			
		||||
    C_ADD (scratch[3], scratch[0], scratch[2]);
 | 
			
		||||
    C_SUB (scratch[4], scratch[0], scratch[2]);
 | 
			
		||||
    C_SUB (Fout[m2], *Fout, scratch[3]);
 | 
			
		||||
    tw1 += fstride;
 | 
			
		||||
    tw2 += fstride * 2;
 | 
			
		||||
    tw3 += fstride * 3;
 | 
			
		||||
    C_ADDTO (*Fout, scratch[3]);
 | 
			
		||||
 | 
			
		||||
    if (st->inverse) {
 | 
			
		||||
      Fout[m].r = scratch[5].r - scratch[4].i;
 | 
			
		||||
      Fout[m].i = scratch[5].i + scratch[4].r;
 | 
			
		||||
      Fout[m3].r = scratch[5].r + scratch[4].i;
 | 
			
		||||
      Fout[m3].i = scratch[5].i - scratch[4].r;
 | 
			
		||||
    } else {
 | 
			
		||||
      Fout[m].r = scratch[5].r + scratch[4].i;
 | 
			
		||||
      Fout[m].i = scratch[5].i - scratch[4].r;
 | 
			
		||||
      Fout[m3].r = scratch[5].r - scratch[4].i;
 | 
			
		||||
      Fout[m3].i = scratch[5].i + scratch[4].r;
 | 
			
		||||
    }
 | 
			
		||||
    ++Fout;
 | 
			
		||||
  } while (--k);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
kf_bfly3 (kiss_fft_f32_cpx * Fout,
 | 
			
		||||
    const size_t fstride, const kiss_fft_f32_cfg st, size_t m)
 | 
			
		||||
{
 | 
			
		||||
  size_t k = m;
 | 
			
		||||
  const size_t m2 = 2 * m;
 | 
			
		||||
  kiss_fft_f32_cpx *tw1, *tw2;
 | 
			
		||||
  kiss_fft_f32_cpx scratch[5];
 | 
			
		||||
  kiss_fft_f32_cpx epi3;
 | 
			
		||||
  epi3 = st->twiddles[fstride * m];
 | 
			
		||||
 | 
			
		||||
  tw1 = tw2 = st->twiddles;
 | 
			
		||||
 | 
			
		||||
  do {
 | 
			
		||||
    C_FIXDIV (*Fout, 3);
 | 
			
		||||
    C_FIXDIV (Fout[m], 3);
 | 
			
		||||
    C_FIXDIV (Fout[m2], 3);
 | 
			
		||||
 | 
			
		||||
    C_MUL (scratch[1], Fout[m], *tw1);
 | 
			
		||||
    C_MUL (scratch[2], Fout[m2], *tw2);
 | 
			
		||||
 | 
			
		||||
    C_ADD (scratch[3], scratch[1], scratch[2]);
 | 
			
		||||
    C_SUB (scratch[0], scratch[1], scratch[2]);
 | 
			
		||||
    tw1 += fstride;
 | 
			
		||||
    tw2 += fstride * 2;
 | 
			
		||||
 | 
			
		||||
    Fout[m].r = Fout->r - HALF_OF (scratch[3].r);
 | 
			
		||||
    Fout[m].i = Fout->i - HALF_OF (scratch[3].i);
 | 
			
		||||
 | 
			
		||||
    C_MULBYSCALAR (scratch[0], epi3.i);
 | 
			
		||||
 | 
			
		||||
    C_ADDTO (*Fout, scratch[3]);
 | 
			
		||||
 | 
			
		||||
    Fout[m2].r = Fout[m].r + scratch[0].i;
 | 
			
		||||
    Fout[m2].i = Fout[m].i - scratch[0].r;
 | 
			
		||||
 | 
			
		||||
    Fout[m].r -= scratch[0].i;
 | 
			
		||||
    Fout[m].i += scratch[0].r;
 | 
			
		||||
 | 
			
		||||
    ++Fout;
 | 
			
		||||
  } while (--k);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
kf_bfly5 (kiss_fft_f32_cpx * Fout,
 | 
			
		||||
    const size_t fstride, const kiss_fft_f32_cfg st, int m)
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
 | 
			
		||||
  int u;
 | 
			
		||||
  kiss_fft_f32_cpx scratch[13];
 | 
			
		||||
  kiss_fft_f32_cpx *twiddles = st->twiddles;
 | 
			
		||||
  kiss_fft_f32_cpx *tw;
 | 
			
		||||
  kiss_fft_f32_cpx ya, yb;
 | 
			
		||||
  ya = twiddles[fstride * m];
 | 
			
		||||
  yb = twiddles[fstride * 2 * m];
 | 
			
		||||
 | 
			
		||||
  Fout0 = Fout;
 | 
			
		||||
  Fout1 = Fout0 + m;
 | 
			
		||||
  Fout2 = Fout0 + 2 * m;
 | 
			
		||||
  Fout3 = Fout0 + 3 * m;
 | 
			
		||||
  Fout4 = Fout0 + 4 * m;
 | 
			
		||||
 | 
			
		||||
  tw = st->twiddles;
 | 
			
		||||
  for (u = 0; u < m; ++u) {
 | 
			
		||||
    C_FIXDIV (*Fout0, 5);
 | 
			
		||||
    C_FIXDIV (*Fout1, 5);
 | 
			
		||||
    C_FIXDIV (*Fout2, 5);
 | 
			
		||||
    C_FIXDIV (*Fout3, 5);
 | 
			
		||||
    C_FIXDIV (*Fout4, 5);
 | 
			
		||||
    scratch[0] = *Fout0;
 | 
			
		||||
 | 
			
		||||
    C_MUL (scratch[1], *Fout1, tw[u * fstride]);
 | 
			
		||||
    C_MUL (scratch[2], *Fout2, tw[2 * u * fstride]);
 | 
			
		||||
    C_MUL (scratch[3], *Fout3, tw[3 * u * fstride]);
 | 
			
		||||
    C_MUL (scratch[4], *Fout4, tw[4 * u * fstride]);
 | 
			
		||||
 | 
			
		||||
    C_ADD (scratch[7], scratch[1], scratch[4]);
 | 
			
		||||
    C_SUB (scratch[10], scratch[1], scratch[4]);
 | 
			
		||||
    C_ADD (scratch[8], scratch[2], scratch[3]);
 | 
			
		||||
    C_SUB (scratch[9], scratch[2], scratch[3]);
 | 
			
		||||
 | 
			
		||||
    Fout0->r += scratch[7].r + scratch[8].r;
 | 
			
		||||
    Fout0->i += scratch[7].i + scratch[8].i;
 | 
			
		||||
 | 
			
		||||
    scratch[5].r =
 | 
			
		||||
        scratch[0].r + S_MUL (scratch[7].r, ya.r) + S_MUL (scratch[8].r, yb.r);
 | 
			
		||||
    scratch[5].i =
 | 
			
		||||
        scratch[0].i + S_MUL (scratch[7].i, ya.r) + S_MUL (scratch[8].i, yb.r);
 | 
			
		||||
 | 
			
		||||
    scratch[6].r = S_MUL (scratch[10].i, ya.i) + S_MUL (scratch[9].i, yb.i);
 | 
			
		||||
    scratch[6].i = -S_MUL (scratch[10].r, ya.i) - S_MUL (scratch[9].r, yb.i);
 | 
			
		||||
 | 
			
		||||
    C_SUB (*Fout1, scratch[5], scratch[6]);
 | 
			
		||||
    C_ADD (*Fout4, scratch[5], scratch[6]);
 | 
			
		||||
 | 
			
		||||
    scratch[11].r =
 | 
			
		||||
        scratch[0].r + S_MUL (scratch[7].r, yb.r) + S_MUL (scratch[8].r, ya.r);
 | 
			
		||||
    scratch[11].i =
 | 
			
		||||
        scratch[0].i + S_MUL (scratch[7].i, yb.r) + S_MUL (scratch[8].i, ya.r);
 | 
			
		||||
    scratch[12].r = -S_MUL (scratch[10].i, yb.i) + S_MUL (scratch[9].i, ya.i);
 | 
			
		||||
    scratch[12].i = S_MUL (scratch[10].r, yb.i) - S_MUL (scratch[9].r, ya.i);
 | 
			
		||||
 | 
			
		||||
    C_ADD (*Fout2, scratch[11], scratch[12]);
 | 
			
		||||
    C_SUB (*Fout3, scratch[11], scratch[12]);
 | 
			
		||||
 | 
			
		||||
    ++Fout0;
 | 
			
		||||
    ++Fout1;
 | 
			
		||||
    ++Fout2;
 | 
			
		||||
    ++Fout3;
 | 
			
		||||
    ++Fout4;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* perform the butterfly for one stage of a mixed radix FFT */
 | 
			
		||||
static void
 | 
			
		||||
kf_bfly_generic (kiss_fft_f32_cpx * Fout,
 | 
			
		||||
    const size_t fstride, const kiss_fft_f32_cfg st, int m, int p)
 | 
			
		||||
{
 | 
			
		||||
  int u, k, q1, q;
 | 
			
		||||
  kiss_fft_f32_cpx *twiddles = st->twiddles;
 | 
			
		||||
  kiss_fft_f32_cpx t;
 | 
			
		||||
  int Norig = st->nfft;
 | 
			
		||||
 | 
			
		||||
  kiss_fft_f32_cpx *scratch =
 | 
			
		||||
      (kiss_fft_f32_cpx *) KISS_FFT_F32_TMP_ALLOC (sizeof (kiss_fft_f32_cpx) *
 | 
			
		||||
      p);
 | 
			
		||||
 | 
			
		||||
  for (u = 0; u < m; ++u) {
 | 
			
		||||
    k = u;
 | 
			
		||||
    for (q1 = 0; q1 < p; ++q1) {
 | 
			
		||||
      scratch[q1] = Fout[k];
 | 
			
		||||
      C_FIXDIV (scratch[q1], p);
 | 
			
		||||
      k += m;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    k = u;
 | 
			
		||||
    for (q1 = 0; q1 < p; ++q1) {
 | 
			
		||||
      int twidx = 0;
 | 
			
		||||
      Fout[k] = scratch[0];
 | 
			
		||||
      for (q = 1; q < p; ++q) {
 | 
			
		||||
        twidx += fstride * k;
 | 
			
		||||
        if (twidx >= Norig)
 | 
			
		||||
          twidx -= Norig;
 | 
			
		||||
        C_MUL (t, scratch[q], twiddles[twidx]);
 | 
			
		||||
        C_ADDTO (Fout[k], t);
 | 
			
		||||
      }
 | 
			
		||||
      k += m;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  KISS_FFT_F32_TMP_FREE (scratch);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
kf_work (kiss_fft_f32_cpx * Fout,
 | 
			
		||||
    const kiss_fft_f32_cpx * f,
 | 
			
		||||
    const size_t fstride, int in_stride, int *factors,
 | 
			
		||||
    const kiss_fft_f32_cfg st)
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_cpx *Fout_beg = Fout;
 | 
			
		||||
  const int p = *factors++;     /* the radix  */
 | 
			
		||||
  const int m = *factors++;     /* stage's fft length/p */
 | 
			
		||||
  const kiss_fft_f32_cpx *Fout_end = Fout + p * m;
 | 
			
		||||
 | 
			
		||||
#ifdef _OPENMP
 | 
			
		||||
  // use openmp extensions at the 
 | 
			
		||||
  // top-level (not recursive)
 | 
			
		||||
  if (fstride == 1 && p <= 5 && m != 1) {
 | 
			
		||||
    int k;
 | 
			
		||||
 | 
			
		||||
    // execute the p different work units in different threads
 | 
			
		||||
#       pragma omp parallel for
 | 
			
		||||
    for (k = 0; k < p; ++k)
 | 
			
		||||
      kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
 | 
			
		||||
          in_stride, factors, st);
 | 
			
		||||
    // all threads have joined by this point
 | 
			
		||||
 | 
			
		||||
    switch (p) {
 | 
			
		||||
      case 2:
 | 
			
		||||
        kf_bfly2 (Fout, fstride, st, m);
 | 
			
		||||
        break;
 | 
			
		||||
      case 3:
 | 
			
		||||
        kf_bfly3 (Fout, fstride, st, m);
 | 
			
		||||
        break;
 | 
			
		||||
      case 4:
 | 
			
		||||
        kf_bfly4 (Fout, fstride, st, m);
 | 
			
		||||
        break;
 | 
			
		||||
      case 5:
 | 
			
		||||
        kf_bfly5 (Fout, fstride, st, m);
 | 
			
		||||
        break;
 | 
			
		||||
      default:
 | 
			
		||||
        kf_bfly_generic (Fout, fstride, st, m, p);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if (m == 1) {
 | 
			
		||||
    do {
 | 
			
		||||
      *Fout = *f;
 | 
			
		||||
      f += fstride * in_stride;
 | 
			
		||||
    } while (++Fout != Fout_end);
 | 
			
		||||
  } else {
 | 
			
		||||
    do {
 | 
			
		||||
      // recursive call:
 | 
			
		||||
      // DFT of size m*p performed by doing
 | 
			
		||||
      // p instances of smaller DFTs of size m, 
 | 
			
		||||
      // each one takes a decimated version of the input
 | 
			
		||||
      kf_work (Fout, f, fstride * p, in_stride, factors, st);
 | 
			
		||||
      f += fstride * in_stride;
 | 
			
		||||
    } while ((Fout += m) != Fout_end);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Fout = Fout_beg;
 | 
			
		||||
 | 
			
		||||
  // recombine the p smaller DFTs 
 | 
			
		||||
  switch (p) {
 | 
			
		||||
    case 2:
 | 
			
		||||
      kf_bfly2 (Fout, fstride, st, m);
 | 
			
		||||
      break;
 | 
			
		||||
    case 3:
 | 
			
		||||
      kf_bfly3 (Fout, fstride, st, m);
 | 
			
		||||
      break;
 | 
			
		||||
    case 4:
 | 
			
		||||
      kf_bfly4 (Fout, fstride, st, m);
 | 
			
		||||
      break;
 | 
			
		||||
    case 5:
 | 
			
		||||
      kf_bfly5 (Fout, fstride, st, m);
 | 
			
		||||
      break;
 | 
			
		||||
    default:
 | 
			
		||||
      kf_bfly_generic (Fout, fstride, st, m, p);
 | 
			
		||||
      break;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*  facbuf is populated by p1,m1,p2,m2, ...
 | 
			
		||||
    where 
 | 
			
		||||
    p[i] * m[i] = m[i-1]
 | 
			
		||||
    m0 = n                  */
 | 
			
		||||
static void
 | 
			
		||||
kf_factor (int n, int *facbuf)
 | 
			
		||||
{
 | 
			
		||||
  int p = 4;
 | 
			
		||||
  double floor_sqrt;
 | 
			
		||||
  floor_sqrt = floor (sqrt ((double) n));
 | 
			
		||||
 | 
			
		||||
  /*factor out powers of 4, powers of 2, then any remaining primes */
 | 
			
		||||
  do {
 | 
			
		||||
    while (n % p) {
 | 
			
		||||
      switch (p) {
 | 
			
		||||
        case 4:
 | 
			
		||||
          p = 2;
 | 
			
		||||
          break;
 | 
			
		||||
        case 2:
 | 
			
		||||
          p = 3;
 | 
			
		||||
          break;
 | 
			
		||||
        default:
 | 
			
		||||
          p += 2;
 | 
			
		||||
          break;
 | 
			
		||||
      }
 | 
			
		||||
      if (p > floor_sqrt)
 | 
			
		||||
        p = n;                  /* no more factors, skip to end */
 | 
			
		||||
    }
 | 
			
		||||
    n /= p;
 | 
			
		||||
    *facbuf++ = p;
 | 
			
		||||
    *facbuf++ = n;
 | 
			
		||||
  } while (n > 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *
 | 
			
		||||
 * User-callable function to allocate all necessary storage space for the fft.
 | 
			
		||||
 *
 | 
			
		||||
 * The return value is a contiguous block of memory, allocated with malloc.  As such,
 | 
			
		||||
 * It can be freed with free(), rather than a kiss_fft_f32-specific function.
 | 
			
		||||
 * */
 | 
			
		||||
kiss_fft_f32_cfg
 | 
			
		||||
kiss_fft_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_cfg st = NULL;
 | 
			
		||||
  size_t memneeded = sizeof (struct kiss_fft_f32_state)
 | 
			
		||||
      + sizeof (kiss_fft_f32_cpx) * (nfft - 1); /* twiddle factors */
 | 
			
		||||
 | 
			
		||||
  if (lenmem == NULL) {
 | 
			
		||||
    st = (kiss_fft_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
 | 
			
		||||
  } else {
 | 
			
		||||
    if (mem != NULL && *lenmem >= memneeded)
 | 
			
		||||
      st = (kiss_fft_f32_cfg) mem;
 | 
			
		||||
    *lenmem = memneeded;
 | 
			
		||||
  }
 | 
			
		||||
  if (st) {
 | 
			
		||||
    int i;
 | 
			
		||||
    st->nfft = nfft;
 | 
			
		||||
    st->inverse = inverse_fft;
 | 
			
		||||
 | 
			
		||||
    for (i = 0; i < nfft; ++i) {
 | 
			
		||||
      const double pi =
 | 
			
		||||
          3.141592653589793238462643383279502884197169399375105820974944;
 | 
			
		||||
      double phase = -2 * pi * i / nfft;
 | 
			
		||||
      if (st->inverse)
 | 
			
		||||
        phase *= -1;
 | 
			
		||||
      kf_cexp (st->twiddles + i, phase);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    kf_factor (nfft, st->factors);
 | 
			
		||||
  }
 | 
			
		||||
  return st;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
kiss_fft_f32_stride (kiss_fft_f32_cfg st, const kiss_fft_f32_cpx * fin,
 | 
			
		||||
    kiss_fft_f32_cpx * fout, int in_stride)
 | 
			
		||||
{
 | 
			
		||||
  if (fin == fout) {
 | 
			
		||||
    //NOTE: this is not really an in-place FFT algorithm.
 | 
			
		||||
    //It just performs an out-of-place FFT into a temp buffer
 | 
			
		||||
    kiss_fft_f32_cpx *tmpbuf =
 | 
			
		||||
        (kiss_fft_f32_cpx *) KISS_FFT_F32_TMP_ALLOC (sizeof (kiss_fft_f32_cpx) *
 | 
			
		||||
        st->nfft);
 | 
			
		||||
    kf_work (tmpbuf, fin, 1, in_stride, st->factors, st);
 | 
			
		||||
    memcpy (fout, tmpbuf, sizeof (kiss_fft_f32_cpx) * st->nfft);
 | 
			
		||||
    KISS_FFT_F32_TMP_FREE (tmpbuf);
 | 
			
		||||
  } else {
 | 
			
		||||
    kf_work (fout, fin, 1, in_stride, st->factors, st);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
kiss_fft_f32 (kiss_fft_f32_cfg cfg, const kiss_fft_f32_cpx * fin,
 | 
			
		||||
    kiss_fft_f32_cpx * fout)
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_stride (cfg, fin, fout, 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
kiss_fft_f32_cleanup (void)
 | 
			
		||||
{
 | 
			
		||||
  // nothing needed any more
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int
 | 
			
		||||
kiss_fft_f32_next_fast_size (int n)
 | 
			
		||||
{
 | 
			
		||||
  while (1) {
 | 
			
		||||
    int m = n;
 | 
			
		||||
    while ((m % 2) == 0)
 | 
			
		||||
      m /= 2;
 | 
			
		||||
    while ((m % 3) == 0)
 | 
			
		||||
      m /= 3;
 | 
			
		||||
    while ((m % 5) == 0)
 | 
			
		||||
      m /= 5;
 | 
			
		||||
    if (m <= 1)
 | 
			
		||||
      break;                    /* n is completely factorable by twos, threes, and fives */
 | 
			
		||||
    n++;
 | 
			
		||||
  }
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,112 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
 *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
 | 
			
		||||
 *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
 | 
			
		||||
 *
 | 
			
		||||
 *  SPDX-License-Identifier: BSD-3-Clause
 | 
			
		||||
 *  See COPYING file for more information.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef KISS_FFT_F32_H
 | 
			
		||||
#define KISS_FFT_F32_H
 | 
			
		||||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 ATTENTION!
 | 
			
		||||
 If you would like a :
 | 
			
		||||
 -- a utility that will handle the caching of fft objects
 | 
			
		||||
 -- real-only (no imaginary time component ) FFT
 | 
			
		||||
 -- a multi-dimensional FFT
 | 
			
		||||
 -- a command-line utility to perform ffts
 | 
			
		||||
 -- a command-line utility to perform fast-convolution filtering
 | 
			
		||||
 | 
			
		||||
 Then see kfc.h kiss_fftr_f32.h kiss_fft_f32nd.h fftutil.c kiss_fastfir.c
 | 
			
		||||
  in the tools/ directory.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#define KISS_FFT_F32_MALLOC malloc
 | 
			
		||||
#define KISS_FFT_F32_FREE free
 | 
			
		||||
#define kiss_fft_f32_scalar float
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    kiss_fft_f32_scalar r;
 | 
			
		||||
    kiss_fft_f32_scalar i;
 | 
			
		||||
}kiss_fft_f32_cpx;
 | 
			
		||||
 | 
			
		||||
typedef struct kiss_fft_f32_state* kiss_fft_f32_cfg;
 | 
			
		||||
 | 
			
		||||
/* 
 | 
			
		||||
 *  kiss_fft_f32_alloc
 | 
			
		||||
 *  
 | 
			
		||||
 *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
 | 
			
		||||
 *
 | 
			
		||||
 *  typical usage:      kiss_fft_f32_cfg mycfg=kiss_fft_f32_alloc(1024,0,NULL,NULL);
 | 
			
		||||
 *
 | 
			
		||||
 *  The return value from fft_alloc is a cfg buffer used internally
 | 
			
		||||
 *  by the fft routine or NULL.
 | 
			
		||||
 *
 | 
			
		||||
 *  If lenmem is NULL, then kiss_fft_f32_alloc will allocate a cfg buffer using malloc.
 | 
			
		||||
 *  The returned value should be free()d when done to avoid memory leaks.
 | 
			
		||||
 *  
 | 
			
		||||
 *  The state can be placed in a user supplied buffer 'mem':
 | 
			
		||||
 *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
 | 
			
		||||
 *      then the function places the cfg in mem and the size used in *lenmem
 | 
			
		||||
 *      and returns mem.
 | 
			
		||||
 *  
 | 
			
		||||
 *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
 | 
			
		||||
 *      then the function returns NULL and places the minimum cfg 
 | 
			
		||||
 *      buffer size in *lenmem.
 | 
			
		||||
 * */
 | 
			
		||||
 | 
			
		||||
kiss_fft_f32_cfg kiss_fft_f32_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); 
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * kiss_fft_f32(cfg,in_out_buf)
 | 
			
		||||
 *
 | 
			
		||||
 * Perform an FFT on a complex input buffer.
 | 
			
		||||
 * for a forward FFT,
 | 
			
		||||
 * fin should be  f[0] , f[1] , ... ,f[nfft-1]
 | 
			
		||||
 * fout will be   F[0] , F[1] , ... ,F[nfft-1]
 | 
			
		||||
 * Note that each element is complex and can be accessed like
 | 
			
		||||
    f[k].r and f[k].i
 | 
			
		||||
 * */
 | 
			
		||||
void kiss_fft_f32(kiss_fft_f32_cfg cfg,const kiss_fft_f32_cpx *fin,kiss_fft_f32_cpx *fout);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 A more generic version of the above function. It reads its input from every Nth sample.
 | 
			
		||||
 * */
 | 
			
		||||
void kiss_fft_f32_stride(kiss_fft_f32_cfg cfg,const kiss_fft_f32_cpx *fin,kiss_fft_f32_cpx *fout,int fin_stride);
 | 
			
		||||
 | 
			
		||||
/* If kiss_fft_f32_alloc allocated a buffer, it is one contiguous 
 | 
			
		||||
   buffer and can be simply free()d when no longer needed*/
 | 
			
		||||
#define kiss_fft_f32_free KISS_FFT_F32_FREE
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 
 | 
			
		||||
 your compiler output to call this before you exit.
 | 
			
		||||
*/
 | 
			
		||||
void kiss_fft_f32_cleanup(void);
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
 | 
			
		||||
 */
 | 
			
		||||
int kiss_fft_f32_next_fast_size(int n);
 | 
			
		||||
 | 
			
		||||
/* for real ffts, we need an even size */
 | 
			
		||||
#define kiss_fftr_f32_next_fast_size_real(n) \
 | 
			
		||||
        (kiss_fft_f32_next_fast_size( ((n)+1)>>1)<<1)
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
} 
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -1,148 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
 *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
 | 
			
		||||
 *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
 | 
			
		||||
 *
 | 
			
		||||
 *  SPDX-License-Identifier: BSD-3-Clause
 | 
			
		||||
 *  See COPYING file for more information.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "kiss_fftr_f32.h"
 | 
			
		||||
#include "_kiss_fft_guts_f32.h"
 | 
			
		||||
 | 
			
		||||
struct kiss_fftr_f32_state
 | 
			
		||||
{
 | 
			
		||||
  kiss_fft_f32_cfg substate;
 | 
			
		||||
  kiss_fft_f32_cpx *tmpbuf;
 | 
			
		||||
  kiss_fft_f32_cpx *super_twiddles;
 | 
			
		||||
#ifdef USE_SIMD
 | 
			
		||||
  void *pad;
 | 
			
		||||
#endif
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
kiss_fftr_f32_cfg
 | 
			
		||||
kiss_fftr_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
 | 
			
		||||
{
 | 
			
		||||
  int i;
 | 
			
		||||
  kiss_fftr_f32_cfg st = NULL;
 | 
			
		||||
  size_t subsize = 0, memneeded;
 | 
			
		||||
 | 
			
		||||
  nfft >>= 1;
 | 
			
		||||
 | 
			
		||||
  kiss_fft_f32_alloc (nfft, inverse_fft, NULL, &subsize);
 | 
			
		||||
  memneeded =
 | 
			
		||||
      ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state)) +
 | 
			
		||||
      ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 3 / 2);
 | 
			
		||||
 | 
			
		||||
  if (lenmem == NULL) {
 | 
			
		||||
    st = (kiss_fftr_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
 | 
			
		||||
  } else {
 | 
			
		||||
    if (*lenmem >= memneeded)
 | 
			
		||||
      st = (kiss_fftr_f32_cfg) mem;
 | 
			
		||||
    *lenmem = memneeded;
 | 
			
		||||
  }
 | 
			
		||||
  if (!st)
 | 
			
		||||
    return NULL;
 | 
			
		||||
 | 
			
		||||
  st->substate = (kiss_fft_f32_cfg) (((char *) st) + ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state)));       /*just beyond kiss_fftr_f32_state struct */
 | 
			
		||||
  st->tmpbuf =
 | 
			
		||||
      (kiss_fft_f32_cpx *) (((char *) st->substate) + ALIGN_STRUCT (subsize));
 | 
			
		||||
  st->super_twiddles = st->tmpbuf + nfft;
 | 
			
		||||
  kiss_fft_f32_alloc (nfft, inverse_fft, st->substate, &subsize);
 | 
			
		||||
 | 
			
		||||
  for (i = 0; i < nfft / 2; ++i) {
 | 
			
		||||
    double phase =
 | 
			
		||||
        -3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
 | 
			
		||||
    if (inverse_fft)
 | 
			
		||||
      phase *= -1;
 | 
			
		||||
    kf_cexp (st->super_twiddles + i, phase);
 | 
			
		||||
  }
 | 
			
		||||
  return st;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
kiss_fftr_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_scalar * timedata,
 | 
			
		||||
    kiss_fft_f32_cpx * freqdata)
 | 
			
		||||
{
 | 
			
		||||
  /* input buffer timedata is stored row-wise */
 | 
			
		||||
  int k, ncfft;
 | 
			
		||||
  kiss_fft_f32_cpx fpnk, fpk, f1k, f2k, tw, tdc;
 | 
			
		||||
 | 
			
		||||
  ncfft = st->substate->nfft;
 | 
			
		||||
 | 
			
		||||
  /*perform the parallel fft of two real signals packed in real,imag */
 | 
			
		||||
  kiss_fft_f32 (st->substate, (const kiss_fft_f32_cpx *) timedata, st->tmpbuf);
 | 
			
		||||
  /* The real part of the DC element of the frequency spectrum in st->tmpbuf
 | 
			
		||||
   * contains the sum of the even-numbered elements of the input time sequence
 | 
			
		||||
   * The imag part is the sum of the odd-numbered elements
 | 
			
		||||
   *
 | 
			
		||||
   * The sum of tdc.r and tdc.i is the sum of the input time sequence. 
 | 
			
		||||
   *      yielding DC of input time sequence
 | 
			
		||||
   * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 
 | 
			
		||||
   *      yielding Nyquist bin of input time sequence
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  tdc.r = st->tmpbuf[0].r;
 | 
			
		||||
  tdc.i = st->tmpbuf[0].i;
 | 
			
		||||
  C_FIXDIV (tdc, 2);
 | 
			
		||||
  CHECK_OVERFLOW_OP (tdc.r, +, tdc.i);
 | 
			
		||||
  CHECK_OVERFLOW_OP (tdc.r, -, tdc.i);
 | 
			
		||||
  freqdata[0].r = tdc.r + tdc.i;
 | 
			
		||||
  freqdata[ncfft].r = tdc.r - tdc.i;
 | 
			
		||||
#ifdef USE_SIMD
 | 
			
		||||
  freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps (0);
 | 
			
		||||
#else
 | 
			
		||||
  freqdata[ncfft].i = freqdata[0].i = 0;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  for (k = 1; k <= ncfft / 2; ++k) {
 | 
			
		||||
    fpk = st->tmpbuf[k];
 | 
			
		||||
    fpnk.r = st->tmpbuf[ncfft - k].r;
 | 
			
		||||
    fpnk.i = -st->tmpbuf[ncfft - k].i;
 | 
			
		||||
    C_FIXDIV (fpk, 2);
 | 
			
		||||
    C_FIXDIV (fpnk, 2);
 | 
			
		||||
 | 
			
		||||
    C_ADD (f1k, fpk, fpnk);
 | 
			
		||||
    C_SUB (f2k, fpk, fpnk);
 | 
			
		||||
    C_MUL (tw, f2k, st->super_twiddles[k - 1]);
 | 
			
		||||
 | 
			
		||||
    freqdata[k].r = HALF_OF (f1k.r + tw.r);
 | 
			
		||||
    freqdata[k].i = HALF_OF (f1k.i + tw.i);
 | 
			
		||||
    freqdata[ncfft - k].r = HALF_OF (f1k.r - tw.r);
 | 
			
		||||
    freqdata[ncfft - k].i = HALF_OF (tw.i - f1k.i);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
kiss_fftri_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_cpx * freqdata,
 | 
			
		||||
    kiss_fft_f32_scalar * timedata)
 | 
			
		||||
{
 | 
			
		||||
  /* input buffer timedata is stored row-wise */
 | 
			
		||||
  int k, ncfft;
 | 
			
		||||
 | 
			
		||||
  ncfft = st->substate->nfft;
 | 
			
		||||
 | 
			
		||||
  st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
 | 
			
		||||
  st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
 | 
			
		||||
  C_FIXDIV (st->tmpbuf[0], 2);
 | 
			
		||||
 | 
			
		||||
  for (k = 1; k <= ncfft / 2; ++k) {
 | 
			
		||||
    kiss_fft_f32_cpx fk, fnkc, fek, fok, tmp;
 | 
			
		||||
    fk = freqdata[k];
 | 
			
		||||
    fnkc.r = freqdata[ncfft - k].r;
 | 
			
		||||
    fnkc.i = -freqdata[ncfft - k].i;
 | 
			
		||||
    C_FIXDIV (fk, 2);
 | 
			
		||||
    C_FIXDIV (fnkc, 2);
 | 
			
		||||
 | 
			
		||||
    C_ADD (fek, fk, fnkc);
 | 
			
		||||
    C_SUB (tmp, fk, fnkc);
 | 
			
		||||
    C_MUL (fok, tmp, st->super_twiddles[k - 1]);
 | 
			
		||||
    C_ADD (st->tmpbuf[k], fek, fok);
 | 
			
		||||
    C_SUB (st->tmpbuf[ncfft - k], fek, fok);
 | 
			
		||||
#ifdef USE_SIMD
 | 
			
		||||
    st->tmpbuf[ncfft - k].i *= _mm_set1_ps (-1.0);
 | 
			
		||||
#else
 | 
			
		||||
    st->tmpbuf[ncfft - k].i *= -1;
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
  kiss_fft_f32 (st->substate, st->tmpbuf, (kiss_fft_f32_cpx *) timedata);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,54 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
 *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
 | 
			
		||||
 *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
 | 
			
		||||
 *
 | 
			
		||||
 *  SPDX-License-Identifier: BSD-3-Clause
 | 
			
		||||
 *  See COPYING file for more information.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef KISS_FTR_H
 | 
			
		||||
#define KISS_FTR_H
 | 
			
		||||
 | 
			
		||||
#include "kiss_fft_f32.h"
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
/* 
 | 
			
		||||
 
 | 
			
		||||
 Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
 
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
typedef struct kiss_fftr_f32_state *kiss_fftr_f32_cfg;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
kiss_fftr_f32_cfg kiss_fftr_f32_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
 | 
			
		||||
/*
 | 
			
		||||
 nfft must be even
 | 
			
		||||
 | 
			
		||||
 If you don't care to allocate space, use mem = lenmem = NULL 
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void kiss_fftr_f32(kiss_fftr_f32_cfg cfg,const kiss_fft_f32_scalar *timedata,kiss_fft_f32_cpx *freqdata);
 | 
			
		||||
/*
 | 
			
		||||
 input timedata has nfft scalar points
 | 
			
		||||
 output freqdata has nfft/2+1 complex points
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
void kiss_fftri_f32(kiss_fftr_f32_cfg cfg,const kiss_fft_f32_cpx *freqdata,kiss_fft_f32_scalar *timedata);
 | 
			
		||||
/*
 | 
			
		||||
 input freqdata has  nfft/2+1 complex points
 | 
			
		||||
 output timedata has nfft scalar points
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#define kiss_fftr_f32_free KISS_FFT_F32_FREE
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										2244
									
								
								src/modules/module-filter-chain/pffft.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										2244
									
								
								src/modules/module-filter-chain/pffft.c
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										177
									
								
								src/modules/module-filter-chain/pffft.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										177
									
								
								src/modules/module-filter-chain/pffft.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,177 @@
 | 
			
		|||
/* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
 | 
			
		||||
 | 
			
		||||
   Based on original fortran 77 code from FFTPACKv4 from NETLIB,
 | 
			
		||||
   authored by Dr Paul Swarztrauber of NCAR, in 1985.
 | 
			
		||||
 | 
			
		||||
   As confirmed by the NCAR fftpack software curators, the following
 | 
			
		||||
   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
 | 
			
		||||
   released under the same terms.
 | 
			
		||||
 | 
			
		||||
   FFTPACK license:
 | 
			
		||||
 | 
			
		||||
   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
 | 
			
		||||
 | 
			
		||||
   Copyright (c) 2004 the University Corporation for Atmospheric
 | 
			
		||||
   Research ("UCAR"). All rights reserved. Developed by NCAR's
 | 
			
		||||
   Computational and Information Systems Laboratory, UCAR,
 | 
			
		||||
   www.cisl.ucar.edu.
 | 
			
		||||
 | 
			
		||||
   Redistribution and use of the Software in source and binary forms,
 | 
			
		||||
   with or without modification, is permitted provided that the
 | 
			
		||||
   following conditions are met:
 | 
			
		||||
 | 
			
		||||
   - Neither the names of NCAR's Computational and Information Systems
 | 
			
		||||
   Laboratory, the University Corporation for Atmospheric Research,
 | 
			
		||||
   nor the names of its sponsors or contributors may be used to
 | 
			
		||||
   endorse or promote products derived from this Software without
 | 
			
		||||
   specific prior written permission.
 | 
			
		||||
 | 
			
		||||
   - Redistributions of source code must retain the above copyright
 | 
			
		||||
   notices, this list of conditions, and the disclaimer below.
 | 
			
		||||
 | 
			
		||||
   - Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
   notice, this list of conditions, and the disclaimer below in the
 | 
			
		||||
   documentation and/or other materials provided with the
 | 
			
		||||
   distribution.
 | 
			
		||||
 | 
			
		||||
   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | 
			
		||||
   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
 | 
			
		||||
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 | 
			
		||||
   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
 | 
			
		||||
   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 | 
			
		||||
   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 | 
			
		||||
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
 | 
			
		||||
   SOFTWARE.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
   PFFFT : a Pretty Fast FFT.
 | 
			
		||||
 | 
			
		||||
   This is basically an adaptation of the single precision fftpack
 | 
			
		||||
   (v4) as found on netlib taking advantage of SIMD instruction found
 | 
			
		||||
   on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
 | 
			
		||||
 | 
			
		||||
   For architectures where no SIMD instruction is available, the code
 | 
			
		||||
   falls back to a scalar version.
 | 
			
		||||
 | 
			
		||||
   Restrictions:
 | 
			
		||||
 | 
			
		||||
   - 1D transforms only, with 32-bit single precision.
 | 
			
		||||
 | 
			
		||||
   - supports only transforms for inputs of length N of the form
 | 
			
		||||
   N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
 | 
			
		||||
   144, 160, etc are all acceptable lengths). Performance is best for
 | 
			
		||||
   128<=N<=8192.
 | 
			
		||||
 | 
			
		||||
   - all (float*) pointers in the functions below are expected to
 | 
			
		||||
   have an "simd-compatible" alignment, that is 16 bytes on x86 and
 | 
			
		||||
   powerpc CPUs.
 | 
			
		||||
 | 
			
		||||
   You can allocate such buffers with the functions
 | 
			
		||||
   pffft_aligned_malloc / pffft_aligned_free (or with stuff like
 | 
			
		||||
   posix_memalign..)
 | 
			
		||||
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#ifndef PFFFT_H
 | 
			
		||||
#define PFFFT_H
 | 
			
		||||
 | 
			
		||||
#include <stddef.h> // for size_t
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  /* opaque struct holding internal stuff (precomputed twiddle factors)
 | 
			
		||||
     this struct can be shared by many threads as it contains only
 | 
			
		||||
     read-only data.
 | 
			
		||||
  */
 | 
			
		||||
  typedef struct PFFFT_Setup PFFFT_Setup;
 | 
			
		||||
 | 
			
		||||
  /* direction of the transform */
 | 
			
		||||
  typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
 | 
			
		||||
 | 
			
		||||
  /* type of transform */
 | 
			
		||||
  typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
    prepare for performing transforms of size N -- the returned
 | 
			
		||||
    PFFFT_Setup structure is read-only so it can safely be shared by
 | 
			
		||||
    multiple concurrent threads.
 | 
			
		||||
  */
 | 
			
		||||
  PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
 | 
			
		||||
  void pffft_destroy_setup(PFFFT_Setup *);
 | 
			
		||||
  /*
 | 
			
		||||
     Perform a Fourier transform , The z-domain data is stored in the
 | 
			
		||||
     most efficient order for transforming it back, or using it for
 | 
			
		||||
     convolution. If you need to have its content sorted in the
 | 
			
		||||
     "usual" way, that is as an array of interleaved complex numbers,
 | 
			
		||||
     either use pffft_transform_ordered , or call pffft_zreorder after
 | 
			
		||||
     the forward fft, and before the backward fft.
 | 
			
		||||
 | 
			
		||||
     Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
 | 
			
		||||
     Typically you will want to scale the backward transform by 1/N.
 | 
			
		||||
 | 
			
		||||
     The 'work' pointer should point to an area of N (2*N for complex
 | 
			
		||||
     fft) floats, properly aligned. If 'work' is NULL, then stack will
 | 
			
		||||
     be used instead (this is probably the best strategy for small
 | 
			
		||||
     FFTs, say for N < 16384).
 | 
			
		||||
 | 
			
		||||
     input and output may alias.
 | 
			
		||||
  */
 | 
			
		||||
  void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
     Similar to pffft_transform, but makes sure that the output is
 | 
			
		||||
     ordered as expected (interleaved complex numbers).  This is
 | 
			
		||||
     similar to calling pffft_transform and then pffft_zreorder.
 | 
			
		||||
 | 
			
		||||
     input and output may alias.
 | 
			
		||||
  */
 | 
			
		||||
  void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
     call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
 | 
			
		||||
     PFFFT_FORWARD) if you want to have the frequency components in
 | 
			
		||||
     the correct "canonical" order, as interleaved complex numbers.
 | 
			
		||||
 | 
			
		||||
     (for real transforms, both 0-frequency and half frequency
 | 
			
		||||
     components, which are real, are assembled in the first entry as
 | 
			
		||||
     F(0)+i*F(n/2+1). Note that the original fftpack did place
 | 
			
		||||
     F(n/2+1) at the end of the arrays).
 | 
			
		||||
 | 
			
		||||
     input and output should not alias.
 | 
			
		||||
  */
 | 
			
		||||
  void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
     Perform a multiplication of the frequency components of dft_a and
 | 
			
		||||
     dft_b and accumulate them into dft_ab. The arrays should have
 | 
			
		||||
     been obtained with pffft_transform(.., PFFFT_FORWARD) and should
 | 
			
		||||
     *not* have been reordered with pffft_zreorder (otherwise just
 | 
			
		||||
     perform the operation yourself as the dft coefs are stored as
 | 
			
		||||
     interleaved complex numbers).
 | 
			
		||||
 | 
			
		||||
     the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
 | 
			
		||||
 | 
			
		||||
     The dft_a, dft_b and dft_ab pointers may alias.
 | 
			
		||||
  */
 | 
			
		||||
  void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
    the float buffers must have the correct alignment (16-byte boundary
 | 
			
		||||
    on intel and powerpc). This function may be used to obtain such
 | 
			
		||||
    correctly aligned buffers. 
 | 
			
		||||
  */
 | 
			
		||||
  void *pffft_aligned_malloc(size_t nb_bytes);
 | 
			
		||||
  void pffft_aligned_free(void *);
 | 
			
		||||
 | 
			
		||||
  /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
 | 
			
		||||
  int pffft_simd_size();
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif // PFFFT_H
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue