mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-03 09:01:54 -05:00
filter-chain: use pffft for the convolver
It is faster.
This commit is contained in:
parent
9dbfa63193
commit
0f5face73f
9 changed files with 2550 additions and 990 deletions
|
|
@ -50,8 +50,7 @@ pipewire_module_filter_chain = shared_library('pipewire-module-filter-chain',
|
|||
'module-filter-chain/biquad.c',
|
||||
'module-filter-chain/ladspa_plugin.c',
|
||||
'module-filter-chain/builtin_plugin.c',
|
||||
'module-filter-chain/kiss_fft_f32.c',
|
||||
'module-filter-chain/kiss_fftr_f32.c',
|
||||
'module-filter-chain/pffft.c',
|
||||
'module-filter-chain/convolver.c' ],
|
||||
include_directories : [configinc, spa_inc],
|
||||
install : true,
|
||||
|
|
|
|||
|
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
/* kiss_fft_f32.h
|
||||
defines kiss_fft_f32_scalar as either short or a float type
|
||||
and defines
|
||||
typedef struct { kiss_fft_f32_scalar r; kiss_fft_f32_scalar i; }kiss_fft_f32_cpx; */
|
||||
#include "kiss_fft_f32.h"
|
||||
#include <limits.h>
|
||||
|
||||
/* The 2*sizeof(size_t) alignment here is borrowed from
|
||||
* GNU libc, so it should be good most everywhere.
|
||||
* It is more conservative than is needed on some 64-bit
|
||||
* platforms, but ia64 does require a 16-byte alignment.
|
||||
* The SIMD extensions for x86 and ppc32 would want a
|
||||
* larger alignment than this, but we don't need to
|
||||
* do better than malloc.
|
||||
*
|
||||
* Borrowed from GLib's gobject/gtype.c
|
||||
*/
|
||||
#define STRUCT_ALIGNMENT (2 * sizeof (size_t))
|
||||
#define ALIGN_STRUCT(offset) \
|
||||
((offset + (STRUCT_ALIGNMENT - 1)) & -STRUCT_ALIGNMENT)
|
||||
|
||||
#define MAXFACTORS 32
|
||||
/* e.g. an fft of length 128 has 4 factors
|
||||
as far as kissfft is concerned
|
||||
4*4*4*2
|
||||
*/
|
||||
|
||||
struct kiss_fft_f32_state{
|
||||
int nfft;
|
||||
int inverse;
|
||||
int factors[2*MAXFACTORS];
|
||||
kiss_fft_f32_cpx twiddles[1];
|
||||
};
|
||||
|
||||
/*
|
||||
Explanation of macros dealing with complex math:
|
||||
|
||||
C_MUL(m,a,b) : m = a*b
|
||||
C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
|
||||
C_SUB( res, a,b) : res = a - b
|
||||
C_SUBFROM( res , a) : res -= a
|
||||
C_ADDTO( res , a) : res += a
|
||||
* */
|
||||
#ifdef FIXED_POINT
|
||||
#include <stdint.h>
|
||||
#if (FIXED_POINT==32)
|
||||
# define FRACBITS 31
|
||||
# define SAMPPROD int64_t
|
||||
#define SAMP_MAX INT32_MAX
|
||||
#define SAMP_MIN INT32_MIN
|
||||
#else
|
||||
# define FRACBITS 15
|
||||
# define SAMPPROD int32_t
|
||||
#define SAMP_MAX INT16_MAX
|
||||
#define SAMP_MIN INT16_MIN
|
||||
#endif
|
||||
|
||||
#if defined(CHECK_OVERFLOW)
|
||||
# define CHECK_OVERFLOW_OP(a,op,b) \
|
||||
if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
|
||||
g_critical("overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); }
|
||||
#endif
|
||||
|
||||
|
||||
# define smul(a,b) ( (SAMPPROD)(a)*(b) )
|
||||
# define sround( x ) (kiss_fft_f32_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
|
||||
|
||||
# define S_MUL(a,b) sround( smul(a,b) )
|
||||
|
||||
# define C_MUL(m,a,b) \
|
||||
do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
|
||||
(m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
|
||||
|
||||
# define DIVSCALAR(x,k) \
|
||||
(x) = sround( smul( x, SAMP_MAX/k ) )
|
||||
|
||||
# define C_FIXDIV(c,div) \
|
||||
do { DIVSCALAR( (c).r , div); \
|
||||
DIVSCALAR( (c).i , div); }while (0)
|
||||
|
||||
# define C_MULBYSCALAR( c, s ) \
|
||||
do{ (c).r = sround( smul( (c).r , s ) ) ;\
|
||||
(c).i = sround( smul( (c).i , s ) ) ; }while(0)
|
||||
|
||||
#else /* not FIXED_POINT*/
|
||||
|
||||
# define S_MUL(a,b) ( (a)*(b) )
|
||||
#define C_MUL(m,a,b) \
|
||||
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
|
||||
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
|
||||
# define C_FIXDIV(c,div) /* NOOP */
|
||||
# define C_MULBYSCALAR( c, s ) \
|
||||
do{ (c).r *= (s);\
|
||||
(c).i *= (s); }while(0)
|
||||
#endif
|
||||
|
||||
#ifndef CHECK_OVERFLOW_OP
|
||||
# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
|
||||
#endif
|
||||
|
||||
#define C_ADD( res, a,b)\
|
||||
do { \
|
||||
CHECK_OVERFLOW_OP((a).r,+,(b).r)\
|
||||
CHECK_OVERFLOW_OP((a).i,+,(b).i)\
|
||||
(res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
|
||||
}while(0)
|
||||
#define C_SUB( res, a,b)\
|
||||
do { \
|
||||
CHECK_OVERFLOW_OP((a).r,-,(b).r)\
|
||||
CHECK_OVERFLOW_OP((a).i,-,(b).i)\
|
||||
(res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
|
||||
}while(0)
|
||||
#define C_ADDTO( res , a)\
|
||||
do { \
|
||||
CHECK_OVERFLOW_OP((res).r,+,(a).r)\
|
||||
CHECK_OVERFLOW_OP((res).i,+,(a).i)\
|
||||
(res).r += (a).r; (res).i += (a).i;\
|
||||
}while(0)
|
||||
|
||||
#define C_SUBFROM( res , a)\
|
||||
do {\
|
||||
CHECK_OVERFLOW_OP((res).r,-,(a).r)\
|
||||
CHECK_OVERFLOW_OP((res).i,-,(a).i)\
|
||||
(res).r -= (a).r; (res).i -= (a).i; \
|
||||
}while(0)
|
||||
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
# define KISS_FFT_F32_COS(phase) floor(.5+SAMP_MAX * cos (phase))
|
||||
# define KISS_FFT_F32_SIN(phase) floor(.5+SAMP_MAX * sin (phase))
|
||||
# define HALF_OF(x) ((x)>>1)
|
||||
#elif defined(USE_SIMD)
|
||||
# define KISS_FFT_F32_COS(phase) _mm_set1_ps( cos(phase) )
|
||||
# define KISS_FFT_F32_SIN(phase) _mm_set1_ps( sin(phase) )
|
||||
# define HALF_OF(x) ((x)*_mm_set1_ps(.5))
|
||||
#else
|
||||
# define KISS_FFT_F32_COS(phase) (kiss_fft_f32_scalar) cos(phase)
|
||||
# define KISS_FFT_F32_SIN(phase) (kiss_fft_f32_scalar) sin(phase)
|
||||
# define HALF_OF(x) ((x)*.5)
|
||||
#endif
|
||||
|
||||
#define kf_cexp(x,phase) \
|
||||
do{ \
|
||||
(x)->r = KISS_FFT_F32_COS(phase);\
|
||||
(x)->i = KISS_FFT_F32_SIN(phase);\
|
||||
}while(0)
|
||||
|
||||
|
||||
/* a debugging function */
|
||||
#define pcpx(c)\
|
||||
fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
|
||||
|
||||
|
||||
#ifdef KISS_FFT_F32_USE_ALLOCA
|
||||
// define this to allow use of alloca instead of malloc for temporary buffers
|
||||
// Temporary buffers are used in two case:
|
||||
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
|
||||
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
|
||||
#include <alloca.h>
|
||||
#define KISS_FFT_F32_TMP_ALLOC(nbytes) alloca(nbytes)
|
||||
#define KISS_FFT_F32_TMP_FREE(ptr)
|
||||
#else
|
||||
#define KISS_FFT_F32_TMP_ALLOC(nbytes) KISS_FFT_F32_MALLOC(nbytes)
|
||||
#define KISS_FFT_F32_TMP_FREE(ptr) KISS_FFT_F32_FREE(ptr)
|
||||
#endif
|
||||
|
|
@ -28,8 +28,14 @@
|
|||
|
||||
#include <spa/utils/defs.h>
|
||||
|
||||
#include "kiss_fft_f32.h"
|
||||
#include "kiss_fftr_f32.h"
|
||||
#include <math.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include "pffft.h"
|
||||
|
||||
struct fft_cpx {
|
||||
float *v;
|
||||
};
|
||||
|
||||
struct convolver1 {
|
||||
int blockSize;
|
||||
|
|
@ -37,16 +43,16 @@ struct convolver1 {
|
|||
int segCount;
|
||||
int fftComplexSize;
|
||||
|
||||
kiss_fft_f32_cpx **segments;
|
||||
kiss_fft_f32_cpx **segmentsIr;
|
||||
struct fft_cpx *segments;
|
||||
struct fft_cpx *segmentsIr;
|
||||
|
||||
float *fft_buffer;
|
||||
|
||||
void *fft;
|
||||
void *ifft;
|
||||
|
||||
kiss_fft_f32_cpx *pre_mult;
|
||||
kiss_fft_f32_cpx *conv;
|
||||
struct fft_cpx pre_mult;
|
||||
struct fft_cpx conv;
|
||||
float *overlap;
|
||||
|
||||
float *inputBuffer;
|
||||
|
|
@ -55,6 +61,38 @@ struct convolver1 {
|
|||
int current;
|
||||
};
|
||||
|
||||
static void *fft_alloc(int size)
|
||||
{
|
||||
void *d;
|
||||
d = pffft_aligned_malloc(size);
|
||||
memset(d, 0, size);
|
||||
return d;
|
||||
}
|
||||
static void fft_free(void *data)
|
||||
{
|
||||
pffft_aligned_free(data);
|
||||
}
|
||||
|
||||
static void fft_cpx_init(struct fft_cpx *cpx, int size)
|
||||
{
|
||||
cpx->v = fft_alloc(size * 2 * sizeof(float));
|
||||
}
|
||||
|
||||
static void fft_cpx_free(struct fft_cpx *cpx)
|
||||
{
|
||||
fft_free(cpx->v);
|
||||
}
|
||||
|
||||
static void fft_cpx_clear(struct fft_cpx *cpx, int size)
|
||||
{
|
||||
memset(cpx->v, 0, sizeof(float) * 2 * size);
|
||||
}
|
||||
|
||||
static void fft_cpx_copy(struct fft_cpx *dst, struct fft_cpx *src, int size)
|
||||
{
|
||||
memcpy(dst->v, src->v, sizeof(float) * 2 * size);
|
||||
}
|
||||
|
||||
static int next_power_of_two(int val)
|
||||
{
|
||||
int r = 1;
|
||||
|
|
@ -63,6 +101,37 @@ static int next_power_of_two(int val)
|
|||
return r;
|
||||
}
|
||||
|
||||
static inline void *fft_new(int size)
|
||||
{
|
||||
return pffft_new_setup(size, PFFFT_REAL);
|
||||
}
|
||||
|
||||
static inline void *ifft_new(int size)
|
||||
{
|
||||
return pffft_new_setup(size, PFFFT_REAL);
|
||||
}
|
||||
|
||||
static inline void fft_destroy(void *fft)
|
||||
{
|
||||
pffft_destroy_setup(fft);
|
||||
}
|
||||
|
||||
static inline void fft_run(void *fft, float *in, struct fft_cpx *out)
|
||||
{
|
||||
pffft_transform(fft, in, out->v, NULL, PFFFT_FORWARD);
|
||||
}
|
||||
|
||||
static inline void ifft_run(void *ifft, struct fft_cpx *in, float *out)
|
||||
{
|
||||
pffft_transform(ifft, in->v, out, NULL, PFFFT_BACKWARD);
|
||||
}
|
||||
|
||||
static inline void fft_convolve_accum(void *fft, struct fft_cpx *r,
|
||||
const struct fft_cpx *a, const struct fft_cpx *b, int len, float scale)
|
||||
{
|
||||
pffft_zconvolve_accumulate(fft, a->v, b->v, r->v, scale);
|
||||
}
|
||||
|
||||
static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
|
||||
{
|
||||
struct convolver1 *conv;
|
||||
|
|
@ -86,37 +155,37 @@ static struct convolver1 *convolver1_new(int block, const float *ir, int irlen)
|
|||
conv->segCount = (irlen + conv->blockSize-1) / conv->blockSize;
|
||||
conv->fftComplexSize = (conv->segSize / 2) + 1;
|
||||
|
||||
conv->fft = kiss_fftr_f32_alloc(conv->segSize, 0, NULL, NULL);
|
||||
conv->fft = fft_new(conv->segSize);
|
||||
if (conv->fft == NULL)
|
||||
return NULL;
|
||||
conv->ifft = kiss_fftr_f32_alloc(conv->segSize, 1, NULL, NULL);
|
||||
conv->ifft = ifft_new(conv->segSize);
|
||||
if (conv->ifft == NULL)
|
||||
return NULL;
|
||||
|
||||
conv->fft_buffer = calloc(sizeof(float), conv->segSize);
|
||||
conv->fft_buffer = fft_alloc(sizeof(float) * conv->segSize);
|
||||
if (conv->fft_buffer == NULL)
|
||||
return NULL;
|
||||
|
||||
conv->segments = calloc(sizeof(kiss_fft_f32_cpx*), conv->segCount);
|
||||
conv->segmentsIr = calloc(sizeof(kiss_fft_f32_cpx*), conv->segCount);
|
||||
conv->segments = calloc(sizeof(struct fft_cpx), conv->segCount);
|
||||
conv->segmentsIr = calloc(sizeof(struct fft_cpx), conv->segCount);
|
||||
|
||||
for (i = 0; i < conv->segCount; i++) {
|
||||
int left = irlen - (i * conv->blockSize);
|
||||
int copy = SPA_MIN(conv->blockSize, left);
|
||||
|
||||
conv->segments[i] = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
|
||||
conv->segmentsIr[i] = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
|
||||
fft_cpx_init(&conv->segments[i], conv->fftComplexSize);
|
||||
fft_cpx_init(&conv->segmentsIr[i], conv->fftComplexSize);
|
||||
|
||||
memcpy(conv->fft_buffer, &ir[i * conv->blockSize], copy * sizeof(float));
|
||||
if (copy < conv->segSize)
|
||||
memset(conv->fft_buffer + copy, 0, (conv->segSize - copy) * sizeof(float));
|
||||
|
||||
kiss_fftr_f32(conv->fft, conv->fft_buffer, conv->segmentsIr[i]);
|
||||
fft_run(conv->fft, conv->fft_buffer, &conv->segmentsIr[i]);
|
||||
}
|
||||
conv->pre_mult = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
|
||||
conv->conv = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
|
||||
conv->overlap = calloc(sizeof(float), conv->blockSize);
|
||||
conv->inputBuffer = calloc(sizeof(float), conv->blockSize);
|
||||
fft_cpx_init(&conv->pre_mult, conv->fftComplexSize);
|
||||
fft_cpx_init(&conv->conv, conv->fftComplexSize);
|
||||
conv->overlap = fft_alloc(sizeof(float) * conv->blockSize);
|
||||
conv->inputBuffer = fft_alloc(sizeof(float) * conv->blockSize);
|
||||
conv->inputBufferFill = 0;
|
||||
conv->current = 0;
|
||||
|
||||
|
|
@ -127,35 +196,38 @@ static void convolver1_free(struct convolver1 *conv)
|
|||
{
|
||||
int i;
|
||||
for (i = 0; i < conv->segCount; i++) {
|
||||
free(conv->segments[i]);
|
||||
free(conv->segmentsIr[i]);
|
||||
fft_cpx_free(&conv->segments[i]);
|
||||
fft_cpx_free(&conv->segmentsIr[i]);
|
||||
}
|
||||
free(conv->fft);
|
||||
free(conv->ifft);
|
||||
free(conv->fft_buffer);
|
||||
fft_destroy(conv->fft);
|
||||
fft_destroy(conv->ifft);
|
||||
fft_free(conv->fft_buffer);
|
||||
free(conv->segments);
|
||||
free(conv->segmentsIr);
|
||||
free(conv->pre_mult);
|
||||
free(conv->conv);
|
||||
free(conv->overlap);
|
||||
free(conv->inputBuffer);
|
||||
fft_cpx_free(&conv->pre_mult);
|
||||
fft_cpx_free(&conv->conv);
|
||||
fft_free(conv->overlap);
|
||||
fft_free(conv->inputBuffer);
|
||||
free(conv);
|
||||
}
|
||||
|
||||
void Sum(float* result, const float* a, const float* b, int len)
|
||||
{
|
||||
int i;
|
||||
#if defined (__SSE__)
|
||||
const int end4 = 4 * (len / 4);
|
||||
for (i = 0; i < end4; i += 4) {
|
||||
const __m128 va = _mm_load_ps(&a[i]);
|
||||
const __m128 vb = _mm_load_ps(&b[i]);
|
||||
_mm_store_ps(&result[i], _mm_add_ps(va,vb));
|
||||
}
|
||||
for (i = end4; i < len; ++i) {
|
||||
result[i] = a[i] + b[i];
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < len; i++)
|
||||
result[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
void ComplexMultiplyAccumulate(kiss_fft_f32_cpx *r, const kiss_fft_f32_cpx *a, const kiss_fft_f32_cpx *b, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
r[i].r += a[i].r * b[i].r - a[i].i * b[i].i;
|
||||
r[i].i += a[i].r * b[i].i + a[i].i * b[i].r;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int convolver1_run(struct convolver1 *conv, const float *input, float *output, int len)
|
||||
|
|
@ -176,30 +248,27 @@ static int convolver1_run(struct convolver1 *conv, const float *input, float *ou
|
|||
memcpy(conv->fft_buffer, conv->inputBuffer, conv->blockSize * sizeof(float));
|
||||
memset(conv->fft_buffer + conv->blockSize, 0, (conv->segSize - conv->blockSize) * sizeof(float));
|
||||
|
||||
kiss_fftr_f32(conv->fft, conv->fft_buffer, conv->segments[conv->current]);
|
||||
fft_run(conv->fft, conv->fft_buffer, &conv->segments[conv->current]);
|
||||
|
||||
if (conv->inputBufferFill == 0) {
|
||||
memset(conv->pre_mult, 0, sizeof(kiss_fft_f32_cpx) * conv->fftComplexSize);
|
||||
fft_cpx_clear(&conv->pre_mult, conv->fftComplexSize);
|
||||
|
||||
for (i = 1; i < conv->segCount; i++) {
|
||||
const int indexIr = i;
|
||||
const int indexAudio = (conv->current + i) % conv->segCount;
|
||||
|
||||
ComplexMultiplyAccumulate(conv->pre_mult,
|
||||
conv->segmentsIr[indexIr],
|
||||
conv->segments[indexAudio],
|
||||
conv->fftComplexSize);
|
||||
fft_convolve_accum(conv->fft, &conv->pre_mult,
|
||||
&conv->segmentsIr[indexIr],
|
||||
&conv->segments[indexAudio],
|
||||
conv->fftComplexSize, 1.0f / conv->segSize);
|
||||
}
|
||||
}
|
||||
memcpy(conv->conv, conv->pre_mult, sizeof(kiss_fft_f32_cpx) * conv->fftComplexSize);
|
||||
fft_cpx_copy(&conv->conv, &conv->pre_mult, conv->fftComplexSize);
|
||||
|
||||
ComplexMultiplyAccumulate(conv->conv, conv->segments[conv->current], conv->segmentsIr[0],
|
||||
conv->fftComplexSize);
|
||||
fft_convolve_accum(conv->fft, &conv->conv, &conv->segments[conv->current], &conv->segmentsIr[0],
|
||||
conv->fftComplexSize, 1.0f / conv->segSize);
|
||||
|
||||
kiss_fftri_f32(conv->ifft, conv->conv, conv->fft_buffer);
|
||||
|
||||
for (i = 0; i < conv->segSize; i++)
|
||||
conv->fft_buffer[i] /= conv->segSize;
|
||||
ifft_run(conv->ifft, &conv->conv, conv->fft_buffer);
|
||||
|
||||
Sum(output + processed, conv->fft_buffer + inputBufferPos, conv->overlap + inputBufferPos, processing);
|
||||
|
||||
|
|
@ -265,19 +334,19 @@ struct convolver *convolver_new(int head_block, int tail_block, const float *ir,
|
|||
if (irlen > conv->tailBlockSize) {
|
||||
int conv1IrLen = SPA_MIN(irlen - conv->tailBlockSize, conv->tailBlockSize);
|
||||
conv->tailConvolver0 = convolver1_new(conv->headBlockSize, ir + conv->tailBlockSize, conv1IrLen);
|
||||
conv->tailOutput0 = calloc(conv->tailBlockSize, sizeof(float));
|
||||
conv->tailPrecalculated0 = calloc(conv->tailBlockSize, sizeof(float));
|
||||
conv->tailOutput0 = fft_alloc(conv->tailBlockSize * sizeof(float));
|
||||
conv->tailPrecalculated0 = fft_alloc(conv->tailBlockSize * sizeof(float));
|
||||
}
|
||||
|
||||
if (irlen > 2 * conv->tailBlockSize) {
|
||||
int tailIrLen = irlen - (2 * conv->tailBlockSize);
|
||||
conv->tailConvolver = convolver1_new(conv->tailBlockSize, ir + (2 * conv->tailBlockSize), tailIrLen);
|
||||
conv->tailOutput = calloc(conv->tailBlockSize, sizeof(float));
|
||||
conv->tailPrecalculated = calloc(conv->tailBlockSize, sizeof(float));
|
||||
conv->tailOutput = fft_alloc(conv->tailBlockSize * sizeof(float));
|
||||
conv->tailPrecalculated = fft_alloc(conv->tailBlockSize * sizeof(float));
|
||||
}
|
||||
|
||||
if (conv->tailConvolver0 || conv->tailConvolver)
|
||||
conv->tailInput = calloc(conv->tailBlockSize, sizeof(float));
|
||||
conv->tailInput = fft_alloc(conv->tailBlockSize * sizeof(float));
|
||||
|
||||
conv->tailInputFill = 0;
|
||||
conv->precalculatedPos = 0;
|
||||
|
|
@ -293,11 +362,11 @@ void convolver_free(struct convolver *conv)
|
|||
convolver1_free(conv->tailConvolver0);
|
||||
if (conv->tailConvolver)
|
||||
convolver1_free(conv->tailConvolver);
|
||||
free(conv->tailOutput0);
|
||||
free(conv->tailPrecalculated0);
|
||||
free(conv->tailOutput);
|
||||
free(conv->tailPrecalculated);
|
||||
free(conv->tailInput);
|
||||
fft_free(conv->tailOutput0);
|
||||
fft_free(conv->tailPrecalculated0);
|
||||
fft_free(conv->tailOutput);
|
||||
fft_free(conv->tailPrecalculated);
|
||||
fft_free(conv->tailInput);
|
||||
free(conv);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,442 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
|
||||
#include "_kiss_fft_guts_f32.h"
|
||||
/* The guts header contains all the multiplication and addition macros that are defined for
|
||||
fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
||||
*/
|
||||
|
||||
static void
|
||||
kf_bfly2 (kiss_fft_f32_cpx * Fout,
|
||||
const size_t fstride, const kiss_fft_f32_cfg st, int m)
|
||||
{
|
||||
kiss_fft_f32_cpx *Fout2;
|
||||
kiss_fft_f32_cpx *tw1 = st->twiddles;
|
||||
kiss_fft_f32_cpx t;
|
||||
Fout2 = Fout + m;
|
||||
do {
|
||||
C_FIXDIV (*Fout, 2);
|
||||
C_FIXDIV (*Fout2, 2);
|
||||
|
||||
C_MUL (t, *Fout2, *tw1);
|
||||
tw1 += fstride;
|
||||
C_SUB (*Fout2, *Fout, t);
|
||||
C_ADDTO (*Fout, t);
|
||||
++Fout2;
|
||||
++Fout;
|
||||
} while (--m);
|
||||
}
|
||||
|
||||
static void
|
||||
kf_bfly4 (kiss_fft_f32_cpx * Fout,
|
||||
const size_t fstride, const kiss_fft_f32_cfg st, const size_t m)
|
||||
{
|
||||
kiss_fft_f32_cpx *tw1, *tw2, *tw3;
|
||||
kiss_fft_f32_cpx scratch[6];
|
||||
size_t k = m;
|
||||
const size_t m2 = 2 * m;
|
||||
const size_t m3 = 3 * m;
|
||||
|
||||
|
||||
tw3 = tw2 = tw1 = st->twiddles;
|
||||
|
||||
do {
|
||||
C_FIXDIV (*Fout, 4);
|
||||
C_FIXDIV (Fout[m], 4);
|
||||
C_FIXDIV (Fout[m2], 4);
|
||||
C_FIXDIV (Fout[m3], 4);
|
||||
|
||||
C_MUL (scratch[0], Fout[m], *tw1);
|
||||
C_MUL (scratch[1], Fout[m2], *tw2);
|
||||
C_MUL (scratch[2], Fout[m3], *tw3);
|
||||
|
||||
C_SUB (scratch[5], *Fout, scratch[1]);
|
||||
C_ADDTO (*Fout, scratch[1]);
|
||||
C_ADD (scratch[3], scratch[0], scratch[2]);
|
||||
C_SUB (scratch[4], scratch[0], scratch[2]);
|
||||
C_SUB (Fout[m2], *Fout, scratch[3]);
|
||||
tw1 += fstride;
|
||||
tw2 += fstride * 2;
|
||||
tw3 += fstride * 3;
|
||||
C_ADDTO (*Fout, scratch[3]);
|
||||
|
||||
if (st->inverse) {
|
||||
Fout[m].r = scratch[5].r - scratch[4].i;
|
||||
Fout[m].i = scratch[5].i + scratch[4].r;
|
||||
Fout[m3].r = scratch[5].r + scratch[4].i;
|
||||
Fout[m3].i = scratch[5].i - scratch[4].r;
|
||||
} else {
|
||||
Fout[m].r = scratch[5].r + scratch[4].i;
|
||||
Fout[m].i = scratch[5].i - scratch[4].r;
|
||||
Fout[m3].r = scratch[5].r - scratch[4].i;
|
||||
Fout[m3].i = scratch[5].i + scratch[4].r;
|
||||
}
|
||||
++Fout;
|
||||
} while (--k);
|
||||
}
|
||||
|
||||
static void
|
||||
kf_bfly3 (kiss_fft_f32_cpx * Fout,
|
||||
const size_t fstride, const kiss_fft_f32_cfg st, size_t m)
|
||||
{
|
||||
size_t k = m;
|
||||
const size_t m2 = 2 * m;
|
||||
kiss_fft_f32_cpx *tw1, *tw2;
|
||||
kiss_fft_f32_cpx scratch[5];
|
||||
kiss_fft_f32_cpx epi3;
|
||||
epi3 = st->twiddles[fstride * m];
|
||||
|
||||
tw1 = tw2 = st->twiddles;
|
||||
|
||||
do {
|
||||
C_FIXDIV (*Fout, 3);
|
||||
C_FIXDIV (Fout[m], 3);
|
||||
C_FIXDIV (Fout[m2], 3);
|
||||
|
||||
C_MUL (scratch[1], Fout[m], *tw1);
|
||||
C_MUL (scratch[2], Fout[m2], *tw2);
|
||||
|
||||
C_ADD (scratch[3], scratch[1], scratch[2]);
|
||||
C_SUB (scratch[0], scratch[1], scratch[2]);
|
||||
tw1 += fstride;
|
||||
tw2 += fstride * 2;
|
||||
|
||||
Fout[m].r = Fout->r - HALF_OF (scratch[3].r);
|
||||
Fout[m].i = Fout->i - HALF_OF (scratch[3].i);
|
||||
|
||||
C_MULBYSCALAR (scratch[0], epi3.i);
|
||||
|
||||
C_ADDTO (*Fout, scratch[3]);
|
||||
|
||||
Fout[m2].r = Fout[m].r + scratch[0].i;
|
||||
Fout[m2].i = Fout[m].i - scratch[0].r;
|
||||
|
||||
Fout[m].r -= scratch[0].i;
|
||||
Fout[m].i += scratch[0].r;
|
||||
|
||||
++Fout;
|
||||
} while (--k);
|
||||
}
|
||||
|
||||
static void
|
||||
kf_bfly5 (kiss_fft_f32_cpx * Fout,
|
||||
const size_t fstride, const kiss_fft_f32_cfg st, int m)
|
||||
{
|
||||
kiss_fft_f32_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
|
||||
int u;
|
||||
kiss_fft_f32_cpx scratch[13];
|
||||
kiss_fft_f32_cpx *twiddles = st->twiddles;
|
||||
kiss_fft_f32_cpx *tw;
|
||||
kiss_fft_f32_cpx ya, yb;
|
||||
ya = twiddles[fstride * m];
|
||||
yb = twiddles[fstride * 2 * m];
|
||||
|
||||
Fout0 = Fout;
|
||||
Fout1 = Fout0 + m;
|
||||
Fout2 = Fout0 + 2 * m;
|
||||
Fout3 = Fout0 + 3 * m;
|
||||
Fout4 = Fout0 + 4 * m;
|
||||
|
||||
tw = st->twiddles;
|
||||
for (u = 0; u < m; ++u) {
|
||||
C_FIXDIV (*Fout0, 5);
|
||||
C_FIXDIV (*Fout1, 5);
|
||||
C_FIXDIV (*Fout2, 5);
|
||||
C_FIXDIV (*Fout3, 5);
|
||||
C_FIXDIV (*Fout4, 5);
|
||||
scratch[0] = *Fout0;
|
||||
|
||||
C_MUL (scratch[1], *Fout1, tw[u * fstride]);
|
||||
C_MUL (scratch[2], *Fout2, tw[2 * u * fstride]);
|
||||
C_MUL (scratch[3], *Fout3, tw[3 * u * fstride]);
|
||||
C_MUL (scratch[4], *Fout4, tw[4 * u * fstride]);
|
||||
|
||||
C_ADD (scratch[7], scratch[1], scratch[4]);
|
||||
C_SUB (scratch[10], scratch[1], scratch[4]);
|
||||
C_ADD (scratch[8], scratch[2], scratch[3]);
|
||||
C_SUB (scratch[9], scratch[2], scratch[3]);
|
||||
|
||||
Fout0->r += scratch[7].r + scratch[8].r;
|
||||
Fout0->i += scratch[7].i + scratch[8].i;
|
||||
|
||||
scratch[5].r =
|
||||
scratch[0].r + S_MUL (scratch[7].r, ya.r) + S_MUL (scratch[8].r, yb.r);
|
||||
scratch[5].i =
|
||||
scratch[0].i + S_MUL (scratch[7].i, ya.r) + S_MUL (scratch[8].i, yb.r);
|
||||
|
||||
scratch[6].r = S_MUL (scratch[10].i, ya.i) + S_MUL (scratch[9].i, yb.i);
|
||||
scratch[6].i = -S_MUL (scratch[10].r, ya.i) - S_MUL (scratch[9].r, yb.i);
|
||||
|
||||
C_SUB (*Fout1, scratch[5], scratch[6]);
|
||||
C_ADD (*Fout4, scratch[5], scratch[6]);
|
||||
|
||||
scratch[11].r =
|
||||
scratch[0].r + S_MUL (scratch[7].r, yb.r) + S_MUL (scratch[8].r, ya.r);
|
||||
scratch[11].i =
|
||||
scratch[0].i + S_MUL (scratch[7].i, yb.r) + S_MUL (scratch[8].i, ya.r);
|
||||
scratch[12].r = -S_MUL (scratch[10].i, yb.i) + S_MUL (scratch[9].i, ya.i);
|
||||
scratch[12].i = S_MUL (scratch[10].r, yb.i) - S_MUL (scratch[9].r, ya.i);
|
||||
|
||||
C_ADD (*Fout2, scratch[11], scratch[12]);
|
||||
C_SUB (*Fout3, scratch[11], scratch[12]);
|
||||
|
||||
++Fout0;
|
||||
++Fout1;
|
||||
++Fout2;
|
||||
++Fout3;
|
||||
++Fout4;
|
||||
}
|
||||
}
|
||||
|
||||
/* perform the butterfly for one stage of a mixed radix FFT */
|
||||
static void
|
||||
kf_bfly_generic (kiss_fft_f32_cpx * Fout,
|
||||
const size_t fstride, const kiss_fft_f32_cfg st, int m, int p)
|
||||
{
|
||||
int u, k, q1, q;
|
||||
kiss_fft_f32_cpx *twiddles = st->twiddles;
|
||||
kiss_fft_f32_cpx t;
|
||||
int Norig = st->nfft;
|
||||
|
||||
kiss_fft_f32_cpx *scratch =
|
||||
(kiss_fft_f32_cpx *) KISS_FFT_F32_TMP_ALLOC (sizeof (kiss_fft_f32_cpx) *
|
||||
p);
|
||||
|
||||
for (u = 0; u < m; ++u) {
|
||||
k = u;
|
||||
for (q1 = 0; q1 < p; ++q1) {
|
||||
scratch[q1] = Fout[k];
|
||||
C_FIXDIV (scratch[q1], p);
|
||||
k += m;
|
||||
}
|
||||
|
||||
k = u;
|
||||
for (q1 = 0; q1 < p; ++q1) {
|
||||
int twidx = 0;
|
||||
Fout[k] = scratch[0];
|
||||
for (q = 1; q < p; ++q) {
|
||||
twidx += fstride * k;
|
||||
if (twidx >= Norig)
|
||||
twidx -= Norig;
|
||||
C_MUL (t, scratch[q], twiddles[twidx]);
|
||||
C_ADDTO (Fout[k], t);
|
||||
}
|
||||
k += m;
|
||||
}
|
||||
}
|
||||
KISS_FFT_F32_TMP_FREE (scratch);
|
||||
}
|
||||
|
||||
static void
|
||||
kf_work (kiss_fft_f32_cpx * Fout,
|
||||
const kiss_fft_f32_cpx * f,
|
||||
const size_t fstride, int in_stride, int *factors,
|
||||
const kiss_fft_f32_cfg st)
|
||||
{
|
||||
kiss_fft_f32_cpx *Fout_beg = Fout;
|
||||
const int p = *factors++; /* the radix */
|
||||
const int m = *factors++; /* stage's fft length/p */
|
||||
const kiss_fft_f32_cpx *Fout_end = Fout + p * m;
|
||||
|
||||
#ifdef _OPENMP
|
||||
// use openmp extensions at the
|
||||
// top-level (not recursive)
|
||||
if (fstride == 1 && p <= 5 && m != 1) {
|
||||
int k;
|
||||
|
||||
// execute the p different work units in different threads
|
||||
# pragma omp parallel for
|
||||
for (k = 0; k < p; ++k)
|
||||
kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
|
||||
in_stride, factors, st);
|
||||
// all threads have joined by this point
|
||||
|
||||
switch (p) {
|
||||
case 2:
|
||||
kf_bfly2 (Fout, fstride, st, m);
|
||||
break;
|
||||
case 3:
|
||||
kf_bfly3 (Fout, fstride, st, m);
|
||||
break;
|
||||
case 4:
|
||||
kf_bfly4 (Fout, fstride, st, m);
|
||||
break;
|
||||
case 5:
|
||||
kf_bfly5 (Fout, fstride, st, m);
|
||||
break;
|
||||
default:
|
||||
kf_bfly_generic (Fout, fstride, st, m, p);
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (m == 1) {
|
||||
do {
|
||||
*Fout = *f;
|
||||
f += fstride * in_stride;
|
||||
} while (++Fout != Fout_end);
|
||||
} else {
|
||||
do {
|
||||
// recursive call:
|
||||
// DFT of size m*p performed by doing
|
||||
// p instances of smaller DFTs of size m,
|
||||
// each one takes a decimated version of the input
|
||||
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
||||
f += fstride * in_stride;
|
||||
} while ((Fout += m) != Fout_end);
|
||||
}
|
||||
|
||||
Fout = Fout_beg;
|
||||
|
||||
// recombine the p smaller DFTs
|
||||
switch (p) {
|
||||
case 2:
|
||||
kf_bfly2 (Fout, fstride, st, m);
|
||||
break;
|
||||
case 3:
|
||||
kf_bfly3 (Fout, fstride, st, m);
|
||||
break;
|
||||
case 4:
|
||||
kf_bfly4 (Fout, fstride, st, m);
|
||||
break;
|
||||
case 5:
|
||||
kf_bfly5 (Fout, fstride, st, m);
|
||||
break;
|
||||
default:
|
||||
kf_bfly_generic (Fout, fstride, st, m, p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* facbuf is populated by p1,m1,p2,m2, ...
|
||||
where
|
||||
p[i] * m[i] = m[i-1]
|
||||
m0 = n */
|
||||
static void
|
||||
kf_factor (int n, int *facbuf)
|
||||
{
|
||||
int p = 4;
|
||||
double floor_sqrt;
|
||||
floor_sqrt = floor (sqrt ((double) n));
|
||||
|
||||
/*factor out powers of 4, powers of 2, then any remaining primes */
|
||||
do {
|
||||
while (n % p) {
|
||||
switch (p) {
|
||||
case 4:
|
||||
p = 2;
|
||||
break;
|
||||
case 2:
|
||||
p = 3;
|
||||
break;
|
||||
default:
|
||||
p += 2;
|
||||
break;
|
||||
}
|
||||
if (p > floor_sqrt)
|
||||
p = n; /* no more factors, skip to end */
|
||||
}
|
||||
n /= p;
|
||||
*facbuf++ = p;
|
||||
*facbuf++ = n;
|
||||
} while (n > 1);
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* User-callable function to allocate all necessary storage space for the fft.
|
||||
*
|
||||
* The return value is a contiguous block of memory, allocated with malloc. As such,
|
||||
* It can be freed with free(), rather than a kiss_fft_f32-specific function.
|
||||
* */
|
||||
kiss_fft_f32_cfg
|
||||
kiss_fft_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||
{
|
||||
kiss_fft_f32_cfg st = NULL;
|
||||
size_t memneeded = sizeof (struct kiss_fft_f32_state)
|
||||
+ sizeof (kiss_fft_f32_cpx) * (nfft - 1); /* twiddle factors */
|
||||
|
||||
if (lenmem == NULL) {
|
||||
st = (kiss_fft_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
|
||||
} else {
|
||||
if (mem != NULL && *lenmem >= memneeded)
|
||||
st = (kiss_fft_f32_cfg) mem;
|
||||
*lenmem = memneeded;
|
||||
}
|
||||
if (st) {
|
||||
int i;
|
||||
st->nfft = nfft;
|
||||
st->inverse = inverse_fft;
|
||||
|
||||
for (i = 0; i < nfft; ++i) {
|
||||
const double pi =
|
||||
3.141592653589793238462643383279502884197169399375105820974944;
|
||||
double phase = -2 * pi * i / nfft;
|
||||
if (st->inverse)
|
||||
phase *= -1;
|
||||
kf_cexp (st->twiddles + i, phase);
|
||||
}
|
||||
|
||||
kf_factor (nfft, st->factors);
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
kiss_fft_f32_stride (kiss_fft_f32_cfg st, const kiss_fft_f32_cpx * fin,
|
||||
kiss_fft_f32_cpx * fout, int in_stride)
|
||||
{
|
||||
if (fin == fout) {
|
||||
//NOTE: this is not really an in-place FFT algorithm.
|
||||
//It just performs an out-of-place FFT into a temp buffer
|
||||
kiss_fft_f32_cpx *tmpbuf =
|
||||
(kiss_fft_f32_cpx *) KISS_FFT_F32_TMP_ALLOC (sizeof (kiss_fft_f32_cpx) *
|
||||
st->nfft);
|
||||
kf_work (tmpbuf, fin, 1, in_stride, st->factors, st);
|
||||
memcpy (fout, tmpbuf, sizeof (kiss_fft_f32_cpx) * st->nfft);
|
||||
KISS_FFT_F32_TMP_FREE (tmpbuf);
|
||||
} else {
|
||||
kf_work (fout, fin, 1, in_stride, st->factors, st);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kiss_fft_f32 (kiss_fft_f32_cfg cfg, const kiss_fft_f32_cpx * fin,
|
||||
kiss_fft_f32_cpx * fout)
|
||||
{
|
||||
kiss_fft_f32_stride (cfg, fin, fout, 1);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
kiss_fft_f32_cleanup (void)
|
||||
{
|
||||
// nothing needed any more
|
||||
}
|
||||
|
||||
int
|
||||
kiss_fft_f32_next_fast_size (int n)
|
||||
{
|
||||
while (1) {
|
||||
int m = n;
|
||||
while ((m % 2) == 0)
|
||||
m /= 2;
|
||||
while ((m % 3) == 0)
|
||||
m /= 3;
|
||||
while ((m % 5) == 0)
|
||||
m /= 5;
|
||||
if (m <= 1)
|
||||
break; /* n is completely factorable by twos, threes, and fives */
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#ifndef KISS_FFT_F32_H
|
||||
#define KISS_FFT_F32_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
ATTENTION!
|
||||
If you would like a :
|
||||
-- a utility that will handle the caching of fft objects
|
||||
-- real-only (no imaginary time component ) FFT
|
||||
-- a multi-dimensional FFT
|
||||
-- a command-line utility to perform ffts
|
||||
-- a command-line utility to perform fast-convolution filtering
|
||||
|
||||
Then see kfc.h kiss_fftr_f32.h kiss_fft_f32nd.h fftutil.c kiss_fastfir.c
|
||||
in the tools/ directory.
|
||||
*/
|
||||
|
||||
#define KISS_FFT_F32_MALLOC malloc
|
||||
#define KISS_FFT_F32_FREE free
|
||||
#define kiss_fft_f32_scalar float
|
||||
|
||||
typedef struct {
|
||||
kiss_fft_f32_scalar r;
|
||||
kiss_fft_f32_scalar i;
|
||||
}kiss_fft_f32_cpx;
|
||||
|
||||
typedef struct kiss_fft_f32_state* kiss_fft_f32_cfg;
|
||||
|
||||
/*
|
||||
* kiss_fft_f32_alloc
|
||||
*
|
||||
* Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
|
||||
*
|
||||
* typical usage: kiss_fft_f32_cfg mycfg=kiss_fft_f32_alloc(1024,0,NULL,NULL);
|
||||
*
|
||||
* The return value from fft_alloc is a cfg buffer used internally
|
||||
* by the fft routine or NULL.
|
||||
*
|
||||
* If lenmem is NULL, then kiss_fft_f32_alloc will allocate a cfg buffer using malloc.
|
||||
* The returned value should be free()d when done to avoid memory leaks.
|
||||
*
|
||||
* The state can be placed in a user supplied buffer 'mem':
|
||||
* If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
|
||||
* then the function places the cfg in mem and the size used in *lenmem
|
||||
* and returns mem.
|
||||
*
|
||||
* If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
|
||||
* then the function returns NULL and places the minimum cfg
|
||||
* buffer size in *lenmem.
|
||||
* */
|
||||
|
||||
kiss_fft_f32_cfg kiss_fft_f32_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
|
||||
|
||||
/*
|
||||
* kiss_fft_f32(cfg,in_out_buf)
|
||||
*
|
||||
* Perform an FFT on a complex input buffer.
|
||||
* for a forward FFT,
|
||||
* fin should be f[0] , f[1] , ... ,f[nfft-1]
|
||||
* fout will be F[0] , F[1] , ... ,F[nfft-1]
|
||||
* Note that each element is complex and can be accessed like
|
||||
f[k].r and f[k].i
|
||||
* */
|
||||
void kiss_fft_f32(kiss_fft_f32_cfg cfg,const kiss_fft_f32_cpx *fin,kiss_fft_f32_cpx *fout);
|
||||
|
||||
/*
|
||||
A more generic version of the above function. It reads its input from every Nth sample.
|
||||
* */
|
||||
void kiss_fft_f32_stride(kiss_fft_f32_cfg cfg,const kiss_fft_f32_cpx *fin,kiss_fft_f32_cpx *fout,int fin_stride);
|
||||
|
||||
/* If kiss_fft_f32_alloc allocated a buffer, it is one contiguous
|
||||
buffer and can be simply free()d when no longer needed*/
|
||||
#define kiss_fft_f32_free KISS_FFT_F32_FREE
|
||||
|
||||
/*
|
||||
Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
|
||||
your compiler output to call this before you exit.
|
||||
*/
|
||||
void kiss_fft_f32_cleanup(void);
|
||||
|
||||
|
||||
/*
|
||||
* Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
|
||||
*/
|
||||
int kiss_fft_f32_next_fast_size(int n);
|
||||
|
||||
/* for real ffts, we need an even size */
|
||||
#define kiss_fftr_f32_next_fast_size_real(n) \
|
||||
(kiss_fft_f32_next_fast_size( ((n)+1)>>1)<<1)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#include "kiss_fftr_f32.h"
|
||||
#include "_kiss_fft_guts_f32.h"
|
||||
|
||||
struct kiss_fftr_f32_state
|
||||
{
|
||||
kiss_fft_f32_cfg substate;
|
||||
kiss_fft_f32_cpx *tmpbuf;
|
||||
kiss_fft_f32_cpx *super_twiddles;
|
||||
#ifdef USE_SIMD
|
||||
void *pad;
|
||||
#endif
|
||||
};
|
||||
|
||||
kiss_fftr_f32_cfg
|
||||
kiss_fftr_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||
{
|
||||
int i;
|
||||
kiss_fftr_f32_cfg st = NULL;
|
||||
size_t subsize = 0, memneeded;
|
||||
|
||||
nfft >>= 1;
|
||||
|
||||
kiss_fft_f32_alloc (nfft, inverse_fft, NULL, &subsize);
|
||||
memneeded =
|
||||
ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state)) +
|
||||
ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 3 / 2);
|
||||
|
||||
if (lenmem == NULL) {
|
||||
st = (kiss_fftr_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
|
||||
} else {
|
||||
if (*lenmem >= memneeded)
|
||||
st = (kiss_fftr_f32_cfg) mem;
|
||||
*lenmem = memneeded;
|
||||
}
|
||||
if (!st)
|
||||
return NULL;
|
||||
|
||||
st->substate = (kiss_fft_f32_cfg) (((char *) st) + ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state))); /*just beyond kiss_fftr_f32_state struct */
|
||||
st->tmpbuf =
|
||||
(kiss_fft_f32_cpx *) (((char *) st->substate) + ALIGN_STRUCT (subsize));
|
||||
st->super_twiddles = st->tmpbuf + nfft;
|
||||
kiss_fft_f32_alloc (nfft, inverse_fft, st->substate, &subsize);
|
||||
|
||||
for (i = 0; i < nfft / 2; ++i) {
|
||||
double phase =
|
||||
-3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
|
||||
if (inverse_fft)
|
||||
phase *= -1;
|
||||
kf_cexp (st->super_twiddles + i, phase);
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
void
|
||||
kiss_fftr_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_scalar * timedata,
|
||||
kiss_fft_f32_cpx * freqdata)
|
||||
{
|
||||
/* input buffer timedata is stored row-wise */
|
||||
int k, ncfft;
|
||||
kiss_fft_f32_cpx fpnk, fpk, f1k, f2k, tw, tdc;
|
||||
|
||||
ncfft = st->substate->nfft;
|
||||
|
||||
/*perform the parallel fft of two real signals packed in real,imag */
|
||||
kiss_fft_f32 (st->substate, (const kiss_fft_f32_cpx *) timedata, st->tmpbuf);
|
||||
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
|
||||
* contains the sum of the even-numbered elements of the input time sequence
|
||||
* The imag part is the sum of the odd-numbered elements
|
||||
*
|
||||
* The sum of tdc.r and tdc.i is the sum of the input time sequence.
|
||||
* yielding DC of input time sequence
|
||||
* The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
|
||||
* yielding Nyquist bin of input time sequence
|
||||
*/
|
||||
|
||||
tdc.r = st->tmpbuf[0].r;
|
||||
tdc.i = st->tmpbuf[0].i;
|
||||
C_FIXDIV (tdc, 2);
|
||||
CHECK_OVERFLOW_OP (tdc.r, +, tdc.i);
|
||||
CHECK_OVERFLOW_OP (tdc.r, -, tdc.i);
|
||||
freqdata[0].r = tdc.r + tdc.i;
|
||||
freqdata[ncfft].r = tdc.r - tdc.i;
|
||||
#ifdef USE_SIMD
|
||||
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps (0);
|
||||
#else
|
||||
freqdata[ncfft].i = freqdata[0].i = 0;
|
||||
#endif
|
||||
|
||||
for (k = 1; k <= ncfft / 2; ++k) {
|
||||
fpk = st->tmpbuf[k];
|
||||
fpnk.r = st->tmpbuf[ncfft - k].r;
|
||||
fpnk.i = -st->tmpbuf[ncfft - k].i;
|
||||
C_FIXDIV (fpk, 2);
|
||||
C_FIXDIV (fpnk, 2);
|
||||
|
||||
C_ADD (f1k, fpk, fpnk);
|
||||
C_SUB (f2k, fpk, fpnk);
|
||||
C_MUL (tw, f2k, st->super_twiddles[k - 1]);
|
||||
|
||||
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
||||
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
||||
freqdata[ncfft - k].r = HALF_OF (f1k.r - tw.r);
|
||||
freqdata[ncfft - k].i = HALF_OF (tw.i - f1k.i);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kiss_fftri_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_cpx * freqdata,
|
||||
kiss_fft_f32_scalar * timedata)
|
||||
{
|
||||
/* input buffer timedata is stored row-wise */
|
||||
int k, ncfft;
|
||||
|
||||
ncfft = st->substate->nfft;
|
||||
|
||||
st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
|
||||
st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
|
||||
C_FIXDIV (st->tmpbuf[0], 2);
|
||||
|
||||
for (k = 1; k <= ncfft / 2; ++k) {
|
||||
kiss_fft_f32_cpx fk, fnkc, fek, fok, tmp;
|
||||
fk = freqdata[k];
|
||||
fnkc.r = freqdata[ncfft - k].r;
|
||||
fnkc.i = -freqdata[ncfft - k].i;
|
||||
C_FIXDIV (fk, 2);
|
||||
C_FIXDIV (fnkc, 2);
|
||||
|
||||
C_ADD (fek, fk, fnkc);
|
||||
C_SUB (tmp, fk, fnkc);
|
||||
C_MUL (fok, tmp, st->super_twiddles[k - 1]);
|
||||
C_ADD (st->tmpbuf[k], fek, fok);
|
||||
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
||||
#ifdef USE_SIMD
|
||||
st->tmpbuf[ncfft - k].i *= _mm_set1_ps (-1.0);
|
||||
#else
|
||||
st->tmpbuf[ncfft - k].i *= -1;
|
||||
#endif
|
||||
}
|
||||
kiss_fft_f32 (st->substate, st->tmpbuf, (kiss_fft_f32_cpx *) timedata);
|
||||
}
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#ifndef KISS_FTR_H
|
||||
#define KISS_FTR_H
|
||||
|
||||
#include "kiss_fft_f32.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
|
||||
Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
typedef struct kiss_fftr_f32_state *kiss_fftr_f32_cfg;
|
||||
|
||||
|
||||
kiss_fftr_f32_cfg kiss_fftr_f32_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
|
||||
/*
|
||||
nfft must be even
|
||||
|
||||
If you don't care to allocate space, use mem = lenmem = NULL
|
||||
*/
|
||||
|
||||
|
||||
void kiss_fftr_f32(kiss_fftr_f32_cfg cfg,const kiss_fft_f32_scalar *timedata,kiss_fft_f32_cpx *freqdata);
|
||||
/*
|
||||
input timedata has nfft scalar points
|
||||
output freqdata has nfft/2+1 complex points
|
||||
*/
|
||||
|
||||
void kiss_fftri_f32(kiss_fftr_f32_cfg cfg,const kiss_fft_f32_cpx *freqdata,kiss_fft_f32_scalar *timedata);
|
||||
/*
|
||||
input freqdata has nfft/2+1 complex points
|
||||
output timedata has nfft scalar points
|
||||
*/
|
||||
|
||||
#define kiss_fftr_f32_free KISS_FFT_F32_FREE
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
2244
src/modules/module-filter-chain/pffft.c
Normal file
2244
src/modules/module-filter-chain/pffft.c
Normal file
File diff suppressed because it is too large
Load diff
177
src/modules/module-filter-chain/pffft.h
Normal file
177
src/modules/module-filter-chain/pffft.h
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
|
||||
|
||||
Based on original fortran 77 code from FFTPACKv4 from NETLIB,
|
||||
authored by Dr Paul Swarztrauber of NCAR, in 1985.
|
||||
|
||||
As confirmed by the NCAR fftpack software curators, the following
|
||||
FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
|
||||
released under the same terms.
|
||||
|
||||
FFTPACK license:
|
||||
|
||||
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
|
||||
|
||||
Copyright (c) 2004 the University Corporation for Atmospheric
|
||||
Research ("UCAR"). All rights reserved. Developed by NCAR's
|
||||
Computational and Information Systems Laboratory, UCAR,
|
||||
www.cisl.ucar.edu.
|
||||
|
||||
Redistribution and use of the Software in source and binary forms,
|
||||
with or without modification, is permitted provided that the
|
||||
following conditions are met:
|
||||
|
||||
- Neither the names of NCAR's Computational and Information Systems
|
||||
Laboratory, the University Corporation for Atmospheric Research,
|
||||
nor the names of its sponsors or contributors may be used to
|
||||
endorse or promote products derived from this Software without
|
||||
specific prior written permission.
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notices, this list of conditions, and the disclaimer below.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions, and the disclaimer below in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
PFFFT : a Pretty Fast FFT.
|
||||
|
||||
This is basically an adaptation of the single precision fftpack
|
||||
(v4) as found on netlib taking advantage of SIMD instruction found
|
||||
on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
|
||||
|
||||
For architectures where no SIMD instruction is available, the code
|
||||
falls back to a scalar version.
|
||||
|
||||
Restrictions:
|
||||
|
||||
- 1D transforms only, with 32-bit single precision.
|
||||
|
||||
- supports only transforms for inputs of length N of the form
|
||||
N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
|
||||
144, 160, etc are all acceptable lengths). Performance is best for
|
||||
128<=N<=8192.
|
||||
|
||||
- all (float*) pointers in the functions below are expected to
|
||||
have an "simd-compatible" alignment, that is 16 bytes on x86 and
|
||||
powerpc CPUs.
|
||||
|
||||
You can allocate such buffers with the functions
|
||||
pffft_aligned_malloc / pffft_aligned_free (or with stuff like
|
||||
posix_memalign..)
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PFFFT_H
|
||||
#define PFFFT_H
|
||||
|
||||
#include <stddef.h> // for size_t
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* opaque struct holding internal stuff (precomputed twiddle factors)
|
||||
this struct can be shared by many threads as it contains only
|
||||
read-only data.
|
||||
*/
|
||||
typedef struct PFFFT_Setup PFFFT_Setup;
|
||||
|
||||
/* direction of the transform */
|
||||
typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
|
||||
|
||||
/* type of transform */
|
||||
typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
|
||||
|
||||
/*
|
||||
prepare for performing transforms of size N -- the returned
|
||||
PFFFT_Setup structure is read-only so it can safely be shared by
|
||||
multiple concurrent threads.
|
||||
*/
|
||||
PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
|
||||
void pffft_destroy_setup(PFFFT_Setup *);
|
||||
/*
|
||||
Perform a Fourier transform , The z-domain data is stored in the
|
||||
most efficient order for transforming it back, or using it for
|
||||
convolution. If you need to have its content sorted in the
|
||||
"usual" way, that is as an array of interleaved complex numbers,
|
||||
either use pffft_transform_ordered , or call pffft_zreorder after
|
||||
the forward fft, and before the backward fft.
|
||||
|
||||
Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
|
||||
Typically you will want to scale the backward transform by 1/N.
|
||||
|
||||
The 'work' pointer should point to an area of N (2*N for complex
|
||||
fft) floats, properly aligned. If 'work' is NULL, then stack will
|
||||
be used instead (this is probably the best strategy for small
|
||||
FFTs, say for N < 16384).
|
||||
|
||||
input and output may alias.
|
||||
*/
|
||||
void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
|
||||
|
||||
/*
|
||||
Similar to pffft_transform, but makes sure that the output is
|
||||
ordered as expected (interleaved complex numbers). This is
|
||||
similar to calling pffft_transform and then pffft_zreorder.
|
||||
|
||||
input and output may alias.
|
||||
*/
|
||||
void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
|
||||
|
||||
/*
|
||||
call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
|
||||
PFFFT_FORWARD) if you want to have the frequency components in
|
||||
the correct "canonical" order, as interleaved complex numbers.
|
||||
|
||||
(for real transforms, both 0-frequency and half frequency
|
||||
components, which are real, are assembled in the first entry as
|
||||
F(0)+i*F(n/2+1). Note that the original fftpack did place
|
||||
F(n/2+1) at the end of the arrays).
|
||||
|
||||
input and output should not alias.
|
||||
*/
|
||||
void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
|
||||
|
||||
/*
|
||||
Perform a multiplication of the frequency components of dft_a and
|
||||
dft_b and accumulate them into dft_ab. The arrays should have
|
||||
been obtained with pffft_transform(.., PFFFT_FORWARD) and should
|
||||
*not* have been reordered with pffft_zreorder (otherwise just
|
||||
perform the operation yourself as the dft coefs are stored as
|
||||
interleaved complex numbers).
|
||||
|
||||
the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
|
||||
|
||||
The dft_a, dft_b and dft_ab pointers may alias.
|
||||
*/
|
||||
void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
|
||||
|
||||
/*
|
||||
the float buffers must have the correct alignment (16-byte boundary
|
||||
on intel and powerpc). This function may be used to obtain such
|
||||
correctly aligned buffers.
|
||||
*/
|
||||
void *pffft_aligned_malloc(size_t nb_bytes);
|
||||
void pffft_aligned_free(void *);
|
||||
|
||||
/* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
|
||||
int pffft_simd_size();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // PFFFT_H
|
||||
Loading…
Add table
Add a link
Reference in a new issue