filter-chain: add convolver

2026-05-28 21:37:33 -04:00 · 2021-08-10 12:26:31 +02:00 · 2021-08-10 12:26:31 +02:00 · 67eb485811
commit 67eb485811
parent b4976728c3
10 changed files with 1330 additions and 2 deletions
--- a/src/daemon/filter-chain/sink-convolver.conf
+++ b/src/daemon/filter-chain/sink-convolver.conf
@ -0,0 +1,54 @@
+# Convolver sink
+#
+# start with pipewire -c filter-chain/sink-convolver.conf
+#
+context.properties = {
+    log.level        = 0
+}
+
+context.spa-libs = {
+    audio.convert.* = audioconvert/libspa-audioconvert
+    support.*       = support/libspa-support
+}
+
+context.modules = [
+    {   name = libpipewire-module-rtkit
+        args = {
+            #nice.level   = -11
+            #rt.prio      = 88
+            #rt.time.soft = 2000000
+            #rt.time.hard = 2000000
+        }
+        flags = [ ifexists nofail ]
+    }
+    {   name = libpipewire-module-protocol-native }
+    {   name = libpipewire-module-client-node }
+    {   name = libpipewire-module-adapter }
+
+    {   name = libpipewire-module-filter-chain
+        args = {
+            node.name = "effect_output.convolver"
+            node.description = "Convolver Sink"
+            media.name = "Convolver Sink"
+            filter.graph = {
+                nodes = [
+                    {
+                        type = builtin
+                        name = convolver
+                        label = convolver
+                    }
+                ]
+            }
+            capture.props = {
+                media.class = Audio/Sink
+                audio.channels = 2
+                audio.position = [ FL FR ]
+            }
+            playback.props = {
+                node.passive = true
+                audio.channels = 2
+                audio.position = [ FL FR ]
+            }
+        }
+    }
+]
--- a/src/modules/meson.build
+++ b/src/modules/meson.build
@ -47,12 +47,15 @@ pipewire_module_loopback = shared_library('pipewire-module-loopback',

 pipewire_module_filter_chain = shared_library('pipewire-module-filter-chain',
  [ 'module-filter-chain.c',
-    'module-filter-chain/biquad.c' ],
+    'module-filter-chain/biquad.c',
+    'module-filter-chain/kiss_fft_f32.c',
+    'module-filter-chain/kiss_fftr_f32.c',
+    'module-filter-chain/convolver.c' ],
  include_directories : [configinc, spa_inc],
  install : true,
  install_dir : modules_install_dir,
  install_rpath: modules_install_dir,
-  dependencies : [mathlib, dl_lib, pipewire_dep],
+  dependencies : [mathlib, dl_lib, pipewire_dep, sndfile_dep],
 )

 pipewire_module_echo_cancel_sources = [
--- a/src/modules/module-filter-chain/_kiss_fft_guts_f32.h
+++ b/src/modules/module-filter-chain/_kiss_fft_guts_f32.h
@ -0,0 +1,173 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+/* kiss_fft_f32.h
+   defines kiss_fft_f32_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_f32_scalar r; kiss_fft_f32_scalar i; }kiss_fft_f32_cpx; */
+#include "kiss_fft_f32.h"
+#include <limits.h>
+
+/* The 2*sizeof(size_t) alignment here is borrowed from
+ * GNU libc, so it should be good most everywhere.
+ * It is more conservative than is needed on some 64-bit
+ * platforms, but ia64 does require a 16-byte alignment.
+ * The SIMD extensions for x86 and ppc32 would want a
+ * larger alignment than this, but we don't need to
+ * do better than malloc.
+ *
+ * Borrowed from GLib's gobject/gtype.c
+ */
+#define STRUCT_ALIGNMENT (2 * sizeof (size_t))
+#define ALIGN_STRUCT(offset) \
+      ((offset + (STRUCT_ALIGNMENT - 1)) & -STRUCT_ALIGNMENT)
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors 
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_f32_state{
+    int nfft;
+    int inverse;
+    int factors[2*MAXFACTORS];
+    kiss_fft_f32_cpx twiddles[1];
+};
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef FIXED_POINT
+#include <stdint.h>
+#if (FIXED_POINT==32)
+# define FRACBITS 31
+# define SAMPPROD int64_t
+#define SAMP_MAX INT32_MAX
+#define SAMP_MIN INT32_MIN
+#else
+# define FRACBITS 15
+# define SAMPPROD int32_t
+#define SAMP_MAX INT16_MAX
+#define SAMP_MIN INT16_MIN
+#endif
+
+#if defined(CHECK_OVERFLOW)
+#  define CHECK_OVERFLOW_OP(a,op,b)  \
+	if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
+		g_critical("overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }
+#endif
+
+
+#   define smul(a,b) ( (SAMPPROD)(a)*(b) )
+#   define sround( x )  (kiss_fft_f32_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+
+#   define S_MUL(a,b) sround( smul(a,b) )
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+          (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+
+#   define DIVSCALAR(x,k) \
+	(x) = sround( smul(  x, SAMP_MAX/k ) )
+
+#   define C_FIXDIV(c,div) \
+	do {    DIVSCALAR( (c).r , div);  \
+		DIVSCALAR( (c).i  , div); }while (0)
+
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
+        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
+
+#else  /* not FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#define  C_ADD( res, a,b)\
+    do { \
+	    CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+	    CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+	    (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+	    CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+	    CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+	    (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+	    CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+	    CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+	    (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+	    CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+	    CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+	    (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+
+
+#ifdef FIXED_POINT
+#  define KISS_FFT_F32_COS(phase)  floor(.5+SAMP_MAX * cos (phase))
+#  define KISS_FFT_F32_SIN(phase)  floor(.5+SAMP_MAX * sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_F32_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_F32_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5))
+#else
+#  define KISS_FFT_F32_COS(phase) (kiss_fft_f32_scalar) cos(phase)
+#  define KISS_FFT_F32_SIN(phase) (kiss_fft_f32_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*.5)
+#endif
+
+#define  kf_cexp(x,phase) \
+	do{ \
+		(x)->r = KISS_FFT_F32_COS(phase);\
+		(x)->i = KISS_FFT_F32_SIN(phase);\
+	}while(0)
+
+
+/* a debugging function */
+#define pcpx(c)\
+    fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
+
+
+#ifdef KISS_FFT_F32_USE_ALLOCA
+// define this to allow use of alloca instead of malloc for temporary buffers
+// Temporary buffers are used in two case: 
+// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
+// 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
+#include <alloca.h>
+#define  KISS_FFT_F32_TMP_ALLOC(nbytes) alloca(nbytes)
+#define  KISS_FFT_F32_TMP_FREE(ptr) 
+#else
+#define  KISS_FFT_F32_TMP_ALLOC(nbytes) KISS_FFT_F32_MALLOC(nbytes)
+#define  KISS_FFT_F32_TMP_FREE(ptr) KISS_FFT_F32_FREE(ptr)
+#endif
--- a/src/modules/module-filter-chain/builtin.h
+++ b/src/modules/module-filter-chain/builtin.h
@ -22,7 +22,10 @@
 * DEALINGS IN THE SOFTWARE.
 */

+#include <sndfile.h>
+
 #include "biquad.h"
+#include "convolver.h"

 struct builtin {
 	unsigned long rate;
@ -392,6 +395,99 @@ static const LADSPA_Descriptor bq_allpass_desc = {
 	.cleanup = builtin_cleanup,
 };

+/** convolve */
+struct convolver_impl {
+	unsigned long rate;
+	LADSPA_Data *port[64];
+
+	struct convolver *conv;
+};
+
+static LADSPA_Handle convolver_instantiate(const struct _LADSPA_Descriptor * Descriptor,
+		unsigned long SampleRate)
+{
+	struct convolver_impl *impl;
+	SF_INFO info;
+	SNDFILE *f;
+	float *samples;
+	const char *filename;
+
+	filename = "src/modules/module-filter-chain/convolve.wav";
+	filename = "src/modules/module-filter-chain/street2-L.wav";
+	spa_zero(info);
+	f = sf_open(filename, SFM_READ, &info) ;
+	if (f == NULL) {
+		pw_log_error("can't open %s", filename);
+		return NULL;
+	}
+
+	impl = calloc(1, sizeof(*impl));
+	if (impl == NULL)
+		return NULL;
+
+	impl->rate = SampleRate;
+
+	samples = malloc(info.frames * sizeof(float) * info.channels);
+        if (samples == NULL)
+		return NULL;
+
+	sf_read_float(f, samples, info.frames);
+
+	impl->conv = convolver_new(256, samples, info.frames);
+
+	free(samples);
+	sf_close(f);
+
+	return impl;
+}
+
+static void convolver_connect_port(LADSPA_Handle Instance, unsigned long Port,
+                        LADSPA_Data * DataLocation)
+{
+	struct convolver_impl *impl = Instance;
+	impl->port[Port] = DataLocation;
+}
+
+static void convolver_cleanup(LADSPA_Handle Instance)
+{
+	struct convolver_impl *impl = Instance;
+	free(impl);
+}
+
+static const LADSPA_PortDescriptor convolve_port_desc[] = {
+	LADSPA_PORT_OUTPUT | LADSPA_PORT_AUDIO,
+	LADSPA_PORT_INPUT | LADSPA_PORT_AUDIO,
+};
+
+static const char * const convolve_port_names[] = {
+	"Out", "In"
+};
+
+static const LADSPA_PortRangeHint convolve_range_hints[] = {
+	{ 0, }, { 0, },
+};
+
+static void convolve_run(LADSPA_Handle Instance, unsigned long SampleCount)
+{
+	struct convolver_impl *impl = Instance;
+	convolver_run(impl->conv, impl->port[1], impl->port[0], SampleCount);
+}
+
+static const LADSPA_Descriptor convolve_desc = {
+	.Label = "convolver",
+	.Name = "Convolver",
+	.Maker = "PipeWire",
+	.Copyright = "MIT",
+	.PortCount = 2,
+	.PortDescriptors = convolve_port_desc,
+	.PortNames = convolve_port_names,
+	.PortRangeHints = convolve_range_hints,
+	.instantiate = convolver_instantiate,
+	.connect_port = convolver_connect_port,
+	.run = convolve_run,
+	.cleanup = convolver_cleanup,
+};
+
 static const LADSPA_Descriptor * builtin_ladspa_descriptor(unsigned long Index)
 {
 	switch(Index) {
@ -415,6 +511,8 @@ static const LADSPA_Descriptor * builtin_ladspa_descriptor(unsigned long Index)
 		return &bq_allpass_desc;
 	case 9:
 		return &copy_desc;
+	case 10:
+		return &convolve_desc;
 	}
 	return NULL;
 }
--- a/src/modules/module-filter-chain/convolver.c
+++ b/src/modules/module-filter-chain/convolver.c
@ -0,0 +1,213 @@
+// ==================================================================================
+// Copyright (c) 2017 HiFi-LoFi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is furnished
+// to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+// ==================================================================================
+
+#include "convolver.h"
+
+#include <spa/utils/defs.h>
+
+#include "kiss_fft_f32.h"
+#include "kiss_fftr_f32.h"
+
+struct convolver {
+	int blockSize;
+	int segSize;
+	int segCount;
+	int fftComplexSize;
+
+	kiss_fft_f32_cpx **segments;
+	kiss_fft_f32_cpx **segmentsIr;
+
+	float *fft_buffer;
+
+	void *fft;
+	void *ifft;
+
+	kiss_fft_f32_cpx *pre_mult;
+	kiss_fft_f32_cpx *conv;
+	float *overlap;
+
+	float *inputBuffer;
+	int inputBufferFill;
+
+	int current;
+};
+
+static int next_power_of_two(int val)
+{
+	int r = 1;
+	while (r < val)
+		r *= 2;
+	return r;
+}
+
+struct convolver *convolver_new(int block, const float *ir, int irlen)
+{
+	struct convolver *conv;
+	int i, j;
+
+	if (block == 0)
+		return NULL;
+
+	while (irlen > 0 && fabs(ir[irlen-1]) < 0.000001f)
+		irlen--;
+
+	conv = calloc(1, sizeof(*conv));
+	if (conv == NULL)
+		return NULL;
+
+	if (irlen == 0)
+		return conv;
+
+	conv->blockSize = next_power_of_two(block);
+	conv->segSize = 2 * conv->blockSize;
+	conv->segCount = (irlen + conv->blockSize-1) / conv->blockSize;
+	conv->fftComplexSize = (conv->segSize / 2) + 1;
+
+	fprintf(stderr, "blockSize:%d segSize:%d segCount:%d fftComplexSize:%d\n",
+			conv->blockSize, conv->segSize, conv->segCount,
+			conv->fftComplexSize);
+
+        conv->fft = kiss_fftr_f32_alloc(conv->segSize, 0, NULL, NULL);
+        if (conv->fft == NULL)
+                return NULL;
+        conv->ifft = kiss_fftr_f32_alloc(conv->segSize, 1, NULL, NULL);
+        if (conv->ifft == NULL)
+                return NULL;
+
+	conv->fft_buffer = calloc(sizeof(float), conv->segSize);
+        if (conv->fft_buffer == NULL)
+                return NULL;
+
+	conv->segments = calloc(sizeof(kiss_fft_f32_cpx*), conv->segCount);
+	conv->segmentsIr = calloc(sizeof(kiss_fft_f32_cpx*), conv->segCount);
+
+	for (i = 0; i < conv->segCount; i++) {
+		int left = irlen - (i * conv->blockSize);
+		int copy = SPA_MIN(conv->blockSize, left);
+
+		conv->segments[i] = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
+		conv->segmentsIr[i] = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
+
+		memcpy(conv->fft_buffer, &ir[i * conv->blockSize], copy * sizeof(float));
+		if (copy < conv->segSize)
+			memset(conv->fft_buffer + copy, 0, (conv->segSize - copy) * sizeof(float));
+
+	        kiss_fftr_f32(conv->fft, conv->fft_buffer, conv->segmentsIr[i]);
+
+//		for (j = 0; j < conv->fftComplexSize; j++) {
+//			conv->segmentsIr[i][j].r /= conv->segSize * 2;
+//			conv->segmentsIr[i][j].i /= conv->segSize * 2;
+//		}
+
+	}
+	conv->pre_mult = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
+	conv->conv = calloc(sizeof(kiss_fft_f32_cpx), conv->fftComplexSize);
+	conv->overlap = calloc(sizeof(float), conv->blockSize);
+	conv->inputBuffer = calloc(sizeof(float), conv->blockSize);
+	conv->inputBufferFill = 0;
+	conv->current = 0;
+
+	return conv;
+}
+
+void convolver_free(struct convolver *conv)
+{
+	free(conv);
+}
+
+void Sum(float* result, const float* a, const float* b, int len)
+{
+	int i;
+	for (i = 0; i < len; i++)
+		result[i] = a[i] + b[i];
+}
+
+void ComplexMultiplyAccumulate(kiss_fft_f32_cpx *r, const kiss_fft_f32_cpx *a, const kiss_fft_f32_cpx *b, int len)
+{
+	int i;
+	for (i = 0; i < len; i++) {
+		r[i].r += a[i].r * b[i].r - a[i].i * b[i].i;
+		r[i].i += a[i].r * b[i].i + a[i].i * b[i].r;
+	}
+}
+
+int convolver_run(struct convolver *conv, const float *input, float *output, int len)
+{
+	int i, processed = 0;
+
+	if (conv->segCount == 0) {
+		memset(output, 0, len * sizeof(float));
+		return len;
+	}
+
+	while (processed < len) {
+		const int processing = SPA_MIN(len - processed, conv->blockSize - conv->inputBufferFill);
+		const int inputBufferPos = conv->inputBufferFill;
+
+		fprintf(stderr, "len:%d processing:%d fill:%d processed:%d\n",
+				len, processing, inputBufferPos, processed);
+
+		memcpy(conv->inputBuffer + inputBufferPos, input + processed, processing * sizeof(float));
+
+		memcpy(conv->fft_buffer, conv->inputBuffer, conv->blockSize * sizeof(float));
+		memset(conv->fft_buffer + conv->blockSize, 0, (conv->segSize - conv->blockSize) * sizeof(float));
+
+		kiss_fftr_f32(conv->fft, conv->fft_buffer, conv->segments[conv->current]);
+
+		if (conv->inputBufferFill == 0) {
+			memset(conv->pre_mult, 0, sizeof(kiss_fft_f32_cpx) * conv->fftComplexSize);
+
+			for (i = 1; i < conv->segCount; i++) {
+				const int indexIr = i;
+				const int indexAudio = (conv->current + i) % conv->segCount;
+
+				ComplexMultiplyAccumulate(conv->pre_mult,
+						conv->segmentsIr[indexIr],
+						conv->segments[indexAudio],
+						conv->fftComplexSize);
+			}
+		}
+		memcpy(conv->conv, conv->pre_mult, sizeof(kiss_fft_f32_cpx) * conv->fftComplexSize);
+
+		ComplexMultiplyAccumulate(conv->conv, conv->segments[conv->current], conv->segmentsIr[0],
+				conv->fftComplexSize);
+
+		kiss_fftri_f32(conv->ifft, conv->conv, conv->fft_buffer);
+
+		for (i = 0; i < conv->segSize; i++)
+			conv->fft_buffer[i] /= conv->segSize * 4;
+
+		Sum(output + processed, conv->fft_buffer + inputBufferPos, conv->overlap + inputBufferPos, processing);
+
+		conv->inputBufferFill += processing;
+		if (conv->inputBufferFill == conv->blockSize) {
+			memset(conv->inputBuffer, 0, sizeof(float) * conv->blockSize);
+			conv->inputBufferFill = 0;
+
+			memcpy(conv->overlap, conv->fft_buffer + conv->blockSize, conv->blockSize * sizeof(float));
+
+			conv->current = (conv->current > 0) ? (conv->current - 1) : (conv->segCount - 1);
+		}
+
+		processed += processing;
+	}
+	return len;
+}
--- a/src/modules/module-filter-chain/convolver.h
+++ b/src/modules/module-filter-chain/convolver.h
@ -0,0 +1,31 @@
+/* PipeWire
+ *
+ * Copyright © 2021 Wim Taymans
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+struct convolver *convolver_new(int block, const float *ir, int irlen);
+void convolver_free(struct convolver *conv);
+
+int convolver_run(struct convolver *conv, const float *input, float *output, int length);
--- a/src/modules/module-filter-chain/kiss_fft_f32.c
+++ b/src/modules/module-filter-chain/kiss_fft_f32.c
@ -0,0 +1,442 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+
+#include "_kiss_fft_guts_f32.h"
+/* The guts header contains all the multiplication and addition macros that are defined for
+ fixed or floating point complex numbers.  It also delares the kf_ internal functions.
+ */
+
+static void
+kf_bfly2 (kiss_fft_f32_cpx * Fout,
+    const size_t fstride, const kiss_fft_f32_cfg st, int m)
+{
+  kiss_fft_f32_cpx *Fout2;
+  kiss_fft_f32_cpx *tw1 = st->twiddles;
+  kiss_fft_f32_cpx t;
+  Fout2 = Fout + m;
+  do {
+    C_FIXDIV (*Fout, 2);
+    C_FIXDIV (*Fout2, 2);
+
+    C_MUL (t, *Fout2, *tw1);
+    tw1 += fstride;
+    C_SUB (*Fout2, *Fout, t);
+    C_ADDTO (*Fout, t);
+    ++Fout2;
+    ++Fout;
+  } while (--m);
+}
+
+static void
+kf_bfly4 (kiss_fft_f32_cpx * Fout,
+    const size_t fstride, const kiss_fft_f32_cfg st, const size_t m)
+{
+  kiss_fft_f32_cpx *tw1, *tw2, *tw3;
+  kiss_fft_f32_cpx scratch[6];
+  size_t k = m;
+  const size_t m2 = 2 * m;
+  const size_t m3 = 3 * m;
+
+
+  tw3 = tw2 = tw1 = st->twiddles;
+
+  do {
+    C_FIXDIV (*Fout, 4);
+    C_FIXDIV (Fout[m], 4);
+    C_FIXDIV (Fout[m2], 4);
+    C_FIXDIV (Fout[m3], 4);
+
+    C_MUL (scratch[0], Fout[m], *tw1);
+    C_MUL (scratch[1], Fout[m2], *tw2);
+    C_MUL (scratch[2], Fout[m3], *tw3);
+
+    C_SUB (scratch[5], *Fout, scratch[1]);
+    C_ADDTO (*Fout, scratch[1]);
+    C_ADD (scratch[3], scratch[0], scratch[2]);
+    C_SUB (scratch[4], scratch[0], scratch[2]);
+    C_SUB (Fout[m2], *Fout, scratch[3]);
+    tw1 += fstride;
+    tw2 += fstride * 2;
+    tw3 += fstride * 3;
+    C_ADDTO (*Fout, scratch[3]);
+
+    if (st->inverse) {
+      Fout[m].r = scratch[5].r - scratch[4].i;
+      Fout[m].i = scratch[5].i + scratch[4].r;
+      Fout[m3].r = scratch[5].r + scratch[4].i;
+      Fout[m3].i = scratch[5].i - scratch[4].r;
+    } else {
+      Fout[m].r = scratch[5].r + scratch[4].i;
+      Fout[m].i = scratch[5].i - scratch[4].r;
+      Fout[m3].r = scratch[5].r - scratch[4].i;
+      Fout[m3].i = scratch[5].i + scratch[4].r;
+    }
+    ++Fout;
+  } while (--k);
+}
+
+static void
+kf_bfly3 (kiss_fft_f32_cpx * Fout,
+    const size_t fstride, const kiss_fft_f32_cfg st, size_t m)
+{
+  size_t k = m;
+  const size_t m2 = 2 * m;
+  kiss_fft_f32_cpx *tw1, *tw2;
+  kiss_fft_f32_cpx scratch[5];
+  kiss_fft_f32_cpx epi3;
+  epi3 = st->twiddles[fstride * m];
+
+  tw1 = tw2 = st->twiddles;
+
+  do {
+    C_FIXDIV (*Fout, 3);
+    C_FIXDIV (Fout[m], 3);
+    C_FIXDIV (Fout[m2], 3);
+
+    C_MUL (scratch[1], Fout[m], *tw1);
+    C_MUL (scratch[2], Fout[m2], *tw2);
+
+    C_ADD (scratch[3], scratch[1], scratch[2]);
+    C_SUB (scratch[0], scratch[1], scratch[2]);
+    tw1 += fstride;
+    tw2 += fstride * 2;
+
+    Fout[m].r = Fout->r - HALF_OF (scratch[3].r);
+    Fout[m].i = Fout->i - HALF_OF (scratch[3].i);
+
+    C_MULBYSCALAR (scratch[0], epi3.i);
+
+    C_ADDTO (*Fout, scratch[3]);
+
+    Fout[m2].r = Fout[m].r + scratch[0].i;
+    Fout[m2].i = Fout[m].i - scratch[0].r;
+
+    Fout[m].r -= scratch[0].i;
+    Fout[m].i += scratch[0].r;
+
+    ++Fout;
+  } while (--k);
+}
+
+static void
+kf_bfly5 (kiss_fft_f32_cpx * Fout,
+    const size_t fstride, const kiss_fft_f32_cfg st, int m)
+{
+  kiss_fft_f32_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
+  int u;
+  kiss_fft_f32_cpx scratch[13];
+  kiss_fft_f32_cpx *twiddles = st->twiddles;
+  kiss_fft_f32_cpx *tw;
+  kiss_fft_f32_cpx ya, yb;
+  ya = twiddles[fstride * m];
+  yb = twiddles[fstride * 2 * m];
+
+  Fout0 = Fout;
+  Fout1 = Fout0 + m;
+  Fout2 = Fout0 + 2 * m;
+  Fout3 = Fout0 + 3 * m;
+  Fout4 = Fout0 + 4 * m;
+
+  tw = st->twiddles;
+  for (u = 0; u < m; ++u) {
+    C_FIXDIV (*Fout0, 5);
+    C_FIXDIV (*Fout1, 5);
+    C_FIXDIV (*Fout2, 5);
+    C_FIXDIV (*Fout3, 5);
+    C_FIXDIV (*Fout4, 5);
+    scratch[0] = *Fout0;
+
+    C_MUL (scratch[1], *Fout1, tw[u * fstride]);
+    C_MUL (scratch[2], *Fout2, tw[2 * u * fstride]);
+    C_MUL (scratch[3], *Fout3, tw[3 * u * fstride]);
+    C_MUL (scratch[4], *Fout4, tw[4 * u * fstride]);
+
+    C_ADD (scratch[7], scratch[1], scratch[4]);
+    C_SUB (scratch[10], scratch[1], scratch[4]);
+    C_ADD (scratch[8], scratch[2], scratch[3]);
+    C_SUB (scratch[9], scratch[2], scratch[3]);
+
+    Fout0->r += scratch[7].r + scratch[8].r;
+    Fout0->i += scratch[7].i + scratch[8].i;
+
+    scratch[5].r =
+        scratch[0].r + S_MUL (scratch[7].r, ya.r) + S_MUL (scratch[8].r, yb.r);
+    scratch[5].i =
+        scratch[0].i + S_MUL (scratch[7].i, ya.r) + S_MUL (scratch[8].i, yb.r);
+
+    scratch[6].r = S_MUL (scratch[10].i, ya.i) + S_MUL (scratch[9].i, yb.i);
+    scratch[6].i = -S_MUL (scratch[10].r, ya.i) - S_MUL (scratch[9].r, yb.i);
+
+    C_SUB (*Fout1, scratch[5], scratch[6]);
+    C_ADD (*Fout4, scratch[5], scratch[6]);
+
+    scratch[11].r =
+        scratch[0].r + S_MUL (scratch[7].r, yb.r) + S_MUL (scratch[8].r, ya.r);
+    scratch[11].i =
+        scratch[0].i + S_MUL (scratch[7].i, yb.r) + S_MUL (scratch[8].i, ya.r);
+    scratch[12].r = -S_MUL (scratch[10].i, yb.i) + S_MUL (scratch[9].i, ya.i);
+    scratch[12].i = S_MUL (scratch[10].r, yb.i) - S_MUL (scratch[9].r, ya.i);
+
+    C_ADD (*Fout2, scratch[11], scratch[12]);
+    C_SUB (*Fout3, scratch[11], scratch[12]);
+
+    ++Fout0;
+    ++Fout1;
+    ++Fout2;
+    ++Fout3;
+    ++Fout4;
+  }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void
+kf_bfly_generic (kiss_fft_f32_cpx * Fout,
+    const size_t fstride, const kiss_fft_f32_cfg st, int m, int p)
+{
+  int u, k, q1, q;
+  kiss_fft_f32_cpx *twiddles = st->twiddles;
+  kiss_fft_f32_cpx t;
+  int Norig = st->nfft;
+
+  kiss_fft_f32_cpx *scratch =
+      (kiss_fft_f32_cpx *) KISS_FFT_F32_TMP_ALLOC (sizeof (kiss_fft_f32_cpx) *
+      p);
+
+  for (u = 0; u < m; ++u) {
+    k = u;
+    for (q1 = 0; q1 < p; ++q1) {
+      scratch[q1] = Fout[k];
+      C_FIXDIV (scratch[q1], p);
+      k += m;
+    }
+
+    k = u;
+    for (q1 = 0; q1 < p; ++q1) {
+      int twidx = 0;
+      Fout[k] = scratch[0];
+      for (q = 1; q < p; ++q) {
+        twidx += fstride * k;
+        if (twidx >= Norig)
+          twidx -= Norig;
+        C_MUL (t, scratch[q], twiddles[twidx]);
+        C_ADDTO (Fout[k], t);
+      }
+      k += m;
+    }
+  }
+  KISS_FFT_F32_TMP_FREE (scratch);
+}
+
+static void
+kf_work (kiss_fft_f32_cpx * Fout,
+    const kiss_fft_f32_cpx * f,
+    const size_t fstride, int in_stride, int *factors,
+    const kiss_fft_f32_cfg st)
+{
+  kiss_fft_f32_cpx *Fout_beg = Fout;
+  const int p = *factors++;     /* the radix  */
+  const int m = *factors++;     /* stage's fft length/p */
+  const kiss_fft_f32_cpx *Fout_end = Fout + p * m;
+
+#ifdef _OPENMP
+  // use openmp extensions at the 
+  // top-level (not recursive)
+  if (fstride == 1 && p <= 5 && m != 1) {
+    int k;
+
+    // execute the p different work units in different threads
+#       pragma omp parallel for
+    for (k = 0; k < p; ++k)
+      kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
+          in_stride, factors, st);
+    // all threads have joined by this point
+
+    switch (p) {
+      case 2:
+        kf_bfly2 (Fout, fstride, st, m);
+        break;
+      case 3:
+        kf_bfly3 (Fout, fstride, st, m);
+        break;
+      case 4:
+        kf_bfly4 (Fout, fstride, st, m);
+        break;
+      case 5:
+        kf_bfly5 (Fout, fstride, st, m);
+        break;
+      default:
+        kf_bfly_generic (Fout, fstride, st, m, p);
+        break;
+    }
+    return;
+  }
+#endif
+
+  if (m == 1) {
+    do {
+      *Fout = *f;
+      f += fstride * in_stride;
+    } while (++Fout != Fout_end);
+  } else {
+    do {
+      // recursive call:
+      // DFT of size m*p performed by doing
+      // p instances of smaller DFTs of size m, 
+      // each one takes a decimated version of the input
+      kf_work (Fout, f, fstride * p, in_stride, factors, st);
+      f += fstride * in_stride;
+    } while ((Fout += m) != Fout_end);
+  }
+
+  Fout = Fout_beg;
+
+  // recombine the p smaller DFTs 
+  switch (p) {
+    case 2:
+      kf_bfly2 (Fout, fstride, st, m);
+      break;
+    case 3:
+      kf_bfly3 (Fout, fstride, st, m);
+      break;
+    case 4:
+      kf_bfly4 (Fout, fstride, st, m);
+      break;
+    case 5:
+      kf_bfly5 (Fout, fstride, st, m);
+      break;
+    default:
+      kf_bfly_generic (Fout, fstride, st, m, p);
+      break;
+  }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where 
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static void
+kf_factor (int n, int *facbuf)
+{
+  int p = 4;
+  double floor_sqrt;
+  floor_sqrt = floor (sqrt ((double) n));
+
+  /*factor out powers of 4, powers of 2, then any remaining primes */
+  do {
+    while (n % p) {
+      switch (p) {
+        case 4:
+          p = 2;
+          break;
+        case 2:
+          p = 3;
+          break;
+        default:
+          p += 2;
+          break;
+      }
+      if (p > floor_sqrt)
+        p = n;                  /* no more factors, skip to end */
+    }
+    n /= p;
+    *facbuf++ = p;
+    *facbuf++ = n;
+  } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc.  As such,
+ * It can be freed with free(), rather than a kiss_fft_f32-specific function.
+ * */
+kiss_fft_f32_cfg
+kiss_fft_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
+{
+  kiss_fft_f32_cfg st = NULL;
+  size_t memneeded = sizeof (struct kiss_fft_f32_state)
+      + sizeof (kiss_fft_f32_cpx) * (nfft - 1); /* twiddle factors */
+
+  if (lenmem == NULL) {
+    st = (kiss_fft_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
+  } else {
+    if (mem != NULL && *lenmem >= memneeded)
+      st = (kiss_fft_f32_cfg) mem;
+    *lenmem = memneeded;
+  }
+  if (st) {
+    int i;
+    st->nfft = nfft;
+    st->inverse = inverse_fft;
+
+    for (i = 0; i < nfft; ++i) {
+      const double pi =
+          3.141592653589793238462643383279502884197169399375105820974944;
+      double phase = -2 * pi * i / nfft;
+      if (st->inverse)
+        phase *= -1;
+      kf_cexp (st->twiddles + i, phase);
+    }
+
+    kf_factor (nfft, st->factors);
+  }
+  return st;
+}
+
+
+void
+kiss_fft_f32_stride (kiss_fft_f32_cfg st, const kiss_fft_f32_cpx * fin,
+    kiss_fft_f32_cpx * fout, int in_stride)
+{
+  if (fin == fout) {
+    //NOTE: this is not really an in-place FFT algorithm.
+    //It just performs an out-of-place FFT into a temp buffer
+    kiss_fft_f32_cpx *tmpbuf =
+        (kiss_fft_f32_cpx *) KISS_FFT_F32_TMP_ALLOC (sizeof (kiss_fft_f32_cpx) *
+        st->nfft);
+    kf_work (tmpbuf, fin, 1, in_stride, st->factors, st);
+    memcpy (fout, tmpbuf, sizeof (kiss_fft_f32_cpx) * st->nfft);
+    KISS_FFT_F32_TMP_FREE (tmpbuf);
+  } else {
+    kf_work (fout, fin, 1, in_stride, st->factors, st);
+  }
+}
+
+void
+kiss_fft_f32 (kiss_fft_f32_cfg cfg, const kiss_fft_f32_cpx * fin,
+    kiss_fft_f32_cpx * fout)
+{
+  kiss_fft_f32_stride (cfg, fin, fout, 1);
+}
+
+
+void
+kiss_fft_f32_cleanup (void)
+{
+  // nothing needed any more
+}
+
+int
+kiss_fft_f32_next_fast_size (int n)
+{
+  while (1) {
+    int m = n;
+    while ((m % 2) == 0)
+      m /= 2;
+    while ((m % 3) == 0)
+      m /= 3;
+    while ((m % 5) == 0)
+      m /= 5;
+    if (m <= 1)
+      break;                    /* n is completely factorable by twos, threes, and fives */
+    n++;
+  }
+  return n;
+}
--- a/src/modules/module-filter-chain/kiss_fft_f32.h
+++ b/src/modules/module-filter-chain/kiss_fft_f32.h
@ -0,0 +1,112 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FFT_F32_H
+#define KISS_FFT_F32_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ ATTENTION!
+ If you would like a :
+ -- a utility that will handle the caching of fft objects
+ -- real-only (no imaginary time component ) FFT
+ -- a multi-dimensional FFT
+ -- a command-line utility to perform ffts
+ -- a command-line utility to perform fast-convolution filtering
+
+ Then see kfc.h kiss_fftr_f32.h kiss_fft_f32nd.h fftutil.c kiss_fastfir.c
+  in the tools/ directory.
+*/
+
+#define KISS_FFT_F32_MALLOC malloc
+#define KISS_FFT_F32_FREE free
+#define kiss_fft_f32_scalar float
+
+typedef struct {
+    kiss_fft_f32_scalar r;
+    kiss_fft_f32_scalar i;
+}kiss_fft_f32_cpx;
+
+typedef struct kiss_fft_f32_state* kiss_fft_f32_cfg;
+
+/* 
+ *  kiss_fft_f32_alloc
+ *  
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_f32_cfg mycfg=kiss_fft_f32_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then kiss_fft_f32_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *  
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *  
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg 
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_f32_cfg kiss_fft_f32_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); 
+
+/*
+ * kiss_fft_f32(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void kiss_fft_f32(kiss_fft_f32_cfg cfg,const kiss_fft_f32_cpx *fin,kiss_fft_f32_cpx *fout);
+
+/*
+ A more generic version of the above function. It reads its input from every Nth sample.
+ * */
+void kiss_fft_f32_stride(kiss_fft_f32_cfg cfg,const kiss_fft_f32_cpx *fin,kiss_fft_f32_cpx *fout,int fin_stride);
+
+/* If kiss_fft_f32_alloc allocated a buffer, it is one contiguous 
+   buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_f32_free KISS_FFT_F32_FREE
+
+/*
+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 
+ your compiler output to call this before you exit.
+*/
+void kiss_fft_f32_cleanup(void);
+	
+
+/*
+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
+ */
+int kiss_fft_f32_next_fast_size(int n);
+
+/* for real ffts, we need an even size */
+#define kiss_fftr_f32_next_fast_size_real(n) \
+        (kiss_fft_f32_next_fast_size( ((n)+1)>>1)<<1)
+
+#ifdef __cplusplus
+} 
+#endif
+
+#endif
--- a/src/modules/module-filter-chain/kiss_fftr_f32.c
+++ b/src/modules/module-filter-chain/kiss_fftr_f32.c
@ -0,0 +1,148 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#include "kiss_fftr_f32.h"
+#include "_kiss_fft_guts_f32.h"
+
+struct kiss_fftr_f32_state
+{
+  kiss_fft_f32_cfg substate;
+  kiss_fft_f32_cpx *tmpbuf;
+  kiss_fft_f32_cpx *super_twiddles;
+#ifdef USE_SIMD
+  void *pad;
+#endif
+};
+
+kiss_fftr_f32_cfg
+kiss_fftr_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
+{
+  int i;
+  kiss_fftr_f32_cfg st = NULL;
+  size_t subsize = 0, memneeded;
+
+  nfft >>= 1;
+
+  kiss_fft_f32_alloc (nfft, inverse_fft, NULL, &subsize);
+  memneeded =
+      ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state)) +
+      ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 3 / 2);
+
+  if (lenmem == NULL) {
+    st = (kiss_fftr_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
+  } else {
+    if (*lenmem >= memneeded)
+      st = (kiss_fftr_f32_cfg) mem;
+    *lenmem = memneeded;
+  }
+  if (!st)
+    return NULL;
+
+  st->substate = (kiss_fft_f32_cfg) (((char *) st) + ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state)));       /*just beyond kiss_fftr_f32_state struct */
+  st->tmpbuf =
+      (kiss_fft_f32_cpx *) (((char *) st->substate) + ALIGN_STRUCT (subsize));
+  st->super_twiddles = st->tmpbuf + nfft;
+  kiss_fft_f32_alloc (nfft, inverse_fft, st->substate, &subsize);
+
+  for (i = 0; i < nfft / 2; ++i) {
+    double phase =
+        -3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
+    if (inverse_fft)
+      phase *= -1;
+    kf_cexp (st->super_twiddles + i, phase);
+  }
+  return st;
+}
+
+void
+kiss_fftr_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_scalar * timedata,
+    kiss_fft_f32_cpx * freqdata)
+{
+  /* input buffer timedata is stored row-wise */
+  int k, ncfft;
+  kiss_fft_f32_cpx fpnk, fpk, f1k, f2k, tw, tdc;
+
+  ncfft = st->substate->nfft;
+
+  /*perform the parallel fft of two real signals packed in real,imag */
+  kiss_fft_f32 (st->substate, (const kiss_fft_f32_cpx *) timedata, st->tmpbuf);
+  /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+   * contains the sum of the even-numbered elements of the input time sequence
+   * The imag part is the sum of the odd-numbered elements
+   *
+   * The sum of tdc.r and tdc.i is the sum of the input time sequence. 
+   *      yielding DC of input time sequence
+   * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 
+   *      yielding Nyquist bin of input time sequence
+   */
+
+  tdc.r = st->tmpbuf[0].r;
+  tdc.i = st->tmpbuf[0].i;
+  C_FIXDIV (tdc, 2);
+  CHECK_OVERFLOW_OP (tdc.r, +, tdc.i);
+  CHECK_OVERFLOW_OP (tdc.r, -, tdc.i);
+  freqdata[0].r = tdc.r + tdc.i;
+  freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD
+  freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps (0);
+#else
+  freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+  for (k = 1; k <= ncfft / 2; ++k) {
+    fpk = st->tmpbuf[k];
+    fpnk.r = st->tmpbuf[ncfft - k].r;
+    fpnk.i = -st->tmpbuf[ncfft - k].i;
+    C_FIXDIV (fpk, 2);
+    C_FIXDIV (fpnk, 2);
+
+    C_ADD (f1k, fpk, fpnk);
+    C_SUB (f2k, fpk, fpnk);
+    C_MUL (tw, f2k, st->super_twiddles[k - 1]);
+
+    freqdata[k].r = HALF_OF (f1k.r + tw.r);
+    freqdata[k].i = HALF_OF (f1k.i + tw.i);
+    freqdata[ncfft - k].r = HALF_OF (f1k.r - tw.r);
+    freqdata[ncfft - k].i = HALF_OF (tw.i - f1k.i);
+  }
+}
+
+void
+kiss_fftri_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_cpx * freqdata,
+    kiss_fft_f32_scalar * timedata)
+{
+  /* input buffer timedata is stored row-wise */
+  int k, ncfft;
+
+  ncfft = st->substate->nfft;
+
+  st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+  st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+  C_FIXDIV (st->tmpbuf[0], 2);
+
+  for (k = 1; k <= ncfft / 2; ++k) {
+    kiss_fft_f32_cpx fk, fnkc, fek, fok, tmp;
+    fk = freqdata[k];
+    fnkc.r = freqdata[ncfft - k].r;
+    fnkc.i = -freqdata[ncfft - k].i;
+    C_FIXDIV (fk, 2);
+    C_FIXDIV (fnkc, 2);
+
+    C_ADD (fek, fk, fnkc);
+    C_SUB (tmp, fk, fnkc);
+    C_MUL (fok, tmp, st->super_twiddles[k - 1]);
+    C_ADD (st->tmpbuf[k], fek, fok);
+    C_SUB (st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD
+    st->tmpbuf[ncfft - k].i *= _mm_set1_ps (-1.0);
+#else
+    st->tmpbuf[ncfft - k].i *= -1;
+#endif
+  }
+  kiss_fft_f32 (st->substate, st->tmpbuf, (kiss_fft_f32_cpx *) timedata);
+}
--- a/src/modules/module-filter-chain/kiss_fftr_f32.h
+++ b/src/modules/module-filter-chain/kiss_fftr_f32.h
@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FTR_H
+#define KISS_FTR_H
+
+#include "kiss_fft_f32.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    
+/* 
+ 
+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
+
+ 
+ 
+ */
+
+typedef struct kiss_fftr_f32_state *kiss_fftr_f32_cfg;
+
+
+kiss_fftr_f32_cfg kiss_fftr_f32_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+/*
+ nfft must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL 
+*/
+
+
+void kiss_fftr_f32(kiss_fftr_f32_cfg cfg,const kiss_fft_f32_scalar *timedata,kiss_fft_f32_cpx *freqdata);
+/*
+ input timedata has nfft scalar points
+ output freqdata has nfft/2+1 complex points
+*/
+
+void kiss_fftri_f32(kiss_fftr_f32_cfg cfg,const kiss_fft_f32_cpx *freqdata,kiss_fft_f32_scalar *timedata);
+/*
+ input freqdata has  nfft/2+1 complex points
+ output timedata has nfft scalar points
+*/
+
+#define kiss_fftr_f32_free KISS_FFT_F32_FREE
+
+#ifdef __cplusplus
+}
+#endif
+#endif