From a389a553e3ea7c21ff7b3cb9d5ac11023d5e54e8 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Mon, 22 Jun 2026 13:05:32 +0200 Subject: [PATCH] filter-graph: use pffft aligned alloc in all cases Always use the pffft aligned alloc function. The fftw alloc function only aligns to 16 bytes and the AVX code uses stores that rely on an alignment of 32 bytes. The pffft alloc alignes to 64 bytes. Fixes #5320 --- spa/plugins/filter-graph/audio-dsp-avx2.c | 17 +++++------- spa/plugins/filter-graph/audio-dsp-c.c | 32 +++++------------------ spa/plugins/filter-graph/audio-dsp-impl.h | 2 ++ spa/plugins/filter-graph/audio-dsp-sse.c | 18 ++++++------- spa/plugins/filter-graph/audio-dsp.c | 4 +-- 5 files changed, 26 insertions(+), 47 deletions(-) diff --git a/spa/plugins/filter-graph/audio-dsp-avx2.c b/spa/plugins/filter-graph/audio-dsp-avx2.c index 710231ab4..5b20b625d 100644 --- a/spa/plugins/filter-graph/audio-dsp-avx2.c +++ b/spa/plugins/filter-graph/audio-dsp-avx2.c @@ -12,9 +12,8 @@ #ifdef HAVE_FFTW #include -#else -#include "pffft.h" #endif +#include "pffft.h" #include "audio-dsp-impl.h" #include @@ -386,14 +385,12 @@ static void fft_interleaved_avx2(float *data, uint32_t len, float scale) void *dsp_fft_memalloc_avx2(void *obj, uint32_t size, bool real) { -#ifdef HAVE_FFTW - return fftwf_alloc_real(real ? size : SPA_ROUND_UP_N(size, FFT_BLOCK) * 2); -#else - if (real) - return pffft_aligned_malloc(size * sizeof(float)); - else - return pffft_aligned_malloc(size * 2 * sizeof(float)); -#endif + uint32_t asize = real ? size : SPA_ROUND_UP_N(size, FFT_BLOCK) * 2; + return pffft_aligned_malloc(asize * sizeof(float)); +} +void dsp_fft_memfree_avx2(void *obj, void *data) +{ + pffft_aligned_free(data); } void dsp_fft_memclear_avx2(void *obj, void *data, uint32_t size, bool real) diff --git a/spa/plugins/filter-graph/audio-dsp-c.c b/spa/plugins/filter-graph/audio-dsp-c.c index e430aae0b..82d71bc52 100644 --- a/spa/plugins/filter-graph/audio-dsp-c.c +++ b/spa/plugins/filter-graph/audio-dsp-c.c @@ -14,9 +14,8 @@ #ifdef HAVE_FFTW #include -#else -#include "pffft.h" #endif +#include "pffft.h" #include "audio-dsp-impl.h" void dsp_clear_c(void *obj, float * SPA_RESTRICT dst, uint32_t n_samples) @@ -261,14 +260,14 @@ void *dsp_fft_new_c(void *obj, uint32_t size, bool real) float *rdata; fftwf_complex *cdata; - rdata = fftwf_alloc_real(size * 2); - cdata = fftwf_alloc_complex(size + 1); + rdata = spa_fga_dsp_fft_memalloc(obj, size * 2, true); + cdata = spa_fga_dsp_fft_memalloc(obj, size + 1, false); info->plan_r2c = fftwf_plan_dft_r2c_1d(size, rdata, cdata, FFTW_ESTIMATE); info->plan_c2r = fftwf_plan_dft_c2r_1d(size, cdata, rdata, FFTW_ESTIMATE); - fftwf_free(rdata); - fftwf_free(cdata); + spa_fga_dsp_fft_memfree(obj, rdata); + spa_fga_dsp_fft_memfree(obj, cdata); } #else info->setup = pffft_new_setup(size, real ? PFFFT_REAL : PFFFT_COMPLEX); @@ -290,35 +289,18 @@ void dsp_fft_free_c(void *obj, void *fft) void *dsp_fft_memalloc_c(void *obj, uint32_t size, bool real) { -#ifdef HAVE_FFTW - if (real) - return fftwf_alloc_real(size); - else - return fftwf_alloc_complex(size); -#else - if (real) - return pffft_aligned_malloc(size * sizeof(float)); - else - return pffft_aligned_malloc(size * 2 * sizeof(float)); -#endif + uint32_t asize = real ? size : size * 2; + return pffft_aligned_malloc(asize * sizeof(float)); } void dsp_fft_memfree_c(void *obj, void *data) { -#ifdef HAVE_FFTW - fftwf_free(data); -#else pffft_aligned_free(data); -#endif } void dsp_fft_memclear_c(void *obj, void *data, uint32_t size, bool real) { -#ifdef HAVE_FFTW spa_fga_dsp_clear(obj, data, real ? size : size * 2); -#else - spa_fga_dsp_clear(obj, data, real ? size : size * 2); -#endif } void dsp_fft_run_c(void *obj, void *fft, int direction, diff --git a/spa/plugins/filter-graph/audio-dsp-impl.h b/spa/plugins/filter-graph/audio-dsp-impl.h index 4b63bf22f..da0222351 100644 --- a/spa/plugins/filter-graph/audio-dsp-impl.h +++ b/spa/plugins/filter-graph/audio-dsp-impl.h @@ -84,6 +84,7 @@ MAKE_MULT_FUNC(sse); MAKE_BIQUAD_RUN_FUNC(sse); MAKE_DELAY_FUNC(sse); MAKE_FFT_MEMALLOC_FUNC(sse); +MAKE_FFT_MEMFREE_FUNC(sse); MAKE_FFT_MEMCLEAR_FUNC(sse); MAKE_FFT_RUN_FUNC(sse); MAKE_FFT_CMUL_FUNC(sse); @@ -95,6 +96,7 @@ MAKE_SUM_FUNC(avx2); MAKE_LINEAR_FUNC(avx2); MAKE_MULT_FUNC(avx2); MAKE_FFT_MEMALLOC_FUNC(avx2); +MAKE_FFT_MEMFREE_FUNC(avx2); MAKE_FFT_MEMCLEAR_FUNC(avx2); MAKE_FFT_RUN_FUNC(avx2); MAKE_FFT_CMUL_FUNC(avx2); diff --git a/spa/plugins/filter-graph/audio-dsp-sse.c b/spa/plugins/filter-graph/audio-dsp-sse.c index 54f2f212c..36ba569a1 100644 --- a/spa/plugins/filter-graph/audio-dsp-sse.c +++ b/spa/plugins/filter-graph/audio-dsp-sse.c @@ -14,9 +14,8 @@ #ifdef HAVE_FFTW #include -#else -#include "pffft.h" #endif +#include "pffft.h" #include "audio-dsp-impl.h" @@ -827,14 +826,13 @@ static void fft_interleaved_sse(float *data, uint32_t len, float scale) void *dsp_fft_memalloc_sse(void *obj, uint32_t size, bool real) { -#ifdef HAVE_FFTW - return fftwf_alloc_real(real ? size : SPA_ROUND_UP_N(size, FFT_BLOCK) * 2); -#else - if (real) - return pffft_aligned_malloc(size * sizeof(float)); - else - return pffft_aligned_malloc(size * 2 * sizeof(float)); -#endif + uint32_t asize = real ? size : SPA_ROUND_UP_N(size, FFT_BLOCK) * 2; + return pffft_aligned_malloc(asize * sizeof(float)); +} + +void dsp_fft_memfree_sse(void *obj, void *data) +{ + pffft_aligned_free(data); } void dsp_fft_memclear_sse(void *obj, void *data, uint32_t size, bool real) diff --git a/spa/plugins/filter-graph/audio-dsp.c b/spa/plugins/filter-graph/audio-dsp.c index d72c46d87..a899820f9 100644 --- a/spa/plugins/filter-graph/audio-dsp.c +++ b/spa/plugins/filter-graph/audio-dsp.c @@ -35,7 +35,7 @@ static const struct dsp_info dsp_table[] = .funcs.fft_new = dsp_fft_new_c, .funcs.fft_free = dsp_fft_free_c, .funcs.fft_memalloc = dsp_fft_memalloc_avx2, - .funcs.fft_memfree = dsp_fft_memfree_c, + .funcs.fft_memfree = dsp_fft_memfree_avx2, .funcs.fft_memclear = dsp_fft_memclear_avx2, .funcs.fft_run = dsp_fft_run_avx2, .funcs.fft_cmul = dsp_fft_cmul_avx2, @@ -55,7 +55,7 @@ static const struct dsp_info dsp_table[] = .funcs.fft_new = dsp_fft_new_c, .funcs.fft_free = dsp_fft_free_c, .funcs.fft_memalloc = dsp_fft_memalloc_sse, - .funcs.fft_memfree = dsp_fft_memfree_c, + .funcs.fft_memfree = dsp_fft_memfree_sse, .funcs.fft_memclear = dsp_fft_memclear_sse, .funcs.fft_run = dsp_fft_run_sse, .funcs.fft_cmul = dsp_fft_cmul_sse,