diff --git a/meson.build b/meson.build index 6807ab174..8a6c3b07d 100644 --- a/meson.build +++ b/meson.build @@ -50,6 +50,16 @@ if cc.get_id() == 'gcc' language : 'c') endif +sse_args = '-msse' +sse2_args = '-msse2' +ssse3_args = '-mssse3' +sse41_args = '-msse4.1' + +have_sse = cc.has_argument(sse_args) +have_sse2 = cc.has_argument(sse2_args) +have_ssse3 = cc.has_argument(ssse3_args) +have_sse41 = cc.has_argument(sse41_args) + cdata = configuration_data() cdata.set('PIPEWIRE_VERSION_MAJOR', pipewire_version_major) cdata.set('PIPEWIRE_VERSION_MINOR', pipewire_version_minor) diff --git a/spa/plugins/audioconvert/fmt-ops-sse2.c b/spa/plugins/audioconvert/fmt-ops-sse2.c index 896a3fa10..c14c4da86 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse2.c +++ b/spa/plugins/audioconvert/fmt-ops-sse2.c @@ -22,10 +22,7 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include - -#include +#include "fmt-ops.h" #include @@ -109,7 +106,7 @@ conv_s16_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_ } } -static void +void conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) { const int16_t *s = src[0]; @@ -301,7 +298,7 @@ conv_s24_to_f32d_4_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_ } } -static void +void conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) { const int8_t *s = src[0]; @@ -469,7 +466,7 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE } } -static void +void conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) { int32_t *d = dst[0]; @@ -638,7 +635,7 @@ conv_f32d_to_s16_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE } } -static void +void conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) { int16_t *d = dst[0]; diff --git a/spa/plugins/audioconvert/fmt-ops-sse41.c b/spa/plugins/audioconvert/fmt-ops-sse41.c index 567f368bb..2cfe83c43 100644 --- a/spa/plugins/audioconvert/fmt-ops-sse41.c +++ b/spa/plugins/audioconvert/fmt-ops-sse41.c @@ -22,10 +22,7 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include - -#include +#include "fmt-ops.h" #include @@ -53,7 +50,7 @@ conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA in = _mm_srai_epi32(in, 8); out = _mm_cvtepi32_ps(in); out = _mm_mul_ps(out, factor); - _mm_storeu_ps(&d0[n], out); + _mm_store_ps(&d0[n], out); s += 12 * n_channels; } for(; n < n_samples; n++) { @@ -64,16 +61,23 @@ conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA } } -static void +extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples); +extern void conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples); + +void conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) { const int8_t *s = src[0]; uint32_t i = 0; +#if defined (HAVE_SSSE3) for(; i + 3 < n_channels; i += 4) conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples); +#endif +#if defined (HAVE_SSE2) for(; i + 1 < n_channels; i += 2) conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples); +#endif for(; i < n_channels; i++) conv_s24_to_f32d_1_sse41(data, &dst[i], &s[3*i], n_channels, n_samples); } diff --git a/spa/plugins/audioconvert/fmt-ops-ssse3.c b/spa/plugins/audioconvert/fmt-ops-ssse3.c index 45374b1ea..dcfc81c03 100644 --- a/spa/plugins/audioconvert/fmt-ops-ssse3.c +++ b/spa/plugins/audioconvert/fmt-ops-ssse3.c @@ -22,10 +22,7 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include - -#include +#include "fmt-ops.h" #include @@ -38,8 +35,8 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA uint32_t n, unrolled; __m128i in[4]; __m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE); - //const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11); - const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1); + const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11); + //const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1); if (SPA_IS_ALIGNED(d0, 16)) unrolled = n_samples / 4; @@ -93,7 +90,10 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA } } -static void +extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples); +extern void conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples); + +void conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) { const int8_t *s = src[0]; @@ -101,8 +101,10 @@ conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_R for(; i + 3 < n_channels; i += 4) conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples); +#if defined (HAVE_SSE2) for(; i + 1 < n_channels; i += 2) conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples); for(; i < n_channels; i++) conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples); +#endif } diff --git a/spa/plugins/audioconvert/fmt-ops.c b/spa/plugins/audioconvert/fmt-ops.c index 988f426e5..32569d9ad 100644 --- a/spa/plugins/audioconvert/fmt-ops.c +++ b/spa/plugins/audioconvert/fmt-ops.c @@ -30,66 +30,7 @@ #include #include -#define U8_MIN 0 -#define U8_MAX 255 -#define U8_SCALE 127.5f -#define U8_OFFS 128 -#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0) -#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS) - -#define S16_MIN -32767 -#define S16_MAX 32767 -#define S16_MAX_F 32767.0f -#define S16_SCALE 32767.0f -#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE)) -#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE) - -#define S24_MIN -8388607 -#define S24_MAX 8388607 -#define S24_MAX_F 8388607.0f -#define S24_SCALE 8388607.0f -#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE)) -#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE) - -#define S32_SCALE 2147483648.0f -#define S32_MIN 2147483520.0f - -#define S32_TO_F32(v) S24_TO_F32((v) >> 8) -#define F32_TO_S32(v) (F32_TO_S24(v) << 8) - -static inline int32_t read_s24(const void *src) -{ - const int8_t *s = src; -#if __BYTE_ORDER == __LITTLE_ENDIAN - return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]); -#else - return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]); -#endif -} - -static inline void write_s24(void *dst, int32_t val) -{ - uint8_t *d = dst; -#if __BYTE_ORDER == __LITTLE_ENDIAN - d[0] = (uint8_t) (val); - d[1] = (uint8_t) (val >> 8); - d[2] = (uint8_t) (val >> 16); -#else - d[0] = (uint8_t) (val >> 16); - d[1] = (uint8_t) (val >> 8); - d[2] = (uint8_t) (val); -#endif -} - -#if defined (__SSE2__) -#include "fmt-ops-sse2.c" -#endif -#if defined (__SSSE3__) -#include "fmt-ops-ssse3.c" -#endif -#if defined (__SSE4_1__) -#include "fmt-ops-sse41.c" -#endif +#include "fmt-ops.h" static void conv_copy8d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples) @@ -728,19 +669,18 @@ interleave_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT s } } - -typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], - uint32_t n_channels, uint32_t n_samples); - -static const struct conv_info { +struct conv_info { uint32_t src_fmt; uint32_t dst_fmt; #define FEATURE_SSE2 SPA_CPU_FLAG_SSE2 +#define FEATURE_SSE41 SPA_CPU_FLAG_SSE41 #define FEATURE_SSSE3 SPA_CPU_FLAG_SSSE3 uint32_t features; convert_func_t func; -} conv_table[] = +}; + +static struct conv_info conv_table[] = { /* to f32 */ { SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_F32, 0, conv_u8_to_f32 }, @@ -751,7 +691,7 @@ static const struct conv_info { { SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32, 0, conv_s16_to_f32 }, { SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, conv_s16d_to_f32d }, -#if defined (__SSE2__) +#if defined (HAVE_SSE2) { SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s16_to_f32d_sse2 }, #endif { SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, 0, conv_s16_to_f32d }, @@ -769,13 +709,13 @@ static const struct conv_info { { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 }, { SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24d_to_f32d }, -#if defined (__SSE4_1__) - { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse41 }, +#if defined (HAVE_SSSE3) +// { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSSE3, conv_s24_to_f32d_ssse3 }, #endif -#if defined (__SSSE3__) - { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSSE3, conv_s24_to_f32d_ssse3 }, +#if defined (HAVE_SSE41) + { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE41, conv_s24_to_f32d_sse41 }, #endif -#if defined (__SSE2__) +#if defined (HAVE_SSE2) { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse2 }, #endif { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32d }, @@ -795,7 +735,7 @@ static const struct conv_info { { SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16, 0, conv_f32_to_s16 }, { SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16P, 0, conv_f32d_to_s16d }, { SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16P, 0, conv_f32_to_s16d }, -#if defined (__SSE2__) +#if defined (HAVE_SSE2) { SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, FEATURE_SSE2, conv_f32d_to_s16_sse2 }, #endif { SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, 0, conv_f32d_to_s16 }, @@ -803,7 +743,7 @@ static const struct conv_info { { SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32, 0, conv_f32_to_s32 }, { SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32P, 0, conv_f32d_to_s32d }, { SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32P, 0, conv_f32_to_s32d }, -#if defined (__SSE2__) +#if defined (HAVE_SSE2) { SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, FEATURE_SSE2, conv_f32d_to_s32_sse2 }, #endif { SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, 0, conv_f32d_to_s32 }, diff --git a/spa/plugins/audioconvert/fmt-ops.h b/spa/plugins/audioconvert/fmt-ops.h new file mode 100644 index 000000000..a7c7f4397 --- /dev/null +++ b/spa/plugins/audioconvert/fmt-ops.h @@ -0,0 +1,101 @@ +/* Spa + * + * Copyright © 2019 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +#define U8_MIN 0 +#define U8_MAX 255 +#define U8_SCALE 127.5f +#define U8_OFFS 128 +#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0) +#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS) + +#define S16_MIN -32767 +#define S16_MAX 32767 +#define S16_MAX_F 32767.0f +#define S16_SCALE 32767.0f +#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE)) +#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE) + +#define S24_MIN -8388607 +#define S24_MAX 8388607 +#define S24_MAX_F 8388607.0f +#define S24_SCALE 8388607.0f +#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE)) +#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE) + +#define S32_SCALE 2147483648.0f +#define S32_MIN 2147483520.0f + +#define S32_TO_F32(v) S24_TO_F32((v) >> 8) +#define F32_TO_S32(v) (F32_TO_S24(v) << 8) + +static inline int32_t read_s24(const void *src) +{ + const int8_t *s = src; +#if __BYTE_ORDER == __LITTLE_ENDIAN + return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]); +#else + return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]); +#endif +} + +static inline void write_s24(void *dst, int32_t val) +{ + uint8_t *d = dst; +#if __BYTE_ORDER == __LITTLE_ENDIAN + d[0] = (uint8_t) (val); + d[1] = (uint8_t) (val >> 8); + d[2] = (uint8_t) (val >> 16); +#else + d[0] = (uint8_t) (val >> 16); + d[1] = (uint8_t) (val >> 8); + d[2] = (uint8_t) (val); +#endif +} + +typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); + +#if defined(HAVE_SSE2) +void conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); +void conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); +void conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); +void conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); +#endif +#if defined(HAVE_SSSE3) +void conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); +#endif +#if defined(HAVE_SSE41) +void conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], + uint32_t n_channels, uint32_t n_samples); + +#endif diff --git a/spa/plugins/audioconvert/meson.build b/spa/plugins/audioconvert/meson.build index e323a2870..0c57790bd 100644 --- a/spa/plugins/audioconvert/meson.build +++ b/spa/plugins/audioconvert/meson.build @@ -6,10 +6,57 @@ audioconvert_sources = ['fmtconvert.c', 'audioconvert.c', 'plugin.c'] +simd_cargs = [] +simd_dependencies = [] + +if have_sse + audioconvert_sse = static_library('audioconvert_sse', + ['resample-native-sse.c'], + c_args : [sse_args], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_SSE'] + simd_dependencies += audioconvert_sse +endif +if have_sse2 + audioconvert_sse2 = static_library('audioconvert_sse2', + ['fmt-ops-sse2.c'], + c_args : [sse2_args], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_SSE2'] + simd_dependencies += audioconvert_sse2 +endif +if have_ssse3 + audioconvert_ssse3 = static_library('audioconvert_ssse3', + ['fmt-ops-ssse3.c', + 'resample-native-ssse3.c' ], + c_args : [ssse3_args], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_SSSE3'] + simd_dependencies += audioconvert_ssse3 +endif +if have_sse41 + audioconvert_sse41 = static_library('audioconvert_sse41', + ['fmt-ops-sse41.c'], + c_args : [sse41_args], + include_directories : [spa_inc], + install : false + ) + simd_cargs += ['-DHAVE_SSE41'] + simd_dependencies += audioconvert_sse41 +endif + audioconvertlib = shared_library('spa-audioconvert', audioconvert_sources, + c_args : simd_cargs, include_directories : [spa_inc], dependencies : [ speexdsp_dep, mathlib ], + link_with : simd_dependencies, install : true, install_dir : '@0@/spa/audioconvert/'.format(get_option('libdir'))) diff --git a/spa/plugins/audioconvert/resample-native-impl.h b/spa/plugins/audioconvert/resample-native-impl.h index 4c5f01b57..36b8cec3d 100644 --- a/spa/plugins/audioconvert/resample-native-impl.h +++ b/spa/plugins/audioconvert/resample-native-impl.h @@ -22,6 +22,45 @@ * DEALINGS IN THE SOFTWARE. */ +#include + +#include + +#include "resample.h" + +typedef void (*resample_func_t)(struct resample *r, + const void * SPA_RESTRICT src[], uint32_t *in_len, + void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len); + +struct native_data { + double rate; + uint32_t n_taps; + uint32_t n_phases; + uint32_t in_rate; + uint32_t out_rate; + uint32_t index; + uint32_t phase; + uint32_t inc; + uint32_t frac; + uint32_t filter_stride; + uint32_t filter_stride_os; + uint32_t hist; + float **history; + resample_func_t func; + float *filter; + float *hist_mem; +}; + +#define DEFINE_RESAMPLER_FULL(arch) \ +void do_resample_full_##arch(struct resample *r, \ + const void * SPA_RESTRICT src[], uint32_t *in_len, \ + void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) + +#define DEFINE_RESAMPLER_INTER(arch) \ +void do_resample_inter_##arch(struct resample *r, \ + const void * SPA_RESTRICT src[], uint32_t *in_len, \ + void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) + #define MAKE_RESAMPLER_COPY(arch) \ static void do_resample_copy_##arch(struct resample *r, \ const void * SPA_RESTRICT src[], uint32_t *in_len, \ @@ -52,9 +91,7 @@ static void do_resample_copy_##arch(struct resample *r, \ } #define MAKE_RESAMPLER_FULL(arch) \ -static void do_resample_full_##arch(struct resample *r, \ - const void * SPA_RESTRICT src[], uint32_t *in_len, \ - void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) \ +DEFINE_RESAMPLER_FULL(arch) \ { \ struct native_data *data = r->data; \ uint32_t n_taps = data->n_taps, stride = data->filter_stride_os; \ @@ -93,9 +130,7 @@ static void do_resample_full_##arch(struct resample *r, \ } #define MAKE_RESAMPLER_INTER(arch) \ -static void do_resample_inter_##arch(struct resample *r, \ - const void * SPA_RESTRICT src[], uint32_t *in_len, \ - void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) \ +DEFINE_RESAMPLER_INTER(arch) \ { \ struct native_data *data = r->data; \ uint32_t index, phase, stride = data->filter_stride; \ @@ -140,3 +175,16 @@ static void do_resample_inter_##arch(struct resample *r, \ data->index = index; \ data->phase = phase; \ } + + +DEFINE_RESAMPLER_FULL(c); +DEFINE_RESAMPLER_INTER(c); + +#if defined (HAVE_SSE) +DEFINE_RESAMPLER_FULL(sse); +DEFINE_RESAMPLER_INTER(sse); +#endif +#if defined (HAVE_SSSE3) +DEFINE_RESAMPLER_FULL(ssse3); +DEFINE_RESAMPLER_INTER(ssse3); +#endif diff --git a/spa/plugins/audioconvert/resample-native-sse.h b/spa/plugins/audioconvert/resample-native-sse.c similarity index 98% rename from spa/plugins/audioconvert/resample-native-sse.h rename to spa/plugins/audioconvert/resample-native-sse.c index 8762c2996..479bacdc8 100644 --- a/spa/plugins/audioconvert/resample-native-sse.h +++ b/spa/plugins/audioconvert/resample-native-sse.c @@ -22,6 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "resample-native-impl.h" + #include static void inner_product_sse(float *d, const float * SPA_RESTRICT s, diff --git a/spa/plugins/audioconvert/resample-native-ssse3.h b/spa/plugins/audioconvert/resample-native-ssse3.c similarity index 91% rename from spa/plugins/audioconvert/resample-native-ssse3.h rename to spa/plugins/audioconvert/resample-native-ssse3.c index 4812d891f..c39bc610a 100644 --- a/spa/plugins/audioconvert/resample-native-ssse3.h +++ b/spa/plugins/audioconvert/resample-native-ssse3.c @@ -22,6 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "resample-native-impl.h" + #include static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s, @@ -90,8 +92,8 @@ static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s, } break; } - sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); - sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); + sum = _mm_add_ps(sum, _mm_movehdup_ps(sum)); + sum = _mm_add_ss(sum, _mm_movehl_ps(sum, sum)); _mm_store_ss(d, sum); } @@ -99,11 +101,14 @@ static void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { -#if defined (__SSE__) - inner_product_ip_sse(d, s, t0, t1, x, n_taps); -#else - inner_product_ip_c(d, s, t0, t1, x, n_taps); -#endif + float sum[2] = { 0.0f, 0.0f }; + uint32_t i; + + for (i = 0; i < n_taps; i++) { + sum[0] += s[i] * t0[i]; + sum[1] += s[i] * t1[i]; + } + *d = (sum[1] - sum[0]) * x + sum[0]; } MAKE_RESAMPLER_FULL(ssse3); diff --git a/spa/plugins/audioconvert/resample-native.h b/spa/plugins/audioconvert/resample-native.h index f6d44c95f..1e3faee36 100644 --- a/spa/plugins/audioconvert/resample-native.h +++ b/spa/plugins/audioconvert/resample-native.h @@ -22,39 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ -#include - -typedef void (*resample_func_t)(struct resample *r, - const void * SPA_RESTRICT src[], uint32_t *in_len, - void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len); - -struct native_data { - double rate; - uint32_t n_taps; - uint32_t n_phases; - uint32_t in_rate; - uint32_t out_rate; - uint32_t index; - uint32_t phase; - uint32_t inc; - uint32_t frac; - uint32_t filter_stride; - uint32_t filter_stride_os; - uint32_t hist; - float **history; - resample_func_t func; - float *filter; - float *hist_mem; -}; - #include "resample-native-impl.h" #include "resample-native-c.h" -#if defined (__SSE__) -#include "resample-native-sse.h" -#endif -#if defined (__SSSE3__) -#include "resample-native-ssse3.h" -#endif struct quality { uint32_t n_taps; @@ -149,11 +118,11 @@ static void impl_native_update_rate(struct resample *r, double rate) bool is_full = r->i_rate == in_rate; data->func = is_full ? do_resample_full_c : do_resample_inter_c; -#if defined (__SSE__) +#if defined (HAVE_SSE) if (r->cpu_flags & SPA_CPU_FLAG_SSE) data->func = is_full ? do_resample_full_sse : do_resample_inter_sse; #endif -#if defined (__SSSE3__) +#if defined (HAVE_SSSE3) if (r->cpu_flags & SPA_CPU_FLAG_SSSE3) data->func = is_full ? do_resample_full_ssse3 : do_resample_inter_ssse3; #endif diff --git a/spa/plugins/audioconvert/resample.h b/spa/plugins/audioconvert/resample.h index 4f46e6d3d..5344fe81e 100644 --- a/spa/plugins/audioconvert/resample.h +++ b/spa/plugins/audioconvert/resample.h @@ -22,6 +22,9 @@ * DEALINGS IN THE SOFTWARE. */ +#ifndef RESAMPLE_H +#define RESAMPLE_H + #include struct resample { @@ -45,3 +48,5 @@ struct resample { #define resample_process(r,...) (r)->process(r,__VA_ARGS__) #define resample_reset(r) (r)->reset(r) #define resample_delay(r) (r)->delay(r) + +#endif /* RESAMPLE_H */