audioconvert: handle more optimizations

Compile an optimized library for the given CPU with the right flags,
then link it with the main library.
This commit is contained in:
Wim Taymans 2019-03-27 17:58:48 +01:00
parent eaffb25cc2
commit c8d3d475bb
12 changed files with 271 additions and 141 deletions

View file

@ -50,6 +50,16 @@ if cc.get_id() == 'gcc'
language : 'c')
endif
sse_args = '-msse'
sse2_args = '-msse2'
ssse3_args = '-mssse3'
sse41_args = '-msse4.1'
have_sse = cc.has_argument(sse_args)
have_sse2 = cc.has_argument(sse2_args)
have_ssse3 = cc.has_argument(ssse3_args)
have_sse41 = cc.has_argument(sse41_args)
cdata = configuration_data()
cdata.set('PIPEWIRE_VERSION_MAJOR', pipewire_version_major)
cdata.set('PIPEWIRE_VERSION_MINOR', pipewire_version_minor)

View file

@ -22,10 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <spa/utils/defs.h>
#include "fmt-ops.h"
#include <emmintrin.h>
@ -109,7 +106,7 @@ conv_s16_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
}
}
static void
void
conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int16_t *s = src[0];
@ -301,7 +298,7 @@ conv_s24_to_f32d_4_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
}
}
static void
void
conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int8_t *s = src[0];
@ -469,7 +466,7 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
}
}
static void
void
conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
int32_t *d = dst[0];
@ -638,7 +635,7 @@ conv_f32d_to_s16_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
}
}
static void
void
conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
int16_t *d = dst[0];

View file

@ -22,10 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <spa/utils/defs.h>
#include "fmt-ops.h"
#include <smmintrin.h>
@ -53,7 +50,7 @@ conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA
in = _mm_srai_epi32(in, 8);
out = _mm_cvtepi32_ps(in);
out = _mm_mul_ps(out, factor);
_mm_storeu_ps(&d0[n], out);
_mm_store_ps(&d0[n], out);
s += 12 * n_channels;
}
for(; n < n_samples; n++) {
@ -64,16 +61,23 @@ conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA
}
}
static void
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
extern void conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
void
conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int8_t *s = src[0];
uint32_t i = 0;
#if defined (HAVE_SSSE3)
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
#endif
#if defined (HAVE_SSE2)
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
#endif
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse41(data, &dst[i], &s[3*i], n_channels, n_samples);
}

View file

@ -22,10 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <spa/utils/defs.h>
#include "fmt-ops.h"
#include <tmmintrin.h>
@ -38,8 +35,8 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
uint32_t n, unrolled;
__m128i in[4];
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
//const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
//const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
if (SPA_IS_ALIGNED(d0, 16))
unrolled = n_samples / 4;
@ -93,7 +90,10 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
}
}
static void
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
extern void conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
void
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int8_t *s = src[0];
@ -101,8 +101,10 @@ conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_R
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
#if defined (HAVE_SSE2)
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
#endif
}

View file

@ -30,66 +30,7 @@
#include <spa/utils/defs.h>
#include <spa/param/audio/format-utils.h>
#define U8_MIN 0
#define U8_MAX 255
#define U8_SCALE 127.5f
#define U8_OFFS 128
#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
#define S16_MIN -32767
#define S16_MAX 32767
#define S16_MAX_F 32767.0f
#define S16_SCALE 32767.0f
#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE))
#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
#define S24_MIN -8388607
#define S24_MAX 8388607
#define S24_MAX_F 8388607.0f
#define S24_SCALE 8388607.0f
#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE))
#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
#define S32_SCALE 2147483648.0f
#define S32_MIN 2147483520.0f
#define S32_TO_F32(v) S24_TO_F32((v) >> 8)
#define F32_TO_S32(v) (F32_TO_S24(v) << 8)
static inline int32_t read_s24(const void *src)
{
const int8_t *s = src;
#if __BYTE_ORDER == __LITTLE_ENDIAN
return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
#else
return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
#endif
}
static inline void write_s24(void *dst, int32_t val)
{
uint8_t *d = dst;
#if __BYTE_ORDER == __LITTLE_ENDIAN
d[0] = (uint8_t) (val);
d[1] = (uint8_t) (val >> 8);
d[2] = (uint8_t) (val >> 16);
#else
d[0] = (uint8_t) (val >> 16);
d[1] = (uint8_t) (val >> 8);
d[2] = (uint8_t) (val);
#endif
}
#if defined (__SSE2__)
#include "fmt-ops-sse2.c"
#endif
#if defined (__SSSE3__)
#include "fmt-ops-ssse3.c"
#endif
#if defined (__SSE4_1__)
#include "fmt-ops-sse41.c"
#endif
#include "fmt-ops.h"
static void
conv_copy8d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
@ -728,19 +669,18 @@ interleave_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT s
}
}
typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
static const struct conv_info {
struct conv_info {
uint32_t src_fmt;
uint32_t dst_fmt;
#define FEATURE_SSE2 SPA_CPU_FLAG_SSE2
#define FEATURE_SSE41 SPA_CPU_FLAG_SSE41
#define FEATURE_SSSE3 SPA_CPU_FLAG_SSSE3
uint32_t features;
convert_func_t func;
} conv_table[] =
};
static struct conv_info conv_table[] =
{
/* to f32 */
{ SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_F32, 0, conv_u8_to_f32 },
@ -751,7 +691,7 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32, 0, conv_s16_to_f32 },
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, conv_s16d_to_f32d },
#if defined (__SSE2__)
#if defined (HAVE_SSE2)
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s16_to_f32d_sse2 },
#endif
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, 0, conv_s16_to_f32d },
@ -769,13 +709,13 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24d_to_f32d },
#if defined (__SSE4_1__)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse41 },
#if defined (HAVE_SSSE3)
// { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSSE3, conv_s24_to_f32d_ssse3 },
#endif
#if defined (__SSSE3__)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSSE3, conv_s24_to_f32d_ssse3 },
#if defined (HAVE_SSE41)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE41, conv_s24_to_f32d_sse41 },
#endif
#if defined (__SSE2__)
#if defined (HAVE_SSE2)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse2 },
#endif
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32d },
@ -795,7 +735,7 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16, 0, conv_f32_to_s16 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16P, 0, conv_f32d_to_s16d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16P, 0, conv_f32_to_s16d },
#if defined (__SSE2__)
#if defined (HAVE_SSE2)
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, FEATURE_SSE2, conv_f32d_to_s16_sse2 },
#endif
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, 0, conv_f32d_to_s16 },
@ -803,7 +743,7 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32, 0, conv_f32_to_s32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32P, 0, conv_f32d_to_s32d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32P, 0, conv_f32_to_s32d },
#if defined (__SSE2__)
#if defined (HAVE_SSE2)
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, FEATURE_SSE2, conv_f32d_to_s32_sse2 },
#endif
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, 0, conv_f32d_to_s32 },

View file

@ -0,0 +1,101 @@
/* Spa
*
* Copyright © 2019 Wim Taymans
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <math.h>
#include <spa/utils/defs.h>
#define U8_MIN 0
#define U8_MAX 255
#define U8_SCALE 127.5f
#define U8_OFFS 128
#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
#define S16_MIN -32767
#define S16_MAX 32767
#define S16_MAX_F 32767.0f
#define S16_SCALE 32767.0f
#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE))
#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
#define S24_MIN -8388607
#define S24_MAX 8388607
#define S24_MAX_F 8388607.0f
#define S24_SCALE 8388607.0f
#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE))
#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
#define S32_SCALE 2147483648.0f
#define S32_MIN 2147483520.0f
#define S32_TO_F32(v) S24_TO_F32((v) >> 8)
#define F32_TO_S32(v) (F32_TO_S24(v) << 8)
static inline int32_t read_s24(const void *src)
{
const int8_t *s = src;
#if __BYTE_ORDER == __LITTLE_ENDIAN
return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
#else
return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
#endif
}
static inline void write_s24(void *dst, int32_t val)
{
uint8_t *d = dst;
#if __BYTE_ORDER == __LITTLE_ENDIAN
d[0] = (uint8_t) (val);
d[1] = (uint8_t) (val >> 8);
d[2] = (uint8_t) (val >> 16);
#else
d[0] = (uint8_t) (val >> 16);
d[1] = (uint8_t) (val >> 8);
d[2] = (uint8_t) (val);
#endif
}
typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
#if defined(HAVE_SSE2)
void conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
void conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
void conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
void conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
#endif
#if defined(HAVE_SSSE3)
void conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
#endif
#if defined(HAVE_SSE41)
void conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
uint32_t n_channels, uint32_t n_samples);
#endif

View file

@ -6,10 +6,57 @@ audioconvert_sources = ['fmtconvert.c',
'audioconvert.c',
'plugin.c']
simd_cargs = []
simd_dependencies = []
if have_sse
audioconvert_sse = static_library('audioconvert_sse',
['resample-native-sse.c'],
c_args : [sse_args],
include_directories : [spa_inc],
install : false
)
simd_cargs += ['-DHAVE_SSE']
simd_dependencies += audioconvert_sse
endif
if have_sse2
audioconvert_sse2 = static_library('audioconvert_sse2',
['fmt-ops-sse2.c'],
c_args : [sse2_args],
include_directories : [spa_inc],
install : false
)
simd_cargs += ['-DHAVE_SSE2']
simd_dependencies += audioconvert_sse2
endif
if have_ssse3
audioconvert_ssse3 = static_library('audioconvert_ssse3',
['fmt-ops-ssse3.c',
'resample-native-ssse3.c' ],
c_args : [ssse3_args],
include_directories : [spa_inc],
install : false
)
simd_cargs += ['-DHAVE_SSSE3']
simd_dependencies += audioconvert_ssse3
endif
if have_sse41
audioconvert_sse41 = static_library('audioconvert_sse41',
['fmt-ops-sse41.c'],
c_args : [sse41_args],
include_directories : [spa_inc],
install : false
)
simd_cargs += ['-DHAVE_SSE41']
simd_dependencies += audioconvert_sse41
endif
audioconvertlib = shared_library('spa-audioconvert',
audioconvert_sources,
c_args : simd_cargs,
include_directories : [spa_inc],
dependencies : [ speexdsp_dep, mathlib ],
link_with : simd_dependencies,
install : true,
install_dir : '@0@/spa/audioconvert/'.format(get_option('libdir')))

View file

@ -22,6 +22,45 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <math.h>
#include <spa/utils/defs.h>
#include "resample.h"
typedef void (*resample_func_t)(struct resample *r,
const void * SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len);
struct native_data {
double rate;
uint32_t n_taps;
uint32_t n_phases;
uint32_t in_rate;
uint32_t out_rate;
uint32_t index;
uint32_t phase;
uint32_t inc;
uint32_t frac;
uint32_t filter_stride;
uint32_t filter_stride_os;
uint32_t hist;
float **history;
resample_func_t func;
float *filter;
float *hist_mem;
};
#define DEFINE_RESAMPLER_FULL(arch) \
void do_resample_full_##arch(struct resample *r, \
const void * SPA_RESTRICT src[], uint32_t *in_len, \
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len)
#define DEFINE_RESAMPLER_INTER(arch) \
void do_resample_inter_##arch(struct resample *r, \
const void * SPA_RESTRICT src[], uint32_t *in_len, \
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len)
#define MAKE_RESAMPLER_COPY(arch) \
static void do_resample_copy_##arch(struct resample *r, \
const void * SPA_RESTRICT src[], uint32_t *in_len, \
@ -52,9 +91,7 @@ static void do_resample_copy_##arch(struct resample *r, \
}
#define MAKE_RESAMPLER_FULL(arch) \
static void do_resample_full_##arch(struct resample *r, \
const void * SPA_RESTRICT src[], uint32_t *in_len, \
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) \
DEFINE_RESAMPLER_FULL(arch) \
{ \
struct native_data *data = r->data; \
uint32_t n_taps = data->n_taps, stride = data->filter_stride_os; \
@ -93,9 +130,7 @@ static void do_resample_full_##arch(struct resample *r, \
}
#define MAKE_RESAMPLER_INTER(arch) \
static void do_resample_inter_##arch(struct resample *r, \
const void * SPA_RESTRICT src[], uint32_t *in_len, \
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) \
DEFINE_RESAMPLER_INTER(arch) \
{ \
struct native_data *data = r->data; \
uint32_t index, phase, stride = data->filter_stride; \
@ -140,3 +175,16 @@ static void do_resample_inter_##arch(struct resample *r, \
data->index = index; \
data->phase = phase; \
}
DEFINE_RESAMPLER_FULL(c);
DEFINE_RESAMPLER_INTER(c);
#if defined (HAVE_SSE)
DEFINE_RESAMPLER_FULL(sse);
DEFINE_RESAMPLER_INTER(sse);
#endif
#if defined (HAVE_SSSE3)
DEFINE_RESAMPLER_FULL(ssse3);
DEFINE_RESAMPLER_INTER(ssse3);
#endif

View file

@ -22,6 +22,8 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "resample-native-impl.h"
#include <xmmintrin.h>
static void inner_product_sse(float *d, const float * SPA_RESTRICT s,

View file

@ -22,6 +22,8 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "resample-native-impl.h"
#include <tmmintrin.h>
static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
@ -90,8 +92,8 @@ static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
}
break;
}
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
sum = _mm_add_ps(sum, _mm_movehdup_ps(sum));
sum = _mm_add_ss(sum, _mm_movehl_ps(sum, sum));
_mm_store_ss(d, sum);
}
@ -99,11 +101,14 @@ static void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s,
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
uint32_t n_taps)
{
#if defined (__SSE__)
inner_product_ip_sse(d, s, t0, t1, x, n_taps);
#else
inner_product_ip_c(d, s, t0, t1, x, n_taps);
#endif
float sum[2] = { 0.0f, 0.0f };
uint32_t i;
for (i = 0; i < n_taps; i++) {
sum[0] += s[i] * t0[i];
sum[1] += s[i] * t1[i];
}
*d = (sum[1] - sum[0]) * x + sum[0];
}
MAKE_RESAMPLER_FULL(ssse3);

View file

@ -22,39 +22,8 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <math.h>
typedef void (*resample_func_t)(struct resample *r,
const void * SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len);
struct native_data {
double rate;
uint32_t n_taps;
uint32_t n_phases;
uint32_t in_rate;
uint32_t out_rate;
uint32_t index;
uint32_t phase;
uint32_t inc;
uint32_t frac;
uint32_t filter_stride;
uint32_t filter_stride_os;
uint32_t hist;
float **history;
resample_func_t func;
float *filter;
float *hist_mem;
};
#include "resample-native-impl.h"
#include "resample-native-c.h"
#if defined (__SSE__)
#include "resample-native-sse.h"
#endif
#if defined (__SSSE3__)
#include "resample-native-ssse3.h"
#endif
struct quality {
uint32_t n_taps;
@ -149,11 +118,11 @@ static void impl_native_update_rate(struct resample *r, double rate)
bool is_full = r->i_rate == in_rate;
data->func = is_full ? do_resample_full_c : do_resample_inter_c;
#if defined (__SSE__)
#if defined (HAVE_SSE)
if (r->cpu_flags & SPA_CPU_FLAG_SSE)
data->func = is_full ? do_resample_full_sse : do_resample_inter_sse;
#endif
#if defined (__SSSE3__)
#if defined (HAVE_SSSE3)
if (r->cpu_flags & SPA_CPU_FLAG_SSSE3)
data->func = is_full ? do_resample_full_ssse3 : do_resample_inter_ssse3;
#endif

View file

@ -22,6 +22,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#ifndef RESAMPLE_H
#define RESAMPLE_H
#include <spa/support/cpu.h>
struct resample {
@ -45,3 +48,5 @@ struct resample {
#define resample_process(r,...) (r)->process(r,__VA_ARGS__)
#define resample_reset(r) (r)->reset(r)
#define resample_delay(r) (r)->delay(r)
#endif /* RESAMPLE_H */