mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-10-29 05:40:27 -04:00
audioconvert: handle more optimizations
Compile an optimized library for the given CPU with the right flags, then link it with the main library.
This commit is contained in:
parent
eaffb25cc2
commit
c8d3d475bb
12 changed files with 271 additions and 141 deletions
10
meson.build
10
meson.build
|
|
@ -50,6 +50,16 @@ if cc.get_id() == 'gcc'
|
|||
language : 'c')
|
||||
endif
|
||||
|
||||
sse_args = '-msse'
|
||||
sse2_args = '-msse2'
|
||||
ssse3_args = '-mssse3'
|
||||
sse41_args = '-msse4.1'
|
||||
|
||||
have_sse = cc.has_argument(sse_args)
|
||||
have_sse2 = cc.has_argument(sse2_args)
|
||||
have_ssse3 = cc.has_argument(ssse3_args)
|
||||
have_sse41 = cc.has_argument(sse41_args)
|
||||
|
||||
cdata = configuration_data()
|
||||
cdata.set('PIPEWIRE_VERSION_MAJOR', pipewire_version_major)
|
||||
cdata.set('PIPEWIRE_VERSION_MINOR', pipewire_version_minor)
|
||||
|
|
|
|||
|
|
@ -22,10 +22,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <spa/utils/defs.h>
|
||||
#include "fmt-ops.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
|
|
@ -109,7 +106,7 @@ conv_s16_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const int16_t *s = src[0];
|
||||
|
|
@ -301,7 +298,7 @@ conv_s24_to_f32d_4_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const int8_t *s = src[0];
|
||||
|
|
@ -469,7 +466,7 @@ conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
int32_t *d = dst[0];
|
||||
|
|
@ -638,7 +635,7 @@ conv_f32d_to_s16_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RE
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
int16_t *d = dst[0];
|
||||
|
|
|
|||
|
|
@ -22,10 +22,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <spa/utils/defs.h>
|
||||
#include "fmt-ops.h"
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
|
|
@ -53,7 +50,7 @@ conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
in = _mm_srai_epi32(in, 8);
|
||||
out = _mm_cvtepi32_ps(in);
|
||||
out = _mm_mul_ps(out, factor);
|
||||
_mm_storeu_ps(&d0[n], out);
|
||||
_mm_store_ps(&d0[n], out);
|
||||
s += 12 * n_channels;
|
||||
}
|
||||
for(; n < n_samples; n++) {
|
||||
|
|
@ -64,16 +61,23 @@ conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
||||
extern void conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
||||
|
||||
void
|
||||
conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const int8_t *s = src[0];
|
||||
uint32_t i = 0;
|
||||
|
||||
#if defined (HAVE_SSSE3)
|
||||
for(; i + 3 < n_channels; i += 4)
|
||||
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||
#endif
|
||||
#if defined (HAVE_SSE2)
|
||||
for(; i + 1 < n_channels; i += 2)
|
||||
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||
#endif
|
||||
for(; i < n_channels; i++)
|
||||
conv_s24_to_f32d_1_sse41(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,10 +22,7 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <spa/utils/defs.h>
|
||||
#include "fmt-ops.h"
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
|
|
@ -38,8 +35,8 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
uint32_t n, unrolled;
|
||||
__m128i in[4];
|
||||
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
|
||||
//const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
|
||||
const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
|
||||
const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
|
||||
//const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
|
||||
|
||||
if (SPA_IS_ALIGNED(d0, 16))
|
||||
unrolled = n_samples / 4;
|
||||
|
|
@ -93,7 +90,10 @@ conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
extern void conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
||||
extern void conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples);
|
||||
|
||||
void
|
||||
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
{
|
||||
const int8_t *s = src[0];
|
||||
|
|
@ -101,8 +101,10 @@ conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_R
|
|||
|
||||
for(; i + 3 < n_channels; i += 4)
|
||||
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||
#if defined (HAVE_SSE2)
|
||||
for(; i + 1 < n_channels; i += 2)
|
||||
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||
for(; i < n_channels; i++)
|
||||
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,66 +30,7 @@
|
|||
#include <spa/utils/defs.h>
|
||||
#include <spa/param/audio/format-utils.h>
|
||||
|
||||
#define U8_MIN 0
|
||||
#define U8_MAX 255
|
||||
#define U8_SCALE 127.5f
|
||||
#define U8_OFFS 128
|
||||
#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
|
||||
#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
|
||||
|
||||
#define S16_MIN -32767
|
||||
#define S16_MAX 32767
|
||||
#define S16_MAX_F 32767.0f
|
||||
#define S16_SCALE 32767.0f
|
||||
#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE))
|
||||
#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
|
||||
|
||||
#define S24_MIN -8388607
|
||||
#define S24_MAX 8388607
|
||||
#define S24_MAX_F 8388607.0f
|
||||
#define S24_SCALE 8388607.0f
|
||||
#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE))
|
||||
#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
|
||||
|
||||
#define S32_SCALE 2147483648.0f
|
||||
#define S32_MIN 2147483520.0f
|
||||
|
||||
#define S32_TO_F32(v) S24_TO_F32((v) >> 8)
|
||||
#define F32_TO_S32(v) (F32_TO_S24(v) << 8)
|
||||
|
||||
static inline int32_t read_s24(const void *src)
|
||||
{
|
||||
const int8_t *s = src;
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
|
||||
#else
|
||||
return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void write_s24(void *dst, int32_t val)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
d[0] = (uint8_t) (val);
|
||||
d[1] = (uint8_t) (val >> 8);
|
||||
d[2] = (uint8_t) (val >> 16);
|
||||
#else
|
||||
d[0] = (uint8_t) (val >> 16);
|
||||
d[1] = (uint8_t) (val >> 8);
|
||||
d[2] = (uint8_t) (val);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined (__SSE2__)
|
||||
#include "fmt-ops-sse2.c"
|
||||
#endif
|
||||
#if defined (__SSSE3__)
|
||||
#include "fmt-ops-ssse3.c"
|
||||
#endif
|
||||
#if defined (__SSE4_1__)
|
||||
#include "fmt-ops-sse41.c"
|
||||
#endif
|
||||
#include "fmt-ops.h"
|
||||
|
||||
static void
|
||||
conv_copy8d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
|
||||
|
|
@ -728,19 +669,18 @@ interleave_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT s
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
|
||||
static const struct conv_info {
|
||||
struct conv_info {
|
||||
uint32_t src_fmt;
|
||||
uint32_t dst_fmt;
|
||||
#define FEATURE_SSE2 SPA_CPU_FLAG_SSE2
|
||||
#define FEATURE_SSE41 SPA_CPU_FLAG_SSE41
|
||||
#define FEATURE_SSSE3 SPA_CPU_FLAG_SSSE3
|
||||
uint32_t features;
|
||||
|
||||
convert_func_t func;
|
||||
} conv_table[] =
|
||||
};
|
||||
|
||||
static struct conv_info conv_table[] =
|
||||
{
|
||||
/* to f32 */
|
||||
{ SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_F32, 0, conv_u8_to_f32 },
|
||||
|
|
@ -751,7 +691,7 @@ static const struct conv_info {
|
|||
|
||||
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32, 0, conv_s16_to_f32 },
|
||||
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, conv_s16d_to_f32d },
|
||||
#if defined (__SSE2__)
|
||||
#if defined (HAVE_SSE2)
|
||||
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s16_to_f32d_sse2 },
|
||||
#endif
|
||||
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, 0, conv_s16_to_f32d },
|
||||
|
|
@ -769,13 +709,13 @@ static const struct conv_info {
|
|||
|
||||
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 },
|
||||
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24d_to_f32d },
|
||||
#if defined (__SSE4_1__)
|
||||
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse41 },
|
||||
#if defined (HAVE_SSSE3)
|
||||
// { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSSE3, conv_s24_to_f32d_ssse3 },
|
||||
#endif
|
||||
#if defined (__SSSE3__)
|
||||
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSSE3, conv_s24_to_f32d_ssse3 },
|
||||
#if defined (HAVE_SSE41)
|
||||
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE41, conv_s24_to_f32d_sse41 },
|
||||
#endif
|
||||
#if defined (__SSE2__)
|
||||
#if defined (HAVE_SSE2)
|
||||
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse2 },
|
||||
#endif
|
||||
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32d },
|
||||
|
|
@ -795,7 +735,7 @@ static const struct conv_info {
|
|||
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16, 0, conv_f32_to_s16 },
|
||||
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16P, 0, conv_f32d_to_s16d },
|
||||
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16P, 0, conv_f32_to_s16d },
|
||||
#if defined (__SSE2__)
|
||||
#if defined (HAVE_SSE2)
|
||||
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, FEATURE_SSE2, conv_f32d_to_s16_sse2 },
|
||||
#endif
|
||||
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, 0, conv_f32d_to_s16 },
|
||||
|
|
@ -803,7 +743,7 @@ static const struct conv_info {
|
|||
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32, 0, conv_f32_to_s32 },
|
||||
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32P, 0, conv_f32d_to_s32d },
|
||||
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32P, 0, conv_f32_to_s32d },
|
||||
#if defined (__SSE2__)
|
||||
#if defined (HAVE_SSE2)
|
||||
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, FEATURE_SSE2, conv_f32d_to_s32_sse2 },
|
||||
#endif
|
||||
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, 0, conv_f32d_to_s32 },
|
||||
|
|
|
|||
101
spa/plugins/audioconvert/fmt-ops.h
Normal file
101
spa/plugins/audioconvert/fmt-ops.h
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/* Spa
|
||||
*
|
||||
* Copyright © 2019 Wim Taymans
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <spa/utils/defs.h>
|
||||
|
||||
#define U8_MIN 0
|
||||
#define U8_MAX 255
|
||||
#define U8_SCALE 127.5f
|
||||
#define U8_OFFS 128
|
||||
#define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
|
||||
#define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
|
||||
|
||||
#define S16_MIN -32767
|
||||
#define S16_MAX 32767
|
||||
#define S16_MAX_F 32767.0f
|
||||
#define S16_SCALE 32767.0f
|
||||
#define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE))
|
||||
#define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
|
||||
|
||||
#define S24_MIN -8388607
|
||||
#define S24_MAX 8388607
|
||||
#define S24_MAX_F 8388607.0f
|
||||
#define S24_SCALE 8388607.0f
|
||||
#define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE))
|
||||
#define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
|
||||
|
||||
#define S32_SCALE 2147483648.0f
|
||||
#define S32_MIN 2147483520.0f
|
||||
|
||||
#define S32_TO_F32(v) S24_TO_F32((v) >> 8)
|
||||
#define F32_TO_S32(v) (F32_TO_S24(v) << 8)
|
||||
|
||||
static inline int32_t read_s24(const void *src)
|
||||
{
|
||||
const int8_t *s = src;
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
|
||||
#else
|
||||
return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void write_s24(void *dst, int32_t val)
|
||||
{
|
||||
uint8_t *d = dst;
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
d[0] = (uint8_t) (val);
|
||||
d[1] = (uint8_t) (val >> 8);
|
||||
d[2] = (uint8_t) (val >> 16);
|
||||
#else
|
||||
d[0] = (uint8_t) (val >> 16);
|
||||
d[1] = (uint8_t) (val >> 8);
|
||||
d[2] = (uint8_t) (val);
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
|
||||
#if defined(HAVE_SSE2)
|
||||
void conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
void conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
void conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
void conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
#endif
|
||||
#if defined(HAVE_SSSE3)
|
||||
void conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
#endif
|
||||
#if defined(HAVE_SSE41)
|
||||
void conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
|
||||
uint32_t n_channels, uint32_t n_samples);
|
||||
|
||||
#endif
|
||||
|
|
@ -6,10 +6,57 @@ audioconvert_sources = ['fmtconvert.c',
|
|||
'audioconvert.c',
|
||||
'plugin.c']
|
||||
|
||||
simd_cargs = []
|
||||
simd_dependencies = []
|
||||
|
||||
if have_sse
|
||||
audioconvert_sse = static_library('audioconvert_sse',
|
||||
['resample-native-sse.c'],
|
||||
c_args : [sse_args],
|
||||
include_directories : [spa_inc],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_SSE']
|
||||
simd_dependencies += audioconvert_sse
|
||||
endif
|
||||
if have_sse2
|
||||
audioconvert_sse2 = static_library('audioconvert_sse2',
|
||||
['fmt-ops-sse2.c'],
|
||||
c_args : [sse2_args],
|
||||
include_directories : [spa_inc],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_SSE2']
|
||||
simd_dependencies += audioconvert_sse2
|
||||
endif
|
||||
if have_ssse3
|
||||
audioconvert_ssse3 = static_library('audioconvert_ssse3',
|
||||
['fmt-ops-ssse3.c',
|
||||
'resample-native-ssse3.c' ],
|
||||
c_args : [ssse3_args],
|
||||
include_directories : [spa_inc],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_SSSE3']
|
||||
simd_dependencies += audioconvert_ssse3
|
||||
endif
|
||||
if have_sse41
|
||||
audioconvert_sse41 = static_library('audioconvert_sse41',
|
||||
['fmt-ops-sse41.c'],
|
||||
c_args : [sse41_args],
|
||||
include_directories : [spa_inc],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_SSE41']
|
||||
simd_dependencies += audioconvert_sse41
|
||||
endif
|
||||
|
||||
audioconvertlib = shared_library('spa-audioconvert',
|
||||
audioconvert_sources,
|
||||
c_args : simd_cargs,
|
||||
include_directories : [spa_inc],
|
||||
dependencies : [ speexdsp_dep, mathlib ],
|
||||
link_with : simd_dependencies,
|
||||
install : true,
|
||||
install_dir : '@0@/spa/audioconvert/'.format(get_option('libdir')))
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,45 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <spa/utils/defs.h>
|
||||
|
||||
#include "resample.h"
|
||||
|
||||
typedef void (*resample_func_t)(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len);
|
||||
|
||||
struct native_data {
|
||||
double rate;
|
||||
uint32_t n_taps;
|
||||
uint32_t n_phases;
|
||||
uint32_t in_rate;
|
||||
uint32_t out_rate;
|
||||
uint32_t index;
|
||||
uint32_t phase;
|
||||
uint32_t inc;
|
||||
uint32_t frac;
|
||||
uint32_t filter_stride;
|
||||
uint32_t filter_stride_os;
|
||||
uint32_t hist;
|
||||
float **history;
|
||||
resample_func_t func;
|
||||
float *filter;
|
||||
float *hist_mem;
|
||||
};
|
||||
|
||||
#define DEFINE_RESAMPLER_FULL(arch) \
|
||||
void do_resample_full_##arch(struct resample *r, \
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len)
|
||||
|
||||
#define DEFINE_RESAMPLER_INTER(arch) \
|
||||
void do_resample_inter_##arch(struct resample *r, \
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len)
|
||||
|
||||
#define MAKE_RESAMPLER_COPY(arch) \
|
||||
static void do_resample_copy_##arch(struct resample *r, \
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||
|
|
@ -52,9 +91,7 @@ static void do_resample_copy_##arch(struct resample *r, \
|
|||
}
|
||||
|
||||
#define MAKE_RESAMPLER_FULL(arch) \
|
||||
static void do_resample_full_##arch(struct resample *r, \
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) \
|
||||
DEFINE_RESAMPLER_FULL(arch) \
|
||||
{ \
|
||||
struct native_data *data = r->data; \
|
||||
uint32_t n_taps = data->n_taps, stride = data->filter_stride_os; \
|
||||
|
|
@ -93,9 +130,7 @@ static void do_resample_full_##arch(struct resample *r, \
|
|||
}
|
||||
|
||||
#define MAKE_RESAMPLER_INTER(arch) \
|
||||
static void do_resample_inter_##arch(struct resample *r, \
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len) \
|
||||
DEFINE_RESAMPLER_INTER(arch) \
|
||||
{ \
|
||||
struct native_data *data = r->data; \
|
||||
uint32_t index, phase, stride = data->filter_stride; \
|
||||
|
|
@ -140,3 +175,16 @@ static void do_resample_inter_##arch(struct resample *r, \
|
|||
data->index = index; \
|
||||
data->phase = phase; \
|
||||
}
|
||||
|
||||
|
||||
DEFINE_RESAMPLER_FULL(c);
|
||||
DEFINE_RESAMPLER_INTER(c);
|
||||
|
||||
#if defined (HAVE_SSE)
|
||||
DEFINE_RESAMPLER_FULL(sse);
|
||||
DEFINE_RESAMPLER_INTER(sse);
|
||||
#endif
|
||||
#if defined (HAVE_SSSE3)
|
||||
DEFINE_RESAMPLER_FULL(ssse3);
|
||||
DEFINE_RESAMPLER_INTER(ssse3);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "resample-native-impl.h"
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
static void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
||||
|
|
@ -22,6 +22,8 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "resample-native-impl.h"
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||
|
|
@ -90,8 +92,8 @@ static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
|||
}
|
||||
break;
|
||||
}
|
||||
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
|
||||
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
|
||||
sum = _mm_add_ps(sum, _mm_movehdup_ps(sum));
|
||||
sum = _mm_add_ss(sum, _mm_movehl_ps(sum, sum));
|
||||
_mm_store_ss(d, sum);
|
||||
}
|
||||
|
||||
|
|
@ -99,11 +101,14 @@ static void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s,
|
|||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
#if defined (__SSE__)
|
||||
inner_product_ip_sse(d, s, t0, t1, x, n_taps);
|
||||
#else
|
||||
inner_product_ip_c(d, s, t0, t1, x, n_taps);
|
||||
#endif
|
||||
float sum[2] = { 0.0f, 0.0f };
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < n_taps; i++) {
|
||||
sum[0] += s[i] * t0[i];
|
||||
sum[1] += s[i] * t1[i];
|
||||
}
|
||||
*d = (sum[1] - sum[0]) * x + sum[0];
|
||||
}
|
||||
|
||||
MAKE_RESAMPLER_FULL(ssse3);
|
||||
|
|
@ -22,39 +22,8 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
typedef void (*resample_func_t)(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t offs, uint32_t *out_len);
|
||||
|
||||
struct native_data {
|
||||
double rate;
|
||||
uint32_t n_taps;
|
||||
uint32_t n_phases;
|
||||
uint32_t in_rate;
|
||||
uint32_t out_rate;
|
||||
uint32_t index;
|
||||
uint32_t phase;
|
||||
uint32_t inc;
|
||||
uint32_t frac;
|
||||
uint32_t filter_stride;
|
||||
uint32_t filter_stride_os;
|
||||
uint32_t hist;
|
||||
float **history;
|
||||
resample_func_t func;
|
||||
float *filter;
|
||||
float *hist_mem;
|
||||
};
|
||||
|
||||
#include "resample-native-impl.h"
|
||||
#include "resample-native-c.h"
|
||||
#if defined (__SSE__)
|
||||
#include "resample-native-sse.h"
|
||||
#endif
|
||||
#if defined (__SSSE3__)
|
||||
#include "resample-native-ssse3.h"
|
||||
#endif
|
||||
|
||||
struct quality {
|
||||
uint32_t n_taps;
|
||||
|
|
@ -149,11 +118,11 @@ static void impl_native_update_rate(struct resample *r, double rate)
|
|||
bool is_full = r->i_rate == in_rate;
|
||||
|
||||
data->func = is_full ? do_resample_full_c : do_resample_inter_c;
|
||||
#if defined (__SSE__)
|
||||
#if defined (HAVE_SSE)
|
||||
if (r->cpu_flags & SPA_CPU_FLAG_SSE)
|
||||
data->func = is_full ? do_resample_full_sse : do_resample_inter_sse;
|
||||
#endif
|
||||
#if defined (__SSSE3__)
|
||||
#if defined (HAVE_SSSE3)
|
||||
if (r->cpu_flags & SPA_CPU_FLAG_SSSE3)
|
||||
data->func = is_full ? do_resample_full_ssse3 : do_resample_inter_ssse3;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef RESAMPLE_H
|
||||
#define RESAMPLE_H
|
||||
|
||||
#include <spa/support/cpu.h>
|
||||
|
||||
struct resample {
|
||||
|
|
@ -45,3 +48,5 @@ struct resample {
|
|||
#define resample_process(r,...) (r)->process(r,__VA_ARGS__)
|
||||
#define resample_reset(r) (r)->reset(r)
|
||||
#define resample_delay(r) (r)->delay(r)
|
||||
|
||||
#endif /* RESAMPLE_H */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue