audioconvert: some more optimizations

This commit is contained in:
Wim Taymans 2019-03-20 13:04:44 +01:00
parent 67f26c9caf
commit fa3bcabbca
6 changed files with 580 additions and 150 deletions

View file

@ -160,6 +160,14 @@ struct spa_param_info {
#define SPA_EXPORT #define SPA_EXPORT
#endif #endif
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define SPA_RESTRICT restrict
#elif defined(__GNUC__) && __GNUC__ >= 4
#define SPA_RESTRICT __restrict__
#else
#define SPA_RESTRICT
#endif
#define SPA_ROUND_DOWN_N(num,align) ((num) & ~((align) - 1)) #define SPA_ROUND_DOWN_N(num,align) ((num) & ~((align) - 1))
#define SPA_ROUND_UP_N(num,align) SPA_ROUND_DOWN_N((num) + ((align) - 1),align) #define SPA_ROUND_UP_N(num,align) SPA_ROUND_DOWN_N((num) + ((align) - 1),align)

View file

@ -224,7 +224,7 @@ channelmix_f32_5p1_3p1_sse(void *data, int n_dst, void *dst[n_dst],
float **s = (float **) src; float **s = (float **) src;
__m128 mix = _mm_set1_ps(v * 0.5f); __m128 mix = _mm_set1_ps(v * 0.5f);
__m128 vol = _mm_set1_ps(v); __m128 vol = _mm_set1_ps(v);
__m128 avg; __m128 avg[2];
float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5]; float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5];
float *dFL = d[0], *dFR = d[1], *dFC = d[2], *dLFE = d[3]; float *dFL = d[0], *dFR = d[1], *dFC = d[2], *dLFE = d[3];
@ -238,7 +238,7 @@ channelmix_f32_5p1_3p1_sse(void *data, int n_dst, void *dst[n_dst],
SPA_IS_ALIGNED(dFR, 16) && SPA_IS_ALIGNED(dFR, 16) &&
SPA_IS_ALIGNED(dFC, 16) && SPA_IS_ALIGNED(dFC, 16) &&
SPA_IS_ALIGNED(dLFE, 16)) SPA_IS_ALIGNED(dLFE, 16))
unrolled = n_samples / 4; unrolled = n_samples / 8;
else else
unrolled = 0; unrolled = 0;
@ -247,37 +247,49 @@ channelmix_f32_5p1_3p1_sse(void *data, int n_dst, void *dst[n_dst],
memset(d[i], 0, n_samples * sizeof(float)); memset(d[i], 0, n_samples * sizeof(float));
} }
else if (v == VOLUME_NORM) { else if (v == VOLUME_NORM) {
for(n = 0; unrolled--; n += 4) { for(n = 0; unrolled--; n += 8) {
avg = _mm_add_ps(_mm_load_ps(&sFL[n]), _mm_load_ps(&sSL[n])); avg[0] = _mm_add_ps(_mm_load_ps(&sFL[n]), _mm_load_ps(&sSL[n]));
_mm_store_ps(&dFL[n], _mm_mul_ps(avg, mix)); avg[1] = _mm_add_ps(_mm_load_ps(&sFL[n+4]), _mm_load_ps(&sSL[n+4]));
avg = _mm_add_ps(_mm_load_ps(&sFR[n]), _mm_load_ps(&sSR[n])); _mm_store_ps(&dFL[n], _mm_mul_ps(avg[0], mix));
_mm_store_ps(&dFR[n], _mm_mul_ps(avg, mix)); _mm_store_ps(&dFL[n+4], _mm_mul_ps(avg[1], mix));
avg[0] = _mm_add_ps(_mm_load_ps(&sFR[n]), _mm_load_ps(&sSR[n]));
avg[1] = _mm_add_ps(_mm_load_ps(&sFR[n+4]), _mm_load_ps(&sSR[n+4]));
_mm_store_ps(&dFR[n], _mm_mul_ps(avg[0], mix));
_mm_store_ps(&dFR[n+4], _mm_mul_ps(avg[1], mix));
_mm_store_ps(&dFC[n], _mm_load_ps(&sFC[n])); _mm_store_ps(&dFC[n], _mm_load_ps(&sFC[n]));
_mm_store_ps(&dFC[n+4], _mm_load_ps(&sFC[n+4]));
_mm_store_ps(&dLFE[n], _mm_load_ps(&sLFE[n])); _mm_store_ps(&dLFE[n], _mm_load_ps(&sLFE[n]));
_mm_store_ps(&dLFE[n+4], _mm_load_ps(&sLFE[n+4]));
} }
for(; n < n_samples; n++) { for(; n < n_samples; n++) {
avg = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n])); avg[0] = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n]));
_mm_store_ss(&dFL[n], _mm_mul_ss(avg, mix)); _mm_store_ss(&dFL[n], _mm_mul_ss(avg[0], mix));
avg = _mm_add_ss(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n])); avg[0] = _mm_add_ss(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
_mm_store_ss(&dFR[n], _mm_mul_ss(avg, mix)); _mm_store_ss(&dFR[n], _mm_mul_ss(avg[0], mix));
_mm_store_ss(&dFC[n], _mm_load_ss(&sFC[n])); _mm_store_ss(&dFC[n], _mm_load_ss(&sFC[n]));
_mm_store_ss(&dLFE[n], _mm_load_ss(&sLFE[n])); _mm_store_ss(&dLFE[n], _mm_load_ss(&sLFE[n]));
} }
} }
else { else {
for(n = 0; unrolled--; n += 4) { for(n = 0; unrolled--; n += 8) {
avg = _mm_add_ps(_mm_load_ps(&sFL[n]), _mm_load_ps(&sSL[n])); avg[0] = _mm_add_ps(_mm_load_ps(&sFL[n]), _mm_load_ps(&sSL[n]));
_mm_store_ps(&dFL[n], _mm_mul_ps(avg, mix)); avg[1] = _mm_add_ps(_mm_load_ps(&sFL[n+4]), _mm_load_ps(&sSL[n+4]));
avg = _mm_add_ps(_mm_load_ps(&sFR[n]), _mm_load_ps(&sSR[n])); _mm_store_ps(&dFL[n], _mm_mul_ps(avg[0], mix));
_mm_store_ps(&dFR[n], _mm_mul_ps(avg, mix)); _mm_store_ps(&dFL[n+4], _mm_mul_ps(avg[1], mix));
avg[0] = _mm_add_ps(_mm_load_ps(&sFR[n]), _mm_load_ps(&sSR[n]));
avg[1] = _mm_add_ps(_mm_load_ps(&sFR[n+4]), _mm_load_ps(&sSR[n+4]));
_mm_store_ps(&dFR[n], _mm_mul_ps(avg[0], mix));
_mm_store_ps(&dFR[n+4], _mm_mul_ps(avg[1], mix));
_mm_store_ps(&dFC[n], _mm_mul_ps(_mm_load_ps(&sFC[n]), vol)); _mm_store_ps(&dFC[n], _mm_mul_ps(_mm_load_ps(&sFC[n]), vol));
_mm_store_ps(&dFC[n+4], _mm_mul_ps(_mm_load_ps(&sFC[n+4]), vol));
_mm_store_ps(&dLFE[n], _mm_mul_ps(_mm_load_ps(&sLFE[n]), vol)); _mm_store_ps(&dLFE[n], _mm_mul_ps(_mm_load_ps(&sLFE[n]), vol));
_mm_store_ps(&dLFE[n+4], _mm_mul_ps(_mm_load_ps(&sLFE[n+4]), vol));
} }
for(; n < n_samples; n++) { for(; n < n_samples; n++) {
avg = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n])); avg[0] = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n]));
_mm_store_ss(&dFL[n], _mm_mul_ss(avg, mix)); _mm_store_ss(&dFL[n], _mm_mul_ss(avg[0], mix));
avg = _mm_add_ss(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n])); avg[0] = _mm_add_ss(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
_mm_store_ss(&dFR[n], _mm_mul_ss(avg, mix)); _mm_store_ss(&dFR[n], _mm_mul_ss(avg[0], mix));
_mm_store_ss(&dFC[n], _mm_mul_ss(_mm_load_ss(&sFC[n]), vol)); _mm_store_ss(&dFC[n], _mm_mul_ss(_mm_load_ss(&sFC[n]), vol));
_mm_store_ss(&dLFE[n], _mm_mul_ss(_mm_load_ss(&sLFE[n]), vol)); _mm_store_ss(&dLFE[n], _mm_mul_ss(_mm_load_ss(&sLFE[n]), vol));
} }

View file

@ -30,12 +30,12 @@
#include <emmintrin.h> #include <emmintrin.h>
static void static void
conv_s16_to_f32d_1_sse2(void *data, void *dst[], const void *src, int n_channels, int n_samples) conv_s16_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{ {
const int16_t *s = src; const int16_t *s = src;
float **d = (float **) dst; float **d = (float **) dst;
float *d0 = d[0]; float *d0 = d[0];
int n, unrolled; uint32_t n, unrolled;
__m128i in; __m128i in;
__m128 out, factor = _mm_set1_ps(1.0f / S16_SCALE); __m128 out, factor = _mm_set1_ps(1.0f / S16_SCALE);
@ -64,12 +64,12 @@ conv_s16_to_f32d_1_sse2(void *data, void *dst[], const void *src, int n_channels
} }
static void static void
conv_s16_to_f32d_2_sse2(void *data, void *dst[], const void *src, int n_channels, int n_samples) conv_s16_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{ {
const int16_t *s = src; const int16_t *s = src;
float **d = (float **) dst; float **d = (float **) dst;
float *d0 = d[0], *d1 = d[1]; float *d0 = d[0], *d1 = d[1];
int n, unrolled; uint32_t n, unrolled;
__m128i in, t[2]; __m128i in, t[2];
__m128 out[2], factor = _mm_set1_ps(1.0f / S16_SCALE); __m128 out[2], factor = _mm_set1_ps(1.0f / S16_SCALE);
@ -110,10 +110,10 @@ conv_s16_to_f32d_2_sse2(void *data, void *dst[], const void *src, int n_channels
} }
static void static void
conv_s16_to_f32d_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s16_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int16_t *s = src[0]; const int16_t *s = src[0];
int i = 0; uint32_t i = 0;
for(; i + 1 < n_channels; i += 2) for(; i + 1 < n_channels; i += 2)
conv_s16_to_f32d_2_sse2(data, &dst[i], &s[i], n_channels, n_samples); conv_s16_to_f32d_2_sse2(data, &dst[i], &s[i], n_channels, n_samples);
@ -122,16 +122,16 @@ conv_s16_to_f32d_sse2(void *data, void *dst[], const void *src[], int n_channels
} }
static void static void
conv_s24_to_f32d_1_sse2(void *data, void *dst[], const void *src, int n_channels, int n_samples) conv_s24_to_f32d_1_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t *s = src; const uint8_t *s = src;
float **d = (float **) dst; float **d = (float **) dst;
float *d0 = d[0]; float *d0 = d[0];
int n, unrolled; uint32_t n, unrolled;
__m128i in; __m128i in;
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE); __m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16) && n_samples > 4) { if (SPA_IS_ALIGNED(d0, 16)) {
unrolled = n_samples / 4; unrolled = n_samples / 4;
if ((n_samples & 3) == 0) if ((n_samples & 3) == 0)
unrolled--; unrolled--;
@ -161,22 +161,167 @@ conv_s24_to_f32d_1_sse2(void *data, void *dst[], const void *src, int n_channels
} }
static void static void
conv_s24_to_f32d_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_to_f32d_2_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{
const uint8_t *s = src;
float **d = (float **) dst;
float *d0 = d[0], *d1 = d[1];
uint32_t n, unrolled;
__m128i in[2];
__m128 out[2], factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16)) {
unrolled = n_samples / 4;
if ((n_samples & 3) == 0)
unrolled--;
}
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_setr_epi32(
*((uint32_t*)&s[0 + 0*n_channels]),
*((uint32_t*)&s[0 + 3*n_channels]),
*((uint32_t*)&s[0 + 6*n_channels]),
*((uint32_t*)&s[0 + 9*n_channels]));
in[1] = _mm_setr_epi32(
*((uint32_t*)&s[3 + 0*n_channels]),
*((uint32_t*)&s[3 + 3*n_channels]),
*((uint32_t*)&s[3 + 6*n_channels]),
*((uint32_t*)&s[3 + 9*n_channels]));
in[0] = _mm_slli_epi32(in[0], 8);
in[1] = _mm_slli_epi32(in[1], 8);
in[0] = _mm_srai_epi32(in[0], 8);
in[1] = _mm_srai_epi32(in[1], 8);
out[0] = _mm_cvtepi32_ps(in[0]);
out[1] = _mm_cvtepi32_ps(in[1]);
out[0] = _mm_mul_ps(out[0], factor);
out[1] = _mm_mul_ps(out[1], factor);
_mm_store_ps(&d0[n], out[0]);
_mm_store_ps(&d1[n], out[1]);
s += 12 * n_channels;
}
for(; n < n_samples; n++) {
out[0] = _mm_cvtsi32_ss(out[0], read_s24(s));
out[1] = _mm_cvtsi32_ss(out[1], read_s24(s+3));
out[0] = _mm_mul_ss(out[0], factor);
out[1] = _mm_mul_ss(out[1], factor);
_mm_store_ss(&d0[n], out[0]);
_mm_store_ss(&d1[n], out[1]);
s += 3 * n_channels;
}
}
static void
conv_s24_to_f32d_4_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{
const uint8_t *s = src;
float **d = (float **) dst;
float *d0 = d[0], *d1 = d[1], *d2 = d[2], *d3 = d[3];
uint32_t n, unrolled;
__m128i in[4];
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16)) {
unrolled = n_samples / 4;
if ((n_samples & 3) == 0)
unrolled--;
}
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_setr_epi32(
*((uint32_t*)&s[0 + 0*n_channels]),
*((uint32_t*)&s[0 + 3*n_channels]),
*((uint32_t*)&s[0 + 6*n_channels]),
*((uint32_t*)&s[0 + 9*n_channels]));
in[1] = _mm_setr_epi32(
*((uint32_t*)&s[3 + 0*n_channels]),
*((uint32_t*)&s[3 + 3*n_channels]),
*((uint32_t*)&s[3 + 6*n_channels]),
*((uint32_t*)&s[3 + 9*n_channels]));
in[2] = _mm_setr_epi32(
*((uint32_t*)&s[6 + 0*n_channels]),
*((uint32_t*)&s[6 + 3*n_channels]),
*((uint32_t*)&s[6 + 6*n_channels]),
*((uint32_t*)&s[6 + 9*n_channels]));
in[3] = _mm_setr_epi32(
*((uint32_t*)&s[9 + 0*n_channels]),
*((uint32_t*)&s[9 + 3*n_channels]),
*((uint32_t*)&s[9 + 6*n_channels]),
*((uint32_t*)&s[9 + 9*n_channels]));
in[0] = _mm_slli_epi32(in[0], 8);
in[1] = _mm_slli_epi32(in[1], 8);
in[2] = _mm_slli_epi32(in[2], 8);
in[3] = _mm_slli_epi32(in[3], 8);
in[0] = _mm_srai_epi32(in[0], 8);
in[1] = _mm_srai_epi32(in[1], 8);
in[2] = _mm_srai_epi32(in[2], 8);
in[3] = _mm_srai_epi32(in[3], 8);
out[0] = _mm_cvtepi32_ps(in[0]);
out[1] = _mm_cvtepi32_ps(in[1]);
out[2] = _mm_cvtepi32_ps(in[2]);
out[3] = _mm_cvtepi32_ps(in[3]);
out[0] = _mm_mul_ps(out[0], factor);
out[1] = _mm_mul_ps(out[1], factor);
out[2] = _mm_mul_ps(out[2], factor);
out[3] = _mm_mul_ps(out[3], factor);
_mm_store_ps(&d0[n], out[0]);
_mm_store_ps(&d1[n], out[1]);
_mm_store_ps(&d2[n], out[2]);
_mm_store_ps(&d3[n], out[3]);
s += 12 * n_channels;
}
for(; n < n_samples; n++) {
out[0] = _mm_cvtsi32_ss(out[0], read_s24(s));
out[1] = _mm_cvtsi32_ss(out[1], read_s24(s+3));
out[2] = _mm_cvtsi32_ss(out[2], read_s24(s+6));
out[3] = _mm_cvtsi32_ss(out[3], read_s24(s+9));
out[0] = _mm_mul_ss(out[0], factor);
out[1] = _mm_mul_ss(out[1], factor);
out[2] = _mm_mul_ss(out[2], factor);
out[3] = _mm_mul_ss(out[3], factor);
_mm_store_ss(&d0[n], out[0]);
_mm_store_ss(&d1[n], out[1]);
_mm_store_ss(&d2[n], out[2]);
_mm_store_ss(&d3[n], out[3]);
s += 3 * n_channels;
}
}
static void
conv_s24_to_f32d_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int8_t *s = src[0]; const int8_t *s = src[0];
int i = 0; uint32_t i = 0;
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i < n_channels; i++) for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples); conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
} }
static void static void
conv_f32d_to_s32_1_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples) conv_f32d_to_s32_1_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
const float *s0 = s[0]; const float *s0 = s[0];
int32_t *d = dst; int32_t *d = dst;
int n, unrolled; uint32_t n, unrolled;
__m128 in[1]; __m128 in[1];
__m128i out[4]; __m128i out[4];
__m128 int_max = _mm_set1_ps(S24_MAX_F); __m128 int_max = _mm_set1_ps(S24_MAX_F);
@ -212,12 +357,12 @@ conv_f32d_to_s32_1_sse2(void *data, void *dst, const void *src[], int n_channels
} }
static void static void
conv_f32d_to_s32_2_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples) conv_f32d_to_s32_2_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1]; const float *s0 = s[0], *s1 = s[1];
int32_t *d = dst; int32_t *d = dst;
int n, unrolled; uint32_t n, unrolled;
__m128 in[2]; __m128 in[2];
__m128i out[2], t[2]; __m128i out[2], t[2];
__m128 int_max = _mm_set1_ps(S24_MAX_F); __m128 int_max = _mm_set1_ps(S24_MAX_F);
@ -265,12 +410,12 @@ conv_f32d_to_s32_2_sse2(void *data, void *dst, const void *src[], int n_channels
} }
static void static void
conv_f32d_to_s32_4_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples) conv_f32d_to_s32_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1], *s2 = s[2], *s3 = s[3]; const float *s0 = s[0], *s1 = s[1], *s2 = s[2], *s3 = s[3];
int32_t *d = dst; int32_t *d = dst;
int n, unrolled; uint32_t n, unrolled;
__m128 in[4]; __m128 in[4];
__m128i out[4], t[4]; __m128i out[4], t[4];
__m128 int_max = _mm_set1_ps(S24_MAX_F); __m128 int_max = _mm_set1_ps(S24_MAX_F);
@ -279,7 +424,8 @@ conv_f32d_to_s32_4_sse2(void *data, void *dst, const void *src[], int n_channels
if (SPA_IS_ALIGNED(s0, 16) && if (SPA_IS_ALIGNED(s0, 16) &&
SPA_IS_ALIGNED(s1, 16) && SPA_IS_ALIGNED(s1, 16) &&
SPA_IS_ALIGNED(s2, 16) && SPA_IS_ALIGNED(s2, 16) &&
SPA_IS_ALIGNED(s3, 16)) SPA_IS_ALIGNED(s3, 16) &&
SPA_IS_ALIGNED(d, 16))
unrolled = n_samples / 4; unrolled = n_samples / 4;
else else
unrolled = 0; unrolled = 0;
@ -310,10 +456,10 @@ conv_f32d_to_s32_4_sse2(void *data, void *dst, const void *src[], int n_channels
out[2] = _mm_unpacklo_epi64(t[2], t[3]); out[2] = _mm_unpacklo_epi64(t[2], t[3]);
out[3] = _mm_unpackhi_epi64(t[2], t[3]); out[3] = _mm_unpackhi_epi64(t[2], t[3]);
_mm_storeu_si128((__m128i*)(d + 0*n_channels), out[0]); _mm_store_si128((__m128i*)(d + 0*n_channels), out[0]);
_mm_storeu_si128((__m128i*)(d + 1*n_channels), out[1]); _mm_store_si128((__m128i*)(d + 1*n_channels), out[1]);
_mm_storeu_si128((__m128i*)(d + 2*n_channels), out[2]); _mm_store_si128((__m128i*)(d + 2*n_channels), out[2]);
_mm_storeu_si128((__m128i*)(d + 3*n_channels), out[3]); _mm_store_si128((__m128i*)(d + 3*n_channels), out[3]);
d += 4*n_channels; d += 4*n_channels;
} }
for(; n < n_samples; n++) { for(; n < n_samples; n++) {
@ -335,10 +481,10 @@ conv_f32d_to_s32_4_sse2(void *data, void *dst, const void *src[], int n_channels
} }
static void static void
conv_f32d_to_s32_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s32_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int32_t *d = dst[0]; int32_t *d = dst[0];
int i = 0; uint32_t i = 0;
for(; i + 3 < n_channels; i += 4) for(; i + 3 < n_channels; i += 4)
conv_f32d_to_s32_4_sse2(data, &d[i], &src[i], n_channels, n_samples); conv_f32d_to_s32_4_sse2(data, &d[i], &src[i], n_channels, n_samples);
@ -349,12 +495,12 @@ conv_f32d_to_s32_sse2(void *data, void *dst[], const void *src[], int n_channels
} }
static void static void
conv_f32d_to_s16_1_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples) conv_f32d_to_s16_1_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
const float *s0 = s[0]; const float *s0 = s[0];
int16_t *d = dst; int16_t *d = dst;
int n, unrolled; uint32_t n, unrolled;
__m128 in[2]; __m128 in[2];
__m128i out[2]; __m128i out[2];
__m128 int_max = _mm_set1_ps(S16_MAX_F); __m128 int_max = _mm_set1_ps(S16_MAX_F);
@ -391,12 +537,12 @@ conv_f32d_to_s16_1_sse2(void *data, void *dst, const void *src[], int n_channels
} }
static void static void
conv_f32d_to_s16_2_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples) conv_f32d_to_s16_2_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1]; const float *s0 = s[0], *s1 = s[1];
int16_t *d = dst; int16_t *d = dst;
int n, unrolled; uint32_t n, unrolled;
__m128 in[2]; __m128 in[2];
__m128i out[4], t[2]; __m128i out[4], t[2];
__m128 int_max = _mm_set1_ps(S16_MAX_F); __m128 int_max = _mm_set1_ps(S16_MAX_F);
@ -441,11 +587,76 @@ conv_f32d_to_s16_2_sse2(void *data, void *dst, const void *src[], int n_channels
} }
static void static void
conv_f32d_to_s16_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s16_4_sse2(void *data, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1], *s2 = s[2], *s3 = s[3];
int16_t *d = dst;
uint32_t n, unrolled;
__m128 in[4];
__m128i out[4], t[4];
__m128 int_max = _mm_set1_ps(S16_MAX_F);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
if (SPA_IS_ALIGNED(s0, 16) &&
SPA_IS_ALIGNED(s1, 16) &&
SPA_IS_ALIGNED(s2, 16) &&
SPA_IS_ALIGNED(s3, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), int_max);
in[2] = _mm_mul_ps(_mm_load_ps(&s2[n]), int_max);
in[3] = _mm_mul_ps(_mm_load_ps(&s3[n]), int_max);
t[0] = _mm_cvtps_epi32(in[0]);
t[1] = _mm_cvtps_epi32(in[1]);
t[2] = _mm_cvtps_epi32(in[2]);
t[3] = _mm_cvtps_epi32(in[3]);
t[0] = _mm_packs_epi32(t[0], t[2]);
t[1] = _mm_packs_epi32(t[1], t[3]);
out[0] = _mm_unpacklo_epi16(t[0], t[1]);
out[1] = _mm_unpackhi_epi16(t[0], t[1]);
out[2] = _mm_unpacklo_epi32(out[0], out[1]);
out[3] = _mm_unpackhi_epi32(out[0], out[1]);
_mm_storel_pi((__m64*)(d + 0*n_channels), (__m128)out[2]);
_mm_storeh_pi((__m64*)(d + 1*n_channels), (__m128)out[2]);
_mm_storel_pi((__m64*)(d + 2*n_channels), (__m128)out[3]);
_mm_storeh_pi((__m64*)(d + 3*n_channels), (__m128)out[3]);
d += 4*n_channels;
}
for(; n < n_samples; n++) {
in[0] = _mm_mul_ss(_mm_load_ss(&s0[n]), int_max);
in[1] = _mm_mul_ss(_mm_load_ss(&s1[n]), int_max);
in[2] = _mm_mul_ss(_mm_load_ss(&s2[n]), int_max);
in[3] = _mm_mul_ss(_mm_load_ss(&s3[n]), int_max);
in[0] = _mm_min_ss(int_max, _mm_max_ss(in[0], int_min));
in[1] = _mm_min_ss(int_max, _mm_max_ss(in[1], int_min));
in[2] = _mm_min_ss(int_max, _mm_max_ss(in[2], int_min));
in[3] = _mm_min_ss(int_max, _mm_max_ss(in[3], int_min));
d[0] = _mm_cvtss_si32(in[0]);
d[1] = _mm_cvtss_si32(in[1]);
d[2] = _mm_cvtss_si32(in[2]);
d[3] = _mm_cvtss_si32(in[3]);
d += n_channels;
}
}
static void
conv_f32d_to_s16_sse2(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int16_t *d = dst[0]; int16_t *d = dst[0];
int i = 0; uint32_t i = 0;
for(; i + 3 < n_channels; i += 4)
conv_f32d_to_s16_4_sse2(data, &d[i], &src[i], n_channels, n_samples);
for(; i + 1 < n_channels; i += 2) for(; i + 1 < n_channels; i += 2)
conv_f32d_to_s16_2_sse2(data, &d[i], &src[i], n_channels, n_samples); conv_f32d_to_s16_2_sse2(data, &d[i], &src[i], n_channels, n_samples);
for(; i < n_channels; i++) for(; i < n_channels; i++)

View file

@ -0,0 +1,79 @@
/* Spa
*
* Copyright © 2018 Wim Taymans
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <spa/utils/defs.h>
#include <smmintrin.h>
static void
conv_s24_to_f32d_1_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{
const uint8_t *s = src;
float **d = (float **) dst;
float *d0 = d[0];
uint32_t n, unrolled;
__m128i in;
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
if (SPA_IS_ALIGNED(d0, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in = _mm_insert_epi32(in, *((uint32_t*)&s[0 * n_channels]), 0);
in = _mm_insert_epi32(in, *((uint32_t*)&s[3 * n_channels]), 1);
in = _mm_insert_epi32(in, *((uint32_t*)&s[6 * n_channels]), 2);
in = _mm_insert_epi32(in, *((uint32_t*)&s[9 * n_channels]), 3);
in = _mm_slli_epi32(in, 8);
in = _mm_srai_epi32(in, 8);
out = _mm_cvtepi32_ps(in);
out = _mm_mul_ps(out, factor);
_mm_storeu_ps(&d0[n], out);
s += 12 * n_channels;
}
for(; n < n_samples; n++) {
out = _mm_cvtsi32_ss(out, read_s24(s));
out = _mm_mul_ss(out, factor);
_mm_store_ss(&d0[n], out);
s += 3 * n_channels;
}
}
static void
conv_s24_to_f32d_sse41(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int8_t *s = src[0];
uint32_t i = 0;
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse41(data, &dst[i], &s[3*i], n_channels, n_samples);
}

View file

@ -0,0 +1,108 @@
/* Spa
*
* Copyright © 2018 Wim Taymans
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <stdio.h>
#include <spa/utils/defs.h>
#include <smmintrin.h>
static void
conv_s24_to_f32d_4_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src, uint32_t n_channels, uint32_t n_samples)
{
const uint8_t *s = src;
float **d = (float **) dst;
float *d0 = d[0], *d1 = d[1], *d2 = d[2], *d3 = d[3];
uint32_t n, unrolled;
__m128i in[4];
__m128 out[4], factor = _mm_set1_ps(1.0f / S24_SCALE);
//const __m128i mask = _mm_setr_epi8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
const __m128i mask = _mm_set_epi8(15, 14, 13, -1, 12, 11, 10, -1, 9, 8, 7, -1, 6, 5, 4, -1);
if (SPA_IS_ALIGNED(d0, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_loadu_si128((__m128i*)(s + 0*n_channels));
in[1] = _mm_loadu_si128((__m128i*)(s + 3*n_channels));
in[2] = _mm_loadu_si128((__m128i*)(s + 6*n_channels));
in[3] = _mm_loadu_si128((__m128i*)(s + 9*n_channels));
in[0] = _mm_shuffle_epi8(in[0], mask);
in[1] = _mm_shuffle_epi8(in[1], mask);
in[2] = _mm_shuffle_epi8(in[2], mask);
in[3] = _mm_shuffle_epi8(in[3], mask);
in[0] = _mm_srai_epi32(in[0], 8);
in[1] = _mm_srai_epi32(in[1], 8);
in[2] = _mm_srai_epi32(in[2], 8);
in[3] = _mm_srai_epi32(in[3], 8);
out[0] = _mm_cvtepi32_ps(in[0]);
out[1] = _mm_cvtepi32_ps(in[1]);
out[2] = _mm_cvtepi32_ps(in[2]);
out[3] = _mm_cvtepi32_ps(in[3]);
out[0] = _mm_mul_ps(out[0], factor);
out[1] = _mm_mul_ps(out[1], factor);
out[2] = _mm_mul_ps(out[2], factor);
out[3] = _mm_mul_ps(out[3], factor);
_MM_TRANSPOSE4_PS(out[0], out[1], out[2], out[3]);
_mm_store_ps(&d0[n], out[0]);
_mm_store_ps(&d1[n], out[1]);
_mm_store_ps(&d2[n], out[2]);
_mm_store_ps(&d3[n], out[3]);
s += 12 * n_channels;
}
for(; n < n_samples; n++) {
out[0] = _mm_cvtsi32_ss(out[0], read_s24(s));
out[1] = _mm_cvtsi32_ss(out[1], read_s24(s+3));
out[2] = _mm_cvtsi32_ss(out[2], read_s24(s+6));
out[3] = _mm_cvtsi32_ss(out[3], read_s24(s+9));
out[0] = _mm_mul_ss(out[0], factor);
out[1] = _mm_mul_ss(out[1], factor);
out[2] = _mm_mul_ss(out[2], factor);
out[3] = _mm_mul_ss(out[3], factor);
_mm_store_ss(&d0[n], out[0]);
_mm_store_ss(&d1[n], out[1]);
_mm_store_ss(&d2[n], out[2]);
_mm_store_ss(&d3[n], out[3]);
s += 3 * n_channels;
}
}
static void
conv_s24_to_f32d_ssse3(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{
const int8_t *s = src[0];
uint32_t i = 0;
for(; i + 3 < n_channels; i += 4)
conv_s24_to_f32d_4_ssse3(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i + 1 < n_channels; i += 2)
conv_s24_to_f32d_2_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
}

View file

@ -83,68 +83,74 @@ static inline void write_s24(void *dst, int32_t val)
#if defined (__SSE2__) #if defined (__SSE2__)
#include "fmt-ops-sse2.c" #include "fmt-ops-sse2.c"
#endif #endif
#if defined (__SSSE3__)
#include "fmt-ops-ssse3.c"
#endif
#if defined (__SSE4_1__)
#include "fmt-ops-sse41.c"
#endif
static void static void
conv_copy8d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy8d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i; uint32_t i;
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples); memcpy(dst[i], src[i], n_samples);
} }
static void static void
conv_copy8(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy8(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
memcpy(dst[0], src[0], n_samples * n_channels); memcpy(dst[0], src[0], n_samples * n_channels);
} }
static void static void
conv_copy16d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy16d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i; uint32_t i;
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples * sizeof(int16_t)); memcpy(dst[i], src[i], n_samples * sizeof(int16_t));
} }
static void static void
conv_copy16(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy16(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
memcpy(dst[0], src[0], n_samples * sizeof(int16_t) * n_channels); memcpy(dst[0], src[0], n_samples * sizeof(int16_t) * n_channels);
} }
static void static void
conv_copy24d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy24d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i; uint32_t i;
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples * 3); memcpy(dst[i], src[i], n_samples * 3);
} }
static void static void
conv_copy24(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy24(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
memcpy(dst[0], src[0], n_samples * 3 * n_channels); memcpy(dst[0], src[0], n_samples * 3 * n_channels);
} }
static void static void
conv_copy32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i; uint32_t i;
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples * sizeof(int32_t)); memcpy(dst[i], src[i], n_samples * sizeof(int32_t));
} }
static void static void
conv_copy32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_copy32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
memcpy(dst[0], src[0], n_samples * sizeof(int32_t) * n_channels); memcpy(dst[0], src[0], n_samples * sizeof(int32_t) * n_channels);
} }
static void static void
conv_u8d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_u8d_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const uint8_t *s = src[i]; const uint8_t *s = src[i];
@ -156,17 +162,17 @@ conv_u8d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_u8_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_u8_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_u8d_to_f32d(data, dst, src, 1, n_samples * n_channels); conv_u8d_to_f32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_u8_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_u8_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t *s = src[0]; const uint8_t *s = src[0];
float **d = (float **) dst; float **d = (float **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -175,11 +181,11 @@ conv_u8_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_u8d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_u8d_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t **s = (const uint8_t **) src; const uint8_t **s = (const uint8_t **) src;
float *d = dst[0]; float *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -188,9 +194,9 @@ conv_u8d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s16d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s16d_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const int16_t *s = src[i]; const int16_t *s = src[i];
@ -201,17 +207,17 @@ conv_s16d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, in
} }
static void static void
conv_s16_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s16_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_s16d_to_f32d(data, dst, src, 1, n_samples * n_channels); conv_s16d_to_f32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_s16_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s16_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int16_t *s = src[0]; const int16_t *s = src[0];
float **d = (float **) dst; float **d = (float **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -220,11 +226,11 @@ conv_s16_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s16d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s16d_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int16_t **s = (const int16_t **) src; const int16_t **s = (const int16_t **) src;
float *d = dst[0]; float *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -233,9 +239,9 @@ conv_s16d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s32d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s32d_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const int32_t *s = src[i]; const int32_t *s = src[i];
@ -247,17 +253,17 @@ conv_s32d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, in
} }
static void static void
conv_s32_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s32_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_s32d_to_f32d(data, dst, src, 1, n_samples * n_channels); conv_s32d_to_f32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_s32_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s32_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int32_t *s = src[0]; const int32_t *s = src[0];
float **d = (float **) dst; float **d = (float **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -266,11 +272,11 @@ conv_s32_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s32d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s32d_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int32_t **s = (const int32_t **) src; const int32_t **s = (const int32_t **) src;
float *d = dst[0]; float *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -279,9 +285,9 @@ conv_s32d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s24d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24d_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const int8_t *s = src[i]; const int8_t *s = src[i];
@ -295,17 +301,17 @@ conv_s24d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, in
} }
static void static void
conv_s24_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_s24d_to_f32d(data, dst, src, 1, n_samples * n_channels); conv_s24d_to_f32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_s24_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t *s = src[0]; const uint8_t *s = src[0];
float **d = (float **) dst; float **d = (float **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
@ -316,11 +322,11 @@ conv_s24_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s24d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24d_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t **s = (const uint8_t **) src; const uint8_t **s = (const uint8_t **) src;
float *d = dst[0]; float *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
@ -330,9 +336,9 @@ conv_s24d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_s24_32d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_32d_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const int32_t *s = src[i]; const int32_t *s = src[i];
@ -344,17 +350,17 @@ conv_s24_32d_to_f32d(void *data, void *dst[], const void *src[], int n_channels,
} }
static void static void
conv_s24_32_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_32_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_s24_32d_to_f32d(data, dst, src, 1, n_samples * n_channels); conv_s24_32d_to_f32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_s24_32_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_32_to_f32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int32_t *s = src[0]; const int32_t *s = src[0];
float **d = (float **) dst; float **d = (float **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -363,11 +369,11 @@ conv_s24_32_to_f32d(void *data, void *dst[], const void *src[], int n_channels,
} }
static void static void
conv_s24_32d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_s24_32d_to_f32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int32_t **s = (const int32_t **) src; const int32_t **s = (const int32_t **) src;
float *d = dst[0]; float *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -376,9 +382,9 @@ conv_s24_32d_to_f32(void *data, void *dst[], const void *src[], int n_channels,
} }
static void static void
conv_f32d_to_u8d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_u8d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const float *s = src[i]; const float *s = src[i];
@ -390,17 +396,17 @@ conv_f32d_to_u8d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32_to_u8(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_u8(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_f32d_to_u8d(data, dst, src, 1, n_samples * n_channels); conv_f32d_to_u8d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_f32_to_u8d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_u8d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float *s = src[0]; const float *s = src[0];
uint8_t **d = (uint8_t **) dst; uint8_t **d = (uint8_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -409,11 +415,11 @@ conv_f32_to_u8d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32d_to_u8(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_u8(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
uint8_t *d = dst[0]; uint8_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -422,9 +428,9 @@ conv_f32d_to_u8(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32d_to_s16d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s16d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const float *s = src[i]; const float *s = src[i];
@ -436,17 +442,17 @@ conv_f32d_to_s16d(void *data, void *dst[], const void *src[], int n_channels, in
} }
static void static void
conv_f32_to_s16(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s16(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_f32d_to_s16d(data, dst, src, 1, n_samples * n_channels); conv_f32d_to_s16d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_f32_to_s16d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s16d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float *s = src[0]; const float *s = src[0];
int16_t **d = (int16_t **) dst; int16_t **d = (int16_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -455,11 +461,11 @@ conv_f32_to_s16d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32d_to_s16(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s16(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
int16_t *d = dst[0]; int16_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -468,9 +474,9 @@ conv_f32d_to_s16(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32d_to_s32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const float *s = src[i]; const float *s = src[i];
@ -482,17 +488,17 @@ conv_f32d_to_s32d(void *data, void *dst[], const void *src[], int n_channels, in
} }
static void static void
conv_f32_to_s32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_f32d_to_s32d(data, dst, src, 1, n_samples * n_channels); conv_f32d_to_s32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_f32_to_s32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float *s = src[0]; const float *s = src[0];
int32_t **d = (int32_t **) dst; int32_t **d = (int32_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -501,11 +507,11 @@ conv_f32_to_s32d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32d_to_s32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
int32_t *d = dst[0]; int32_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -516,9 +522,9 @@ conv_f32d_to_s32(void *data, void *dst[], const void *src[], int n_channels, int
static void static void
conv_f32d_to_s24d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s24d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const float *s = src[i]; const float *s = src[i];
@ -532,17 +538,17 @@ conv_f32d_to_s24d(void *data, void *dst[], const void *src[], int n_channels, in
} }
static void static void
conv_f32_to_s24(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s24(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_f32d_to_s24d(data, dst, src, 1, n_samples * n_channels); conv_f32d_to_s24d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_f32_to_s24d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s24d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float *s = src[0]; const float *s = src[0];
uint8_t **d = (uint8_t **) dst; uint8_t **d = (uint8_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
@ -552,11 +558,11 @@ conv_f32_to_s24d(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
conv_f32d_to_s24(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s24(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
uint8_t *d = dst[0]; uint8_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
@ -568,9 +574,9 @@ conv_f32d_to_s24(void *data, void *dst[], const void *src[], int n_channels, int
static void static void
conv_f32d_to_s24_32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s24_32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
int i, j; uint32_t i, j;
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
const float *s = src[i]; const float *s = src[i];
@ -582,17 +588,17 @@ conv_f32d_to_s24_32d(void *data, void *dst[], const void *src[], int n_channels,
} }
static void static void
conv_f32_to_s24_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s24_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
conv_f32d_to_s24_32d(data, dst, src, 1, n_samples * n_channels); conv_f32d_to_s24_32d(data, dst, src, 1, n_samples * n_channels);
} }
static void static void
conv_f32_to_s24_32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32_to_s24_32d(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float *s = src[0]; const float *s = src[0];
int32_t **d = (int32_t **) dst; int32_t **d = (int32_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -601,11 +607,11 @@ conv_f32_to_s24_32d(void *data, void *dst[], const void *src[], int n_channels,
} }
static void static void
conv_f32d_to_s24_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) conv_f32d_to_s24_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const float **s = (const float **) src; const float **s = (const float **) src;
int32_t *d = dst[0]; int32_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -614,11 +620,11 @@ conv_f32d_to_s24_32(void *data, void *dst[], const void *src[], int n_channels,
} }
static void static void
deinterleave_8(void *data, void *dst[], const void *src[], int n_channels, int n_samples) deinterleave_8(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t *s = src[0]; const uint8_t *s = src[0];
uint8_t **d = (uint8_t **) dst; uint8_t **d = (uint8_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -627,11 +633,11 @@ deinterleave_8(void *data, void *dst[], const void *src[], int n_channels, int n
} }
static void static void
deinterleave_16(void *data, void *dst[], const void *src[], int n_channels, int n_samples) deinterleave_16(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint16_t *s = src[0]; const uint16_t *s = src[0];
uint16_t **d = (uint16_t **) dst; uint16_t **d = (uint16_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -640,11 +646,11 @@ deinterleave_16(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
deinterleave_24(void *data, void *dst[], const void *src[], int n_channels, int n_samples) deinterleave_24(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint8_t *s = src[0]; const uint8_t *s = src[0];
uint8_t **d = (uint8_t **) dst; uint8_t **d = (uint8_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
@ -655,11 +661,11 @@ deinterleave_24(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
deinterleave_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) deinterleave_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const uint32_t *s = src[0]; const uint32_t *s = src[0];
uint32_t **d = (uint32_t **) dst; uint32_t **d = (uint32_t **) dst;
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -668,11 +674,11 @@ deinterleave_32(void *data, void *dst[], const void *src[], int n_channels, int
} }
static void static void
interleave_8(void *data, void *dst[], const void *src[], int n_channels, int n_samples) interleave_8(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int8_t **s = (const int8_t **) src; const int8_t **s = (const int8_t **) src;
uint8_t *d = dst[0]; uint8_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -681,11 +687,11 @@ interleave_8(void *data, void *dst[], const void *src[], int n_channels, int n_s
} }
static void static void
interleave_16(void *data, void *dst[], const void *src[], int n_channels, int n_samples) interleave_16(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int16_t **s = (const int16_t **) src; const int16_t **s = (const int16_t **) src;
uint16_t *d = dst[0]; uint16_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -694,11 +700,11 @@ interleave_16(void *data, void *dst[], const void *src[], int n_channels, int n_
} }
static void static void
interleave_24(void *data, void *dst[], const void *src[], int n_channels, int n_samples) interleave_24(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int8_t **s = (const int8_t **) src; const int8_t **s = (const int8_t **) src;
uint8_t *d = dst[0]; uint8_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) { for (i = 0; i < n_channels; i++) {
@ -709,11 +715,11 @@ interleave_24(void *data, void *dst[], const void *src[], int n_channels, int n_
} }
static void static void
interleave_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples) interleave_32(void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[], uint32_t n_channels, uint32_t n_samples)
{ {
const int32_t **s = (const int32_t **) src; const int32_t **s = (const int32_t **) src;
uint32_t *d = dst[0]; uint32_t *d = dst[0];
int i, j; uint32_t i, j;
for (j = 0; j < n_samples; j++) { for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_channels; i++) for (i = 0; i < n_channels; i++)
@ -722,8 +728,8 @@ interleave_32(void *data, void *dst[], const void *src[], int n_channels, int n_
} }
typedef void (*convert_func_t) (void *data, void *dst[], const void *src[], typedef void (*convert_func_t) (void *data, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
int n_channels, int n_samples); uint32_t n_channels, uint32_t n_samples);
static const struct conv_info { static const struct conv_info {
uint32_t src_fmt; uint32_t src_fmt;
@ -761,6 +767,12 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 }, { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24d_to_f32d }, { SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24d_to_f32d },
#if defined (__SSE4_1__)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse41 },
#endif
#if defined (__SSSE3__)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_ssse3 },
#endif
#if defined (__SSE2__) #if defined (__SSE2__)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse2 }, { SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse2 },
#endif #endif