mem: align memory to requested alignment

Improve the allocators to always align the buffer memory to the
requested alignment
Use aligned read and writes for sse functions and check alignment,
optionally falling back to unaligned path.
Add more tests and benchmark cases
Check and warn for misaligned memory in plugins.
This commit is contained in:
Wim Taymans 2019-01-24 18:28:52 +01:00
parent dd66469570
commit 13bf70a8dd
19 changed files with 736 additions and 516 deletions

@ -1 +1 @@
Subproject commit c404942e9d15bd3340c57121753fed8d38b247c6
Subproject commit 1cf3e01219d66f92ea655ddf5c2f4caa9b96bcf7

View file

@ -66,7 +66,7 @@ static inline int spa_buffer_alloc_fill_info(struct spa_buffer_alloc_info *info,
info->skel_size += n_datas * sizeof(struct spa_data);
for (i = 0, size = 0; i < n_metas; i++)
size += metas[i].size;
size += SPA_ROUND_UP_N(metas[i].size, 8);
info->meta_size = size;
if (SPA_FLAG_CHECK(info->flags, SPA_BUFFER_ALLOC_FLAG_INLINE_META))
@ -76,13 +76,18 @@ static inline int spa_buffer_alloc_fill_info(struct spa_buffer_alloc_info *info,
if (SPA_FLAG_CHECK(info->flags, SPA_BUFFER_ALLOC_FLAG_INLINE_CHUNK))
info->skel_size += info->chunk_size;
for (i = 0, size = 0; i < n_datas; i++)
for (i = 0, size = 0; i < n_datas; i++) {
size = SPA_ROUND_UP_N(size, data_aligns[i]);
size += datas[i].maxsize;
}
info->data_size = size;
if (!SPA_FLAG_CHECK(info->flags, SPA_BUFFER_ALLOC_FLAG_NO_DATA) &&
SPA_FLAG_CHECK(info->flags, SPA_BUFFER_ALLOC_FLAG_INLINE_DATA))
info->skel_size += size;
SPA_FLAG_CHECK(info->flags, SPA_BUFFER_ALLOC_FLAG_INLINE_DATA)) {
info->skel_size += n_datas ? data_aligns[0] - 1 : 0;
info->skel_size += info->data_size;
}
info->skel_size = SPA_ROUND_UP_N(info->skel_size, 8);
return 0;
}
@ -114,7 +119,7 @@ spa_buffer_alloc_layout(struct spa_buffer_alloc_info *info,
struct spa_meta *m = &b->metas[i];
*m = info->metas[i];
m->data = *dp;
*dp = SPA_MEMBER(*dp, m->size, void);
*dp = SPA_MEMBER(*dp, SPA_ROUND_UP_N(m->size, 8), void);
}
size = info->n_datas * sizeof(struct spa_chunk);
@ -138,6 +143,7 @@ spa_buffer_alloc_layout(struct spa_buffer_alloc_info *info,
*d = info->datas[i];
d->chunk = &cp[i];
if (!SPA_FLAG_CHECK(info->flags, SPA_BUFFER_ALLOC_FLAG_NO_DATA)) {
*dp = SPA_PTR_ALIGN(*dp, info->data_aligns[i], void);
d->data = *dp;
*dp = SPA_MEMBER(*dp, d->maxsize, void);
}
@ -173,8 +179,6 @@ spa_buffer_alloc_array(uint32_t n_buffers, uint32_t flags,
spa_buffer_alloc_fill_info(&info, n_metas, metas, n_datas, datas, data_aligns);
info.skel_size = SPA_ROUND_UP_N(info.skel_size, 16);
buffers = (struct spa_buffer **)calloc(n_buffers, sizeof(struct spa_buffer *) + info.skel_size);
skel = SPA_MEMBER(buffers, sizeof(struct spa_buffer *) * n_buffers, void);

View file

@ -147,6 +147,9 @@ struct spa_fraction {
#define SPA_ROUND_DOWN_N(num,align) ((num) & ~((align) - 1))
#define SPA_ROUND_UP_N(num,align) SPA_ROUND_DOWN_N((num) + ((align) - 1),align)
#define SPA_IS_ALIGNED(p,align) (((intptr_t)(p) & ((align)-1)) == 0)
#define SPA_PTR_ALIGN(p,align,type) (type*)SPA_ROUND_UP_N((intptr_t)(p), (intptr_t)(align))
#ifndef SPA_LIKELY
#ifdef __GNUC__
#define SPA_LIKELY(x) (__builtin_expect(!!(x),1))

View file

@ -487,8 +487,6 @@ static int port_set_format(struct spa_node *node,
info.media_subtype != SPA_MEDIA_SUBTYPE_raw)
return -EINVAL;
spa_debug_pod(0, NULL, format);
if (spa_format_audio_raw_parse(format, &info.info.raw) < 0)
return -EINVAL;

View file

@ -31,52 +31,65 @@
#include "fmt-ops.c"
#define N_SAMPLES 4096
#define N_CHANNELS 5
#define MAX_SAMPLES 4096
#define MAX_CHANNELS 11
#define MAX_COUNT 1000
static uint8_t samp_in[N_SAMPLES * N_CHANNELS * 4];
static uint8_t samp_out[N_SAMPLES * N_CHANNELS * 4];
static uint8_t samp_in[MAX_SAMPLES * MAX_CHANNELS * 4];
static uint8_t samp_out[MAX_SAMPLES * MAX_CHANNELS * 4];
static void run_test(const char *name, bool in_packed, bool out_packed, convert_func_t func)
static const int sample_sizes[] = { 0, 1, 128, 513, 4096 };
static const int channel_counts[] = { 1, 2, 4, 6, 8, 11 };
static void run_test1(const char *name, bool in_packed, bool out_packed, convert_func_t func,
int n_channels, int n_samples)
{
const void *ip[N_CHANNELS];
void *op[N_CHANNELS];
int i, j, ic, oc, ns;
int i, j;
const void *ip[n_channels];
void *op[n_channels];
struct timespec ts;
uint64_t t1, t2;
uint64_t count = 0;
uint64_t count, t1, t2;
for (j = 0; j < N_CHANNELS; j++) {
ip[j] = &samp_in[j * N_SAMPLES * 4];
op[j] = &samp_out[j * N_SAMPLES * 4];
for (j = 0; j < n_channels; j++) {
ip[j] = &samp_in[j * n_samples * 4];
op[j] = &samp_out[j * n_samples * 4];
}
ic = in_packed ? 1 : N_CHANNELS;
oc = out_packed ? 1 : N_CHANNELS;
ns = (in_packed && out_packed) ? N_SAMPLES * N_CHANNELS : N_SAMPLES;
clock_gettime(CLOCK_MONOTONIC, &ts);
t1 = SPA_TIMESPEC_TO_NSEC(&ts);
count = 0;
for (i = 0; i < MAX_COUNT; i++) {
func(NULL, oc, op, ic, ip, ns);
func(NULL, op, ip, n_channels, n_samples);
count++;
}
count *= N_SAMPLES;
clock_gettime(CLOCK_MONOTONIC, &ts);
t2 = SPA_TIMESPEC_TO_NSEC(&ts);
fprintf(stderr, "%s: elapsed %"PRIu64" count %"PRIu64" = %"PRIu64"/sec\n", name,
fprintf(stderr, "%s: samples %d, channels %d: elapsed %"PRIu64" count %"
PRIu64" = %"PRIu64"/sec\n", name, n_samples, n_channels,
t2 - t1, count, count * (uint64_t)SPA_NSEC_PER_SEC / (t2 - t1));
}
static void run_test(const char *name, bool in_packed, bool out_packed, convert_func_t func)
{
size_t i, j;
for (i = 0; i < SPA_N_ELEMENTS(sample_sizes); i++) {
for (j = 0; j < SPA_N_ELEMENTS(channel_counts); j++) {
run_test1(name, in_packed, out_packed, func, channel_counts[j],
(sample_sizes[i] + (channel_counts[j] -1)) / channel_counts[j]);
}
}
}
static void test_f32_u8(void)
{
run_test("test_f32_u8", true, true, conv_f32_to_u8);
run_test("test_f32d_u8", false, true, conv_f32d_to_u8);
run_test("test_f32_u8d", true, false, conv_f32_to_u8d);
run_test("test_f32d_u8d", false, false, conv_f32d_to_u8d);
}
static void test_u8_f32(void)

View file

@ -26,31 +26,34 @@
static void
channelmix_copy_sse(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float), unrolled, remain;
int i, n, unrolled;
float **d = (float **)dst;
float **s = (float **)src;
__m128 vol = _mm_set1_ps(v);
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (i = 0; i < n_dst; i++)
memcpy(d[i], s[i], n_bytes);
memcpy(d[i], s[i], n_samples * sizeof(float));
}
else {
for (i = 0; i < n_dst; i++) {
float *di = d[i], *si = s[i];
unrolled = n_samples / 4;
remain = n_samples & 3;
if (SPA_IS_ALIGNED(di, 16) &&
SPA_IS_ALIGNED(si, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4)
_mm_storeu_ps(&di[n], _mm_mul_ps(_mm_loadu_ps(&si[n]), vol));
for(; remain--; n++)
_mm_store_ps(&di[n], _mm_mul_ps(_mm_load_ps(&si[n]), vol));
for(; n < n_samples; n++)
_mm_store_ss(&di[n], _mm_mul_ss(_mm_load_ss(&si[n]), vol));
}
}
@ -58,33 +61,40 @@ channelmix_copy_sse(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_2_4_sse(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float), unrolled, remain;
int i, n, unrolled;
float **d = (float **)dst;
float **s = (float **)src;
__m128 vol = _mm_set1_ps(v);
__m128 in;
float *dFL = d[0], *dFR = d[1], *dRL = d[2], *dRR = d[3];
float *sFL = s[0], *sFR = s[1];
float *dFL = d[0], *dFR = d[1], *dRL = d[2], *dRR = d[3];
if (SPA_IS_ALIGNED(sFL, 16) &&
SPA_IS_ALIGNED(sFR, 16) &&
SPA_IS_ALIGNED(dFL, 16) &&
SPA_IS_ALIGNED(dFR, 16) &&
SPA_IS_ALIGNED(dRL, 16) &&
SPA_IS_ALIGNED(dRR, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
in = _mm_loadu_ps(&sFL[n]);
_mm_storeu_ps(&dFL[n], in);
_mm_storeu_ps(&dRL[n], in);
in = _mm_loadu_ps(&sFR[n]);
_mm_storeu_ps(&dFR[n], in);
_mm_storeu_ps(&dRR[n], in);
in = _mm_load_ps(&sFL[n]);
_mm_store_ps(&dFL[n], in);
_mm_store_ps(&dRL[n], in);
in = _mm_load_ps(&sFR[n]);
_mm_store_ps(&dFR[n], in);
_mm_store_ps(&dRR[n], in);
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
in = _mm_load_ss(&sFL[n]);
_mm_store_ss(&dFL[n], in);
_mm_store_ss(&dRL[n], in);
@ -94,18 +104,15 @@ channelmix_f32_2_4_sse(void *data, int n_dst, void *dst[n_dst],
}
}
else {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
in = _mm_mul_ps(_mm_loadu_ps(&sFL[n]), vol);
_mm_storeu_ps(&dFL[n], in);
_mm_storeu_ps(&dRL[n], in);
in = _mm_mul_ps(_mm_loadu_ps(&sFR[n]), vol);
_mm_storeu_ps(&dFR[n], in);
_mm_storeu_ps(&dRR[n], in);
in = _mm_mul_ps(_mm_load_ps(&sFL[n]), vol);
_mm_store_ps(&dFL[n], in);
_mm_store_ps(&dRL[n], in);
in = _mm_mul_ps(_mm_load_ps(&sFR[n]), vol);
_mm_store_ps(&dFR[n], in);
_mm_store_ps(&dRR[n], in);
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
in = _mm_mul_ss(_mm_load_ss(&sFL[n]), vol);
_mm_store_ss(&dFL[n], in);
_mm_store_ss(&dRL[n], in);
@ -119,9 +126,9 @@ channelmix_f32_2_4_sse(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR -> FL+FR */
static void
channelmix_f32_5p1_2_sse(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float), unrolled, remain;
int n, unrolled;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
@ -130,33 +137,41 @@ channelmix_f32_5p1_2_sse(void *data, int n_dst, void *dst[n_dst],
__m128 slev = _mm_set1_ps(m[4]);
__m128 vol = _mm_set1_ps(v);
__m128 in, ctr;
float *dFL = d[0], *dFR = d[1];
float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5];
float *dFL = d[0], *dFR = d[1];
if (SPA_IS_ALIGNED(sFL, 16) &&
SPA_IS_ALIGNED(sFR, 16) &&
SPA_IS_ALIGNED(sFC, 16) &&
SPA_IS_ALIGNED(sLFE, 16) &&
SPA_IS_ALIGNED(sSL, 16) &&
SPA_IS_ALIGNED(sSR, 16) &&
SPA_IS_ALIGNED(dFL, 16) &&
SPA_IS_ALIGNED(dFR, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
if (v <= VOLUME_MIN) {
memset(dFL, 0, n_bytes);
memset(dFR, 0, n_bytes);
memset(dFL, 0, n_samples * sizeof(float));
memset(dFR, 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
ctr = _mm_mul_ps(_mm_loadu_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
in = _mm_mul_ps(_mm_loadu_ps(&sSL[n]), slev);
ctr = _mm_mul_ps(_mm_load_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_load_ps(&sLFE[n]), llev));
in = _mm_mul_ps(_mm_load_ps(&sSL[n]), slev);
in = _mm_add_ps(in, ctr);
in = _mm_add_ps(in, _mm_loadu_ps(&sFL[n]));
_mm_storeu_ps(&dFL[n], in);
in = _mm_mul_ps(_mm_loadu_ps(&sSR[n]), slev);
in = _mm_add_ps(in, _mm_load_ps(&sFL[n]));
_mm_store_ps(&dFL[n], in);
in = _mm_mul_ps(_mm_load_ps(&sSR[n]), slev);
in = _mm_add_ps(in, ctr);
in = _mm_add_ps(in, _mm_loadu_ps(&sFR[n]));
_mm_storeu_ps(&dFR[n], in);
in = _mm_add_ps(in, _mm_load_ps(&sFR[n]));
_mm_store_ps(&dFR[n], in);
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
ctr = _mm_mul_ss(_mm_load_ss(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
ctr = _mm_add_ss(ctr, _mm_mul_ss(_mm_load_ss(&sLFE[n]), llev));
in = _mm_mul_ss(_mm_load_ss(&sSL[n]), slev);
in = _mm_add_ss(in, ctr);
in = _mm_add_ss(in, _mm_load_ss(&sFL[n]));
@ -168,26 +183,23 @@ channelmix_f32_5p1_2_sse(void *data, int n_dst, void *dst[n_dst],
}
}
else {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
ctr = _mm_mul_ps(_mm_loadu_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
in = _mm_mul_ps(_mm_loadu_ps(&sSL[n]), slev);
ctr = _mm_mul_ps(_mm_load_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_load_ps(&sLFE[n]), llev));
in = _mm_mul_ps(_mm_load_ps(&sSL[n]), slev);
in = _mm_add_ps(in, ctr);
in = _mm_add_ps(in, _mm_loadu_ps(&sFL[n]));
in = _mm_add_ps(in, _mm_load_ps(&sFL[n]));
in = _mm_mul_ps(in, vol);
_mm_storeu_ps(&dFL[n], in);
in = _mm_mul_ps(_mm_loadu_ps(&sSR[n]), slev);
_mm_store_ps(&dFL[n], in);
in = _mm_mul_ps(_mm_load_ps(&sSR[n]), slev);
in = _mm_add_ps(in, ctr);
in = _mm_add_ps(in, _mm_loadu_ps(&sFR[n]));
in = _mm_add_ps(in, _mm_load_ps(&sFR[n]));
in = _mm_mul_ps(in, vol);
_mm_storeu_ps(&dFR[n], in);
_mm_store_ps(&dFR[n], in);
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
ctr = _mm_mul_ss(_mm_load_ss(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
ctr = _mm_add_ss(ctr, _mm_mul_ss(_mm_load_ss(&sLFE[n]), llev));
in = _mm_mul_ss(_mm_load_ss(&sSL[n]), slev);
in = _mm_add_ss(in, ctr);
in = _mm_add_ss(in, _mm_load_ss(&sFL[n]));
@ -205,58 +217,66 @@ channelmix_f32_5p1_2_sse(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR -> FL+FR+FC+LFE*/
static void
channelmix_f32_5p1_3p1_sse(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float), unrolled, remain;
int i, n, unrolled;
float **d = (float **) dst;
float **s = (float **) src;
__m128 mix = _mm_set1_ps(v * 0.5f);
__m128 vol = _mm_set1_ps(v);
__m128 avg;
float *dFL = d[0], *dFR = d[1], *dFC = d[2], *dLFE = d[3];
float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5];
float *dFL = d[0], *dFR = d[1], *dFC = d[2], *dLFE = d[3];
if (SPA_IS_ALIGNED(sFL, 16) &&
SPA_IS_ALIGNED(sFR, 16) &&
SPA_IS_ALIGNED(sFC, 16) &&
SPA_IS_ALIGNED(sLFE, 16) &&
SPA_IS_ALIGNED(sSL, 16) &&
SPA_IS_ALIGNED(sSR, 16) &&
SPA_IS_ALIGNED(dFL, 16) &&
SPA_IS_ALIGNED(dFR, 16) &&
SPA_IS_ALIGNED(dFC, 16) &&
SPA_IS_ALIGNED(dLFE, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
avg = _mm_add_ps(_mm_loadu_ps(&sFL[n]), _mm_loadu_ps(&sSL[n]));
_mm_storeu_ps(&dFL[n], _mm_mul_ps(avg, mix));
avg = _mm_add_ps(_mm_loadu_ps(&sFR[n]), _mm_loadu_ps(&sSR[n]));
_mm_storeu_ps(&dFR[n], _mm_mul_ps(avg, mix));
_mm_storeu_ps(&dFC[n], _mm_loadu_ps(&sFC[n]));
_mm_storeu_ps(&dLFE[n], _mm_loadu_ps(&sLFE[n]));
avg = _mm_add_ps(_mm_load_ps(&sFL[n]), _mm_load_ps(&sSL[n]));
_mm_store_ps(&dFL[n], _mm_mul_ps(avg, mix));
avg = _mm_add_ps(_mm_load_ps(&sFR[n]), _mm_load_ps(&sSR[n]));
_mm_store_ps(&dFR[n], _mm_mul_ps(avg, mix));
_mm_store_ps(&dFC[n], _mm_load_ps(&sFC[n]));
_mm_store_ps(&dLFE[n], _mm_load_ps(&sLFE[n]));
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
avg = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n]));
_mm_store_ss(&dFL[n], _mm_mul_ss(avg, mix));
avg = _mm_add_ps(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
avg = _mm_add_ss(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
_mm_store_ss(&dFR[n], _mm_mul_ss(avg, mix));
_mm_store_ss(&dFC[n], _mm_load_ss(&sFC[n]));
_mm_store_ss(&dLFE[n], _mm_load_ss(&sLFE[n]));
}
}
else {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
avg = _mm_add_ps(_mm_loadu_ps(&sFL[n]), _mm_loadu_ps(&sSL[n]));
_mm_storeu_ps(&dFL[n], _mm_mul_ps(avg, mix));
avg = _mm_add_ps(_mm_loadu_ps(&sFR[n]), _mm_loadu_ps(&sSR[n]));
_mm_storeu_ps(&dFR[n], _mm_mul_ps(avg, mix));
_mm_storeu_ps(&dFC[n], _mm_mul_ps(_mm_loadu_ps(&sFC[n]), vol));
_mm_storeu_ps(&dLFE[n], _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), vol));
avg = _mm_add_ps(_mm_load_ps(&sFL[n]), _mm_load_ps(&sSL[n]));
_mm_store_ps(&dFL[n], _mm_mul_ps(avg, mix));
avg = _mm_add_ps(_mm_load_ps(&sFR[n]), _mm_load_ps(&sSR[n]));
_mm_store_ps(&dFR[n], _mm_mul_ps(avg, mix));
_mm_store_ps(&dFC[n], _mm_mul_ps(_mm_load_ps(&sFC[n]), vol));
_mm_store_ps(&dLFE[n], _mm_mul_ps(_mm_load_ps(&sLFE[n]), vol));
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
avg = _mm_add_ss(_mm_load_ss(&sFL[n]), _mm_load_ss(&sSL[n]));
_mm_store_ss(&dFL[n], _mm_mul_ss(avg, mix));
avg = _mm_add_ps(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
avg = _mm_add_ss(_mm_load_ss(&sFR[n]), _mm_load_ss(&sSR[n]));
_mm_store_ss(&dFR[n], _mm_mul_ss(avg, mix));
_mm_store_ss(&dFC[n], _mm_mul_ss(_mm_load_ss(&sFC[n]), vol));
_mm_store_ss(&dLFE[n], _mm_mul_ss(_mm_load_ss(&sLFE[n]), vol));
@ -267,9 +287,9 @@ channelmix_f32_5p1_3p1_sse(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR -> FL+FR+RL+RR*/
static void
channelmix_f32_5p1_4_sse(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float), unrolled, remain;
int i, n, unrolled;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
@ -277,28 +297,39 @@ channelmix_f32_5p1_4_sse(void *data, int n_dst, void *dst[n_dst],
__m128 llev = _mm_set1_ps(m[3]);
__m128 vol = _mm_set1_ps(v);
__m128 ctr;
float *dFL = d[0], *dFR = d[1], *dRL = d[2], *dRR = d[3];
float *sFL = s[0], *sFR = s[1], *sFC = s[2], *sLFE = s[3], *sSL = s[4], *sSR = s[5];
float *dFL = d[0], *dFR = d[1], *dRL = d[2], *dRR = d[3];
if (SPA_IS_ALIGNED(sFL, 16) &&
SPA_IS_ALIGNED(sFR, 16) &&
SPA_IS_ALIGNED(sFC, 16) &&
SPA_IS_ALIGNED(sLFE, 16) &&
SPA_IS_ALIGNED(sSL, 16) &&
SPA_IS_ALIGNED(sSR, 16) &&
SPA_IS_ALIGNED(dFL, 16) &&
SPA_IS_ALIGNED(dFR, 16) &&
SPA_IS_ALIGNED(dRL, 16) &&
SPA_IS_ALIGNED(dRR, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
ctr = _mm_mul_ps(_mm_loadu_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
_mm_storeu_ps(&dFL[n], _mm_add_ps(_mm_loadu_ps(&sFL[n]), ctr));
_mm_storeu_ps(&dFR[n], _mm_add_ps(_mm_loadu_ps(&sFR[n]), ctr));
_mm_storeu_ps(&dRL[n], _mm_loadu_ps(&sSL[n]));
_mm_storeu_ps(&dRR[n], _mm_loadu_ps(&sSR[n]));
ctr = _mm_mul_ps(_mm_load_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_load_ps(&sLFE[n]), llev));
_mm_store_ps(&dFL[n], _mm_add_ps(_mm_load_ps(&sFL[n]), ctr));
_mm_store_ps(&dFR[n], _mm_add_ps(_mm_load_ps(&sFR[n]), ctr));
_mm_store_ps(&dRL[n], _mm_load_ps(&sSL[n]));
_mm_store_ps(&dRR[n], _mm_load_ps(&sSR[n]));
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
ctr = _mm_mul_ss(_mm_load_ss(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
ctr = _mm_add_ss(ctr, _mm_mul_ss(_mm_load_ss(&sLFE[n]), llev));
_mm_store_ss(&dFL[n], _mm_add_ss(_mm_load_ss(&sFL[n]), ctr));
_mm_store_ss(&dFR[n], _mm_add_ss(_mm_load_ss(&sFR[n]), ctr));
_mm_store_ss(&dRL[n], _mm_load_ss(&sSL[n]));
@ -306,20 +337,17 @@ channelmix_f32_5p1_4_sse(void *data, int n_dst, void *dst[n_dst],
}
}
else {
unrolled = n_samples / 4;
remain = n_samples & 3;
for(n = 0; unrolled--; n += 4) {
ctr = _mm_mul_ps(_mm_loadu_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
_mm_storeu_ps(&dFL[n], _mm_mul_ps(_mm_add_ps(_mm_loadu_ps(&sFL[n]), ctr), vol));
_mm_storeu_ps(&dFR[n], _mm_mul_ps(_mm_add_ps(_mm_loadu_ps(&sFR[n]), ctr), vol));
_mm_storeu_ps(&dRL[n], _mm_mul_ps(_mm_loadu_ps(&sSL[n]), vol));
_mm_storeu_ps(&dRR[n], _mm_mul_ps(_mm_loadu_ps(&sSR[n]), vol));
ctr = _mm_mul_ps(_mm_load_ps(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_load_ps(&sLFE[n]), llev));
_mm_store_ps(&dFL[n], _mm_mul_ps(_mm_add_ps(_mm_load_ps(&sFL[n]), ctr), vol));
_mm_store_ps(&dFR[n], _mm_mul_ps(_mm_add_ps(_mm_load_ps(&sFR[n]), ctr), vol));
_mm_store_ps(&dRL[n], _mm_mul_ps(_mm_load_ps(&sSL[n]), vol));
_mm_store_ps(&dRR[n], _mm_mul_ps(_mm_load_ps(&sSR[n]), vol));
}
for(; remain--; n++) {
for(; n < n_samples; n++) {
ctr = _mm_mul_ss(_mm_load_ss(&sFC[n]), clev);
ctr = _mm_add_ps(ctr, _mm_mul_ps(_mm_loadu_ps(&sLFE[n]), llev));
ctr = _mm_add_ss(ctr, _mm_mul_ss(_mm_load_ss(&sLFE[n]), llev));
_mm_store_ss(&dFL[n], _mm_mul_ss(_mm_add_ss(_mm_load_ss(&sFL[n]), ctr), vol));
_mm_store_ss(&dFR[n], _mm_mul_ss(_mm_add_ss(_mm_load_ss(&sFR[n]), ctr), vol));
_mm_store_ss(&dRL[n], _mm_mul_ss(_mm_load_ss(&sSL[n]), vol));

View file

@ -37,19 +37,19 @@
static void
channelmix_copy(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float);
int i, n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (i = 0; i < n_dst; i++)
memcpy(d[i], s[i], n_bytes);
memcpy(d[i], s[i], n_samples * sizeof(float));
}
else {
for (i = 0; i < n_dst; i++)
@ -62,9 +62,9 @@ channelmix_copy(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_n_m(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, j, n, n_samples = n_bytes / sizeof(float);
int i, j, n;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
@ -84,15 +84,15 @@ channelmix_f32_n_m(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_1_2(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float);
int n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
memset(d[0], 0, n_bytes);
memset(d[1], 0, n_bytes);
memset(d[0], 0, n_samples * sizeof(float));
memset(d[1], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++)
@ -106,14 +106,14 @@ channelmix_f32_1_2(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_2_1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float);
int n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
memset(d[0], 0, n_bytes);
memset(d[0], 0, n_samples * sizeof(float));
}
else {
const float f = v * 0.5f;
@ -124,14 +124,14 @@ channelmix_f32_2_1(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_4_1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float);
int n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
memset(d[0], 0, n_bytes);
memset(d[0], 0, n_samples * sizeof(float));
}
else {
const float f = v * 0.25f;
@ -142,14 +142,14 @@ channelmix_f32_4_1(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_3p1_1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float);
int n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
memset(d[0], 0, n_bytes);
memset(d[0], 0, n_samples * sizeof(float));
}
else {
const float f = v * 0.5f;
@ -163,15 +163,15 @@ channelmix_f32_3p1_1(void *data, int n_dst, void *dst[n_dst],
static void
channelmix_f32_2_4(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float);
int i, n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -190,15 +190,15 @@ channelmix_f32_2_4(void *data, int n_dst, void *dst[n_dst],
#define MASK_3_1 _M(FL)|_M(FR)|_M(FC)|_M(LFE)
static void
channelmix_f32_2_3p1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float);
int i, n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -222,15 +222,15 @@ channelmix_f32_2_3p1(void *data, int n_dst, void *dst[n_dst],
#define MASK_5_1 _M(FL)|_M(FR)|_M(FC)|_M(LFE)|_M(SL)|_M(SR)|_M(RL)|_M(RR)
static void
channelmix_f32_2_5p1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples = n_bytes / sizeof(float);
int i, n;
float **d = (float **)dst;
float **s = (float **)src;
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -254,9 +254,9 @@ channelmix_f32_2_5p1(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR -> FL+FR */
static void
channelmix_f32_5p1_2(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float);
int n;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
@ -265,8 +265,8 @@ channelmix_f32_5p1_2(void *data, int n_dst, void *dst[n_dst],
const float slev = m[4];
if (v <= VOLUME_MIN) {
memset(d[0], 0, n_bytes);
memset(d[1], 0, n_bytes);
memset(d[0], 0, n_samples * sizeof(float));
memset(d[1], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -287,16 +287,15 @@ channelmix_f32_5p1_2(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR -> FL+FR+FC+LFE*/
static void
channelmix_f32_5p1_3p1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples;
int i, n;
float **d = (float **) dst;
float **s = (float **) src;
n_samples = n_bytes / sizeof(float);
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else {
const float f1 = 0.5f * v;
@ -312,19 +311,18 @@ channelmix_f32_5p1_3p1(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR -> FL+FR+RL+RR*/
static void
channelmix_f32_5p1_4(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples;
int i, n;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
const float clev = m[2];
const float llev = m[3];
n_samples = n_bytes / sizeof(float);
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -351,9 +349,9 @@ channelmix_f32_5p1_4(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR+RL+RR -> FL+FR */
static void
channelmix_f32_7p1_2(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int n, n_samples = n_bytes / sizeof(float);
int n;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
@ -362,8 +360,8 @@ channelmix_f32_7p1_2(void *data, int n_dst, void *dst[n_dst],
const float slev = m[4];
if (v <= VOLUME_MIN) {
memset(d[0], 0, n_bytes);
memset(d[1], 0, n_bytes);
memset(d[0], 0, n_samples * sizeof(float));
memset(d[1], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -384,16 +382,15 @@ channelmix_f32_7p1_2(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR+RL+RR -> FL+FR+FC+LFE*/
static void
channelmix_f32_7p1_3p1(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples;
int i, n;
float **d = (float **) dst;
float **s = (float **) src;
n_samples = n_bytes / sizeof(float);
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else {
const float f1 = 0.5 * v;
@ -409,9 +406,9 @@ channelmix_f32_7p1_3p1(void *data, int n_dst, void *dst[n_dst],
/* FL+FR+FC+LFE+SL+SR+RL+RR -> FL+FR+RL+RR*/
static void
channelmix_f32_7p1_4(void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], void *matrix, float v, int n_bytes)
int n_src, const void *src[n_src], void *matrix, float v, int n_samples)
{
int i, n, n_samples;
int i, n;
float **d = (float **) dst;
float **s = (float **) src;
float *m = matrix;
@ -419,10 +416,9 @@ channelmix_f32_7p1_4(void *data, int n_dst, void *dst[n_dst],
const float llev = m[3];
const float slev = m[4];
n_samples = n_bytes / sizeof(float);
if (v <= VOLUME_MIN) {
for (i = 0; i < n_dst; i++)
memset(d[i], 0, n_bytes);
memset(d[i], 0, n_samples * sizeof(float));
}
else if (v == VOLUME_NORM) {
for (n = 0; n < n_samples; n++) {
@ -450,7 +446,7 @@ channelmix_f32_7p1_4(void *data, int n_dst, void *dst[n_dst],
typedef void (*channelmix_func_t) (void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src],
void *matrix, float v, int n_bytes);
void *matrix, float v, int n_samples);
#define ANY ((uint32_t)-1)

View file

@ -1132,27 +1132,26 @@ static int impl_node_process(struct spa_node *node)
sbuf = &inport->buffers[inio->buffer_id];
{
uint32_t i, n_bytes;
uint32_t i, n_samples;
struct spa_buffer *sb = sbuf->outbuf, *db = dbuf->outbuf;
uint32_t n_src_datas = sb->n_datas;
uint32_t n_dst_datas = db->n_datas;
const void *src_datas[n_src_datas];
void *dst_datas[n_dst_datas];
n_bytes = sb->datas[0].chunk->size;
n_samples = sb->datas[0].chunk->size / inport->stride;
for (i = 0; i < n_src_datas; i++)
src_datas[i] = sb->datas[i].data;
for (i = 0; i < n_dst_datas; i++) {
dst_datas[i] = db->datas[i].data;
db->datas[i].chunk->size =
(n_bytes / inport->stride) * outport->stride;
db->datas[i].chunk->size = n_samples * outport->stride;
}
this->convert(this, n_dst_datas, dst_datas,
n_src_datas, src_datas,
this->matrix, this->props.mute ? 0.0f : this->props.volume,
n_bytes);
n_samples);
}
outio->status = SPA_STATUS_HAVE_BUFFER;

View file

@ -30,142 +30,148 @@
#include <emmintrin.h>
static void
conv_s16_to_f32d_1_sse2(void *data, int n_dst, void *dst[n_dst], const void *src, int n_samples)
conv_s16_to_f32d_1_sse2(void *data, void *dst[], const void *src, int n_channels, int n_samples)
{
const int16_t *s = src;
float **d = (float **) dst;
float *d0 = d[0];
int n = 0, unrolled;
int n, unrolled;
__m128i in;
__m128 out, factor = _mm_set1_ps(1.0f / S16_SCALE);
unrolled = n_samples / 4;
n_samples = n_samples & 3;
if (SPA_IS_ALIGNED(d0, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(; unrolled--; n += 4) {
in = _mm_insert_epi16(in, s[0*n_dst], 1);
in = _mm_insert_epi16(in, s[1*n_dst], 3);
in = _mm_insert_epi16(in, s[2*n_dst], 5);
in = _mm_insert_epi16(in, s[3*n_dst], 7);
for(n = 0; unrolled--; n += 4) {
in = _mm_insert_epi16(in, s[0*n_channels], 1);
in = _mm_insert_epi16(in, s[1*n_channels], 3);
in = _mm_insert_epi16(in, s[2*n_channels], 5);
in = _mm_insert_epi16(in, s[3*n_channels], 7);
in = _mm_srai_epi32(in, 16);
out = _mm_cvtepi32_ps(in);
out = _mm_mul_ps(out, factor);
_mm_storeu_ps(&d0[n], out);
s += 4*n_dst;
_mm_store_ps(&d0[n], out);
s += 4*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
out = _mm_cvtsi32_ss(out, s[0]);
out = _mm_mul_ss(out, factor);
_mm_store_ss(&d0[n], out);
s += n_dst;
s += n_channels;
}
}
static void
conv_s16_to_f32d_2_sse2(void *data, int n_dst, void *dst[n_dst], const void *src, int n_samples)
conv_s16_to_f32d_2_sse2(void *data, void *dst[], const void *src, int n_channels, int n_samples)
{
const int16_t *s = src;
float **d = (float **) dst;
float *d0 = d[0], *d1 = d[1];
int n = 0, unrolled;
int n, unrolled;
__m128i in, t[2];
__m128 out[2], factor = _mm_set1_ps(1.0f / S16_SCALE);
if (n_dst == 2) {
if (n_channels == 2 &&
SPA_IS_ALIGNED(s, 16) &&
SPA_IS_ALIGNED(d0, 16) &&
SPA_IS_ALIGNED(d1, 16))
unrolled = n_samples / 4;
n_samples = n_samples & 3;
else
unrolled = 0;
for(; unrolled--; n += 4) {
in = _mm_loadu_si128((__m128i*)s);
for(n = 0; unrolled--; n += 4) {
in = _mm_load_si128((__m128i*)s);
t[0] = _mm_slli_epi32(in, 16);
t[0] = _mm_srai_epi32(t[0], 16);
t[1] = _mm_srai_epi32(in, 16);
t[0] = _mm_slli_epi32(in, 16);
t[0] = _mm_srai_epi32(t[0], 16);
t[1] = _mm_srai_epi32(in, 16);
out[0] = _mm_cvtepi32_ps(t[0]);
out[0] = _mm_mul_ps(out[0], factor);
out[1] = _mm_cvtepi32_ps(t[1]);
out[1] = _mm_mul_ps(out[1], factor);
out[0] = _mm_cvtepi32_ps(t[0]);
out[0] = _mm_mul_ps(out[0], factor);
out[1] = _mm_cvtepi32_ps(t[1]);
out[1] = _mm_mul_ps(out[1], factor);
_mm_storeu_ps(&d0[n], out[0]);
_mm_storeu_ps(&d1[n], out[1]);
_mm_store_ps(&d0[n], out[0]);
_mm_store_ps(&d1[n], out[1]);
s += 4*n_dst;
}
s += 4*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
out[0] = _mm_cvtsi32_ss(out[0], s[0]);
out[0] = _mm_mul_ss(out[0], factor);
out[1] = _mm_cvtsi32_ss(out[1], s[1]);
out[1] = _mm_mul_ss(out[1], factor);
_mm_store_ss(&d0[n], out[0]);
_mm_store_ss(&d1[n], out[1]);
s += n_dst;
s += n_channels;
}
}
static void
conv_s16_to_f32d_sse2(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s16_to_f32d_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int16_t *s = src[0];
int i = 0;
for(; i + 1 < n_dst; i += 2)
conv_s16_to_f32d_2_sse2(data, n_dst, &dst[i], &s[i], n_samples);
for(; i < n_dst; i++)
conv_s16_to_f32d_1_sse2(data, n_dst, &dst[i], &s[i], n_samples);
for(; i + 1 < n_channels; i += 2)
conv_s16_to_f32d_2_sse2(data, &dst[i], &s[i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_s16_to_f32d_1_sse2(data, &dst[i], &s[i], n_channels, n_samples);
}
static void
conv_s24_to_f32d_1_sse2(void *data, int n_dst, void *dst[n_dst], const void *src, int n_samples)
conv_s24_to_f32d_1_sse2(void *data, void *dst[], const void *src, int n_channels, int n_samples)
{
const uint8_t *s = src;
float **d = (float **) dst;
float *d0 = d[0];
int n = 0, unrolled;
int n, unrolled;
__m128i in;
__m128 out, factor = _mm_set1_ps(1.0f / S24_SCALE);
unrolled = n_samples / 4;
n_samples = n_samples & 3;
if (n_samples == 0) {
n_samples += 4;
unrolled--;
if (SPA_IS_ALIGNED(d0, 16) && n_samples > 4) {
unrolled = n_samples / 4;
if ((n_samples & 3) == 0)
unrolled--;
}
else
unrolled = 0;
for(; unrolled--; n += 4) {
for(n = 0; unrolled--; n += 4) {
in = _mm_setr_epi32(
*((uint32_t*)&s[0 * n_dst]),
*((uint32_t*)&s[3 * n_dst]),
*((uint32_t*)&s[6 * n_dst]),
*((uint32_t*)&s[9 * n_dst]));
*((uint32_t*)&s[0 * n_channels]),
*((uint32_t*)&s[3 * n_channels]),
*((uint32_t*)&s[6 * n_channels]),
*((uint32_t*)&s[9 * n_channels]));
in = _mm_slli_epi32(in, 8);
in = _mm_srai_epi32(in, 8);
out = _mm_cvtepi32_ps(in);
out = _mm_mul_ps(out, factor);
_mm_storeu_ps(&d0[n], out);
s += 12 * n_dst;
_mm_store_ps(&d0[n], out);
s += 12 * n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
out = _mm_cvtsi32_ss(out, read_s24(s));
out = _mm_mul_ss(out, factor);
_mm_store_ss(&d0[n], out);
s += 3 * n_dst;
s += 3 * n_channels;
}
}
static void
conv_s24_to_f32d_sse2(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24_to_f32d_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int8_t *s = src[0];
int i = 0;
for(; i < n_dst; i++)
conv_s24_to_f32d_1_sse2(data, n_dst, &dst[i], &s[3*i], n_samples);
for(; i < n_channels; i++)
conv_s24_to_f32d_1_sse2(data, &dst[i], &s[3*i], n_channels, n_samples);
}
static void
conv_f32d_to_s32_1_sse2(void *data, void *dst, int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s32_1_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
const float *s0 = s[0];
@ -176,11 +182,13 @@ conv_f32d_to_s32_1_sse2(void *data, void *dst, int n_src, const void *src[n_src]
__m128 int_max = _mm_set1_ps(S24_MAX_F);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
unrolled = n_samples / 4;
n_samples = n_samples & 3;
if (SPA_IS_ALIGNED(s0, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_mul_ps(_mm_loadu_ps(&s0[n]), int_max);
in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_max);
in[0] = _mm_min_ps(int_max, _mm_max_ps(in[0], int_min));
out[0] = _mm_slli_epi32(_mm_cvtps_epi32(in[0]), 8);
@ -188,23 +196,23 @@ conv_f32d_to_s32_1_sse2(void *data, void *dst, int n_src, const void *src[n_src]
out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2));
out[3] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(2, 1, 0, 3));
d[0*n_src] = _mm_cvtsi128_si32(out[0]);
d[1*n_src] = _mm_cvtsi128_si32(out[1]);
d[2*n_src] = _mm_cvtsi128_si32(out[2]);
d[3*n_src] = _mm_cvtsi128_si32(out[3]);
d += 4*n_src;
d[0*n_channels] = _mm_cvtsi128_si32(out[0]);
d[1*n_channels] = _mm_cvtsi128_si32(out[1]);
d[2*n_channels] = _mm_cvtsi128_si32(out[2]);
d[3*n_channels] = _mm_cvtsi128_si32(out[3]);
d += 4*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
in[0] = _mm_load_ss(&s0[n]);
in[0] = _mm_mul_ss(in[0], int_max);
in[0] = _mm_min_ss(int_max, _mm_max_ss(in[0], int_min));
*d = _mm_cvtss_si32(in[0]) << 8;
d += n_src;
d += n_channels;
}
}
static void
conv_f32d_to_s32_2_sse2(void *data, void *dst, int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s32_2_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1];
@ -215,12 +223,15 @@ conv_f32d_to_s32_2_sse2(void *data, void *dst, int n_src, const void *src[n_src]
__m128 int_max = _mm_set1_ps(S24_MAX_F);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
unrolled = n_samples / 4;
n_samples = n_samples & 3;
if (SPA_IS_ALIGNED(s0, 16) &&
SPA_IS_ALIGNED(s1, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_mul_ps(_mm_loadu_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_loadu_ps(&s1[n]), int_max);
in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), int_max);
in[0] = _mm_min_ps(int_max, _mm_max_ps(in[0], int_min));
in[1] = _mm_min_ps(int_max, _mm_max_ps(in[1], int_min));
@ -233,13 +244,13 @@ conv_f32d_to_s32_2_sse2(void *data, void *dst, int n_src, const void *src[n_src]
t[2] = _mm_unpackhi_epi32(out[0], out[1]);
t[3] = _mm_shuffle_epi32(t[2], _MM_SHUFFLE(0, 0, 2, 2));
_mm_storel_epi64((__m128i*)(d + 0*n_src), t[0]);
_mm_storel_epi64((__m128i*)(d + 1*n_src), t[1]);
_mm_storel_epi64((__m128i*)(d + 2*n_src), t[2]);
_mm_storel_epi64((__m128i*)(d + 3*n_src), t[3]);
d += 4*n_src;
_mm_storel_epi64((__m128i*)(d + 0*n_channels), t[0]);
_mm_storel_epi64((__m128i*)(d + 1*n_channels), t[1]);
_mm_storel_epi64((__m128i*)(d + 2*n_channels), t[2]);
_mm_storel_epi64((__m128i*)(d + 3*n_channels), t[3]);
d += 4*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
in[0] = _mm_load_ss(&s0[n]);
in[1] = _mm_load_ss(&s1[n]);
@ -249,12 +260,12 @@ conv_f32d_to_s32_2_sse2(void *data, void *dst, int n_src, const void *src[n_src]
in[0] = _mm_min_ps(int_max, _mm_max_ps(in[0], int_min));
out[0] = _mm_slli_epi32(_mm_cvtps_epi32(in[0]), 8);
_mm_storel_epi64((__m128i*)d, out[0]);
d += n_src;
d += n_channels;
}
}
static void
conv_f32d_to_s32_4_sse2(void *data, void *dst, int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s32_4_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1], *s2 = s[2], *s3 = s[3];
@ -265,14 +276,19 @@ conv_f32d_to_s32_4_sse2(void *data, void *dst, int n_src, const void *src[n_src]
__m128 int_max = _mm_set1_ps(S24_MAX_F);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
unrolled = n_samples / 4;
n_samples = n_samples & 3;
if (SPA_IS_ALIGNED(s0, 16) &&
SPA_IS_ALIGNED(s1, 16) &&
SPA_IS_ALIGNED(s2, 16) &&
SPA_IS_ALIGNED(s3, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_mul_ps(_mm_loadu_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_loadu_ps(&s1[n]), int_max);
in[2] = _mm_mul_ps(_mm_loadu_ps(&s2[n]), int_max);
in[3] = _mm_mul_ps(_mm_loadu_ps(&s3[n]), int_max);
in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), int_max);
in[2] = _mm_mul_ps(_mm_load_ps(&s2[n]), int_max);
in[3] = _mm_mul_ps(_mm_load_ps(&s3[n]), int_max);
in[0] = _mm_min_ps(int_max, _mm_max_ps(in[0], int_min));
in[1] = _mm_min_ps(int_max, _mm_max_ps(in[1], int_min));
@ -294,13 +310,13 @@ conv_f32d_to_s32_4_sse2(void *data, void *dst, int n_src, const void *src[n_src]
out[2] = _mm_unpacklo_epi64(t[2], t[3]);
out[3] = _mm_unpackhi_epi64(t[2], t[3]);
_mm_storeu_si128((__m128i*)(d + 0*n_src), out[0]);
_mm_storeu_si128((__m128i*)(d + 1*n_src), out[1]);
_mm_storeu_si128((__m128i*)(d + 2*n_src), out[2]);
_mm_storeu_si128((__m128i*)(d + 3*n_src), out[3]);
d += 4*n_src;
_mm_storeu_si128((__m128i*)(d + 0*n_channels), out[0]);
_mm_storeu_si128((__m128i*)(d + 1*n_channels), out[1]);
_mm_storeu_si128((__m128i*)(d + 2*n_channels), out[2]);
_mm_storeu_si128((__m128i*)(d + 3*n_channels), out[3]);
d += 4*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
in[0] = _mm_load_ss(&s0[n]);
in[1] = _mm_load_ss(&s1[n]);
in[2] = _mm_load_ss(&s2[n]);
@ -314,26 +330,26 @@ conv_f32d_to_s32_4_sse2(void *data, void *dst, int n_src, const void *src[n_src]
in[0] = _mm_min_ps(int_max, _mm_max_ps(in[0], int_min));
out[0] = _mm_slli_epi32(_mm_cvtps_epi32(in[0]), 8);
_mm_storeu_si128((__m128i*)d, out[0]);
d += n_src;
d += n_channels;
}
}
static void
conv_f32d_to_s32_sse2(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s32_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int32_t *d = dst[0];
int i = 0;
for(; i + 3 < n_src; i += 4)
conv_f32d_to_s32_4_sse2(data, &d[i], n_src, &src[i], n_samples);
for(; i + 1 < n_src; i += 2)
conv_f32d_to_s32_2_sse2(data, &d[i], n_src, &src[i], n_samples);
for(; i < n_src; i++)
conv_f32d_to_s32_1_sse2(data, &d[i], n_src, &src[i], n_samples);
for(; i + 3 < n_channels; i += 4)
conv_f32d_to_s32_4_sse2(data, &d[i], &src[i], n_channels, n_samples);
for(; i + 1 < n_channels; i += 2)
conv_f32d_to_s32_2_sse2(data, &d[i], &src[i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_f32d_to_s32_1_sse2(data, &d[i], &src[i], n_channels, n_samples);
}
static void
conv_f32d_to_s16_1_sse2(void *data, void *dst, int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s16_1_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
const float *s0 = s[0];
@ -344,52 +360,59 @@ conv_f32d_to_s16_1_sse2(void *data, void *dst, int n_src, const void *src[n_src]
__m128 int_max = _mm_set1_ps(S16_MAX_F);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
unrolled = n_samples / 8;
n_samples = n_samples & 7;
if (SPA_IS_ALIGNED(s0, 16))
unrolled = n_samples / 8;
else
unrolled = 0;
for(n = 0; unrolled--; n += 8) {
in[0] = _mm_mul_ps(_mm_loadu_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_loadu_ps(&s0[n+4]), int_max);
in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_load_ps(&s0[n+4]), int_max);
out[0] = _mm_cvtps_epi32(in[0]);
out[1] = _mm_cvtps_epi32(in[1]);
out[0] = _mm_packs_epi32(out[0], out[1]);
d[0*n_src] = _mm_extract_epi16(out[0], 0);
d[1*n_src] = _mm_extract_epi16(out[0], 1);
d[2*n_src] = _mm_extract_epi16(out[0], 2);
d[3*n_src] = _mm_extract_epi16(out[0], 3);
d[4*n_src] = _mm_extract_epi16(out[0], 4);
d[5*n_src] = _mm_extract_epi16(out[0], 5);
d[6*n_src] = _mm_extract_epi16(out[0], 6);
d[7*n_src] = _mm_extract_epi16(out[0], 7);
d += 8*n_src;
d[0*n_channels] = _mm_extract_epi16(out[0], 0);
d[1*n_channels] = _mm_extract_epi16(out[0], 1);
d[2*n_channels] = _mm_extract_epi16(out[0], 2);
d[3*n_channels] = _mm_extract_epi16(out[0], 3);
d[4*n_channels] = _mm_extract_epi16(out[0], 4);
d[5*n_channels] = _mm_extract_epi16(out[0], 5);
d[6*n_channels] = _mm_extract_epi16(out[0], 6);
d[7*n_channels] = _mm_extract_epi16(out[0], 7);
d += 8*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
fprintf(stderr, "%p %d %d %d\n", s0, n_samples, n, n_channels);
spa_assert_not_reached();
in[0] = _mm_mul_ss(_mm_load_ss(&s0[n]), int_max);
in[0] = _mm_min_ss(int_max, _mm_max_ss(in[0], int_min));
*d = _mm_cvtss_si32(in[0]);
d += n_src;
d += n_channels;
}
}
static void
conv_f32d_to_s16_2_sse2(void *data, void *dst, int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s16_2_sse2(void *data, void *dst, const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
const float *s0 = s[0], *s1 = s[1];
int16_t *d = dst;
int n = 0, unrolled;
int n, unrolled;
__m128 in[2];
__m128i out[4], t[2];
__m128 int_max = _mm_set1_ps(S16_MAX_F);
__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
unrolled = n_samples / 4;
n_samples = n_samples & 3;
if (SPA_IS_ALIGNED(s0, 16) &&
SPA_IS_ALIGNED(s1, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for(; unrolled--; n += 4) {
in[0] = _mm_mul_ps(_mm_loadu_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_loadu_ps(&s1[n]), int_max);
for(n = 0; unrolled--; n += 4) {
in[0] = _mm_mul_ps(_mm_load_ps(&s0[n]), int_max);
in[1] = _mm_mul_ps(_mm_load_ps(&s1[n]), int_max);
t[0] = _mm_cvtps_epi32(in[0]);
t[1] = _mm_cvtps_epi32(in[1]);
@ -402,31 +425,33 @@ conv_f32d_to_s16_2_sse2(void *data, void *dst, int n_src, const void *src[n_src]
out[2] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(1, 0, 3, 2));
out[3] = _mm_shuffle_epi32(out[0], _MM_SHUFFLE(2, 1, 0, 3));
*((uint32_t*)(d + 0*n_src)) = _mm_cvtsi128_si32(out[0]);
*((uint32_t*)(d + 1*n_src)) = _mm_cvtsi128_si32(out[1]);
*((uint32_t*)(d + 2*n_src)) = _mm_cvtsi128_si32(out[2]);
*((uint32_t*)(d + 3*n_src)) = _mm_cvtsi128_si32(out[3]);
d += 4*n_src;
*((int32_t*)(d + 0*n_channels)) = _mm_cvtsi128_si32(out[0]);
*((int32_t*)(d + 1*n_channels)) = _mm_cvtsi128_si32(out[1]);
*((int32_t*)(d + 2*n_channels)) = _mm_cvtsi128_si32(out[2]);
*((int32_t*)(d + 3*n_channels)) = _mm_cvtsi128_si32(out[3]);
d += 4*n_channels;
}
for(; n_samples--; n++) {
for(; n < n_samples; n++) {
fprintf(stderr, "%p %p %d %d %d\n", s0, s1, n_samples, n, n_channels);
spa_assert_not_reached();
in[0] = _mm_mul_ss(_mm_load_ss(&s0[n]), int_max);
in[1] = _mm_mul_ss(_mm_load_ss(&s1[n]), int_max);
in[0] = _mm_min_ss(int_max, _mm_max_ss(in[0], int_min));
in[1] = _mm_min_ss(int_max, _mm_max_ss(in[1], int_min));
d[0] = _mm_cvtss_si32(in[0]);
d[1] = _mm_cvtss_si32(in[1]);
d += n_src;
d += n_channels;
}
}
static void
conv_f32d_to_s16_sse2(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s16_sse2(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int16_t *d = dst[0];
int i = 0;
for(; i + 1 < n_src; i += 2)
conv_f32d_to_s16_2_sse2(data, &d[i], n_src, &src[i], n_samples);
for(; i < n_src; i++)
conv_f32d_to_s16_1_sse2(data, &d[i], n_src, &src[i], n_samples);
for(; i + 1 < n_channels; i += 2)
conv_f32d_to_s16_2_sse2(data, &d[i], &src[i], n_channels, n_samples);
for(; i < n_channels; i++)
conv_f32d_to_s16_1_sse2(data, &d[i], &src[i], n_channels, n_samples);
}

View file

@ -30,6 +30,8 @@
#include <spa/utils/defs.h>
#include <spa/param/audio/format-utils.h>
#include <xmmintrin.h>
#define U8_MIN 0
#define U8_MAX 255
#define U8_SCALE 127.5f
@ -85,43 +87,68 @@ static inline void write_s24(void *dst, int32_t val)
#endif
static void
conv_copy8(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_copy8d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i;
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples);
}
static void
conv_copy16(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_copy8(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
memcpy(dst[0], src[0], n_samples * n_channels);
}
static void
conv_copy16d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i;
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples * sizeof(int16_t));
}
static void
conv_copy24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_copy16(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
memcpy(dst[0], src[0], n_samples * sizeof(int16_t) * n_channels);
}
static void
conv_copy24d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i;
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples * 3);
}
static void
conv_copy32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_copy24(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
memcpy(dst[0], src[0], n_samples * 3 * n_channels);
}
static void
conv_copy32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i;
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
memcpy(dst[i], src[i], n_samples * sizeof(int32_t));
}
static void
conv_u8_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_copy32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
memcpy(dst[0], src[0], n_samples * sizeof(int32_t) * n_channels);
}
static void
conv_u8d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const uint8_t *s = src[i];
float *d = dst[i];
@ -131,37 +158,43 @@ conv_u8_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *s
}
static void
conv_u8_to_f32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_u8_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_u8d_to_f32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_u8_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint8_t *s = src[0];
float **d = (float **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = U8_TO_F32(*s++);
}
}
static void
conv_u8d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_u8d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint8_t **s = (const uint8_t **) src;
float *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = U8_TO_F32(s[i][j]);
}
}
static void
conv_s16_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s16d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const int16_t *s = src[i];
float *d = dst[i];
for (j = 0; j < n_samples; j++)
@ -170,37 +203,43 @@ conv_s16_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
conv_s16_to_f32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s16_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_s16d_to_f32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_s16_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int16_t *s = src[0];
float **d = (float **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = S16_TO_F32(*s++);
}
}
static void
conv_s16d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s16d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int16_t **s = (const int16_t **) src;
float *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = S16_TO_F32(s[i][j]);
}
}
static void
conv_s32_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s32d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const int32_t *s = src[i];
float *d = dst[i];
@ -210,38 +249,43 @@ conv_s32_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
conv_s32_to_f32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s32_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_s32d_to_f32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_s32_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int32_t *s = src[0];
float **d = (float **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = S32_TO_F32(*s++);
}
}
static void
conv_s32d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s32d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int32_t **s = (const int32_t **) src;
float *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = S32_TO_F32(s[i][j]);
}
}
static void
conv_s24_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const int8_t *s = src[i];
float *d = dst[i];
@ -253,14 +297,20 @@ conv_s24_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
conv_s24_to_f32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_s24d_to_f32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_s24_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint8_t *s = src[0];
float **d = (float **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++) {
for (i = 0; i < n_channels; i++) {
d[i][j] = S24_TO_F32(read_s24(s));
s += 3;
}
@ -268,25 +318,25 @@ conv_s24_to_f32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void
}
static void
conv_s24d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint8_t **s = (const uint8_t **) src;
float *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
*d++ = S24_TO_F32(read_s24(&s[i][j*3]));
}
}
}
static void
conv_s24_32_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24_32d_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const int32_t *s = src[i];
float *d = dst[i];
@ -296,37 +346,43 @@ conv_s24_32_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const voi
}
static void
conv_s24_32_to_f32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24_32_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_s24_32d_to_f32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_s24_32_to_f32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int32_t *s = src[0];
float **d = (float **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = S24_TO_F32(*s++);
}
}
static void
conv_s24_32d_to_f32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_s24_32d_to_f32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int32_t **s = (const int32_t **) src;
float *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = S24_TO_F32(s[i][j]);
}
}
static void
conv_f32_to_u8(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_u8d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const float *s = src[i];
uint8_t *d = dst[i];
@ -336,37 +392,43 @@ conv_f32_to_u8(void *data, int n_dst, void *dst[n_dst], int n_src, const void *s
}
static void
conv_f32_to_u8d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32_to_u8(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_f32d_to_u8d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_f32_to_u8d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float *s = src[0];
uint8_t **d = (uint8_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = F32_TO_U8(*s++);
}
}
static void
conv_f32d_to_u8(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_u8(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
uint8_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = F32_TO_U8(s[i][j]);
}
}
static void
conv_f32_to_s16(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s16d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const float *s = src[i];
int16_t *d = dst[i];
@ -376,37 +438,43 @@ conv_f32_to_s16(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
conv_f32_to_s16d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32_to_s16(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_f32d_to_s16d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_f32_to_s16d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float *s = src[0];
int16_t **d = (int16_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = F32_TO_S16(*s++);
}
}
static void
conv_f32d_to_s16(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s16(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
int16_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = F32_TO_S16(s[i][j]);
}
}
static void
conv_f32_to_s32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const float *s = src[i];
int32_t *d = dst[i];
@ -416,27 +484,33 @@ conv_f32_to_s32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
conv_f32_to_s32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32_to_s32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_f32d_to_s32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_f32_to_s32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float *s = src[0];
int32_t **d = (int32_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = F32_TO_S32(*s++);
}
}
static void
conv_f32d_to_s32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
int32_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = F32_TO_S32(s[i][j]);
}
}
@ -444,11 +518,11 @@ conv_f32d_to_s32(void *data, int n_dst, void *dst[n_dst], int n_src, const void
static void
conv_f32_to_s24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s24d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const float *s = src[i];
uint8_t *d = dst[i];
@ -460,28 +534,34 @@ conv_f32_to_s24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
conv_f32_to_s24d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32_to_s24(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_f32d_to_s24d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_f32_to_s24d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float *s = src[0];
uint8_t **d = (uint8_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++) {
for (i = 0; i < n_channels; i++) {
write_s24(&d[i][j*3], F32_TO_S24(*s++));
}
}
}
static void
conv_f32d_to_s24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s24(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
uint8_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
write_s24(d, F32_TO_S24(s[i][j]));
d += 3;
}
@ -490,11 +570,11 @@ conv_f32d_to_s24(void *data, int n_dst, void *dst[n_dst], int n_src, const void
static void
conv_f32_to_s24_32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s24_32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
int i, j;
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
const float *s = src[i];
int32_t *d = dst[i];
@ -504,66 +584,72 @@ conv_f32_to_s24_32(void *data, int n_dst, void *dst[n_dst], int n_src, const voi
}
static void
conv_f32_to_s24_32d(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32_to_s24_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
conv_f32d_to_s24_32d(data, dst, src, 1, n_samples * n_channels);
}
static void
conv_f32_to_s24_32d(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float *s = src[0];
int32_t **d = (int32_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = F32_TO_S24(*s++);
}
}
static void
conv_f32d_to_s24_32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
conv_f32d_to_s24_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const float **s = (const float **) src;
int32_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = F32_TO_S24(s[i][j]);
}
}
static void
deinterleave_8(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
deinterleave_8(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint8_t *s = src[0];
uint8_t **d = (uint8_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = *s++;
}
}
static void
deinterleave_16(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
deinterleave_16(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint16_t *s = src[0];
uint16_t **d = (uint16_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = *s++;
}
}
static void
deinterleave_24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
deinterleave_24(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint8_t *s = src[0];
uint8_t **d = (uint8_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++) {
for (i = 0; i < n_channels; i++) {
write_s24(&d[i][j*3], read_s24(s));
s += 3;
}
@ -571,53 +657,53 @@ deinterleave_24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *
}
static void
deinterleave_32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
deinterleave_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const uint32_t *s = src[0];
uint32_t **d = (uint32_t **) dst;
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_dst; i++)
for (i = 0; i < n_channels; i++)
d[i][j] = *s++;
}
}
static void
interleave_8(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
interleave_8(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int8_t **s = (const int8_t **) src;
uint8_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = s[i][j];
}
}
static void
interleave_16(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
interleave_16(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int16_t **s = (const int16_t **) src;
uint16_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = s[i][j];
}
}
static void
interleave_24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
interleave_24(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int8_t **s = (const int8_t **) src;
uint8_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++) {
for (i = 0; i < n_channels; i++) {
write_s24(d, read_s24(&s[i][j*3]));
d += 3;
}
@ -625,21 +711,21 @@ interleave_24(void *data, int n_dst, void *dst[n_dst], int n_src, const void *sr
}
static void
interleave_32(void *data, int n_dst, void *dst[n_dst], int n_src, const void *src[n_src], int n_samples)
interleave_32(void *data, void *dst[], const void *src[], int n_channels, int n_samples)
{
const int32_t **s = (const int32_t **) src;
uint32_t *d = dst[0];
int i, j;
for (j = 0; j < n_samples; j++) {
for (i = 0; i < n_src; i++)
for (i = 0; i < n_channels; i++)
*d++ = s[i][j];
}
}
typedef void (*convert_func_t) (void *data, int n_dst, void *dst[n_dst],
int n_src, const void *src[n_src], int n_samples);
typedef void (*convert_func_t) (void *data, void *dst[], const void *src[],
int n_channels, int n_samples);
static const struct conv_info {
uint32_t src_fmt;
@ -652,13 +738,13 @@ static const struct conv_info {
{
/* to f32 */
{ SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_F32, 0, conv_u8_to_f32 },
{ SPA_AUDIO_FORMAT_U8P, SPA_AUDIO_FORMAT_F32P, 0, conv_u8_to_f32 },
{ SPA_AUDIO_FORMAT_U8P, SPA_AUDIO_FORMAT_F32P, 0, conv_u8d_to_f32d },
{ SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_F32P, 0, conv_u8_to_f32d },
{ SPA_AUDIO_FORMAT_U8P, SPA_AUDIO_FORMAT_F32, 0, conv_u8d_to_f32 },
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32, 0, conv_s16_to_f32 },
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, conv_s16_to_f32 },
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32P, 0, conv_s16d_to_f32d },
#if defined (__SSE2__)
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s16_to_f32d_sse2 },
#endif
@ -666,17 +752,17 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_F32, 0, conv_s16d_to_f32 },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_F32, 0, conv_copy32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_F32P, 0, conv_copy32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_F32P, 0, conv_copy32d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_F32P, 0, deinterleave_32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_F32, 0, interleave_32 },
{ SPA_AUDIO_FORMAT_S32, SPA_AUDIO_FORMAT_F32, 0, conv_s32_to_f32 },
{ SPA_AUDIO_FORMAT_S32P, SPA_AUDIO_FORMAT_F32P, 0, conv_s32_to_f32 },
{ SPA_AUDIO_FORMAT_S32P, SPA_AUDIO_FORMAT_F32P, 0, conv_s32d_to_f32d },
{ SPA_AUDIO_FORMAT_S32, SPA_AUDIO_FORMAT_F32P, 0, conv_s32_to_f32d },
{ SPA_AUDIO_FORMAT_S32P, SPA_AUDIO_FORMAT_F32, 0, conv_s32d_to_f32 },
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32, 0, conv_s24_to_f32 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_to_f32 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24d_to_f32d },
#if defined (__SSE2__)
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_F32P, FEATURE_SSE2, conv_s24_to_f32d_sse2 },
#endif
@ -684,18 +770,18 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_F32, 0, conv_s24d_to_f32 },
{ SPA_AUDIO_FORMAT_S24_32, SPA_AUDIO_FORMAT_F32, 0, conv_s24_32_to_f32 },
{ SPA_AUDIO_FORMAT_S24_32P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_32_to_f32 },
{ SPA_AUDIO_FORMAT_S24_32P, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_32d_to_f32d },
{ SPA_AUDIO_FORMAT_S24_32, SPA_AUDIO_FORMAT_F32P, 0, conv_s24_32_to_f32d },
{ SPA_AUDIO_FORMAT_S24_32P, SPA_AUDIO_FORMAT_F32, 0, conv_s24_32d_to_f32 },
/* from f32 */
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_U8, 0, conv_f32_to_u8 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_U8P, 0, conv_f32_to_u8 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_U8P, 0, conv_f32d_to_u8d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_U8P, 0, conv_f32_to_u8d },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_U8, 0, conv_f32d_to_u8 },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16, 0, conv_f32_to_s16 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16P, 0, conv_f32_to_s16 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16P, 0, conv_f32d_to_s16d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S16P, 0, conv_f32_to_s16d },
#if defined (__SSE2__)
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, FEATURE_SSE2, conv_f32d_to_s16_sse2 },
@ -703,7 +789,7 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S16, 0, conv_f32d_to_s16 },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32, 0, conv_f32_to_s32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32P, 0, conv_f32_to_s32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32P, 0, conv_f32d_to_s32d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S32P, 0, conv_f32_to_s32d },
#if defined (__SSE2__)
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, FEATURE_SSE2, conv_f32d_to_s32_sse2 },
@ -711,42 +797,42 @@ static const struct conv_info {
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S32, 0, conv_f32d_to_s32 },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S24, 0, conv_f32_to_s24 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S24P, 0, conv_f32_to_s24 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S24P, 0, conv_f32d_to_s24d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S24P, 0, conv_f32_to_s24d },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S24, 0, conv_f32d_to_s24 },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S24_32, 0, conv_f32_to_s24_32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S24_32P, 0, conv_f32_to_s24_32 },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S24_32P, 0, conv_f32d_to_s24_32d },
{ SPA_AUDIO_FORMAT_F32, SPA_AUDIO_FORMAT_S24_32P, 0, conv_f32_to_s24_32d },
{ SPA_AUDIO_FORMAT_F32P, SPA_AUDIO_FORMAT_S24_32, 0, conv_f32d_to_s24_32 },
/* u8 */
{ SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_U8, 0, conv_copy8 },
{ SPA_AUDIO_FORMAT_U8P, SPA_AUDIO_FORMAT_U8P, 0, conv_copy8 },
{ SPA_AUDIO_FORMAT_U8P, SPA_AUDIO_FORMAT_U8P, 0, conv_copy8d },
{ SPA_AUDIO_FORMAT_U8, SPA_AUDIO_FORMAT_U8P, 0, deinterleave_8 },
{ SPA_AUDIO_FORMAT_U8P, SPA_AUDIO_FORMAT_U8, 0, interleave_8 },
/* s16 */
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_S16, 0, conv_copy16 },
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_S16P, 0, conv_copy16 },
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_S16P, 0, conv_copy16d },
{ SPA_AUDIO_FORMAT_S16, SPA_AUDIO_FORMAT_S16P, 0, deinterleave_16 },
{ SPA_AUDIO_FORMAT_S16P, SPA_AUDIO_FORMAT_S16, 0, interleave_16 },
/* s32 */
{ SPA_AUDIO_FORMAT_S32, SPA_AUDIO_FORMAT_S32, 0, conv_copy32 },
{ SPA_AUDIO_FORMAT_S32P, SPA_AUDIO_FORMAT_S32P, 0, conv_copy32 },
{ SPA_AUDIO_FORMAT_S32P, SPA_AUDIO_FORMAT_S32P, 0, conv_copy32d },
{ SPA_AUDIO_FORMAT_S32, SPA_AUDIO_FORMAT_S32P, 0, deinterleave_32 },
{ SPA_AUDIO_FORMAT_S32P, SPA_AUDIO_FORMAT_S32, 0, interleave_32 },
/* s24 */
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_S24, 0, conv_copy24 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_S24P, 0, conv_copy24 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_S24P, 0, conv_copy24d },
{ SPA_AUDIO_FORMAT_S24, SPA_AUDIO_FORMAT_S24P, 0, deinterleave_24 },
{ SPA_AUDIO_FORMAT_S24P, SPA_AUDIO_FORMAT_S24, 0, interleave_24 },
/* s24_32 */
{ SPA_AUDIO_FORMAT_S24_32, SPA_AUDIO_FORMAT_S24_32, 0, conv_copy32 },
{ SPA_AUDIO_FORMAT_S24_32P, SPA_AUDIO_FORMAT_S24_32P, 0, conv_copy32 },
{ SPA_AUDIO_FORMAT_S24_32P, SPA_AUDIO_FORMAT_S24_32P, 0, conv_copy32d },
{ SPA_AUDIO_FORMAT_S24_32, SPA_AUDIO_FORMAT_S24_32P, 0, deinterleave_32 },
{ SPA_AUDIO_FORMAT_S24_32P, SPA_AUDIO_FORMAT_S24_32, 0, interleave_32 },
};

View file

@ -115,8 +115,6 @@ struct impl {
uint32_t cpu_flags;
convert_func_t convert;
float empty[4096];
};
#define CHECK_PORT(this,d,id) (id == 0)
@ -656,7 +654,7 @@ impl_node_port_use_buffers(struct spa_node *node,
{
struct impl *this;
struct port *port;
uint32_t i, size = SPA_ID_INVALID;
uint32_t i, size = SPA_ID_INVALID, j;
spa_return_val_if_fail(node != NULL, -EINVAL);
@ -674,6 +672,7 @@ impl_node_port_use_buffers(struct spa_node *node,
for (i = 0; i < n_buffers; i++) {
struct buffer *b;
uint32_t n_datas = buffers[i]->n_datas;
struct spa_data *d = buffers[i]->datas;
b = &port->buffers[i];
@ -682,19 +681,35 @@ impl_node_port_use_buffers(struct spa_node *node,
b->outbuf = buffers[i];
b->h = spa_buffer_find_meta_data(buffers[i], SPA_META_Header, sizeof(*b->h));
if (n_datas != port->blocks) {
spa_log_error(this->log, NAME " %p: expected %d blocks on buffer %d", this,
port->blocks, i);
return -EINVAL;
}
if (size == SPA_ID_INVALID)
size = d[0].maxsize;
else
if (size != d[0].maxsize)
if (size != d[0].maxsize) {
spa_log_error(this->log, NAME " %p: expected size %d on buffer %d", this,
size, i);
return -EINVAL;
}
if (!((d[0].type == SPA_DATA_MemPtr ||
d[0].type == SPA_DATA_MemFd ||
d[0].type == SPA_DATA_DmaBuf) && d[0].data != NULL)) {
spa_log_error(this->log, NAME " %p: invalid memory on buffer %p", this,
buffers[i]);
return -EINVAL;
for (j = 0; j < n_datas; j++) {
if (!((d[j].type == SPA_DATA_MemPtr ||
d[j].type == SPA_DATA_MemFd ||
d[j].type == SPA_DATA_DmaBuf) && d[j].data != NULL)) {
spa_log_error(this->log, NAME " %p: invalid memory %d on buffer %d",
this, j, i);
return -EINVAL;
}
if (!SPA_IS_ALIGNED(d[j].data, 16)) {
spa_log_warn(this->log, NAME " %p: memory %d on buffer %d not aligned",
this, j, i);
}
}
if (direction == SPA_DIRECTION_OUTPUT)
spa_list_append(&port->queue, &b->link);
else
@ -878,7 +893,7 @@ static int impl_node_process(struct spa_node *node)
spa_log_trace(this->log, NAME " %p: n_src:%d n_dst:%d size:%d maxsize:%d n_samples:%d",
this, n_src_datas, n_dst_datas, size, maxsize, n_samples);
this->convert(this, n_dst_datas, dst_datas, n_src_datas, src_datas, n_samples);
this->convert(this, dst_datas, src_datas, SPA_MAX(n_src_datas, n_dst_datas), n_samples);
inio->status = SPA_STATUS_NEED_BUFFER;
res |= SPA_STATUS_NEED_BUFFER;

View file

@ -42,7 +42,7 @@
#define DEFAULT_RATE 48000
#define DEFAULT_CHANNELS 2
#define MAX_SAMPLES 1024
#define MAX_SAMPLES 2048
#define MAX_BUFFERS 64
#define MAX_PORTS 128
@ -100,7 +100,7 @@ struct impl {
bool monitor;
bool have_profile;
float empty[MAX_SAMPLES];
float empty[MAX_SAMPLES + 15];
};
#define CHECK_IN_PORT(this,d,p) ((d) == SPA_DIRECTION_INPUT && (p) < this->port_count)
@ -750,7 +750,7 @@ impl_node_port_use_buffers(struct spa_node *node,
{
struct impl *this;
struct port *port;
uint32_t i;
uint32_t i, j;
spa_return_val_if_fail(node != NULL, -EINVAL);
@ -769,6 +769,7 @@ impl_node_port_use_buffers(struct spa_node *node,
for (i = 0; i < n_buffers; i++) {
struct buffer *b;
uint32_t n_datas = buffers[i]->n_datas;
struct spa_data *d = buffers[i]->datas;
b = &port->buffers[i];
@ -776,13 +777,25 @@ impl_node_port_use_buffers(struct spa_node *node,
b->flags = 0;
b->buf = buffers[i];
if (!((d[0].type == SPA_DATA_MemPtr ||
d[0].type == SPA_DATA_MemFd ||
d[0].type == SPA_DATA_DmaBuf) && d[0].data != NULL)) {
spa_log_error(this->log, NAME " %p: invalid memory on buffer %p %d %p", this,
buffers[i], d[0].type, d[0].data);
if (n_datas != port->blocks) {
spa_log_error(this->log, NAME " %p: invalid blocks %d on buffer %d",
this, n_datas, i);
return -EINVAL;
}
for (j = 0; j < n_datas; j++) {
if (!((d[j].type == SPA_DATA_MemPtr ||
d[j].type == SPA_DATA_MemFd ||
d[j].type == SPA_DATA_DmaBuf) && d[j].data != NULL)) {
spa_log_error(this->log, NAME " %p: invalid memory %d on buffer %d %d %p",
this, j, i, d[j].type, d[j].data);
return -EINVAL;
}
if (!SPA_IS_ALIGNED(d[j].data, 16))
spa_log_warn(this->log, NAME " %p: memory %d on buffer %d not aligned",
this, j, i);
}
if (direction == SPA_DIRECTION_OUTPUT)
queue_buffer(this, port, i);
}
@ -960,7 +973,7 @@ static int impl_node_process(struct spa_node *node)
struct port *inport = GET_IN_PORT(this, i);
if (get_in_buffer(this, inport, &sbuf) < 0) {
src_datas[n_src_datas++] = this->empty;
src_datas[n_src_datas++] = SPA_PTR_ALIGN(this->empty, 16, void);
continue;
}
@ -987,7 +1000,7 @@ static int impl_node_process(struct spa_node *node)
n_samples * outport->stride);
}
this->convert(this, n_dst_datas, dst_datas, n_src_datas, src_datas, n_samples);
this->convert(this, dst_datas, src_datas, SPA_MAX(n_dst_datas, n_src_datas), n_samples);
return res | SPA_STATUS_HAVE_BUFFER;
}

View file

@ -44,7 +44,7 @@
#define DEFAULT_CHANNELS 2
#define DEFAULT_MASK (1LL << SPA_AUDIO_CHANNEL_FL) | (1LL << SPA_AUDIO_CHANNEL_FR)
#define MAX_SAMPLES 1024
#define MAX_SAMPLES 2048
#define MAX_BUFFERS 64
#define MAX_PORTS 128
@ -100,7 +100,7 @@ struct impl {
bool have_profile;
float empty[MAX_SAMPLES];
float empty[MAX_SAMPLES + 15];
};
#define CHECK_OUT_PORT(this,d,p) ((d) == SPA_DIRECTION_OUTPUT && (p) < this->port_count)
@ -754,10 +754,13 @@ impl_node_port_use_buffers(struct spa_node *node,
if (!((d[0].type == SPA_DATA_MemPtr ||
d[0].type == SPA_DATA_MemFd ||
d[0].type == SPA_DATA_DmaBuf) && d[0].data != NULL)) {
spa_log_error(this->log, NAME " %p: invalid memory on buffer %p %d %p", this,
buffers[i], d[0].type, d[0].data);
spa_log_error(this->log, NAME " %p: invalid memory on buffer %d %d %p", this,
i, d[0].type, d[0].data);
return -EINVAL;
}
if (!SPA_IS_ALIGNED(d[0].data, 16))
spa_log_warn(this->log, NAME " %p: memory on buffer %d not aligned", this, i);
if (direction == SPA_DIRECTION_OUTPUT)
queue_buffer(this, port, i);
}
@ -903,7 +906,7 @@ static int impl_node_process(struct spa_node *node)
if ((dbuf = dequeue_buffer(this, outport)) == NULL) {
outio->status = -EPIPE;
empty:
dst_datas[n_dst_datas++] = this->empty;
dst_datas[n_dst_datas++] = SPA_PTR_ALIGN(this->empty, 16, void);
continue;
}
@ -927,7 +930,7 @@ static int impl_node_process(struct spa_node *node)
spa_log_trace(this->log, NAME " %p: %d %d %d %d %d", this,
n_src_datas, n_dst_datas, n_samples, maxsize, inport->stride);
this->convert(this, n_dst_datas, dst_datas, n_src_datas, src_datas, n_samples);
this->convert(this, dst_datas, src_datas, SPA_MAX(n_dst_datas, n_src_datas), n_samples);
inio->status = SPA_STATUS_NEED_BUFFER;
res |= SPA_STATUS_NEED_BUFFER;

View file

@ -33,7 +33,7 @@
#include "fmt-ops.c"
#define N_SAMPLES 29
#define N_SAMPLES 253
#define N_CHANNELS 11
static uint8_t samp_in[N_SAMPLES * 4];
@ -47,7 +47,7 @@ static void run_test(const char *name,
{
const void *ip[N_CHANNELS];
void *tp[N_CHANNELS];
int i, j, ic, oc, ns;
int i, j;
const uint8_t *in8 = in, *out8 = out;
for (j = 0; j < N_SAMPLES; j++) {
@ -62,16 +62,16 @@ static void run_test(const char *name,
tp[0] = temp_in;
switch(in_size) {
case 1:
interleave_8(NULL, 1, tp, N_CHANNELS, ip, N_SAMPLES);
interleave_8(NULL, tp, ip, N_CHANNELS, N_SAMPLES);
break;
case 2:
interleave_16(NULL, 1, tp, N_CHANNELS, ip, N_SAMPLES);
interleave_16(NULL, tp, ip, N_CHANNELS, N_SAMPLES);
break;
case 3:
interleave_24(NULL, 1, tp, N_CHANNELS, ip, N_SAMPLES);
interleave_24(NULL, tp, ip, N_CHANNELS, N_SAMPLES);
break;
case 4:
interleave_32(NULL, 1, tp, N_CHANNELS, ip, N_SAMPLES);
interleave_32(NULL, tp, ip, N_CHANNELS, N_SAMPLES);
break;
default:
fprintf(stderr, "unknown size %zd\n", in_size);
@ -84,16 +84,11 @@ static void run_test(const char *name,
for (j = 0; j < N_CHANNELS; j++)
tp[j] = &temp_out[j * N_SAMPLES * out_size];
ic = in_packed ? 1 : N_CHANNELS;
oc = out_packed ? 1 : N_CHANNELS;
ns = (in_packed && out_packed) ? N_SAMPLES * N_CHANNELS : N_SAMPLES;
func(NULL, oc, tp, ic, ip, ns);
func(NULL, tp, ip, N_CHANNELS, N_SAMPLES);
fprintf(stderr, "test %s:\n", name);
if (out_packed) {
const uint8_t *d = tp[0], *s = samp_out;
spa_debug_mem(0, d, N_SAMPLES * N_CHANNELS * out_size);
for (i = 0; i < N_SAMPLES; i++) {
for (j = 0; j < N_CHANNELS; j++) {
spa_assert(memcmp(d, s, out_size) == 0);
@ -119,6 +114,8 @@ static void test_f32_u8(void)
false, true, conv_f32d_to_u8);
run_test("test_f32_u8d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_f32_to_u8d);
run_test("test_f32d_u8d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_f32d_to_u8d);
}
static void test_u8_f32(void)
@ -132,6 +129,8 @@ static void test_u8_f32(void)
false, true, conv_u8d_to_f32);
run_test("test_u8_f32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_u8_to_f32d);
run_test("test_u8d_f32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_u8d_to_f32d);
}
static void test_f32_s16(void)
@ -145,6 +144,8 @@ static void test_f32_s16(void)
false, true, conv_f32d_to_s16);
run_test("test_f32_s16d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_f32_to_s16d);
run_test("test_f32d_s16d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_f32d_to_s16d);
}
static void test_s16_f32(void)
@ -158,6 +159,8 @@ static void test_s16_f32(void)
false, true, conv_s16d_to_f32);
run_test("test_s16_f32", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, true, conv_s16_to_f32);
run_test("test_s16d_f32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_s16d_to_f32d);
}
static void test_f32_s32(void)
@ -172,6 +175,8 @@ static void test_f32_s32(void)
false, true, conv_f32d_to_s32);
run_test("test_f32_s32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_f32_to_s32d);
run_test("test_f32d_s32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_f32d_to_s32d);
}
static void test_s32_f32(void)
@ -185,6 +190,8 @@ static void test_s32_f32(void)
false, true, conv_s32d_to_f32);
run_test("test_s32_f32", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, true, conv_s32_to_f32);
run_test("test_s32d_f32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_s32d_to_f32d);
}
static void test_f32_s24(void)
@ -193,9 +200,14 @@ static void test_f32_s24(void)
const uint8_t out[] = { 0x00, 0x00, 0x00, 0xff, 0xff, 0x7f, 0x01, 0x00, 0x80,
0xff, 0xff, 0x3f, 0x01, 0x00, 0xc0, 0xff, 0xff, 0x7f, 0x01, 0x00, 0x80 };
run_test("test_f32_s24", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in), true, true, conv_f32_to_s24);
run_test("test_f32d_s24", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in), false, true, conv_f32d_to_s24);
run_test("test_f32_s24d", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in), true, false, conv_f32_to_s24d);
run_test("test_f32_s24", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in),
true, true, conv_f32_to_s24);
run_test("test_f32d_s24", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in),
false, true, conv_f32d_to_s24);
run_test("test_f32_s24d", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in),
true, false, conv_f32_to_s24d);
run_test("test_f32d_s24d", in, sizeof(in[0]), out, 3, SPA_N_ELEMENTS(in),
false, false, conv_f32d_to_s24d);
}
static void test_s24_f32(void)
@ -204,9 +216,14 @@ static void test_s24_f32(void)
0xff, 0xff, 0x3f, 0x01, 0x00, 0xc0, };
const float out[] = { 0.0f, 1.0f, -1.0f, 0.4999999404f, -0.4999999404f, };
run_test("test_s24_f32d", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out), true, false, conv_s24_to_f32d);
run_test("test_s24d_f32", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out), false, true, conv_s24d_to_f32);
run_test("test_s24_f32", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out), true, true, conv_s24_to_f32);
run_test("test_s24_f32d", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_s24_to_f32d);
run_test("test_s24d_f32", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, true, conv_s24d_to_f32);
run_test("test_s24_f32", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, true, conv_s24_to_f32);
run_test("test_s24d_f32d", in, 3, out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_s24d_to_f32d);
}
static void test_f32_s24_32(void)
@ -221,6 +238,8 @@ static void test_f32_s24_32(void)
false, true, conv_f32d_to_s24_32);
run_test("test_f32_s24_32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, false, conv_f32_to_s24_32d);
run_test("test_f32d_s24_32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_f32d_to_s24_32d);
}
static void test_s24_32_f32(void)
@ -234,6 +253,8 @@ static void test_s24_32_f32(void)
false, true, conv_s24_32d_to_f32);
run_test("test_s24_32_f32", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
true, true, conv_s24_32_to_f32);
run_test("test_s24_32d_f32d", in, sizeof(in[0]), out, sizeof(out[0]), SPA_N_ELEMENTS(out),
false, false, conv_s24_32d_to_f32d);
}
int main(int argc, char *argv[])

View file

@ -69,7 +69,7 @@ struct port {
struct spa_handle *spa_handle;
struct spa_node *spa_node;
float empty[MAX_BUFFER_SIZE];
float empty[MAX_BUFFER_SIZE + 15];
};
struct node {
@ -101,14 +101,15 @@ static void init_buffer(struct port *port, uint32_t id)
b->datas[0].flags = 0;
b->datas[0].fd = -1;
b->datas[0].mapoffset = 0;
b->datas[0].maxsize = sizeof(port->empty);
b->datas[0].data = port->empty;
b->datas[0].maxsize = SPA_ROUND_DOWN_N(sizeof(port->empty), 16);
b->datas[0].data = SPA_PTR_ALIGN(port->empty, 16, void);
b->datas[0].chunk = b->chunk;
b->datas[0].chunk->offset = 0;
b->datas[0].chunk->size = 0;
b->datas[0].chunk->stride = 0;
port->bufs[id] = &b->buf;
memset(port->empty, 0, sizeof(port->empty));
pw_log_debug("%p %d", b->datas[0].data, b->datas[0].maxsize);
}
static void init_port(struct port *p, enum spa_direction direction)

View file

@ -109,7 +109,7 @@ struct impl {
uint32_t stride;
bool started;
float empty[MAX_SAMPLES];
float empty[MAX_SAMPLES + 15];
};
#define CHECK_FREE_IN_PORT(this,d,p) ((d) == SPA_DIRECTION_INPUT && (p) < MAX_PORTS && !this->in_ports[(p)].valid)
@ -632,10 +632,12 @@ impl_node_port_use_buffers(struct spa_node *node,
if (!((d[0].type == SPA_DATA_MemPtr ||
d[0].type == SPA_DATA_MemFd ||
d[0].type == SPA_DATA_DmaBuf) && d[0].data != NULL)) {
spa_log_error(this->log, NAME " %p: invalid memory on buffer %p", this,
buffers[i]);
spa_log_error(this->log, NAME " %p: invalid memory on buffer %d", this, i);
return -EINVAL;
}
if (!SPA_IS_ALIGNED(d[0].data, 16)) {
spa_log_warn(this->log, NAME " %p: memory on buffer %d not aligned", this, i);
}
if (direction == SPA_DIRECTION_OUTPUT)
queue_buffer(this, port, b);
}
@ -717,23 +719,27 @@ impl_node_port_send_command(struct spa_node *node,
#include <xmmintrin.h>
static void mix_2(float *dst, float *src1, float *src2, int n_samples)
{
int i, unrolled;
int n, unrolled;
__m128 in[2];
unrolled = n_samples / 4;
n_samples &= 3;
if (SPA_IS_ALIGNED(src1, 16) &&
SPA_IS_ALIGNED(src2, 16) &&
SPA_IS_ALIGNED(dst, 16))
unrolled = n_samples / 4;
else
unrolled = 0;
for (i = 0; unrolled--; i += 4) {
in[0] = _mm_loadu_ps(&src1[i]),
in[1] = _mm_loadu_ps(&src2[i]),
for (n = 0; unrolled--; n += 4) {
in[0] = _mm_load_ps(&src1[n]),
in[1] = _mm_load_ps(&src2[n]),
in[0] = _mm_add_ps(in[0], in[1]);
_mm_storeu_ps(&dst[i], in[0]);
_mm_store_ps(&dst[n], in[0]);
}
for (; n_samples--; i++) {
in[0] = _mm_load_ss(&src1[i]),
in[1] = _mm_load_ss(&src2[i]),
for (; n < n_samples; n++) {
in[0] = _mm_load_ss(&src1[n]),
in[1] = _mm_load_ss(&src2[n]),
in[0] = _mm_add_ss(in[0], in[1]);
_mm_store_ss(&dst[i], in[0]);
_mm_store_ss(&dst[n], in[0]);
}
}
#else
@ -825,13 +831,13 @@ static int impl_node_process(struct spa_node *node)
outb->buffer->n_datas = 1;
outb->buffer->datas = outb->datas;
outb->datas[0].data = this->empty;
outb->datas[0].data = SPA_PTR_ALIGN(this->empty, 16, void);
outb->datas[0].chunk = outb->chunk;
outb->datas[0].chunk->offset = 0;
outb->datas[0].chunk->size = n_samples * sizeof(float);
outb->datas[0].chunk->stride = sizeof(float);
dst = this->empty;
dst = outb->datas[0].data;
if (n_buffers == 0) {
memset(dst, 0, n_samples * sizeof(float));
}

View file

@ -837,7 +837,7 @@ do_port_use_buffers(struct impl *impl,
data_size = 0;
for (j = 0; j < buffers[i]->n_metas; j++) {
data_size += buffers[i]->metas[j].size;
data_size += SPA_ROUND_UP_N(buffers[i]->metas[j].size, 8);
}
for (j = 0; j < buffers[i]->n_datas; j++) {
struct spa_data *d = buffers[i]->datas;

View file

@ -419,6 +419,7 @@ static int alloc_buffers(struct pw_link *this,
uint32_t n_datas,
size_t *data_sizes,
ssize_t *data_strides,
size_t *data_aligns,
struct allocation *allocation)
{
int res;
@ -452,12 +453,13 @@ static int alloc_buffers(struct pw_link *this,
metas[n_metas].type = type;
metas[n_metas].size = size;
meta_size += metas[n_metas].size;
meta_size += SPA_ROUND_UP_N(metas[n_metas].size, 8);
n_metas++;
skel_size += sizeof(struct spa_meta);
}
}
data_size += meta_size;
data_size = SPA_ROUND_UP_N(data_size, data_aligns[0]);
/* data */
for (i = 0; i < n_datas; i++) {
@ -492,7 +494,7 @@ static int alloc_buffers(struct pw_link *this,
m->type = metas[j].type;
m->size = metas[j].size;
m->data = p;
p = SPA_MEMBER(p, m->size, void);
p = SPA_MEMBER(p, SPA_ROUND_UP_N(m->size, 8), void);
}
/* pointer to data structure */
b->n_datas = n_datas;
@ -509,7 +511,7 @@ static int alloc_buffers(struct pw_link *this,
d->type = SPA_DATA_MemFd;
d->flags = 0;
d->fd = m->fd;
d->mapoffset = SPA_PTRDIFF(ddp, m->ptr);
d->mapoffset = SPA_ROUND_UP_N(SPA_PTRDIFF(ddp, m->ptr), data_aligns[i]);
d->maxsize = data_sizes[j];
d->data = SPA_MEMBER(m->ptr, d->mapoffset, void);
d->chunk->offset = 0;
@ -701,9 +703,10 @@ static int do_allocation(struct pw_link *this, uint32_t in_state, uint32_t out_s
struct spa_pod_builder b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
uint32_t i, offset, n_params;
uint32_t max_buffers;
size_t minsize = 8192, stride = 0;
size_t minsize = 8192, stride = 0, align;
size_t data_sizes[1];
ssize_t data_strides[1];
size_t data_aligns[1];
n_params = param_filter(this, input, output, SPA_PARAM_Buffers, &b);
n_params += param_filter(this, input, output, SPA_PARAM_Meta, &b);
@ -720,25 +723,29 @@ static int do_allocation(struct pw_link *this, uint32_t in_state, uint32_t out_s
max_buffers = MAX_BUFFERS;
minsize = stride = 0;
align = 8;
param = find_param(params, n_params, SPA_TYPE_OBJECT_ParamBuffers);
if (param) {
uint32_t qmax_buffers = max_buffers,
qminsize = minsize, qstride = stride;
qminsize = minsize, qstride = stride, qalign = align;
spa_pod_parse_object(param,
SPA_TYPE_OBJECT_ParamBuffers, NULL,
SPA_PARAM_BUFFERS_buffers, SPA_POD_Int(&qmax_buffers),
SPA_PARAM_BUFFERS_size, SPA_POD_Int(&qminsize),
SPA_PARAM_BUFFERS_stride, SPA_POD_Int(&qstride));
SPA_PARAM_BUFFERS_stride, SPA_POD_Int(&qstride),
SPA_PARAM_BUFFERS_align, SPA_POD_Int(&qalign));
max_buffers =
qmax_buffers == 0 ? max_buffers : SPA_MIN(qmax_buffers,
max_buffers);
minsize = SPA_MAX(minsize, qminsize);
stride = SPA_MAX(stride, qstride);
align = SPA_MAX(align, qalign);
pw_log_debug("%d %d %d -> %zd %zd %d", qminsize, qstride, qmax_buffers,
minsize, stride, max_buffers);
pw_log_debug("%d %d %d %d -> %zd %zd %d %zd",
qminsize, qstride, qmax_buffers, qalign,
minsize, stride, max_buffers, align);
} else {
pw_log_warn("no buffers param");
minsize = 8192;
@ -754,6 +761,7 @@ static int do_allocation(struct pw_link *this, uint32_t in_state, uint32_t out_s
data_sizes[0] = minsize;
data_strides[0] = stride;
data_aligns[0] = align;
if ((res = alloc_buffers(this,
max_buffers,
@ -761,6 +769,7 @@ static int do_allocation(struct pw_link *this, uint32_t in_state, uint32_t out_s
params,
1,
data_sizes, data_strides,
data_aligns,
&allocation)) < 0) {
asprintf(&error, "error alloc buffers: %d", res);
goto error;

View file

@ -1075,7 +1075,7 @@ client_node_port_use_buffers(void *object,
struct spa_meta *m = &b->metas[j];
memcpy(m, &buffers[i].buffer->metas[j], sizeof(struct spa_meta));
m->data = SPA_MEMBER(bmem.map.ptr, offset, void);
offset += m->size;
offset += SPA_ROUND_UP_N(m->size, 8);
}
for (j = 0; j < b->n_datas; j++) {