resample: Let the resampler object handle all channels

Move the code to loop over all channels in the resampler itself.
This is better because the resampler can reuse its state for
each channel.
This commit is contained in:
Wim Taymans 2019-03-22 16:48:35 +01:00
parent 5a2ccee1ff
commit f29d14fcc8
5 changed files with 97 additions and 64 deletions

View file

@ -35,47 +35,57 @@ static inline float hmax_ps(__m128 val)
return _mm_cvtss_f32(val);
}
static void impl_peaks_process_sse(struct resample *r, int channel,
void *src, uint32_t *in_len, void *dst, uint32_t *out_len)
static void impl_peaks_process_sse(struct resample *r,
const void * SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t *out_len)
{
struct peaks_data *pd = r->data;
float *s = src, *d = dst, m;
uint32_t i, o, end, chunk, unrolled;
uint32_t c, i, o, end, chunk, unrolled, i_count, o_count;
__m128 in, max, mask = _mm_andnot_ps(_mm_set_ps1(-0.0f),
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
o = i = 0;
if (r->channels == 0)
return;
m = pd->max_f[channel];
max = _mm_set1_ps(m);
for (c = 0; c < r->channels; c++) {
const float *s = src[c];
float *d = dst[c], m = pd->max_f[c];
while (i < *in_len && o < *out_len) {
end = ((uint64_t) (pd->o_count + 1) * r->i_rate) / r->o_rate;
end = end > pd->i_count ? end - pd->i_count : 0;
chunk = SPA_MIN(end, *in_len);
o_count = pd->o_count;
i_count = pd->i_count;
o = i = 0;
unrolled = chunk - ((chunk - i) & 3);
max = _mm_set1_ps(m);
for (; i < unrolled; i+=4) {
in = _mm_loadu_ps(&s[i]);
in = _mm_and_ps(mask, in);
max = _mm_max_ps(in, max);
}
for (; i < chunk; i++)
m = SPA_MAX(fabsf(s[i]), m);
if (i == end) {
d[o++] = SPA_MAX(hmax_ps(max), m);
m = 0.0f;
max = _mm_set1_ps(m);
pd->o_count++;
while (i < *in_len && o < *out_len) {
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
end = end > i_count ? end - i_count : 0;
chunk = SPA_MIN(end, *in_len);
unrolled = chunk - ((chunk - i) & 3);
for (; i < unrolled; i+=4) {
in = _mm_loadu_ps(&s[i]);
in = _mm_and_ps(mask, in);
max = _mm_max_ps(in, max);
}
for (; i < chunk; i++)
m = SPA_MAX(fabsf(s[i]), m);
if (i == end) {
d[o++] = SPA_MAX(hmax_ps(max), m);
m = 0.0f;
max = _mm_set1_ps(m);
o_count++;
}
}
pd->max_f[c] = SPA_MAX(hmax_ps(max), m);
}
pd->max_f[channel] = SPA_MAX(hmax_ps(max), m);
*out_len = o;
*in_len = i;
pd->i_count += i;
pd->o_count = o_count;
pd->i_count = i_count + i;
while (pd->i_count >= r->i_rate) {
pd->i_count -= r->i_rate;

View file

@ -44,35 +44,45 @@ static void impl_peaks_update_rate(struct resample *r, double rate)
{
}
static void impl_peaks_process(struct resample *r, int channel,
void *src, uint32_t *in_len, void *dst, uint32_t *out_len)
static void impl_peaks_process(struct resample *r,
const void * SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t *out_len)
{
struct peaks_data *pd = r->data;
float *s = src, *d = dst, m;
uint32_t i, o, end, chunk;
uint32_t c, i, o, end, chunk, o_count, i_count;
o = i = 0;
m = pd->max_f[channel];
if (r->channels == 0)
return;
while (i < *in_len && o < *out_len) {
end = ((uint64_t) (pd->o_count + 1) * r->i_rate) / r->o_rate;
end = end > pd->i_count ? end - pd->i_count : 0;
chunk = SPA_MIN(end, *in_len);
for (c = 0; c < r->channels; c++) {
const float *s = src[c];
float *d = dst[c], m = pd->max_f[c];
for (; i < chunk; i++)
m = SPA_MAX(fabsf(s[i]), m);
o_count = pd->o_count;
i_count = pd->i_count;
o = i = 0;
if (i == end) {
d[o++] = m;
m = 0.0f;
pd->o_count++;
while (i < *in_len && o < *out_len) {
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
end = end > i_count ? end - i_count : 0;
chunk = SPA_MIN(end, *in_len);
for (; i < chunk; i++)
m = SPA_MAX(fabsf(s[i]), m);
if (i == end) {
d[o++] = m;
m = 0.0f;
o_count++;
}
}
pd->max_f[c] = m;
}
pd->max_f[channel] = m;
*out_len = o;
*in_len = i;
pd->i_count += i;
pd->o_count = o_count;
pd->i_count = i_count + i;
while (pd->i_count >= r->i_rate) {
pd->i_count -= r->i_rate;

View file

@ -37,16 +37,26 @@ static void impl_speex_update_rate(struct resample *r, double rate)
r->i_rate * rate, r->o_rate, r->i_rate, r->o_rate);
}
static void impl_speex_process(struct resample *r, int channel,
void *src, uint32_t *in_len, void *dst, uint32_t *out_len)
static void impl_speex_process(struct resample *r,
const void *SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t *out_len)
{
uint32_t c, i, o;
if (r->i_rate == r->o_rate) {
*out_len = *in_len = SPA_MIN(*in_len, *out_len);
spa_memcpy(dst, src, *out_len * sizeof(float));
o = i = SPA_MIN(*in_len, *out_len);
for (c = 0; c < r->channels; c++)
spa_memcpy(dst[c], src[c], o * sizeof(float));
}
else {
speex_resampler_process_float(r->data, channel, src, in_len, dst, out_len);
for (c = 0; c < r->channels; c++) {
i = *in_len;
o = *out_len;
speex_resampler_process_float(r->data, c, src[c], &i, dst[c], &o);
}
}
*in_len = i;
*out_len = o;
}
static void impl_speex_reset (struct resample *r)

View file

@ -730,6 +730,8 @@ static int impl_node_process(struct spa_node *node)
struct spa_buffer *sb, *db;
uint32_t i, size, in_len, out_len, pin_len, pout_len, maxsize;
int res = 0;
const void **src_datas;
void **dst_datas;
spa_return_val_if_fail(node != NULL, -EINVAL);
@ -781,23 +783,23 @@ static int impl_node_process(struct spa_node *node)
pin_len = in_len = (size - inport->offset) / sizeof(float);
pout_len = out_len = (maxsize - outport->offset) / sizeof(float);
for (i = 0; i < sb->n_datas; i++) {
void *src, *dst;
src_datas = alloca(sizeof(void*) * this->resample.channels);
dst_datas = alloca(sizeof(void*) * this->resample.channels);
in_len = pin_len;
out_len = pout_len;
for (i = 0; i < sb->n_datas; i++)
src_datas[i] = SPA_MEMBER(sb->datas[i].data, inport->offset, void);
for (i = 0; i < db->n_datas; i++)
dst_datas[i] = SPA_MEMBER(db->datas[i].data, outport->offset, void);
src = SPA_MEMBER(sb->datas[i].data, inport->offset, void);
dst = SPA_MEMBER(db->datas[i].data, outport->offset, void);
resample_process(&this->resample, src_datas, &in_len, dst_datas, &out_len);
resample_process(&this->resample, i, src, &in_len, dst, &out_len);
spa_log_trace_fp(this->log, NAME " %p: in %d/%d %ld %d out %d/%d %ld %d",
this, pin_len, in_len, size / sizeof(float), inport->offset,
pout_len, out_len, maxsize / sizeof(float), outport->offset);
spa_log_trace_fp(this->log, NAME " %p: in %d/%d %ld %d out %d/%d %ld %d",
this, pin_len, in_len, size / sizeof(float), inport->offset,
pout_len, out_len, maxsize / sizeof(float), outport->offset);
db->datas[i].chunk->offset = 0;
for (i = 0; i < db->n_datas; i++) {
db->datas[i].chunk->size = outport->offset + (out_len * sizeof(float));
db->datas[i].chunk->offset = 0;
}
inport->offset += in_len * sizeof(float);

View file

@ -32,8 +32,9 @@ struct resample {
void (*free) (struct resample *r);
void (*update_rate) (struct resample *r, double rate);
void (*process) (struct resample *r, int channel,
void *src, uint32_t *in_len, void *dst, uint32_t *out_len);
void (*process) (struct resample *r,
const void * SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t *out_len);
void (*reset) (struct resample *r);
void *data;
};