resample: Let the resampler object handle all channels

Move the code to loop over all channels in the resampler itself.
This is better because the resampler can reuse its state for
each channel.
This commit is contained in:
Wim Taymans 2019-03-22 16:48:35 +01:00
parent 5a2ccee1ff
commit f29d14fcc8
5 changed files with 97 additions and 64 deletions

View file

@ -35,47 +35,57 @@ static inline float hmax_ps(__m128 val)
return _mm_cvtss_f32(val);
}
static void impl_peaks_process_sse(struct resample *r, int channel,
void *src, uint32_t *in_len, void *dst, uint32_t *out_len)
static void impl_peaks_process_sse(struct resample *r,
const void * SPA_RESTRICT src[], uint32_t *in_len,
void * SPA_RESTRICT dst[], uint32_t *out_len)
{
struct peaks_data *pd = r->data;
float *s = src, *d = dst, m;
uint32_t i, o, end, chunk, unrolled;
uint32_t c, i, o, end, chunk, unrolled, i_count, o_count;
__m128 in, max, mask = _mm_andnot_ps(_mm_set_ps1(-0.0f),
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
o = i = 0;
if (r->channels == 0)
return;
m = pd->max_f[channel];
max = _mm_set1_ps(m);
for (c = 0; c < r->channels; c++) {
const float *s = src[c];
float *d = dst[c], m = pd->max_f[c];
while (i < *in_len && o < *out_len) {
end = ((uint64_t) (pd->o_count + 1) * r->i_rate) / r->o_rate;
end = end > pd->i_count ? end - pd->i_count : 0;
chunk = SPA_MIN(end, *in_len);
o_count = pd->o_count;
i_count = pd->i_count;
o = i = 0;
unrolled = chunk - ((chunk - i) & 3);
max = _mm_set1_ps(m);
for (; i < unrolled; i+=4) {
in = _mm_loadu_ps(&s[i]);
in = _mm_and_ps(mask, in);
max = _mm_max_ps(in, max);
}
for (; i < chunk; i++)
m = SPA_MAX(fabsf(s[i]), m);
if (i == end) {
d[o++] = SPA_MAX(hmax_ps(max), m);
m = 0.0f;
max = _mm_set1_ps(m);
pd->o_count++;
while (i < *in_len && o < *out_len) {
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
end = end > i_count ? end - i_count : 0;
chunk = SPA_MIN(end, *in_len);
unrolled = chunk - ((chunk - i) & 3);
for (; i < unrolled; i+=4) {
in = _mm_loadu_ps(&s[i]);
in = _mm_and_ps(mask, in);
max = _mm_max_ps(in, max);
}
for (; i < chunk; i++)
m = SPA_MAX(fabsf(s[i]), m);
if (i == end) {
d[o++] = SPA_MAX(hmax_ps(max), m);
m = 0.0f;
max = _mm_set1_ps(m);
o_count++;
}
}
pd->max_f[c] = SPA_MAX(hmax_ps(max), m);
}
pd->max_f[channel] = SPA_MAX(hmax_ps(max), m);
*out_len = o;
*in_len = i;
pd->i_count += i;
pd->o_count = o_count;
pd->i_count = i_count + i;
while (pd->i_count >= r->i_rate) {
pd->i_count -= r->i_rate;