mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-11-06 13:30:01 -05:00
audioconvert: refactor peaks resampler
Use common code in macro and generate arch specific version. Compile with -Ofast to optimize some fmaxf calls.
This commit is contained in:
parent
a79b5c86ea
commit
6e9e02b420
9 changed files with 89 additions and 110 deletions
|
|
@ -37,60 +37,27 @@ static inline float hmax_ps(__m128 val)
|
|||
return _mm_cvtss_f32(val);
|
||||
}
|
||||
|
||||
void resample_peaks_process_sse(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
||||
static inline float find_abs_max_sse(const float *s, uint32_t n_samples, float m)
|
||||
{
|
||||
struct peaks_data *pd = r->data;
|
||||
uint32_t c, i, o, end, chunk, unrolled, i_count, o_count;
|
||||
__m128 in, max, mask = _mm_andnot_ps(_mm_set_ps1(-0.0f),
|
||||
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
|
||||
__m128 in, max;
|
||||
uint32_t n, unrolled;
|
||||
const __m128 mask = _mm_andnot_ps(
|
||||
_mm_set_ps1(-0.0f),
|
||||
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
|
||||
|
||||
if (r->channels == 0)
|
||||
return;
|
||||
max = _mm_set1_ps(m);
|
||||
|
||||
for (c = 0; c < r->channels; c++) {
|
||||
const float *s = src[c];
|
||||
float *d = dst[c], m = pd->max_f[c];
|
||||
unrolled = n_samples & ~3;
|
||||
|
||||
o_count = pd->o_count;
|
||||
i_count = pd->i_count;
|
||||
o = i = 0;
|
||||
|
||||
max = _mm_set1_ps(m);
|
||||
|
||||
while (i < *in_len && o < *out_len) {
|
||||
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
|
||||
end = end > i_count ? end - i_count : 0;
|
||||
chunk = SPA_MIN(end, *in_len);
|
||||
|
||||
unrolled = chunk - ((chunk - i) & 3);
|
||||
|
||||
for (; i < unrolled; i+=4) {
|
||||
in = _mm_loadu_ps(&s[i]);
|
||||
in = _mm_and_ps(mask, in);
|
||||
max = _mm_max_ps(in, max);
|
||||
}
|
||||
for (; i < chunk; i++)
|
||||
m = SPA_MAX(fabsf(s[i]), m);
|
||||
|
||||
if (i == end) {
|
||||
d[o++] = SPA_MAX(hmax_ps(max), m);
|
||||
m = 0.0f;
|
||||
max = _mm_set1_ps(m);
|
||||
o_count++;
|
||||
}
|
||||
}
|
||||
pd->max_f[c] = SPA_MAX(hmax_ps(max), m);
|
||||
for (n = 0; n < unrolled; n+=4) {
|
||||
in = _mm_loadu_ps(&s[n]);
|
||||
in = _mm_and_ps(mask, in);
|
||||
max = _mm_max_ps(in, max);
|
||||
}
|
||||
for (; n < n_samples; n++)
|
||||
m = fmaxf(fabsf(s[n]), m);
|
||||
|
||||
*out_len = o;
|
||||
*in_len = i;
|
||||
pd->o_count = o_count;
|
||||
pd->i_count = i_count + i;
|
||||
|
||||
while (pd->i_count >= r->i_rate) {
|
||||
pd->i_count -= r->i_rate;
|
||||
pd->o_count -= r->o_rate;
|
||||
}
|
||||
return fmaxf(hmax_ps(max), m);
|
||||
}
|
||||
|
||||
MAKE_PEAKS(sse);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue