mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-10-29 05:40:27 -04:00
audioconvert: refactor peaks resampler
Use common code in macro and generate arch specific version. Compile with -Ofast to optimize some fmaxf calls.
This commit is contained in:
parent
a79b5c86ea
commit
6e9e02b420
9 changed files with 89 additions and 110 deletions
|
|
@ -27,7 +27,7 @@ if have_sse
|
|||
'resample-peaks-sse.c',
|
||||
'volume-ops-sse.c',
|
||||
'channelmix-ops-sse.c' ],
|
||||
c_args : [sse_args, '-O3', '-DHAVE_SSE'],
|
||||
c_args : [sse_args, '-Ofast', '-DHAVE_SSE'],
|
||||
dependencies : [ spa_dep ],
|
||||
install : false
|
||||
)
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
#include <assert.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
static void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||
{
|
||||
__m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty;
|
||||
|
|
@ -56,7 +56,7 @@ static void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
|||
_mm_store_ss(d, sx[0]);
|
||||
}
|
||||
|
||||
static void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
#include "resample-native-impl.h"
|
||||
|
||||
static void inner_product_c(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_c(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||
{
|
||||
float sum = 0.0f;
|
||||
|
|
@ -40,7 +40,7 @@ static void inner_product_c(float *d, const float * SPA_RESTRICT s,
|
|||
*d = sum;
|
||||
}
|
||||
|
||||
static void inner_product_ip_c(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ip_c(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_neon(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||
{
|
||||
unsigned int remainder = n_taps % 16;
|
||||
|
|
@ -137,7 +137,7 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
|
|||
#endif
|
||||
}
|
||||
|
||||
static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
static void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||
{
|
||||
__m128 sum = _mm_setzero_ps();
|
||||
|
|
@ -68,7 +68,7 @@ static void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
|||
_mm_store_ss(d, sum);
|
||||
}
|
||||
|
||||
static void inner_product_ip_sse(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ip_sse(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||
{
|
||||
__m128 sum = _mm_setzero_ps();
|
||||
|
|
@ -97,7 +97,7 @@ static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
|||
_mm_store_ss(d, sum);
|
||||
}
|
||||
|
||||
static void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -26,48 +26,12 @@
|
|||
|
||||
#include "resample-peaks-impl.h"
|
||||
|
||||
void resample_peaks_process_c(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
||||
static inline float find_abs_max_c(const float *s, uint32_t n_samples, float m)
|
||||
{
|
||||
struct peaks_data *pd = r->data;
|
||||
uint32_t c, i, o, end, chunk, o_count, i_count;
|
||||
|
||||
if (SPA_UNLIKELY(r->channels == 0))
|
||||
return;
|
||||
|
||||
for (c = 0; c < r->channels; c++) {
|
||||
const float *s = src[c];
|
||||
float *d = dst[c], m = pd->max_f[c];
|
||||
|
||||
o_count = pd->o_count;
|
||||
i_count = pd->i_count;
|
||||
o = i = 0;
|
||||
|
||||
while (i < *in_len && o < *out_len) {
|
||||
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
|
||||
end = end > i_count ? end - i_count : 0;
|
||||
chunk = SPA_MIN(end, *in_len);
|
||||
|
||||
for (; i < chunk; i++)
|
||||
m = SPA_MAX(fabsf(s[i]), m);
|
||||
|
||||
if (i == end) {
|
||||
d[o++] = m;
|
||||
m = 0.0f;
|
||||
o_count++;
|
||||
}
|
||||
}
|
||||
pd->max_f[c] = m;
|
||||
uint32_t n;
|
||||
for (n = 0; n < n_samples; n++)
|
||||
m = fmaxf(fabsf(s[n]), m);
|
||||
return m;
|
||||
}
|
||||
|
||||
*out_len = o;
|
||||
*in_len = i;
|
||||
pd->o_count = o_count;
|
||||
pd->i_count = i_count + i;
|
||||
|
||||
while (pd->i_count >= r->i_rate) {
|
||||
pd->i_count -= r->i_rate;
|
||||
pd->o_count -= r->o_rate;
|
||||
}
|
||||
}
|
||||
MAKE_PEAKS(c);
|
||||
|
|
|
|||
|
|
@ -34,11 +34,59 @@ struct peaks_data {
|
|||
float max_f[];
|
||||
};
|
||||
|
||||
void resample_peaks_process_c(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t *out_len);
|
||||
#define DEFINE_PEAKS(arch) \
|
||||
void resample_peaks_process_##arch(struct resample *r, \
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
||||
|
||||
#define MAKE_PEAKS(arch) \
|
||||
DEFINE_PEAKS(arch) \
|
||||
{ \
|
||||
struct peaks_data *pd = r->data; \
|
||||
uint32_t c, i, o, end, chunk, i_count, o_count; \
|
||||
\
|
||||
if (SPA_UNLIKELY(r->channels == 0)) \
|
||||
return; \
|
||||
\
|
||||
for (c = 0; c < r->channels; c++) { \
|
||||
const float *s = src[c]; \
|
||||
float *d = dst[c], m = pd->max_f[c]; \
|
||||
\
|
||||
o_count = pd->o_count; \
|
||||
i_count = pd->i_count; \
|
||||
o = i = 0; \
|
||||
\
|
||||
while (i < *in_len && o < *out_len) { \
|
||||
end = ((uint64_t) (o_count + 1) \
|
||||
* r->i_rate) / r->o_rate; \
|
||||
end = end > i_count ? end - i_count : 0; \
|
||||
chunk = SPA_MIN(end, *in_len); \
|
||||
\
|
||||
m = find_abs_max_##arch(&s[i], chunk - i, m); \
|
||||
\
|
||||
i += chunk; \
|
||||
\
|
||||
if (i == end) { \
|
||||
d[o++] = m; \
|
||||
m = 0.0f; \
|
||||
o_count++; \
|
||||
} \
|
||||
} \
|
||||
pd->max_f[c] = m; \
|
||||
} \
|
||||
*out_len = o; \
|
||||
*in_len = i; \
|
||||
pd->o_count = o_count; \
|
||||
pd->i_count = i_count + i; \
|
||||
\
|
||||
while (pd->i_count >= r->i_rate) { \
|
||||
pd->i_count -= r->i_rate; \
|
||||
pd->o_count -= r->o_rate; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
DEFINE_PEAKS(c);
|
||||
#if defined (HAVE_SSE)
|
||||
void resample_peaks_process_sse(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t *out_len);
|
||||
DEFINE_PEAKS(sse);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -37,60 +37,27 @@ static inline float hmax_ps(__m128 val)
|
|||
return _mm_cvtss_f32(val);
|
||||
}
|
||||
|
||||
void resample_peaks_process_sse(struct resample *r,
|
||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
||||
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
||||
static inline float find_abs_max_sse(const float *s, uint32_t n_samples, float m)
|
||||
{
|
||||
struct peaks_data *pd = r->data;
|
||||
uint32_t c, i, o, end, chunk, unrolled, i_count, o_count;
|
||||
__m128 in, max, mask = _mm_andnot_ps(_mm_set_ps1(-0.0f),
|
||||
__m128 in, max;
|
||||
uint32_t n, unrolled;
|
||||
const __m128 mask = _mm_andnot_ps(
|
||||
_mm_set_ps1(-0.0f),
|
||||
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
|
||||
|
||||
if (r->channels == 0)
|
||||
return;
|
||||
|
||||
for (c = 0; c < r->channels; c++) {
|
||||
const float *s = src[c];
|
||||
float *d = dst[c], m = pd->max_f[c];
|
||||
|
||||
o_count = pd->o_count;
|
||||
i_count = pd->i_count;
|
||||
o = i = 0;
|
||||
|
||||
max = _mm_set1_ps(m);
|
||||
|
||||
while (i < *in_len && o < *out_len) {
|
||||
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
|
||||
end = end > i_count ? end - i_count : 0;
|
||||
chunk = SPA_MIN(end, *in_len);
|
||||
unrolled = n_samples & ~3;
|
||||
|
||||
unrolled = chunk - ((chunk - i) & 3);
|
||||
|
||||
for (; i < unrolled; i+=4) {
|
||||
in = _mm_loadu_ps(&s[i]);
|
||||
for (n = 0; n < unrolled; n+=4) {
|
||||
in = _mm_loadu_ps(&s[n]);
|
||||
in = _mm_and_ps(mask, in);
|
||||
max = _mm_max_ps(in, max);
|
||||
}
|
||||
for (; i < chunk; i++)
|
||||
m = SPA_MAX(fabsf(s[i]), m);
|
||||
for (; n < n_samples; n++)
|
||||
m = fmaxf(fabsf(s[n]), m);
|
||||
|
||||
if (i == end) {
|
||||
d[o++] = SPA_MAX(hmax_ps(max), m);
|
||||
m = 0.0f;
|
||||
max = _mm_set1_ps(m);
|
||||
o_count++;
|
||||
}
|
||||
}
|
||||
pd->max_f[c] = SPA_MAX(hmax_ps(max), m);
|
||||
return fmaxf(hmax_ps(max), m);
|
||||
}
|
||||
|
||||
*out_len = o;
|
||||
*in_len = i;
|
||||
pd->o_count = o_count;
|
||||
pd->i_count = i_count + i;
|
||||
|
||||
while (pd->i_count >= r->i_rate) {
|
||||
pd->i_count -= r->i_rate;
|
||||
pd->o_count -= r->o_rate;
|
||||
}
|
||||
}
|
||||
MAKE_PEAKS(sse);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue