mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2025-10-29 05:40:27 -04:00
audioconvert: refactor peaks resampler
Use common code in macro and generate arch specific version. Compile with -Ofast to optimize some fmaxf calls.
This commit is contained in:
parent
a79b5c86ea
commit
6e9e02b420
9 changed files with 89 additions and 110 deletions
|
|
@ -27,7 +27,7 @@ if have_sse
|
||||||
'resample-peaks-sse.c',
|
'resample-peaks-sse.c',
|
||||||
'volume-ops-sse.c',
|
'volume-ops-sse.c',
|
||||||
'channelmix-ops-sse.c' ],
|
'channelmix-ops-sse.c' ],
|
||||||
c_args : [sse_args, '-O3', '-DHAVE_SSE'],
|
c_args : [sse_args, '-Ofast', '-DHAVE_SSE'],
|
||||||
dependencies : [ spa_dep ],
|
dependencies : [ spa_dep ],
|
||||||
install : false
|
install : false
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
static void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||||
{
|
{
|
||||||
__m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty;
|
__m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty;
|
||||||
|
|
@ -56,7 +56,7 @@ static void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
||||||
_mm_store_ss(d, sx[0]);
|
_mm_store_ss(d, sx[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||||
uint32_t n_taps)
|
uint32_t n_taps)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@
|
||||||
|
|
||||||
#include "resample-native-impl.h"
|
#include "resample-native-impl.h"
|
||||||
|
|
||||||
static void inner_product_c(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_c(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||||
{
|
{
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
|
|
@ -40,7 +40,7 @@ static void inner_product_c(float *d, const float * SPA_RESTRICT s,
|
||||||
*d = sum;
|
*d = sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inner_product_ip_c(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_ip_c(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||||
uint32_t n_taps)
|
uint32_t n_taps)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_neon(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||||
{
|
{
|
||||||
unsigned int remainder = n_taps % 16;
|
unsigned int remainder = n_taps % 16;
|
||||||
|
|
@ -137,7 +137,7 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||||
uint32_t n_taps)
|
uint32_t n_taps)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
static void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||||
{
|
{
|
||||||
__m128 sum = _mm_setzero_ps();
|
__m128 sum = _mm_setzero_ps();
|
||||||
|
|
@ -68,7 +68,7 @@ static void inner_product_sse(float *d, const float * SPA_RESTRICT s,
|
||||||
_mm_store_ss(d, sum);
|
_mm_store_ss(d, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inner_product_ip_sse(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_ip_sse(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||||
uint32_t n_taps)
|
uint32_t n_taps)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
#include <tmmintrin.h>
|
#include <tmmintrin.h>
|
||||||
|
|
||||||
static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||||
{
|
{
|
||||||
__m128 sum = _mm_setzero_ps();
|
__m128 sum = _mm_setzero_ps();
|
||||||
|
|
@ -97,7 +97,7 @@ static void inner_product_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||||
_mm_store_ss(d, sum);
|
_mm_store_ss(d, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s,
|
static inline void inner_product_ip_ssse3(float *d, const float * SPA_RESTRICT s,
|
||||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||||
uint32_t n_taps)
|
uint32_t n_taps)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -26,48 +26,12 @@
|
||||||
|
|
||||||
#include "resample-peaks-impl.h"
|
#include "resample-peaks-impl.h"
|
||||||
|
|
||||||
void resample_peaks_process_c(struct resample *r,
|
static inline float find_abs_max_c(const float *s, uint32_t n_samples, float m)
|
||||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
|
||||||
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
|
||||||
{
|
{
|
||||||
struct peaks_data *pd = r->data;
|
uint32_t n;
|
||||||
uint32_t c, i, o, end, chunk, o_count, i_count;
|
for (n = 0; n < n_samples; n++)
|
||||||
|
m = fmaxf(fabsf(s[n]), m);
|
||||||
if (SPA_UNLIKELY(r->channels == 0))
|
return m;
|
||||||
return;
|
|
||||||
|
|
||||||
for (c = 0; c < r->channels; c++) {
|
|
||||||
const float *s = src[c];
|
|
||||||
float *d = dst[c], m = pd->max_f[c];
|
|
||||||
|
|
||||||
o_count = pd->o_count;
|
|
||||||
i_count = pd->i_count;
|
|
||||||
o = i = 0;
|
|
||||||
|
|
||||||
while (i < *in_len && o < *out_len) {
|
|
||||||
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
|
|
||||||
end = end > i_count ? end - i_count : 0;
|
|
||||||
chunk = SPA_MIN(end, *in_len);
|
|
||||||
|
|
||||||
for (; i < chunk; i++)
|
|
||||||
m = SPA_MAX(fabsf(s[i]), m);
|
|
||||||
|
|
||||||
if (i == end) {
|
|
||||||
d[o++] = m;
|
|
||||||
m = 0.0f;
|
|
||||||
o_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pd->max_f[c] = m;
|
|
||||||
}
|
|
||||||
|
|
||||||
*out_len = o;
|
|
||||||
*in_len = i;
|
|
||||||
pd->o_count = o_count;
|
|
||||||
pd->i_count = i_count + i;
|
|
||||||
|
|
||||||
while (pd->i_count >= r->i_rate) {
|
|
||||||
pd->i_count -= r->i_rate;
|
|
||||||
pd->o_count -= r->o_rate;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MAKE_PEAKS(c);
|
||||||
|
|
|
||||||
|
|
@ -34,11 +34,59 @@ struct peaks_data {
|
||||||
float max_f[];
|
float max_f[];
|
||||||
};
|
};
|
||||||
|
|
||||||
void resample_peaks_process_c(struct resample *r,
|
#define DEFINE_PEAKS(arch) \
|
||||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
void resample_peaks_process_##arch(struct resample *r, \
|
||||||
void * SPA_RESTRICT dst[], uint32_t *out_len);
|
const void * SPA_RESTRICT src[], uint32_t *in_len, \
|
||||||
|
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
||||||
|
|
||||||
|
#define MAKE_PEAKS(arch) \
|
||||||
|
DEFINE_PEAKS(arch) \
|
||||||
|
{ \
|
||||||
|
struct peaks_data *pd = r->data; \
|
||||||
|
uint32_t c, i, o, end, chunk, i_count, o_count; \
|
||||||
|
\
|
||||||
|
if (SPA_UNLIKELY(r->channels == 0)) \
|
||||||
|
return; \
|
||||||
|
\
|
||||||
|
for (c = 0; c < r->channels; c++) { \
|
||||||
|
const float *s = src[c]; \
|
||||||
|
float *d = dst[c], m = pd->max_f[c]; \
|
||||||
|
\
|
||||||
|
o_count = pd->o_count; \
|
||||||
|
i_count = pd->i_count; \
|
||||||
|
o = i = 0; \
|
||||||
|
\
|
||||||
|
while (i < *in_len && o < *out_len) { \
|
||||||
|
end = ((uint64_t) (o_count + 1) \
|
||||||
|
* r->i_rate) / r->o_rate; \
|
||||||
|
end = end > i_count ? end - i_count : 0; \
|
||||||
|
chunk = SPA_MIN(end, *in_len); \
|
||||||
|
\
|
||||||
|
m = find_abs_max_##arch(&s[i], chunk - i, m); \
|
||||||
|
\
|
||||||
|
i += chunk; \
|
||||||
|
\
|
||||||
|
if (i == end) { \
|
||||||
|
d[o++] = m; \
|
||||||
|
m = 0.0f; \
|
||||||
|
o_count++; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
pd->max_f[c] = m; \
|
||||||
|
} \
|
||||||
|
*out_len = o; \
|
||||||
|
*in_len = i; \
|
||||||
|
pd->o_count = o_count; \
|
||||||
|
pd->i_count = i_count + i; \
|
||||||
|
\
|
||||||
|
while (pd->i_count >= r->i_rate) { \
|
||||||
|
pd->i_count -= r->i_rate; \
|
||||||
|
pd->o_count -= r->o_rate; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DEFINE_PEAKS(c);
|
||||||
#if defined (HAVE_SSE)
|
#if defined (HAVE_SSE)
|
||||||
void resample_peaks_process_sse(struct resample *r,
|
DEFINE_PEAKS(sse);
|
||||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
|
||||||
void * SPA_RESTRICT dst[], uint32_t *out_len);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -37,60 +37,27 @@ static inline float hmax_ps(__m128 val)
|
||||||
return _mm_cvtss_f32(val);
|
return _mm_cvtss_f32(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void resample_peaks_process_sse(struct resample *r,
|
static inline float find_abs_max_sse(const float *s, uint32_t n_samples, float m)
|
||||||
const void * SPA_RESTRICT src[], uint32_t *in_len,
|
|
||||||
void * SPA_RESTRICT dst[], uint32_t *out_len)
|
|
||||||
{
|
{
|
||||||
struct peaks_data *pd = r->data;
|
__m128 in, max;
|
||||||
uint32_t c, i, o, end, chunk, unrolled, i_count, o_count;
|
uint32_t n, unrolled;
|
||||||
__m128 in, max, mask = _mm_andnot_ps(_mm_set_ps1(-0.0f),
|
const __m128 mask = _mm_andnot_ps(
|
||||||
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
|
_mm_set_ps1(-0.0f),
|
||||||
|
_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()));
|
||||||
|
|
||||||
if (r->channels == 0)
|
max = _mm_set1_ps(m);
|
||||||
return;
|
|
||||||
|
|
||||||
for (c = 0; c < r->channels; c++) {
|
unrolled = n_samples & ~3;
|
||||||
const float *s = src[c];
|
|
||||||
float *d = dst[c], m = pd->max_f[c];
|
|
||||||
|
|
||||||
o_count = pd->o_count;
|
for (n = 0; n < unrolled; n+=4) {
|
||||||
i_count = pd->i_count;
|
in = _mm_loadu_ps(&s[n]);
|
||||||
o = i = 0;
|
in = _mm_and_ps(mask, in);
|
||||||
|
max = _mm_max_ps(in, max);
|
||||||
max = _mm_set1_ps(m);
|
|
||||||
|
|
||||||
while (i < *in_len && o < *out_len) {
|
|
||||||
end = ((uint64_t) (o_count + 1) * r->i_rate) / r->o_rate;
|
|
||||||
end = end > i_count ? end - i_count : 0;
|
|
||||||
chunk = SPA_MIN(end, *in_len);
|
|
||||||
|
|
||||||
unrolled = chunk - ((chunk - i) & 3);
|
|
||||||
|
|
||||||
for (; i < unrolled; i+=4) {
|
|
||||||
in = _mm_loadu_ps(&s[i]);
|
|
||||||
in = _mm_and_ps(mask, in);
|
|
||||||
max = _mm_max_ps(in, max);
|
|
||||||
}
|
|
||||||
for (; i < chunk; i++)
|
|
||||||
m = SPA_MAX(fabsf(s[i]), m);
|
|
||||||
|
|
||||||
if (i == end) {
|
|
||||||
d[o++] = SPA_MAX(hmax_ps(max), m);
|
|
||||||
m = 0.0f;
|
|
||||||
max = _mm_set1_ps(m);
|
|
||||||
o_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pd->max_f[c] = SPA_MAX(hmax_ps(max), m);
|
|
||||||
}
|
}
|
||||||
|
for (; n < n_samples; n++)
|
||||||
|
m = fmaxf(fabsf(s[n]), m);
|
||||||
|
|
||||||
*out_len = o;
|
return fmaxf(hmax_ps(max), m);
|
||||||
*in_len = i;
|
|
||||||
pd->o_count = o_count;
|
|
||||||
pd->i_count = i_count + i;
|
|
||||||
|
|
||||||
while (pd->i_count >= r->i_rate) {
|
|
||||||
pd->i_count -= r->i_rate;
|
|
||||||
pd->o_count -= r->o_rate;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MAKE_PEAKS(sse);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue