mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-10-29 05:40:23 -04:00
echo-cancel: Add SSE optimisation to the adrian module
Optimises the core inner-product function, which takes the most CPU. The SSE-optimised bits of the adrian echo canceller only if the CPU that PA is running on actually supports SSE.
This commit is contained in:
parent
ab4223e9cf
commit
963250abb9
7 changed files with 61 additions and 12 deletions
|
|
@ -17,6 +17,10 @@
|
|||
|
||||
#include "adrian-aec.h"
|
||||
|
||||
#ifdef __SSE__
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
/* Vector Dot Product */
|
||||
static REAL dotp(REAL a[], REAL b[])
|
||||
{
|
||||
|
|
@ -31,8 +35,32 @@ static REAL dotp(REAL a[], REAL b[])
|
|||
return sum0 + sum1;
|
||||
}
|
||||
|
||||
static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
|
||||
static REAL dotp_sse(REAL a[], REAL b[])
|
||||
{
|
||||
#ifdef __SSE__
|
||||
/* This is taken from speex's inner product implementation */
|
||||
int j;
|
||||
REAL sum;
|
||||
__m128 acc = _mm_setzero_ps();
|
||||
|
||||
AEC* AEC_init(int RATE)
|
||||
for (j=0;j<NLMS_LEN;j+=8)
|
||||
{
|
||||
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
|
||||
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
|
||||
}
|
||||
acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
|
||||
acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
|
||||
_mm_store_ss(&sum, acc);
|
||||
|
||||
return sum;
|
||||
#else
|
||||
return dotp(a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
AEC* AEC_init(int RATE, int have_vector)
|
||||
{
|
||||
AEC *a = pa_xnew(AEC, 1);
|
||||
a->hangover = 0;
|
||||
|
|
@ -57,6 +85,11 @@ AEC* AEC_init(int RATE)
|
|||
a->dumpcnt = 0;
|
||||
memset(a->ws, 0, sizeof(a->ws));
|
||||
|
||||
if (have_vector)
|
||||
a->dotp = dotp_sse;
|
||||
else
|
||||
a->dotp = dotp;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
|
|
@ -146,7 +179,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
|
|||
// (mic signal - estimated mic signal from spk signal)
|
||||
e = d;
|
||||
if (a->hangover > 0) {
|
||||
e -= dotp(a->w, a->x + a->j);
|
||||
e -= a->dotp(a->w, a->x + a->j);
|
||||
}
|
||||
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,13 @@
|
|||
|
||||
#ifndef _AEC_H /* include only once */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <pulsecore/macro.h>
|
||||
#include <pulse/xmalloc.h>
|
||||
|
||||
#define WIDEB 2
|
||||
|
||||
// use double if your CPU does software-emulation of float
|
||||
|
|
@ -315,6 +322,9 @@ struct AEC {
|
|||
// variables are public for visualization
|
||||
int hangover;
|
||||
float stepsize;
|
||||
|
||||
// vfuncs that are picked based on processor features available
|
||||
REAL (*dotp) (REAL[], REAL[]);
|
||||
};
|
||||
|
||||
/* Double-Talk Detector
|
||||
|
|
@ -338,7 +348,7 @@ static void AEC_leaky(AEC *a);
|
|||
*/
|
||||
static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize);
|
||||
|
||||
AEC* AEC_init(int RATE);
|
||||
AEC* AEC_init(int RATE, int have_vector);
|
||||
|
||||
/* Acoustic Echo Cancellation and Suppression of one sample
|
||||
* in d: microphone signal with echo
|
||||
|
|
|
|||
|
|
@ -51,12 +51,12 @@ static void pa_adrian_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *
|
|||
*sink_map = *source_map;
|
||||
}
|
||||
|
||||
pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
|
||||
pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||
uint32_t *blocksize, const char *args)
|
||||
{
|
||||
int framelen, rate;
|
||||
int framelen, rate, have_vector = 0;
|
||||
uint32_t frame_size_ms;
|
||||
pa_modargs *ma;
|
||||
|
||||
|
|
@ -80,7 +80,11 @@ pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
|
|||
|
||||
pa_log_debug ("Using framelen %d, blocksize %u, channels %d, rate %d", framelen, ec->params.priv.adrian.blocksize, source_ss->channels, source_ss->rate);
|
||||
|
||||
ec->params.priv.adrian.aec = AEC_init(rate);
|
||||
/* For now we only support SSE */
|
||||
if (c->cpu_info.cpu_type == PA_CPU_X86 && (c->cpu_info.flags.x86 & PA_CPU_X86_SSE))
|
||||
have_vector = 1;
|
||||
|
||||
ec->params.priv.adrian.aec = AEC_init(rate, have_vector);
|
||||
if (!ec->params.priv.adrian.aec)
|
||||
goto fail;
|
||||
|
||||
|
|
|
|||
|
|
@ -27,5 +27,5 @@
|
|||
|
||||
typedef struct AEC AEC;
|
||||
|
||||
AEC* AEC_init(int RATE);
|
||||
AEC* AEC_init(int RATE, int have_vector);
|
||||
int AEC_doAEC(AEC *a, int d_, int x_);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include <pulse/sample.h>
|
||||
#include <pulse/channelmap.h>
|
||||
#include <pulsecore/core.h>
|
||||
#include <pulsecore/macro.h>
|
||||
|
||||
#include <speex/speex_echo.h>
|
||||
|
|
@ -50,7 +51,8 @@ struct pa_echo_canceller_params {
|
|||
typedef struct pa_echo_canceller pa_echo_canceller;
|
||||
|
||||
struct pa_echo_canceller {
|
||||
pa_bool_t (*init) (pa_echo_canceller *ec,
|
||||
pa_bool_t (*init) (pa_core *c,
|
||||
pa_echo_canceller *ec,
|
||||
pa_sample_spec *source_ss,
|
||||
pa_channel_map *source_map,
|
||||
pa_sample_spec *sink_ss,
|
||||
|
|
@ -64,7 +66,7 @@ struct pa_echo_canceller {
|
|||
};
|
||||
|
||||
/* Speex canceller functions */
|
||||
pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
|
||||
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||
uint32_t *blocksize, const char *args);
|
||||
|
|
@ -72,7 +74,7 @@ void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *p
|
|||
void pa_speex_ec_done(pa_echo_canceller *ec);
|
||||
|
||||
/* Adrian Andre's echo canceller */
|
||||
pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
|
||||
pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||
uint32_t *blocksize, const char *args);
|
||||
|
|
|
|||
|
|
@ -1398,7 +1398,7 @@ int pa__init(pa_module*m) {
|
|||
u->asyncmsgq = pa_asyncmsgq_new(0);
|
||||
u->need_realign = TRUE;
|
||||
if (u->ec->init) {
|
||||
if (!u->ec->init(u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
|
||||
if (!u->ec->init(u->core, u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
|
||||
pa_log("Failed to init AEC engine");
|
||||
goto fail;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ static void pa_speex_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *s
|
|||
*sink_map = *source_map;
|
||||
}
|
||||
|
||||
pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
|
||||
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||
uint32_t *blocksize, const char *args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue