echo-cancel: Add SSE optimisation to the adrian module

Optimises the core inner-product function, which takes the most CPU. The
SSE-optimised bits of the adrian echo canceller only if the CPU that PA
is running on actually supports SSE.
This commit is contained in:
Arun Raghavan 2010-09-21 20:42:32 +05:30
parent ab4223e9cf
commit 963250abb9
7 changed files with 61 additions and 12 deletions

View file

@ -17,6 +17,10 @@
#include "adrian-aec.h"
#ifdef __SSE__
#include <xmmintrin.h>
#endif
/* Vector Dot Product */
static REAL dotp(REAL a[], REAL b[])
{
@ -31,8 +35,32 @@ static REAL dotp(REAL a[], REAL b[])
return sum0 + sum1;
}
static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
static REAL dotp_sse(REAL a[], REAL b[])
{
#ifdef __SSE__
/* This is taken from speex's inner product implementation */
int j;
REAL sum;
__m128 acc = _mm_setzero_ps();
AEC* AEC_init(int RATE)
for (j=0;j<NLMS_LEN;j+=8)
{
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
}
acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
_mm_store_ss(&sum, acc);
return sum;
#else
return dotp(a, b);
#endif
}
AEC* AEC_init(int RATE, int have_vector)
{
AEC *a = pa_xnew(AEC, 1);
a->hangover = 0;
@ -57,6 +85,11 @@ AEC* AEC_init(int RATE)
a->dumpcnt = 0;
memset(a->ws, 0, sizeof(a->ws));
if (have_vector)
a->dotp = dotp_sse;
else
a->dotp = dotp;
return a;
}
@ -146,7 +179,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
// (mic signal - estimated mic signal from spk signal)
e = d;
if (a->hangover > 0) {
e -= dotp(a->w, a->x + a->j);
e -= a->dotp(a->w, a->x + a->j);
}
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e

View file

@ -13,6 +13,13 @@
#ifndef _AEC_H /* include only once */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <pulsecore/macro.h>
#include <pulse/xmalloc.h>
#define WIDEB 2
// use double if your CPU does software-emulation of float
@ -315,6 +322,9 @@ struct AEC {
// variables are public for visualization
int hangover;
float stepsize;
// vfuncs that are picked based on processor features available
REAL (*dotp) (REAL[], REAL[]);
};
/* Double-Talk Detector
@ -338,7 +348,7 @@ static void AEC_leaky(AEC *a);
*/
static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize);
AEC* AEC_init(int RATE);
AEC* AEC_init(int RATE, int have_vector);
/* Acoustic Echo Cancellation and Suppression of one sample
* in d: microphone signal with echo

View file

@ -51,12 +51,12 @@ static void pa_adrian_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *
*sink_map = *source_map;
}
pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args)
{
int framelen, rate;
int framelen, rate, have_vector = 0;
uint32_t frame_size_ms;
pa_modargs *ma;
@ -80,7 +80,11 @@ pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
pa_log_debug ("Using framelen %d, blocksize %u, channels %d, rate %d", framelen, ec->params.priv.adrian.blocksize, source_ss->channels, source_ss->rate);
ec->params.priv.adrian.aec = AEC_init(rate);
/* For now we only support SSE */
if (c->cpu_info.cpu_type == PA_CPU_X86 && (c->cpu_info.flags.x86 & PA_CPU_X86_SSE))
have_vector = 1;
ec->params.priv.adrian.aec = AEC_init(rate, have_vector);
if (!ec->params.priv.adrian.aec)
goto fail;

View file

@ -27,5 +27,5 @@
typedef struct AEC AEC;
AEC* AEC_init(int RATE);
AEC* AEC_init(int RATE, int have_vector);
int AEC_doAEC(AEC *a, int d_, int x_);

View file

@ -25,6 +25,7 @@
#include <pulse/sample.h>
#include <pulse/channelmap.h>
#include <pulsecore/core.h>
#include <pulsecore/macro.h>
#include <speex/speex_echo.h>
@ -50,7 +51,8 @@ struct pa_echo_canceller_params {
typedef struct pa_echo_canceller pa_echo_canceller;
struct pa_echo_canceller {
pa_bool_t (*init) (pa_echo_canceller *ec,
pa_bool_t (*init) (pa_core *c,
pa_echo_canceller *ec,
pa_sample_spec *source_ss,
pa_channel_map *source_map,
pa_sample_spec *sink_ss,
@ -64,7 +66,7 @@ struct pa_echo_canceller {
};
/* Speex canceller functions */
pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args);
@ -72,7 +74,7 @@ void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *p
void pa_speex_ec_done(pa_echo_canceller *ec);
/* Adrian Andre's echo canceller */
pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args);

View file

@ -1398,7 +1398,7 @@ int pa__init(pa_module*m) {
u->asyncmsgq = pa_asyncmsgq_new(0);
u->need_realign = TRUE;
if (u->ec->init) {
if (!u->ec->init(u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
if (!u->ec->init(u->core, u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
pa_log("Failed to init AEC engine");
goto fail;
}

View file

@ -48,7 +48,7 @@ static void pa_speex_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *s
*sink_map = *source_map;
}
pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args)