mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-10-31 22:25:33 -04:00
echo-cancel: Add SSE optimisation to the adrian module
Optimises the core inner-product function, which takes the most CPU. The SSE-optimised bits of the adrian echo canceller only if the CPU that PA is running on actually supports SSE.
This commit is contained in:
parent
ab4223e9cf
commit
963250abb9
7 changed files with 61 additions and 12 deletions
|
|
@ -17,6 +17,10 @@
|
||||||
|
|
||||||
#include "adrian-aec.h"
|
#include "adrian-aec.h"
|
||||||
|
|
||||||
|
#ifdef __SSE__
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Vector Dot Product */
|
/* Vector Dot Product */
|
||||||
static REAL dotp(REAL a[], REAL b[])
|
static REAL dotp(REAL a[], REAL b[])
|
||||||
{
|
{
|
||||||
|
|
@ -31,8 +35,32 @@ static REAL dotp(REAL a[], REAL b[])
|
||||||
return sum0 + sum1;
|
return sum0 + sum1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
|
||||||
|
static REAL dotp_sse(REAL a[], REAL b[])
|
||||||
|
{
|
||||||
|
#ifdef __SSE__
|
||||||
|
/* This is taken from speex's inner product implementation */
|
||||||
|
int j;
|
||||||
|
REAL sum;
|
||||||
|
__m128 acc = _mm_setzero_ps();
|
||||||
|
|
||||||
AEC* AEC_init(int RATE)
|
for (j=0;j<NLMS_LEN;j+=8)
|
||||||
|
{
|
||||||
|
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
|
||||||
|
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
|
||||||
|
}
|
||||||
|
acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
|
||||||
|
acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
|
||||||
|
_mm_store_ss(&sum, acc);
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
#else
|
||||||
|
return dotp(a, b);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
AEC* AEC_init(int RATE, int have_vector)
|
||||||
{
|
{
|
||||||
AEC *a = pa_xnew(AEC, 1);
|
AEC *a = pa_xnew(AEC, 1);
|
||||||
a->hangover = 0;
|
a->hangover = 0;
|
||||||
|
|
@ -57,6 +85,11 @@ AEC* AEC_init(int RATE)
|
||||||
a->dumpcnt = 0;
|
a->dumpcnt = 0;
|
||||||
memset(a->ws, 0, sizeof(a->ws));
|
memset(a->ws, 0, sizeof(a->ws));
|
||||||
|
|
||||||
|
if (have_vector)
|
||||||
|
a->dotp = dotp_sse;
|
||||||
|
else
|
||||||
|
a->dotp = dotp;
|
||||||
|
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -146,7 +179,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
|
||||||
// (mic signal - estimated mic signal from spk signal)
|
// (mic signal - estimated mic signal from spk signal)
|
||||||
e = d;
|
e = d;
|
||||||
if (a->hangover > 0) {
|
if (a->hangover > 0) {
|
||||||
e -= dotp(a->w, a->x + a->j);
|
e -= a->dotp(a->w, a->x + a->j);
|
||||||
}
|
}
|
||||||
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
|
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,13 @@
|
||||||
|
|
||||||
#ifndef _AEC_H /* include only once */
|
#ifndef _AEC_H /* include only once */
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include <config.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <pulsecore/macro.h>
|
||||||
|
#include <pulse/xmalloc.h>
|
||||||
|
|
||||||
#define WIDEB 2
|
#define WIDEB 2
|
||||||
|
|
||||||
// use double if your CPU does software-emulation of float
|
// use double if your CPU does software-emulation of float
|
||||||
|
|
@ -315,6 +322,9 @@ struct AEC {
|
||||||
// variables are public for visualization
|
// variables are public for visualization
|
||||||
int hangover;
|
int hangover;
|
||||||
float stepsize;
|
float stepsize;
|
||||||
|
|
||||||
|
// vfuncs that are picked based on processor features available
|
||||||
|
REAL (*dotp) (REAL[], REAL[]);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Double-Talk Detector
|
/* Double-Talk Detector
|
||||||
|
|
@ -338,7 +348,7 @@ static void AEC_leaky(AEC *a);
|
||||||
*/
|
*/
|
||||||
static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize);
|
static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize);
|
||||||
|
|
||||||
AEC* AEC_init(int RATE);
|
AEC* AEC_init(int RATE, int have_vector);
|
||||||
|
|
||||||
/* Acoustic Echo Cancellation and Suppression of one sample
|
/* Acoustic Echo Cancellation and Suppression of one sample
|
||||||
* in d: microphone signal with echo
|
* in d: microphone signal with echo
|
||||||
|
|
|
||||||
|
|
@ -51,12 +51,12 @@ static void pa_adrian_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *
|
||||||
*sink_map = *source_map;
|
*sink_map = *source_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
|
pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||||
uint32_t *blocksize, const char *args)
|
uint32_t *blocksize, const char *args)
|
||||||
{
|
{
|
||||||
int framelen, rate;
|
int framelen, rate, have_vector = 0;
|
||||||
uint32_t frame_size_ms;
|
uint32_t frame_size_ms;
|
||||||
pa_modargs *ma;
|
pa_modargs *ma;
|
||||||
|
|
||||||
|
|
@ -80,7 +80,11 @@ pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
|
||||||
|
|
||||||
pa_log_debug ("Using framelen %d, blocksize %u, channels %d, rate %d", framelen, ec->params.priv.adrian.blocksize, source_ss->channels, source_ss->rate);
|
pa_log_debug ("Using framelen %d, blocksize %u, channels %d, rate %d", framelen, ec->params.priv.adrian.blocksize, source_ss->channels, source_ss->rate);
|
||||||
|
|
||||||
ec->params.priv.adrian.aec = AEC_init(rate);
|
/* For now we only support SSE */
|
||||||
|
if (c->cpu_info.cpu_type == PA_CPU_X86 && (c->cpu_info.flags.x86 & PA_CPU_X86_SSE))
|
||||||
|
have_vector = 1;
|
||||||
|
|
||||||
|
ec->params.priv.adrian.aec = AEC_init(rate, have_vector);
|
||||||
if (!ec->params.priv.adrian.aec)
|
if (!ec->params.priv.adrian.aec)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,5 +27,5 @@
|
||||||
|
|
||||||
typedef struct AEC AEC;
|
typedef struct AEC AEC;
|
||||||
|
|
||||||
AEC* AEC_init(int RATE);
|
AEC* AEC_init(int RATE, int have_vector);
|
||||||
int AEC_doAEC(AEC *a, int d_, int x_);
|
int AEC_doAEC(AEC *a, int d_, int x_);
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
#include <pulse/sample.h>
|
#include <pulse/sample.h>
|
||||||
#include <pulse/channelmap.h>
|
#include <pulse/channelmap.h>
|
||||||
|
#include <pulsecore/core.h>
|
||||||
#include <pulsecore/macro.h>
|
#include <pulsecore/macro.h>
|
||||||
|
|
||||||
#include <speex/speex_echo.h>
|
#include <speex/speex_echo.h>
|
||||||
|
|
@ -50,7 +51,8 @@ struct pa_echo_canceller_params {
|
||||||
typedef struct pa_echo_canceller pa_echo_canceller;
|
typedef struct pa_echo_canceller pa_echo_canceller;
|
||||||
|
|
||||||
struct pa_echo_canceller {
|
struct pa_echo_canceller {
|
||||||
pa_bool_t (*init) (pa_echo_canceller *ec,
|
pa_bool_t (*init) (pa_core *c,
|
||||||
|
pa_echo_canceller *ec,
|
||||||
pa_sample_spec *source_ss,
|
pa_sample_spec *source_ss,
|
||||||
pa_channel_map *source_map,
|
pa_channel_map *source_map,
|
||||||
pa_sample_spec *sink_ss,
|
pa_sample_spec *sink_ss,
|
||||||
|
|
@ -64,7 +66,7 @@ struct pa_echo_canceller {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Speex canceller functions */
|
/* Speex canceller functions */
|
||||||
pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
|
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||||
uint32_t *blocksize, const char *args);
|
uint32_t *blocksize, const char *args);
|
||||||
|
|
@ -72,7 +74,7 @@ void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *p
|
||||||
void pa_speex_ec_done(pa_echo_canceller *ec);
|
void pa_speex_ec_done(pa_echo_canceller *ec);
|
||||||
|
|
||||||
/* Adrian Andre's echo canceller */
|
/* Adrian Andre's echo canceller */
|
||||||
pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
|
pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||||
uint32_t *blocksize, const char *args);
|
uint32_t *blocksize, const char *args);
|
||||||
|
|
|
||||||
|
|
@ -1398,7 +1398,7 @@ int pa__init(pa_module*m) {
|
||||||
u->asyncmsgq = pa_asyncmsgq_new(0);
|
u->asyncmsgq = pa_asyncmsgq_new(0);
|
||||||
u->need_realign = TRUE;
|
u->need_realign = TRUE;
|
||||||
if (u->ec->init) {
|
if (u->ec->init) {
|
||||||
if (!u->ec->init(u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
|
if (!u->ec->init(u->core, u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
|
||||||
pa_log("Failed to init AEC engine");
|
pa_log("Failed to init AEC engine");
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ static void pa_speex_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *s
|
||||||
*sink_map = *source_map;
|
*sink_map = *source_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
|
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
|
||||||
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
pa_sample_spec *source_ss, pa_channel_map *source_map,
|
||||||
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
|
||||||
uint32_t *blocksize, const char *args)
|
uint32_t *blocksize, const char *args)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue