mirror of
https://gitlab.freedesktop.org/pulseaudio/pulseaudio.git
synced 2025-10-29 05:40:23 -04:00
This uses Orc to optimise an inner loop in the core NLMS function of the Adrian echo canceller.
274 lines
6.5 KiB
C
274 lines
6.5 KiB
C
/* aec.cpp
|
|
*
|
|
* Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
|
|
* All Rights Reserved.
|
|
*
|
|
* Acoustic Echo Cancellation NLMS-pw algorithm
|
|
*
|
|
* Version 0.3 filter created with www.dsptutor.freeuk.com
|
|
* Version 0.3.1 Allow change of stability parameter delta
|
|
* Version 0.4 Leaky Normalized LMS - pre whitening algorithm
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <string.h>
|
|
|
|
#include <pulse/xmalloc.h>
|
|
|
|
#include "adrian-aec.h"
|
|
|
|
#ifndef DISABLE_ORC
|
|
#include "adrian-aec-orc.h"
|
|
#endif
|
|
|
|
#ifdef __SSE__
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
/* Vector Dot Product */
|
|
static REAL dotp(REAL a[], REAL b[])
|
|
{
|
|
REAL sum0 = 0.0, sum1 = 0.0;
|
|
int j;
|
|
|
|
for (j = 0; j < NLMS_LEN; j += 2) {
|
|
// optimize: partial loop unrolling
|
|
sum0 += a[j] * b[j];
|
|
sum1 += a[j + 1] * b[j + 1];
|
|
}
|
|
return sum0 + sum1;
|
|
}
|
|
|
|
static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
|
|
static REAL dotp_sse(REAL a[], REAL b[])
|
|
{
|
|
#ifdef __SSE__
|
|
/* This is taken from speex's inner product implementation */
|
|
int j;
|
|
REAL sum;
|
|
__m128 acc = _mm_setzero_ps();
|
|
|
|
for (j=0;j<NLMS_LEN;j+=8)
|
|
{
|
|
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
|
|
acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
|
|
}
|
|
acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
|
|
acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
|
|
_mm_store_ss(&sum, acc);
|
|
|
|
return sum;
|
|
#else
|
|
return dotp(a, b);
|
|
#endif
|
|
}
|
|
|
|
|
|
AEC* AEC_init(int RATE, int have_vector)
|
|
{
|
|
AEC *a = pa_xnew(AEC, 1);
|
|
a->hangover = 0;
|
|
memset(a->x, 0, sizeof(a->x));
|
|
memset(a->xf, 0, sizeof(a->xf));
|
|
memset(a->w, 0, sizeof(a->w));
|
|
a->j = NLMS_EXT;
|
|
a->delta = 0.0f;
|
|
AEC_setambient(a, NoiseFloor);
|
|
a->dfast = a->dslow = M75dB_PCM;
|
|
a->xfast = a->xslow = M80dB_PCM;
|
|
a->gain = 1.0f;
|
|
a->Fx = IIR1_init(2000.0f/RATE);
|
|
a->Fe = IIR1_init(2000.0f/RATE);
|
|
a->cutoff = FIR_HP_300Hz_init();
|
|
a->acMic = IIR_HP_init();
|
|
a->acSpk = IIR_HP_init();
|
|
|
|
a->aes_y2 = M0dB;
|
|
|
|
a->fdwdisplay = -1;
|
|
a->dumpcnt = 0;
|
|
memset(a->ws, 0, sizeof(a->ws));
|
|
|
|
if (have_vector)
|
|
a->dotp = dotp_sse;
|
|
else
|
|
a->dotp = dotp;
|
|
|
|
return a;
|
|
}
|
|
|
|
// Adrian soft decision DTD
|
|
// (Dual Average Near-End to Far-End signal Ratio DTD)
|
|
// This algorithm uses exponential smoothing with differnt
|
|
// ageing parameters to get fast and slow near-end and far-end
|
|
// signal averages. The ratio of NFRs term
|
|
// (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
|
|
// A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
|
|
// mapped to 1.0 with a limited linear function.
|
|
static float AEC_dtd(AEC *a, REAL d, REAL x)
|
|
{
|
|
float stepsize;
|
|
float ratio, M;
|
|
|
|
// fast near-end and far-end average
|
|
a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
|
|
a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
|
|
|
|
// slow near-end and far-end average
|
|
a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
|
|
a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
|
|
|
|
if (a->xfast < M70dB_PCM) {
|
|
return 0.0; // no Spk signal
|
|
}
|
|
|
|
if (a->dfast < M70dB_PCM) {
|
|
return 0.0; // no Mic signal
|
|
}
|
|
|
|
// ratio of NFRs
|
|
ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
|
|
|
|
// begrenzte lineare Kennlinie
|
|
M = (STEPY2 - STEPY1) / (STEPX2 - STEPX1);
|
|
if (ratio < STEPX1) {
|
|
stepsize = STEPY1;
|
|
} else if (ratio > STEPX2) {
|
|
stepsize = STEPY2;
|
|
} else {
|
|
// Punktrichtungsform einer Geraden
|
|
stepsize = M * (ratio - STEPX1) + STEPY1;
|
|
}
|
|
|
|
return stepsize;
|
|
}
|
|
|
|
|
|
static void AEC_leaky(AEC *a)
|
|
// The xfast signal is used to charge the hangover timer to Thold.
|
|
// When hangover expires (no Spk signal for some time) the vector w
|
|
// is erased. This is my implementation of Leaky NLMS.
|
|
{
|
|
if (a->xfast >= M70dB_PCM) {
|
|
// vector w is valid for hangover Thold time
|
|
a->hangover = Thold;
|
|
} else {
|
|
if (a->hangover > 1) {
|
|
--(a->hangover);
|
|
} else if (1 == a->hangover) {
|
|
--(a->hangover);
|
|
// My Leaky NLMS is to erase vector w when hangover expires
|
|
memset(a->w, 0, sizeof(a->w));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#if 0
|
|
void AEC::openwdisplay() {
|
|
// open TCP connection to program wdisplay.tcl
|
|
fdwdisplay = socket_async("127.0.0.1", 50999);
|
|
};
|
|
#endif
|
|
|
|
|
|
static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
|
|
{
|
|
REAL e;
|
|
REAL ef;
|
|
a->x[a->j] = x_;
|
|
a->xf[a->j] = IIR1_highpass(a->Fx, x_); // pre-whitening of x
|
|
|
|
// calculate error value
|
|
// (mic signal - estimated mic signal from spk signal)
|
|
e = d;
|
|
if (a->hangover > 0) {
|
|
e -= a->dotp(a->w, a->x + a->j);
|
|
}
|
|
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
|
|
|
|
// optimize: iterative dotp(xf, xf)
|
|
a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
|
|
|
|
if (stepsize > 0.0) {
|
|
// calculate variable step size
|
|
REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
|
|
|
|
#ifdef DISABLE_ORC
|
|
// update tap weights (filter learning)
|
|
int i;
|
|
for (i = 0; i < NLMS_LEN; i += 2) {
|
|
// optimize: partial loop unrolling
|
|
a->w[i] += mikro_ef * a->xf[i + a->j];
|
|
a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
|
|
}
|
|
#else
|
|
update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN);
|
|
#endif
|
|
}
|
|
|
|
if (--(a->j) < 0) {
|
|
// optimize: decrease number of memory copies
|
|
a->j = NLMS_EXT;
|
|
memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
|
|
memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
|
|
}
|
|
|
|
// Saturation
|
|
if (e > MAXPCM) {
|
|
return MAXPCM;
|
|
} else if (e < -MAXPCM) {
|
|
return -MAXPCM;
|
|
} else {
|
|
return e;
|
|
}
|
|
}
|
|
|
|
|
|
int AEC_doAEC(AEC *a, int d_, int x_)
|
|
{
|
|
REAL d = (REAL) d_;
|
|
REAL x = (REAL) x_;
|
|
|
|
// Mic Highpass Filter - to remove DC
|
|
d = IIR_HP_highpass(a->acMic, d);
|
|
|
|
// Mic Highpass Filter - cut-off below 300Hz
|
|
d = FIR_HP_300Hz_highpass(a->cutoff, d);
|
|
|
|
// Amplify, for e.g. Soundcards with -6dB max. volume
|
|
d *= a->gain;
|
|
|
|
// Spk Highpass Filter - to remove DC
|
|
x = IIR_HP_highpass(a->acSpk, x);
|
|
|
|
// Double Talk Detector
|
|
a->stepsize = AEC_dtd(a, d, x);
|
|
|
|
// Leaky (ageing of vector w)
|
|
AEC_leaky(a);
|
|
|
|
// Acoustic Echo Cancellation
|
|
d = AEC_nlms_pw(a, d, x, a->stepsize);
|
|
|
|
#if 0
|
|
if (fdwdisplay >= 0) {
|
|
if (++dumpcnt >= (WIDEB*RATE/10)) {
|
|
// wdisplay creates 10 dumps per seconds = large CPU load!
|
|
dumpcnt = 0;
|
|
write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
|
|
// we don't check return value. This is not production quality!!!
|
|
memset(ws, 0, sizeof(ws));
|
|
} else {
|
|
int i;
|
|
for (i = 0; i < DUMP_LEN; i += 2) {
|
|
// optimize: partial loop unrolling
|
|
ws[i] += w[i];
|
|
ws[i + 1] += w[i + 1];
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return (int) d;
|
|
}
|