pulseaudio/src/modules/echo-cancel/webrtc.cc
Arun Raghavan 23ce9a4f79 echo-cancel: Plug in WebRTC drift compensation
This adds the ability for echo cancellers to provide their own drift
compensation, and hooks in the appropriate bits to implement this in the
WebRTC canceller.

We do this by introducing an alternative model for the canceller. So
far, the core engine just provided a run() method which was given
blocksize-sized chunks of playback and record samples. The new model has
the engine provide play() and record() methods that can (in theory) be
called by the playback and capture threads. The latter would actually do
the processing required.

In addition to this a set_drift() method may be provided by the
implementation. PA will provide periodic samples of the drift to the
engine. These values need to be aggregated and processed over some time,
since the point values vary quite a bit (but generally fit a linear
regression reasonably accurately). At some point of time, we might move
the actual drift calculation into PA and change the semantics of this
function.

NOTE: This needs further testing before being deemed ready for wider use.
2011-11-01 18:20:32 +05:30

272 lines
9.2 KiB
C++

/***
This file is part of PulseAudio.
Copyright 2011 Collabora Ltd.
Contributor: Arun Raghavan <arun.raghavan@collabora.co.uk>
PulseAudio is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License,
or (at your option) any later version.
PulseAudio is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with PulseAudio; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA.
***/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <pulse/cdecl.h>
PA_C_DECL_BEGIN
#include <pulsecore/core-util.h>
#include <pulsecore/modargs.h>
#include <pulse/timeval.h>
#include "echo-cancel.h"
PA_C_DECL_END
#include <audio_processing.h>
#include <module_common_types.h>
#define BLOCK_SIZE_US 10000
#define DEFAULT_HIGH_PASS_FILTER TRUE
#define DEFAULT_NOISE_SUPPRESSION TRUE
#define DEFAULT_ANALOG_GAIN_CONTROL FALSE
#define DEFAULT_DIGITAL_GAIN_CONTROL TRUE
#define DEFAULT_MOBILE FALSE
#define DEFAULT_ROUTING_MODE "speakerphone"
#define DEFAULT_COMFORT_NOISE TRUE
#define DEFAULT_DRIFT_COMPENSATION FALSE
static const char* const valid_modargs[] = {
"high_pass_filter",
"noise_suppression",
"analog_gain_control",
"digital_gain_control",
"mobile",
"routing_mode",
"comfort_noise",
"drift_compensation",
NULL
};
static int routing_mode_from_string(const char *rmode) {
if (pa_streq(rmode, "quiet-earpiece-or-headset"))
return webrtc::EchoControlMobile::kQuietEarpieceOrHeadset;
else if (pa_streq(rmode, "earpiece"))
return webrtc::EchoControlMobile::kEarpiece;
else if (pa_streq(rmode, "loud-earpiece"))
return webrtc::EchoControlMobile::kLoudEarpiece;
else if (pa_streq(rmode, "speakerphone"))
return webrtc::EchoControlMobile::kSpeakerphone;
else if (pa_streq(rmode, "loud-speakerphone"))
return webrtc::EchoControlMobile::kLoudSpeakerphone;
else
return -1;
}
pa_bool_t pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args)
{
webrtc::AudioProcessing *apm = NULL;
pa_bool_t hpf, ns, agc, dgc, mobile, cn;
int rm;
pa_modargs *ma;
if (!(ma = pa_modargs_new(args, valid_modargs))) {
pa_log("Failed to parse submodule arguments.");
goto fail;
}
hpf = DEFAULT_HIGH_PASS_FILTER;
if (pa_modargs_get_value_boolean(ma, "high_pass_filter", &hpf) < 0) {
pa_log("Failed to parse high_pass_filter value");
goto fail;
}
ns = DEFAULT_NOISE_SUPPRESSION;
if (pa_modargs_get_value_boolean(ma, "noise_suppression", &ns) < 0) {
pa_log("Failed to parse noise_suppression value");
goto fail;
}
agc = DEFAULT_ANALOG_GAIN_CONTROL;
if (pa_modargs_get_value_boolean(ma, "analog_gain_control", &agc) < 0) {
pa_log("Failed to parse analog_gain_control value");
goto fail;
}
dgc = DEFAULT_DIGITAL_GAIN_CONTROL;
if (pa_modargs_get_value_boolean(ma, "analog_gain_control", &dgc) < 0) {
pa_log("Failed to parse digital_gain_control value");
goto fail;
}
if (agc && dgc) {
pa_log("You must pick only one between analog and digital gain control");
goto fail;
}
mobile = DEFAULT_MOBILE;
if (pa_modargs_get_value_boolean(ma, "mobile", &mobile) < 0) {
pa_log("Failed to parse mobile value");
goto fail;
}
ec->params.drift_compensation = DEFAULT_DRIFT_COMPENSATION;
if (pa_modargs_get_value_boolean(ma, "drift_compensation", &ec->params.drift_compensation) < 0) {
pa_log("Failed to parse drift_compensation value");
goto fail;
}
if (mobile) {
if (ec->params.drift_compensation) {
pa_log("Can't use drift_compensation in mobile mode");
goto fail;
}
if ((rm = routing_mode_from_string(pa_modargs_get_value(ma, "routing_mode", DEFAULT_ROUTING_MODE))) < 0) {
pa_log("Failed to parse routing_mode value");
goto fail;
}
cn = DEFAULT_COMFORT_NOISE;
if (pa_modargs_get_value_boolean(ma, "comfort_noise", &cn) < 0) {
pa_log("Failed to parse cn value");
goto fail;
}
} else {
if (pa_modargs_get_value(ma, "comfort_noise", NULL) || pa_modargs_get_value(ma, "routing_mode", NULL)) {
pa_log("The routing_mode and comfort_noise options are only valid with mobile=true");
goto fail;
}
}
apm = webrtc::AudioProcessing::Create(0);
source_ss->format = PA_SAMPLE_S16NE;
*sink_ss = *source_ss;
/* FIXME: the implementation actually allows a different number of
* source/sink channels. Do we want to support that? */
*sink_map = *source_map;
apm->set_sample_rate_hz(source_ss->rate);
apm->set_num_channels(source_ss->channels, source_ss->channels);
apm->set_num_reverse_channels(sink_ss->channels);
if (hpf)
apm->high_pass_filter()->Enable(true);
if (!mobile) {
if (ec->params.drift_compensation) {
apm->echo_cancellation()->set_device_sample_rate_hz(source_ss->rate);
apm->echo_cancellation()->enable_drift_compensation(true);
} else {
apm->echo_cancellation()->enable_drift_compensation(false);
}
apm->echo_cancellation()->Enable(true);
} else {
apm->echo_control_mobile()->set_routing_mode(static_cast<webrtc::EchoControlMobile::RoutingMode>(rm));
apm->echo_control_mobile()->enable_comfort_noise(cn);
apm->echo_control_mobile()->Enable(true);
}
if (ns) {
apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kHigh);
apm->noise_suppression()->Enable(true);
}
if (agc || dgc) {
if (mobile && rm <= webrtc::EchoControlMobile::kEarpiece)
/* Maybe this should be a knob, but we've got a lot of knobs already */
apm->gain_control()->set_mode(webrtc::GainControl::kFixedDigital);
else if (dgc)
apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveDigital);
else {
/* FIXME: Hook up for analog AGC */
pa_log("Analog gain control isn't implemented yet -- using ditital gain control.");
apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveDigital);
}
}
apm->voice_detection()->Enable(true);
ec->params.priv.webrtc.apm = apm;
ec->params.priv.webrtc.sample_spec = *source_ss;
ec->params.priv.webrtc.blocksize = *blocksize = (uint64_t)pa_bytes_per_second(source_ss) * BLOCK_SIZE_US / PA_USEC_PER_SEC;
pa_modargs_free(ma);
return TRUE;
fail:
if (ma)
pa_modargs_free(ma);
if (apm)
webrtc::AudioProcessing::Destroy(apm);
return FALSE;
}
void pa_webrtc_ec_play(pa_echo_canceller *ec, const uint8_t *play) {
webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.priv.webrtc.apm;
webrtc::AudioFrame play_frame;
const pa_sample_spec *ss = &ec->params.priv.webrtc.sample_spec;
play_frame._audioChannel = ss->channels;
play_frame._frequencyInHz = ss->rate;
play_frame._payloadDataLengthInSamples = ec->params.priv.webrtc.blocksize / pa_frame_size(ss);
memcpy(play_frame._payloadData, play, ec->params.priv.webrtc.blocksize);
apm->AnalyzeReverseStream(&play_frame);
}
void pa_webrtc_ec_record(pa_echo_canceller *ec, const uint8_t *rec, uint8_t *out) {
webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.priv.webrtc.apm;
webrtc::AudioFrame out_frame;
const pa_sample_spec *ss = &ec->params.priv.webrtc.sample_spec;
out_frame._audioChannel = ss->channels;
out_frame._frequencyInHz = ss->rate;
out_frame._payloadDataLengthInSamples = ec->params.priv.webrtc.blocksize / pa_frame_size(ss);
memcpy(out_frame._payloadData, rec, ec->params.priv.webrtc.blocksize);
apm->set_stream_delay_ms(0);
apm->ProcessStream(&out_frame);
memcpy(out, out_frame._payloadData, ec->params.priv.webrtc.blocksize);
}
void pa_webrtc_ec_set_drift(pa_echo_canceller *ec, float drift) {
webrtc::AudioProcessing *apm = (webrtc::AudioProcessing*)ec->params.priv.webrtc.apm;
const pa_sample_spec *ss = &ec->params.priv.webrtc.sample_spec;
apm->echo_cancellation()->set_stream_drift_samples(drift * ec->params.priv.webrtc.blocksize / pa_frame_size(ss));
}
void pa_webrtc_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) {
pa_webrtc_ec_play(ec, play);
pa_webrtc_ec_record(ec, rec, out);
}
void pa_webrtc_ec_done(pa_echo_canceller *ec) {
if (ec->params.priv.webrtc.apm) {
webrtc::AudioProcessing::Destroy((webrtc::AudioProcessing*)ec->params.priv.webrtc.apm);
ec->params.priv.webrtc.apm = NULL;
}
}