From 6a4e9d6f2c3ab05817db254074e1a525e72f226e Mon Sep 17 00:00:00 2001
From: Eniz Vukovic <eniz_vukovic@hotmail.com>
Date: Sun, 31 Jan 2016 18:45:46 +0100
Subject: [PATCH] Fix example and make echo canceller adjust delay dynamically

---
 README              |  11 +-
 filter_audio.c      |   6 +-
 test/playback_mic.c | 262 ++++++++++++++++++++++++++++----------------
 3 files changed, 182 insertions(+), 97 deletions(-)

diff --git a/README b/README
index 0f660d3..93eaaa5 100644
--- a/README
+++ b/README
@@ -4,5 +4,12 @@ Build and install using make (`sudo make install`).
 
 My code in here is licenced under the same BSD 3-clause license as the code I took from: https://code.google.com/p/webrtc/
 
-To build the test program (you need openal):
-gcc -g3 -Wall -o playback_mic test/playback_mic.c -lopenal -I /usr/include/AL/ *.c agc/*.c ns/*.c aec/*.c other/*.c zam/*.c vad/*.c -lpthread -lm
+To build the test program, you need portaudio (version 19 from their website), libsndfile (you can get it from your distro repositories):
+
+gcc -g3 -Wall -o playback_mic test/playback_mic.c *.c agc/*.c ns/*.c aec/*.c other/*.c zam/*.c vad/*.c -lpthread -lm -lportaudio -lsndfile
+
+To run the test you will need a sample input file in .wav format (like this: https://www.opus-codec.org/examples/samples/speech_orig.wav).
+The program will exit after the file is played. You should also try to talk while the file is playing to see how it removes the echoes but
+not your voice.
+
+./playback_mic speech_orig.wav [output.wav] # if no output file is presented the default is echoes_removed.wav
diff --git a/filter_audio.c b/filter_audio.c
index 8c1d0b7..d345f88 100644
--- a/filter_audio.c
+++ b/filter_audio.c
@@ -5,6 +5,7 @@
 #include "agc/include/gain_control.h"
 #include "ns/include/noise_suppression_x.h"
 #include "aec/include/echo_cancellation.h"
+#include "aec/aec_core.h"
 #include "vad/include/webrtc_vad.h"
 #include "other/signal_processing_library.h"
 #include "other/speex_resampler.h"
@@ -119,6 +120,9 @@ Filter_Audio *new_filter_audio(uint32_t fs)
         return NULL;
     }
 
+    WebRtcAec_enable_delay_correction(WebRtcAec_aec_core(f_a->echo_cancellation), kAecTrue);
+    WebRtcAec_enable_reported_delay(WebRtcAec_aec_core(f_a->echo_cancellation), kAecTrue);
+    
     WebRtcAgc_config_t gain_config;
 
     gain_config.targetLevelDbfs = 1;
@@ -284,13 +288,11 @@ int set_echo_delay_ms(Filter_Audio *f_a, int16_t msInSndCardBuf)
 
     f_a->msInSndCardBuf = msInSndCardBuf;
 
-
     return 0;
 }
 
 int filter_audio(Filter_Audio *f_a, int16_t *data, unsigned int samples)
 {
-
     if (!f_a) {
         return -1;
     }
diff --git a/test/playback_mic.c b/test/playback_mic.c
index 313d77c..bd702f1 100644
--- a/test/playback_mic.c
+++ b/test/playback_mic.c
@@ -1,106 +1,182 @@
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <string.h> 
-#include <unistd.h>
-#include <math.h>
-
-#include <AL/al.h>
-#include <AL/alc.h> 
-
-#include "../filter_audio.h"
-
-
-static void sourceplaybuffer(ALuint source, int16_t *data, int samples, _Bool channels, unsigned int sample_rate)
+/**
+ *  Licence: 3-clause BSD
+ */
+
+/* Playing and recording audio data to and from audio devices */
+#include <portaudio.h>
+/* Reading and writing .wav files */
+#include <sndfile.h>
+/* Audio filtering */
+#include <filter_audio.h>
+
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <string.h>
+#include <stdbool.h>
+
+Filter_Audio *filteraudio;
+bool filterenabled = true;
+
+/* Input callback. The data in 'input' buffer contains 'fcount' frames captured from the input device.
+ * We will filter that data and write it to our output file. (You can consider input and output files as
+ * peer audio feed).
+ */
+int in_cb(const void *input, void *o, unsigned long fcount, const PaStreamCallbackTimeInfo* c, PaStreamCallbackFlags f, void *user_data)
 {
-    if(!channels || channels > 2) {
-        return;
-    }
-
-    ALuint bufid;
-    ALint processed = 0, queued = 16;
-    alGetSourcei(source, AL_BUFFERS_PROCESSED, &processed);
-    alGetSourcei(source, AL_BUFFERS_QUEUED, &queued);
-    alSourcei(source, AL_LOOPING, AL_FALSE);
-
-    if(processed) {
-        ALuint bufids[processed];
-        alSourceUnqueueBuffers(source, processed, bufids);
-        alDeleteBuffers(processed - 1, bufids + 1);
-        bufid = bufids[0];
-    } else if(queued < 16) {
-        alGenBuffers(1, &bufid);
-    } else {
-        printf("dropped audio frame\n");
-        return;
-    }
-
-    alBufferData(bufid, (channels == 1) ? AL_FORMAT_MONO16 : AL_FORMAT_STEREO16, data, samples * 2 * channels, sample_rate);
-    alSourceQueueBuffers(source, 1, &bufid);
-
-    ALint state;
-    alGetSourcei(source, AL_SOURCE_STATE, &state);
-    if(state != AL_PLAYING) {
-        alSourcePlay(source);
-        printf("Starting source\n");
-    }
+    (void) o;
+    (void) c;
+    (void) f;
+    
+    SNDFILE *af_handle = user_data; // af_handle_out
+    
+    /* We copy data to the mutable buffer */
+    int16_t PCM [fcount]; /* WARNING I'm not quire sure how this works with 2 channels but you'd copy the buffer however */
+    memcpy (PCM, input, sizeof(PCM));
+    
+    /* Now we filter it. After the process is completed you'll get buffer filled with echo-less PCM data. */
+    if (filterenabled && filter_audio(filteraudio, PCM, fcount) == -1)
+        puts("Filtering failed");
+    
+    /* Write filtered data to the file. (or send it to the peer) */
+    sf_write_short(af_handle, PCM, fcount);
+    
+    return 0;
 }
-
-int main()
+/* Output callback. The 'output' contains space for 'fcount' frames to be played by the output device.
+ * We will have to feed filter_audio with that data as it's a referene what sounds should be treated as echo in the input.
+ * We read the output data from the input file provided to use by the first argument (You can consider input and
+ * output files as peer audio feed).
+ */
+int out_cb(const void *i, void *output, unsigned long fcount, const PaStreamCallbackTimeInfo* c, PaStreamCallbackFlags f, void *user_data)
 {
-    unsigned int sample_rate = 48000;
-    unsigned int samples_perframe = sample_rate/50;
-    _Bool filter = 1;
-
-    const char *in_device_list = alcGetString(NULL, ALC_CAPTURE_DEVICE_SPECIFIER);
+    (void) i;
+    (void) c;
+    (void) f;
+    
+    SNDFILE *af_handle = user_data; // af_handle_in
+    
+    /* Read PCM from the file */
+    int64_t count = sf_read_short(af_handle, output, fcount);
+
+    /* If some frames are read, pass them to filter_audio */
+    if (filterenabled && count > 0)
+        pass_audio_output(filteraudio, output, count);
+    
+    return 0;
+}
 
-    const char *temp_d = in_device_list;
-    while (*temp_d) {
-        printf("%s\n", temp_d);
-        temp_d += strlen(temp_d) + 1;
+int main (int argc, char** argv)
+{
+    if (argc < 2) {
+        puts("Required input .wav file path");
+        return 1;
     }
-
-    ALCdevice *device_in = alcCaptureOpenDevice(in_device_list, sample_rate, AL_FORMAT_MONO16, samples_perframe);
-    if (!device_in) {
-        printf("open in dev failed\n");
-        return 0;
+    
+    const char* output_path = "echoes_removed.wav";
+    
+    if (argc > 2)
+        output_path = argv[2];
+    
+    Pa_Initialize();
+    
+    /* list audio IO devices */
+    for (int i = 0; i < Pa_GetDeviceCount(); i ++)
+        puts(Pa_GetDeviceInfo(i)->name);
+    
+    SNDFILE *af_handle_in, *af_handle_out;
+    SF_INFO af_info_in, af_info_out;
+
+    /* Open input audio file */
+    af_handle_in = sf_open(argv[1], SFM_READ, &af_info_in);
+
+    if (af_handle_in == NULL) {
+        puts("Failed to open input file");
+        return 1;
     }
 
-    const char *out_device_list = alcGetString(NULL, ALC_ALL_DEVICES_SPECIFIER);
-    ALCdevice *device_out = alcOpenDevice(out_device_list);
+    /* Open output audio file */
+    af_info_out = af_info_in;
+    af_handle_out = sf_open(output_path, SFM_WRITE, &af_info_out);
 
-    ALCcontext *context = alcCreateContext(device_out, NULL);
-    if(!alcMakeContextCurrent(context)) {
-        printf("alcMakeContextCurrent() failed\n");
-        alcCloseDevice(device_out);
-        return 0;
+    if (af_handle_out == NULL) {
+        puts("Failed to open output file");
+        return 1;
     }
 
-    Filter_Audio *f_a = new_filter_audio(sample_rate);
-
-    ALuint source;
-    alGenSources(1, &source);
-    alcCaptureStart(device_in);
-
-    printf("Starting\n");
-
-    while (1) {
-        ALint samples;
-        alcGetIntegerv(device_in, ALC_CAPTURE_SAMPLES, sizeof(samples), &samples);
-        //printf("%u\n", samples);
-        if(samples >= samples_perframe) {
-            int16_t buf[samples_perframe];
-            alcCaptureSamples(device_in, buf, samples_perframe);
-            if (filter && filter_audio(f_a, buf, samples_perframe) == -1) {
-                printf("filter_audio fail\n");
-                return 0;
-            }
-
-            sourceplaybuffer(source, buf, samples_perframe, 1, sample_rate);
-        }
-
-        usleep(1000);
+    /* Prepare filter_audio */
+    filteraudio = new_filter_audio(af_info_in.samplerate);
+    
+    /* Prepare portaudio streams */
+    PaStream *adout = NULL;
+    PaStream *adin = NULL;
+    
+    /* Choose devices */
+    int in_dev = Pa_GetDefaultInputDevice();
+    int out_dev = Pa_GetDefaultOutputDevice();
+    
+    /* High latency works the best but low latency will also work */
+    double inlat = Pa_GetDeviceInfo(in_dev)->defaultHighInputLatency;
+    double outlat = Pa_GetDeviceInfo(out_dev)->defaultHighOutputLatency;
+    
+    PaStreamParameters output;
+    output.device = out_dev;
+    output.channelCount = af_info_in.channels;
+    output.sampleFormat = paInt16;
+    output.suggestedLatency = outlat;
+    output.hostApiSpecificStreamInfo = NULL;
+
+    PaStreamParameters input;
+    input.device = in_dev;
+    input.channelCount = af_info_in.channels;
+    input.sampleFormat = paInt16;
+    input.suggestedLatency = inlat;
+    input.hostApiSpecificStreamInfo = NULL;
+    
+    int frame_duration = 20;
+    int frame_size = (af_info_in.samplerate * frame_duration / 1000) * af_info_in.channels;
+    
+    PaError err = Pa_OpenStream(&adout, NULL, &output, af_info_in.samplerate, frame_size, paNoFlag, out_cb, af_handle_in);
+    assert(err == paNoError);
+
+    err = Pa_OpenStream(&adin, &input, NULL, af_info_in.samplerate, frame_size, paNoFlag, in_cb, af_handle_out);
+    assert(err == paNoError);
+    
+    /* It's essential that echo delay is set correctly; it's the most important part of the
+     * echo cancellation process. If the delay is not set to the acceptable values the AEC
+     * will not be able to recover. Given that it's not that easy to figure out the exact
+     * time it takes for a signal to get from Output to the Input, setting it to suggested
+     * input device latency + frame duration works really good and gives the filter ability
+     * to adjust it internally after some time (usually up to 6-7 seconds in my tests when
+     * the error is about 20%).
+     */
+    set_echo_delay_ms(filteraudio, (inlat * 1000) + frame_duration);
+    /*
+     */
+    
+    /* Start the streams */
+    err = Pa_StartStream(adout);
+    assert(err == paNoError);
+    
+    err = Pa_StartStream(adin);
+    assert(err == paNoError);
+    
+    /* In case you want to repeat set reps to > 1 */
+    int reps = 1;
+    for (int i = 0; i < reps; i ++)
+    {
+        /* Sleep until the whole file is read */
+        Pa_Sleep((af_info_in.frames * 1000) / (af_info_in.samplerate + 2));
+        sf_seek(af_handle_in, 0, SEEK_SET);
     }
-
+    
+    /* Clear everything */
+    Pa_StopStream(adout);
+    Pa_StopStream(adin);
+    kill_filter_audio(filteraudio);
+    sf_close(af_handle_in);
+    sf_close(af_handle_out);
+    Pa_Terminate();
+    
     return 0;
 }
-