1 /* Copyright (C) Jean-Marc Valin
4 Echo cancelling based on the MDF algorithm described in:
6 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
7 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
14 1. Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 3. The name of the author may not be used to endorse or promote products
22 derived from this software without specific prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
28 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
44 #include "speex/speex_echo.h"
49 #define M_PI 3.14159265358979323846
55 #define min(a,b) ((a)<(b) ? (a) : (b))
56 #define max(a,b) ((a)>(b) ? (a) : (b))
59 /** Compute inner product of two real vectors */
60 static inline float inner_prod(float *x, float *y, int N)
69 /** Compute power spectrum of a half-complex (packed) vector */
70 static inline void power_spectrum(float *X, float *ps, int N)
74 for (i=1,j=1;i<N-1;i+=2,j++)
76 ps[j] = X[i]*X[i] + X[i+1]*X[i+1];
81 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
82 static inline void spectral_mul_accum(float *X, float *Y, float *acc, int N)
88 acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
89 acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
94 #if 0 // unused so removing a warning
95 /** Compute cross-power spectrum of a half-complex (packed) vector with conjugate */
96 static inline void spectral_mul_conj(float *X, float *Y, float *prod, int N)
102 prod[i] = (X[i]*Y[i] + X[i+1]*Y[i+1]);
103 prod[i+1] = (-X[i+1]*Y[i] + X[i]*Y[i+1]);
109 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
110 static inline void weighted_spectral_mul_conj(float *w, float *X, float *Y, float *prod, int N)
113 prod[0] = w[0]*X[0]*Y[0];
114 for (i=1,j=1;i<N-1;i+=2,j++)
116 prod[i] = w[j]*(X[i]*Y[i] + X[i+1]*Y[i+1]);
117 prod[i+1] = w[j]*(-X[i+1]*Y[i] + X[i]*Y[i+1]);
119 prod[i] = w[j]*X[i]*Y[i];
123 /** Creates a new echo canceller state */
124 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
127 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
129 st->frame_size = frame_size;
130 st->window_size = 2*frame_size;
132 M = st->M = (filter_length+st->frame_size-1)/frame_size;
134 st->adapt_rate = .01f;
140 st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup));
141 spx_drft_init(st->fft_lookup, N);
143 st->x = (float*)speex_alloc(N*sizeof(float));
144 st->d = (float*)speex_alloc(N*sizeof(float));
145 st->y = (float*)speex_alloc(N*sizeof(float));
146 st->y2 = (float*)speex_alloc(N*sizeof(float));
147 st->Yps = (float*)speex_alloc(N*sizeof(float));
148 st->last_y = (float*)speex_alloc(N*sizeof(float));
149 st->Yf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
150 st->Rf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
151 st->Xf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
152 st->fratio = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
153 st->regul = (float*)speex_alloc(N*sizeof(float));
155 st->X = (float*)speex_alloc(M*N*sizeof(float));
156 st->D = (float*)speex_alloc(N*sizeof(float));
157 st->Y = (float*)speex_alloc(N*sizeof(float));
158 st->Y2 = (float*)speex_alloc(N*sizeof(float));
159 st->E = (float*)speex_alloc(N*sizeof(float));
160 st->W = (float*)speex_alloc(M*N*sizeof(float));
161 st->PHI = (float*)speex_alloc(M*N*sizeof(float));
162 st->power = (float*)speex_alloc((frame_size+1)*sizeof(float));
163 st->power_1 = (float*)speex_alloc((frame_size+1)*sizeof(float));
164 st->grad = (float*)speex_alloc(N*M*sizeof(float));
168 st->W[i] = st->PHI[i] = 0;
171 st->regul[0] = (.01+(10.)/((4.)*(4.)))/M;
172 for (i=1,j=1;i<N-1;i+=2,j++)
174 st->regul[i] = .01+((10.)/((j+4.)*(j+4.)))/M;
175 st->regul[i+1] = .01+((10.)/((j+4.)*(j+4.)))/M;
177 st->regul[i] = .01+((10.)/((j+4.)*(j+4.)))/M;
183 /** Resets echo canceller state */
184 void speex_echo_state_reset(SpeexEchoState *st)
188 st->adapt_rate = .01f;
196 for (i=0;i<=st->frame_size;i++)
200 st->adapt_rate = .01f;
208 /** Destroys an echo canceller state */
209 void speex_echo_state_destroy(SpeexEchoState *st)
211 spx_drft_clear(st->fft_lookup);
212 speex_free(st->fft_lookup);
216 speex_free(st->last_y);
221 speex_free(st->fratio);
222 speex_free(st->regul);
230 speex_free(st->power);
231 speex_free(st->power_1);
232 speex_free(st->grad);
238 /** Performs echo cancellation on a frame */
239 void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, float *Yout)
247 float Srr=0,Syy=0,Sey=0,See=0,Sxx=0;
250 leak_estimate = .1+(.9/(1+2*st->sum_adapt));
257 /* Copy input data to buffer */
258 for (i=0;i<st->frame_size;i++)
260 st->x[i] = st->x[i+st->frame_size];
261 st->x[i+st->frame_size] = echo[i];
263 st->d[i] = st->d[i+st->frame_size];
264 st->d[i+st->frame_size] = ref[i];
267 /* Shift memory: this could be optimized eventually*/
268 for (i=0;i<N*(M-1);i++)
271 /* Copy new echo frame */
273 st->X[(M-1)*N+i]=st->x[i];
275 /* Convert x (echo input) to frequency domain */
276 spx_drft_forward(st->fft_lookup, &st->X[(M-1)*N]);
278 /* Compute filter response Y */
282 spectral_mul_accum(&st->X[j*N], &st->W[j*N], st->Y, N);
284 /* Convert Y (filter response) to time domain */
287 spx_drft_backward(st->fft_lookup, st->y);
291 /* Transform d (reference signal) to frequency domain */
294 spx_drft_forward(st->fft_lookup, st->D);
296 /* Compute error signal (signal with echo removed) */
297 for (i=0;i<st->frame_size;i++)
300 tmp_out = (float)ref[i] - st->y[i+st->frame_size];
303 st->E[i+st->frame_size] = tmp_out;
308 else if (tmp_out<-32768)
313 /* This bit of code provides faster adaptation by doing a projection of the previous gradient on the
319 Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
323 spectral_mul_accum(&st->X[j*N], &st->PHI[j*N], st->Y2, N);
325 st->y2[i] = st->Y2[i];
326 spx_drft_backward(st->fft_lookup, st->y2);
329 Sge = inner_prod(st->y2+st->frame_size, st->E+st->frame_size, st->frame_size);
330 Sgg = inner_prod(st->y2+st->frame_size, st->y2+st->frame_size, st->frame_size);
331 /* Compute projection gain */
332 gain = Sge/(N+.03*Syy+Sgg);
338 /* Apply gain to weights, echo estimates, output */
340 st->Y[i] += gain*st->Y2[i];
341 for (i=0;i<st->frame_size;i++)
343 st->y[i+st->frame_size] += gain*st->y2[i+st->frame_size];
344 st->E[i+st->frame_size] -= gain*st->y2[i+st->frame_size];
347 st->W[i] += gain*st->PHI[i];
350 /* Compute power spectrum of output (D-Y) and filter response (Y) */
352 st->D[i] -= st->Y[i];
353 power_spectrum(st->D, st->Rf, N);
354 power_spectrum(st->Y, st->Yf, N);
356 /* Compute frequency-domain adaptation mask */
357 for (j=0;j<=st->frame_size;j++)
360 r = leak_estimate*st->Yf[j] / (1+st->Rf[j]);
366 /* Compute a bunch of correlations */
367 //Sry = inner_prod(st->y+st->frame_size, st->d+st->frame_size, st->frame_size);
368 Sey = inner_prod(st->y+st->frame_size, st->E+st->frame_size, st->frame_size);
369 See = inner_prod(st->E+st->frame_size, st->E+st->frame_size, st->frame_size);
370 Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
371 Srr = inner_prod(st->d+st->frame_size, st->d+st->frame_size, st->frame_size);
372 Sxx = inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
374 /* Compute smoothed cross-correlation and energy */
375 st->Sey = .98*st->Sey + .02*Sey;
376 st->Syy = .98*st->Syy + .02*Syy;
377 st->See = .98*st->See + .02*See;
379 /* Check if filter is completely mis-adapted (if so, reset filter) */
380 if (st->Sey/(1+st->Syy + .01*st->See) < -1)
382 /*fprintf (stderr, "reset at %d\n", st->cancel_count);*/
383 speex_echo_state_reset(st);
388 ESR = leak_estimate*Syy / (1+See);
392 /* If over-cancellation (creating echo with 180 phase) damp filter */
393 if (st->Sey/(1+st->Syy) < -.1 && (ESR > .3))
398 /*fprintf (stderr, "corrected down\n");*/
402 /* If under-cancellation (leaving echo with 0 phase) scale filter up */
403 if (st->Sey/(1+st->Syy) > .1 && (ESR > .1 || SER < 10))
408 /*fprintf (stderr, "corrected up %d\n", st->cancel_count);*/
412 /* We consider that the filter is adapted if the following is true*/
413 if (ESR>.6 && st->sum_adapt > 1)
416 fprintf(stderr, "Adapted at %d %f\n", st->cancel_count, st->sum_adapt);*/
420 /* Update frequency-dependent energy ratio with the total energy ratio */
421 for (i=0;i<=st->frame_size;i++)
423 st->fratio[i] = (.2*ESR+.8*min(.005+ESR,st->fratio[i]));
428 st->adapt_rate = .95f/(2+M);
430 /* Temporary adaption rate if filter is not adapted correctly */
432 st->adapt_rate =.8/(2+M);
434 st->adapt_rate =.4/(2+M);
436 st->adapt_rate =.2/(2+M);
438 st->adapt_rate =.08/(2+M);
443 /* How much have we adapted so far? */
444 st->sum_adapt += st->adapt_rate;
446 /* Compute echo power in each frequency bin */
448 float ss = 1.0f/st->cancel_count;
451 power_spectrum(&st->X[(M-1)*N], st->Xf, N);
452 /* Smooth echo energy estimate over time */
453 for (j=0;j<=st->frame_size;j++)
454 st->power[j] = (1-ss)*st->power[j] + ss*st->Xf[j];
457 /* Combine adaptation rate to the the inverse energy estimate */
460 /* If filter is adapted, include the frequency-dependent ratio too */
461 for (i=0;i<=st->frame_size;i++)
462 st->power_1[i] = st->adapt_rate*st->fratio[i] /(1.f+st->power[i]);
464 for (i=0;i<=st->frame_size;i++)
465 st->power_1[i] = st->adapt_rate/(1.f+st->power[i]);
470 /* Convert error to frequency domain */
471 spx_drft_forward(st->fft_lookup, st->E);
473 /* Do some regularization (prevents problems when system is ill-conditoned) */
476 st->W[m*N+i] *= 1-st->regul[i]*ESR;
478 /* Compute weight gradient */
481 weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI+N*j, N);
484 /* Gradient descent */
486 st->W[i] += st->PHI[i];
488 /* AUMDF weight constraint */
491 /* Remove the "if" to make this an MDF filter */
492 if (st->cancel_count%M == j)
494 spx_drft_backward(st->fft_lookup, &st->W[j*N]);
497 for (i=st->frame_size;i<N;i++)
501 spx_drft_forward(st->fft_lookup, &st->W[j*N]);
505 /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/
510 /* If the filter is adapted, take the filtered echo */
511 for (i=0;i<st->frame_size;i++)
512 st->last_y[i] = st->last_y[st->frame_size+i];
513 for (i=0;i<st->frame_size;i++)
514 st->last_y[st->frame_size+i] = st->y[st->frame_size+i];
516 /* If filter isn't adapted yet, all we can do is take the echo signal directly */
518 st->last_y[i] = st->x[i];
521 /* Apply hanning window (should pre-compute it)*/
523 st->Yps[i] = (.5-.5*cos(2*M_PI*i/N))*st->last_y[i];
525 /* Compute power spectrum of the echo */
526 spx_drft_forward(st->fft_lookup, st->Yps);
527 power_spectrum(st->Yps, st->Yps, N);
529 /* Estimate residual echo */
530 for (i=0;i<=st->frame_size;i++)
531 Yout[i] = 2*leak_estimate*st->Yps[i];