3rdparty/iaxclient/lib/libspeex/mdf.c

   1 /* Copyright (C) Jean-Marc Valin
   2
   3    File: speex_echo.c
   4    Echo cancelling based on the MDF algorithm described in:
   5
   6    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
   7    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
   8    February 1990.
   9
  10    Redistribution and use in source and binary forms, with or without
  11    modification, are permitted provided that the following conditions are
  12    met:
  13
  14    1. Redistributions of source code must retain the above copyright notice,
  15    this list of conditions and the following disclaimer.
  16
  17    2. Redistributions in binary form must reproduce the above copyright
  18    notice, this list of conditions and the following disclaimer in the
  19    documentation and/or other materials provided with the distribution.
  20
  21    3. The name of the author may not be used to endorse or promote products
  22    derived from this software without specific prior written permission.
  23
  24    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  25    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  27    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  28    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  29    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  32    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34    POSSIBILITY OF SUCH DAMAGE.
  35 */
  36
  37 #ifdef HAVE_CONFIG_H
  38 #include "config.h"
  39 #endif
  40 #ifdef _MSC_VER
  41 #include "winpoop.h"
  42 #endif
  43 #include "misc.h"
  44 #include "speex/speex_echo.h"
  45 #include "smallft.h"
  46 #include <math.h>
  47
  48 #ifndef M_PI
  49 #define M_PI 3.14159265358979323846
  50 #endif
  51
  52 #define BETA .65
  53
  54 #ifndef min
  55 #define min(a,b) ((a)<(b) ? (a) : (b))
  56 #define max(a,b) ((a)>(b) ? (a) : (b))
  57 #endif
  58
  59 /** Compute inner product of two real vectors */
  60 static inline float inner_prod(float *x, float *y, int N)
  61 {
  62    int i;
  63    float ret=0;
  64    for (i=0;i<N;i++)
  65       ret += x[i]*y[i];
  66    return ret;
  67 }
  68
  69 /** Compute power spectrum of a half-complex (packed) vector */
  70 static inline void power_spectrum(float *X, float *ps, int N)
  71 {
  72    int i, j;
  73    ps[0]=X[0]*X[0];
  74    for (i=1,j=1;i<N-1;i+=2,j++)
  75    {
  76       ps[j] =  X[i]*X[i] + X[i+1]*X[i+1];
  77    }
  78    ps[j]=X[i]*X[i];
  79 }
  80
  81 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
  82 static inline void spectral_mul_accum(float *X, float *Y, float *acc, int N)
  83 {
  84    int i;
  85    acc[0] += X[0]*Y[0];
  86    for (i=1;i<N-1;i+=2)
  87    {
  88       acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
  89       acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
  90    }
  91    acc[i] += X[i]*Y[i];
  92 }
  93
  94 #if 0 // unused so removing a warning
  95 /** Compute cross-power spectrum of a half-complex (packed) vector with conjugate */
  96 static inline void spectral_mul_conj(float *X, float *Y, float *prod, int N)
  97 {
  98    int i;
  99    prod[0] = X[0]*Y[0];
 100    for (i=1;i<N-1;i+=2)
 101    {
 102       prod[i] = (X[i]*Y[i] + X[i+1]*Y[i+1]);
 103       prod[i+1] = (-X[i+1]*Y[i] + X[i]*Y[i+1]);
 104    }
 105    prod[i] = X[i]*Y[i];
 106 }
 107 #endif
 108
 109 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
 110 static inline void weighted_spectral_mul_conj(float *w, float *X, float *Y, float *prod, int N)
 111 {
 112    int i, j;
 113    prod[0] = w[0]*X[0]*Y[0];
 114    for (i=1,j=1;i<N-1;i+=2,j++)
 115    {
 116       prod[i] = w[j]*(X[i]*Y[i] + X[i+1]*Y[i+1]);
 117       prod[i+1] = w[j]*(-X[i+1]*Y[i] + X[i]*Y[i+1]);
 118    }
 119    prod[i] = w[j]*X[i]*Y[i];
 120 }
 121
 122
 123 /** Creates a new echo canceller state */
 124 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 125 {
 126    int i,j,N,M;
 127    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
 128
 129    st->frame_size = frame_size;
 130    st->window_size = 2*frame_size;
 131    N = st->window_size;
 132    M = st->M = (filter_length+st->frame_size-1)/frame_size;
 133    st->cancel_count=0;
 134    st->adapt_rate = .01f;
 135    st->sum_adapt = 0;
 136    st->Sey = 0;
 137    st->Syy = 0;
 138    st->See = 0;
 139
 140    st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup));
 141    spx_drft_init(st->fft_lookup, N);
 142
 143    st->x = (float*)speex_alloc(N*sizeof(float));
 144    st->d = (float*)speex_alloc(N*sizeof(float));
 145    st->y = (float*)speex_alloc(N*sizeof(float));
 146    st->y2 = (float*)speex_alloc(N*sizeof(float));
 147    st->Yps = (float*)speex_alloc(N*sizeof(float));
 148    st->last_y = (float*)speex_alloc(N*sizeof(float));
 149    st->Yf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
 150    st->Rf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
 151    st->Xf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
 152    st->fratio = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
 153    st->regul = (float*)speex_alloc(N*sizeof(float));
 154
 155    st->X = (float*)speex_alloc(M*N*sizeof(float));
 156    st->D = (float*)speex_alloc(N*sizeof(float));
 157    st->Y = (float*)speex_alloc(N*sizeof(float));
 158    st->Y2 = (float*)speex_alloc(N*sizeof(float));
 159    st->E = (float*)speex_alloc(N*sizeof(float));
 160    st->W = (float*)speex_alloc(M*N*sizeof(float));
 161    st->PHI = (float*)speex_alloc(M*N*sizeof(float));
 162    st->power = (float*)speex_alloc((frame_size+1)*sizeof(float));
 163    st->power_1 = (float*)speex_alloc((frame_size+1)*sizeof(float));
 164    st->grad = (float*)speex_alloc(N*M*sizeof(float));
 165
 166    for (i=0;i<N*M;i++)
 167    {
 168       st->W[i] = st->PHI[i] = 0;
 169    }
 170
 171    st->regul[0] = (.01+(10.)/((4.)*(4.)))/M;
 172    for (i=1,j=1;i<N-1;i+=2,j++)
 173    {
 174       st->regul[i] = .01+((10.)/((j+4.)*(j+4.)))/M;
 175       st->regul[i+1] = .01+((10.)/((j+4.)*(j+4.)))/M;
 176    }
 177    st->regul[i] = .01+((10.)/((j+4.)*(j+4.)))/M;
 178
 179    st->adapted = 0;
 180    return st;
 181 }
 182
 183 /** Resets echo canceller state */
 184 void speex_echo_state_reset(SpeexEchoState *st)
 185 {
 186    int i, M, N;
 187    st->cancel_count=0;
 188    st->adapt_rate = .01f;
 189    N = st->window_size;
 190    M = st->M;
 191    for (i=0;i<N*M;i++)
 192    {
 193       st->W[i] = 0;
 194       st->X[i] = 0;
 195    }
 196    for (i=0;i<=st->frame_size;i++)
 197       st->power[i] = 0;
 198
 199    st->adapted = 0;
 200    st->adapt_rate = .01f;
 201    st->sum_adapt = 0;
 202    st->Sey = 0;
 203    st->Syy = 0;
 204    st->See = 0;
 205
 206 }
 207
 208 /** Destroys an echo canceller state */
 209 void speex_echo_state_destroy(SpeexEchoState *st)
 210 {
 211    spx_drft_clear(st->fft_lookup);
 212    speex_free(st->fft_lookup);
 213    speex_free(st->x);
 214    speex_free(st->d);
 215    speex_free(st->y);
 216    speex_free(st->last_y);
 217    speex_free(st->Yps);
 218    speex_free(st->Yf);
 219    speex_free(st->Rf);
 220    speex_free(st->Xf);
 221    speex_free(st->fratio);
 222    speex_free(st->regul);
 223
 224    speex_free(st->X);
 225    speex_free(st->D);
 226    speex_free(st->Y);
 227    speex_free(st->E);
 228    speex_free(st->W);
 229    speex_free(st->PHI);
 230    speex_free(st->power);
 231    speex_free(st->power_1);
 232    speex_free(st->grad);
 233
 234    speex_free(st);
 235 }
 236
 237
 238 /** Performs echo cancellation on a frame */
 239 void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, float *Yout)
 240 {
 241    int i,j,m;
 242    int N,M;
 243    float scale;
 244    float ESR;
 245    float SER;
 246    //float Sry=0
 247    float Srr=0,Syy=0,Sey=0,See=0,Sxx=0;
 248    float leak_estimate;
 249
 250    leak_estimate = .1+(.9/(1+2*st->sum_adapt));
 251
 252    N = st->window_size;
 253    M = st->M;
 254    scale = 1.0f/N;
 255    st->cancel_count++;
 256
 257    /* Copy input data to buffer */
 258    for (i=0;i<st->frame_size;i++)
 259    {
 260       st->x[i] = st->x[i+st->frame_size];
 261       st->x[i+st->frame_size] = echo[i];
 262
 263       st->d[i] = st->d[i+st->frame_size];
 264       st->d[i+st->frame_size] = ref[i];
 265    }
 266
 267    /* Shift memory: this could be optimized eventually*/
 268    for (i=0;i<N*(M-1);i++)
 269       st->X[i]=st->X[i+N];
 270
 271    /* Copy new echo frame */
 272    for (i=0;i<N;i++)
 273       st->X[(M-1)*N+i]=st->x[i];
 274
 275    /* Convert x (echo input) to frequency domain */
 276    spx_drft_forward(st->fft_lookup, &st->X[(M-1)*N]);
 277
 278    /* Compute filter response Y */
 279    for (i=0;i<N;i++)
 280       st->Y[i] = 0;
 281    for (j=0;j<M;j++)
 282       spectral_mul_accum(&st->X[j*N], &st->W[j*N], st->Y, N);
 283
 284    /* Convert Y (filter response) to time domain */
 285    for (i=0;i<N;i++)
 286       st->y[i] = st->Y[i];
 287    spx_drft_backward(st->fft_lookup, st->y);
 288    for (i=0;i<N;i++)
 289       st->y[i] *= scale;
 290
 291    /* Transform d (reference signal) to frequency domain */
 292    for (i=0;i<N;i++)
 293       st->D[i]=st->d[i];
 294    spx_drft_forward(st->fft_lookup, st->D);
 295
 296    /* Compute error signal (signal with echo removed) */
 297    for (i=0;i<st->frame_size;i++)
 298    {
 299       float tmp_out;
 300       tmp_out = (float)ref[i] - st->y[i+st->frame_size];
 301
 302       st->E[i] = 0;
 303       st->E[i+st->frame_size] = tmp_out;
 304
 305       /* Saturation */
 306       if (tmp_out>32767)
 307          tmp_out = 32767;
 308       else if (tmp_out<-32768)
 309          tmp_out = -32768;
 310       out[i] = tmp_out;
 311    }
 312
 313    /* This bit of code provides faster adaptation by doing a projection of the previous gradient on the
 314       "MMSE surface" */
 315    if (1)
 316    {
 317       float Sge, Sgg, Syy;
 318       float gain;
 319       Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
 320       for (i=0;i<N;i++)
 321          st->Y2[i] = 0;
 322       for (j=0;j<M;j++)
 323          spectral_mul_accum(&st->X[j*N], &st->PHI[j*N], st->Y2, N);
 324       for (i=0;i<N;i++)
 325          st->y2[i] = st->Y2[i];
 326       spx_drft_backward(st->fft_lookup, st->y2);
 327       for (i=0;i<N;i++)
 328          st->y2[i] *= scale;
 329       Sge = inner_prod(st->y2+st->frame_size, st->E+st->frame_size, st->frame_size);
 330       Sgg = inner_prod(st->y2+st->frame_size, st->y2+st->frame_size, st->frame_size);
 331       /* Compute projection gain */
 332       gain = Sge/(N+.03*Syy+Sgg);
 333       if (gain>2)
 334          gain = 2;
 335       if (gain < -2)
 336          gain = -2;
 337
 338       /* Apply gain to weights, echo estimates, output */
 339       for (i=0;i<N;i++)
 340          st->Y[i] += gain*st->Y2[i];
 341       for (i=0;i<st->frame_size;i++)
 342       {
 343          st->y[i+st->frame_size] += gain*st->y2[i+st->frame_size];
 344          st->E[i+st->frame_size] -= gain*st->y2[i+st->frame_size];
 345       }
 346       for (i=0;i<M*N;i++)
 347          st->W[i] += gain*st->PHI[i];
 348    }
 349
 350    /* Compute power spectrum of output (D-Y) and filter response (Y) */
 351    for (i=0;i<N;i++)
 352       st->D[i] -= st->Y[i];
 353    power_spectrum(st->D, st->Rf, N);
 354    power_spectrum(st->Y, st->Yf, N);
 355
 356    /* Compute frequency-domain adaptation mask */
 357    for (j=0;j<=st->frame_size;j++)
 358    {
 359       float r;
 360       r = leak_estimate*st->Yf[j] / (1+st->Rf[j]);
 361       if (r>1)
 362          r = 1;
 363       st->fratio[j] = r;
 364    }
 365
 366    /* Compute a bunch of correlations */
 367    //Sry = inner_prod(st->y+st->frame_size, st->d+st->frame_size, st->frame_size);
 368    Sey = inner_prod(st->y+st->frame_size, st->E+st->frame_size, st->frame_size);
 369    See = inner_prod(st->E+st->frame_size, st->E+st->frame_size, st->frame_size);
 370    Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
 371    Srr = inner_prod(st->d+st->frame_size, st->d+st->frame_size, st->frame_size);
 372    Sxx = inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
 373
 374    /* Compute smoothed cross-correlation and energy */
 375    st->Sey = .98*st->Sey + .02*Sey;
 376    st->Syy = .98*st->Syy + .02*Syy;
 377    st->See = .98*st->See + .02*See;
 378
 379    /* Check if filter is completely mis-adapted (if so, reset filter) */
 380    if (st->Sey/(1+st->Syy + .01*st->See) < -1)
 381    {
 382       /*fprintf (stderr, "reset at %d\n", st->cancel_count);*/
 383       speex_echo_state_reset(st);
 384       return;
 385    }
 386
 387    SER = Srr / (1+Sxx);
 388    ESR = leak_estimate*Syy / (1+See);
 389    if (ESR>1)
 390       ESR = 1;
 391 #if 1
 392    /* If over-cancellation (creating echo with 180 phase) damp filter */
 393    if (st->Sey/(1+st->Syy) < -.1 && (ESR > .3))
 394    {
 395       for (i=0;i<M*N;i++)
 396          st->W[i] *= .95;
 397       st->Sey *= .5;
 398       /*fprintf (stderr, "corrected down\n");*/
 399    }
 400 #endif
 401 #if 1
 402    /* If under-cancellation (leaving echo with 0 phase) scale filter up */
 403    if (st->Sey/(1+st->Syy) > .1 && (ESR > .1 || SER < 10))
 404    {
 405       for (i=0;i<M*N;i++)
 406          st->W[i] *= 1.05;
 407       st->Sey *= .5;
 408       /*fprintf (stderr, "corrected up %d\n", st->cancel_count);*/
 409    }
 410 #endif
 411
 412    /* We consider that the filter is adapted if the following is true*/
 413    if (ESR>.6 && st->sum_adapt > 1)
 414    {
 415       /*if (!st->adapted)
 416          fprintf(stderr, "Adapted at %d %f\n", st->cancel_count, st->sum_adapt);*/
 417       st->adapted = 1;
 418    }
 419
 420    /* Update frequency-dependent energy ratio with the total energy ratio */
 421    for (i=0;i<=st->frame_size;i++)
 422    {
 423       st->fratio[i]  = (.2*ESR+.8*min(.005+ESR,st->fratio[i]));
 424    }
 425
 426    if (st->adapted)
 427    {
 428       st->adapt_rate = .95f/(2+M);
 429    } else {
 430       /* Temporary adaption rate if filter is not adapted correctly */
 431       if (SER<.1)
 432          st->adapt_rate =.8/(2+M);
 433       else if (SER<1)
 434          st->adapt_rate =.4/(2+M);
 435       else if (SER<10)
 436          st->adapt_rate =.2/(2+M);
 437       else if (SER<30)
 438          st->adapt_rate =.08/(2+M);
 439       else
 440          st->adapt_rate = 0;
 441    }
 442
 443    /* How much have we adapted so far? */
 444    st->sum_adapt += st->adapt_rate;
 445
 446    /* Compute echo power in each frequency bin */
 447    {
 448       float ss = 1.0f/st->cancel_count;
 449       if (ss < .3/M)
 450          ss=.3/M;
 451       power_spectrum(&st->X[(M-1)*N], st->Xf, N);
 452       /* Smooth echo energy estimate over time */
 453       for (j=0;j<=st->frame_size;j++)
 454          st->power[j] = (1-ss)*st->power[j] + ss*st->Xf[j];
 455
 456
 457       /* Combine adaptation rate to the the inverse energy estimate */
 458       if (st->adapted)
 459       {
 460          /* If filter is adapted, include the frequency-dependent ratio too */
 461          for (i=0;i<=st->frame_size;i++)
 462             st->power_1[i] = st->adapt_rate*st->fratio[i] /(1.f+st->power[i]);
 463       } else {
 464          for (i=0;i<=st->frame_size;i++)
 465             st->power_1[i] = st->adapt_rate/(1.f+st->power[i]);
 466       }
 467    }
 468
 469
 470    /* Convert error to frequency domain */
 471    spx_drft_forward(st->fft_lookup, st->E);
 472
 473    /* Do some regularization (prevents problems when system is ill-conditoned) */
 474    for (m=0;m<M;m++)
 475       for (i=0;i<N;i++)
 476          st->W[m*N+i] *= 1-st->regul[i]*ESR;
 477
 478    /* Compute weight gradient */
 479    for (j=0;j<M;j++)
 480    {
 481       weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI+N*j, N);
 482    }
 483
 484    /* Gradient descent */
 485    for (i=0;i<M*N;i++)
 486       st->W[i] += st->PHI[i];
 487
 488    /* AUMDF weight constraint */
 489    for (j=0;j<M;j++)
 490    {
 491       /* Remove the "if" to make this an MDF filter */
 492       if (st->cancel_count%M == j)
 493       {
 494          spx_drft_backward(st->fft_lookup, &st->W[j*N]);
 495          for (i=0;i<N;i++)
 496             st->W[j*N+i]*=scale;
 497          for (i=st->frame_size;i<N;i++)
 498          {
 499             st->W[j*N+i]=0;
 500          }
 501          spx_drft_forward(st->fft_lookup, &st->W[j*N]);
 502       }
 503    }
 504
 505    /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/
 506    if (Yout)
 507    {
 508       if (st->adapted)
 509       {
 510          /* If the filter is adapted, take the filtered echo */
 511          for (i=0;i<st->frame_size;i++)
 512             st->last_y[i] = st->last_y[st->frame_size+i];
 513          for (i=0;i<st->frame_size;i++)
 514             st->last_y[st->frame_size+i] = st->y[st->frame_size+i];
 515       } else {
 516          /* If filter isn't adapted yet, all we can do is take the echo signal directly */
 517          for (i=0;i<N;i++)
 518             st->last_y[i] = st->x[i];
 519       }
 520
 521       /* Apply hanning window (should pre-compute it)*/
 522       for (i=0;i<N;i++)
 523          st->Yps[i] = (.5-.5*cos(2*M_PI*i/N))*st->last_y[i];
 524
 525       /* Compute power spectrum of the echo */
 526       spx_drft_forward(st->fft_lookup, st->Yps);
 527       power_spectrum(st->Yps, st->Yps, N);
 528
 529       /* Estimate residual echo */
 530       for (i=0;i<=st->frame_size;i++)
 531          Yout[i] = 2*leak_estimate*st->Yps[i];
 532    }
 533
 534 }
 535