src/lib/dl/ext/speex/mdf.c

   1 /* Copyright (C) 2003-2008 Jean-Marc Valin
   2
   3    File: mdf.c
   4    Echo canceller based on the MDF algorithm (see below)
   5
   6    Redistribution and use in source and binary forms, with or without
   7    modification, are permitted provided that the following conditions are
   8    met:
   9
  10    1. Redistributions of source code must retain the above copyright notice,
  11    this list of conditions and the following disclaimer.
  12
  13    2. Redistributions in binary form must reproduce the above copyright
  14    notice, this list of conditions and the following disclaimer in the
  15    documentation and/or other materials provided with the distribution.
  16
  17    3. The name of the author may not be used to endorse or promote products
  18    derived from this software without specific prior written permission.
  19
  20    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  21    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  22    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  24    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  26    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  28    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30    POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33 /*
  34    The echo canceller is based on the MDF algorithm described in:
  35
  36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
  37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
  38    February 1990.
  39
  40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
  41    double-talk is achieved using a variable learning rate as described in:
  42
  43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
  44    Cancellation With Double-Talk. IEEE Transactions on Audio,
  45    Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
  46    http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
  47
  48    There is no explicit double-talk detection, but a continuous variation
  49    in the learning rate based on residual echo, double-talk and background
  50    noise.
  51
  52    About the fixed-point version:
  53    All the signals are represented with 16-bit words. The filter weights
  54    are represented with 32-bit words, but only the top 16 bits are used
  55    in most cases. The lower 16 bits are completely unreliable (due to the
  56    fact that the update is done only on the top bits), but help in the
  57    adaptation -- probably by removing a "threshold effect" due to
  58    quantization (rounding going to zero) when the gradient is small.
  59
  60    Another kludge that seems to work good: when performing the weight
  61    update, we only move half the way toward the "goal" this seems to
  62    reduce the effect of quantization noise in the update phase. This
  63    can be seen as applying a gradient descent on a "soft constraint"
  64    instead of having a hard constraint.
  65
  66 */
  67
  68 #ifdef HAVE_CONFIG_H
  69 #include "config.h"
  70 #endif
  71
  72 #include "arch.h"
  73 #include "speex/speex_echo.h"
  74 #include "fftwrap.h"
  75 #include "pseudofloat.h"
  76 #include "math_approx.h"
  77 #include "os_support.h"
  78
  79 #ifndef M_PI
  80 #define M_PI 3.14159265358979323846
  81 #endif
  82
  83 #ifdef FIXED_POINT
  84 #define WEIGHT_SHIFT 11
  85 #define NORMALIZE_SCALEDOWN 5
  86 #define NORMALIZE_SCALEUP 3
  87 #else
  88 #define WEIGHT_SHIFT 0
  89 #endif
  90
  91 #ifdef FIXED_POINT
  92 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
  93 #else
  94 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
  95 #endif
  96
  97 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
  98    and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
  99 #define TWO_PATH
 100
 101 #ifdef FIXED_POINT
 102 static const spx_float_t MIN_LEAK = {20972, -22};
 103
 104 /* Constants for the two-path filter */
 105 static const spx_float_t VAR1_SMOOTH = {23593, -16};
 106 static const spx_float_t VAR2_SMOOTH = {23675, -15};
 107 static const spx_float_t VAR1_UPDATE = {16384, -15};
 108 static const spx_float_t VAR2_UPDATE = {16384, -16};
 109 static const spx_float_t VAR_BACKTRACK = {16384, -12};
 110 #define TOP16(x) ((x)>>16)
 111
 112 #else
 113
 114 static const spx_float_t MIN_LEAK = .005f;
 115
 116 /* Constants for the two-path filter */
 117 static const spx_float_t VAR1_SMOOTH = .36f;
 118 static const spx_float_t VAR2_SMOOTH = .7225f;
 119 static const spx_float_t VAR1_UPDATE = .5f;
 120 static const spx_float_t VAR2_UPDATE = .25f;
 121 static const spx_float_t VAR_BACKTRACK = 4.f;
 122 #define TOP16(x) (x)
 123 #endif
 124
 125
 126 #define PLAYBACK_DELAY 2
 127
 128 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
 129
 130
 131 /** Speex echo cancellation state. */
 132 struct SpeexEchoState_ {
 133    int frame_size;           /**< Number of samples processed each time */
 134    int window_size;
 135    int M;
 136    int cancel_count;
 137    int adapted;
 138    int saturated;
 139    int screwed_up;
 140    int C;                    /** Number of input channels (microphones) */
 141    int K;                    /** Number of output channels (loudspeakers) */
 142    spx_int32_t sampling_rate;
 143    spx_word16_t spec_average;
 144    spx_word16_t beta0;
 145    spx_word16_t beta_max;
 146    spx_word32_t sum_adapt;
 147    spx_word16_t leak_estimate;
 148
 149    spx_word16_t *e;      /* scratch */
 150    spx_word16_t *x;      /* Far-end input buffer (2N) */
 151    spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
 152    spx_word16_t *input;  /* scratch */
 153    spx_word16_t *y;      /* scratch */
 154    spx_word16_t *last_y;
 155    spx_word16_t *Y;      /* scratch */
 156    spx_word16_t *E;
 157    spx_word32_t *PHI;    /* scratch */
 158    spx_word32_t *W;      /* (Background) filter weights */
 159 #ifdef TWO_PATH
 160    spx_word16_t *foreground; /* Foreground filter weights */
 161    spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
 162    spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
 163    spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
 164    spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
 165 #endif
 166    spx_word32_t *power;  /* Power of the far-end signal */
 167    spx_float_t  *power_1;/* Inverse power of far-end */
 168    spx_word16_t *wtmp;   /* scratch */
 169 #ifdef FIXED_POINT
 170    spx_word16_t *wtmp2;  /* scratch */
 171 #endif
 172    spx_word32_t *Rf;     /* scratch */
 173    spx_word32_t *Yf;     /* scratch */
 174    spx_word32_t *Xf;     /* scratch */
 175    spx_word32_t *Eh;
 176    spx_word32_t *Yh;
 177    spx_float_t   Pey;
 178    spx_float_t   Pyy;
 179    spx_word16_t *window;
 180    spx_word16_t *prop;
 181    void *fft_table;
 182    spx_word16_t *memX, *memD, *memE;
 183    spx_word16_t preemph;
 184    spx_word16_t notch_radius;
 185    spx_mem_t *notch_mem;
 186
 187    /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
 188    spx_int16_t *play_buf;
 189    int play_buf_pos;
 190    int play_buf_started;
 191 };
 192
 193 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
 194 {
 195    int i;
 196    spx_word16_t den2;
 197 #ifdef FIXED_POINT
 198    den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
 199 #else
 200    den2 = radius*radius + .7*(1-radius)*(1-radius);
 201 #endif
 202    /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
 203    for (i=0;i<len;i++)
 204    {
 205       spx_word16_t vin = in[i*stride];
 206       spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
 207 #ifdef FIXED_POINT
 208       mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
 209 #else
 210       mem[0] = mem[1] + 2*(-vin + radius*vout);
 211 #endif
 212       mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
 213       out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
 214    }
 215 }
 216
 217 /* This inner product is slightly different from the codec version because of fixed-point */
 218 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 219 {
 220    spx_word32_t sum=0;
 221    len >>= 1;
 222    while(len--)
 223    {
 224       spx_word32_t part=0;
 225       part = MAC16_16(part,*x++,*y++);
 226       part = MAC16_16(part,*x++,*y++);
 227       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
 228       sum = ADD32(sum,SHR32(part,6));
 229    }
 230    return sum;
 231 }
 232
 233 /** Compute power spectrum of a half-complex (packed) vector */
 234 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
 235 {
 236    int i, j;
 237    ps[0]=MULT16_16(X[0],X[0]);
 238    for (i=1,j=1;i<N-1;i+=2,j++)
 239    {
 240       ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
 241    }
 242    ps[j]=MULT16_16(X[i],X[i]);
 243 }
 244
 245 /** Compute power spectrum of a half-complex (packed) vector and accumulate */
 246 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
 247 {
 248    int i, j;
 249    ps[0]+=MULT16_16(X[0],X[0]);
 250    for (i=1,j=1;i<N-1;i+=2,j++)
 251    {
 252       ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
 253    }
 254    ps[j]+=MULT16_16(X[i],X[i]);
 255 }
 256
 257 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
 258 #ifdef FIXED_POINT
 259 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
 260 {
 261    int i,j;
 262    spx_word32_t tmp1=0,tmp2=0;
 263    for (j=0;j<M;j++)
 264    {
 265       tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
 266    }
 267    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
 268    for (i=1;i<N-1;i+=2)
 269    {
 270       tmp1 = tmp2 = 0;
 271       for (j=0;j<M;j++)
 272       {
 273          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
 274          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
 275       }
 276       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
 277       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
 278    }
 279    tmp1 = tmp2 = 0;
 280    for (j=0;j<M;j++)
 281    {
 282       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
 283    }
 284    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
 285 }
 286 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
 287 {
 288    int i,j;
 289    spx_word32_t tmp1=0,tmp2=0;
 290    for (j=0;j<M;j++)
 291    {
 292       tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
 293    }
 294    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
 295    for (i=1;i<N-1;i+=2)
 296    {
 297       tmp1 = tmp2 = 0;
 298       for (j=0;j<M;j++)
 299       {
 300          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
 301          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
 302       }
 303       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
 304       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
 305    }
 306    tmp1 = tmp2 = 0;
 307    for (j=0;j<M;j++)
 308    {
 309       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
 310    }
 311    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
 312 }
 313
 314 #else
 315 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
 316 {
 317    int i,j;
 318    for (i=0;i<N;i++)
 319       acc[i] = 0;
 320    for (j=0;j<M;j++)
 321    {
 322       acc[0] += X[0]*Y[0];
 323       for (i=1;i<N-1;i+=2)
 324       {
 325          acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
 326          acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
 327       }
 328       acc[i] += X[i]*Y[i];
 329       X += N;
 330       Y += N;
 331    }
 332 }
 333 #define spectral_mul_accum16 spectral_mul_accum
 334 #endif
 335
 336 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
 337 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
 338 {
 339    int i, j;
 340    spx_float_t W;
 341    W = FLOAT_AMULT(p, w[0]);
 342    prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
 343    for (i=1,j=1;i<N-1;i+=2,j++)
 344    {
 345       W = FLOAT_AMULT(p, w[j]);
 346       prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
 347       prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
 348    }
 349    W = FLOAT_AMULT(p, w[j]);
 350    prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
 351 }
 352
 353 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
 354 {
 355    int i, j, p;
 356    spx_word16_t max_sum = 1;
 357    spx_word32_t prop_sum = 1;
 358    for (i=0;i<M;i++)
 359    {
 360       spx_word32_t tmp = 1;
 361       for (p=0;p<P;p++)
 362          for (j=0;j<N;j++)
 363             tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
 364 #ifdef FIXED_POINT
 365       /* Just a security in case an overflow were to occur */
 366       tmp = MIN32(ABS32(tmp), 536870912);
 367 #endif
 368       prop[i] = spx_sqrt(tmp);
 369       if (prop[i] > max_sum)
 370          max_sum = prop[i];
 371    }
 372    for (i=0;i<M;i++)
 373    {
 374       prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
 375       prop_sum += EXTEND32(prop[i]);
 376    }
 377    for (i=0;i<M;i++)
 378    {
 379       prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
 380       /*printf ("%f ", prop[i]);*/
 381    }
 382    /*printf ("\n");*/
 383 }
 384
 385 #ifdef DUMP_ECHO_CANCEL_DATA
 386 #include <stdio.h>
 387 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
 388
 389 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
 390 {
 391    if (!(rFile && pFile && oFile))
 392    {
 393       speex_fatal("Dump files not open");
 394    }
 395    fwrite(rec, sizeof(spx_int16_t), len, rFile);
 396    fwrite(play, sizeof(spx_int16_t), len, pFile);
 397    fwrite(out, sizeof(spx_int16_t), len, oFile);
 398 }
 399 #endif
 400
 401 /** Creates a new echo canceller state */
 402 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 403 {
 404    return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
 405 }
 406
 407 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
 408 {
 409    int i,N,M, C, K;
 410    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
 411
 412    st->K = nb_speakers;
 413    st->C = nb_mic;
 414    C=st->C;
 415    K=st->K;
 416 #ifdef DUMP_ECHO_CANCEL_DATA
 417    if (rFile || pFile || oFile)
 418       speex_fatal("Opening dump files twice");
 419    rFile = fopen("aec_rec.sw", "wb");
 420    pFile = fopen("aec_play.sw", "wb");
 421    oFile = fopen("aec_out.sw", "wb");
 422 #endif
 423
 424    st->frame_size = frame_size;
 425    st->window_size = 2*frame_size;
 426    N = st->window_size;
 427    M = st->M = (filter_length+st->frame_size-1)/frame_size;
 428    st->cancel_count=0;
 429    st->sum_adapt = 0;
 430    st->saturated = 0;
 431    st->screwed_up = 0;
 432    /* This is the default sampling rate */
 433    st->sampling_rate = 8000;
 434    st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
 435 #ifdef FIXED_POINT
 436    st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
 437    st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
 438 #else
 439    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
 440    st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
 441 #endif
 442    st->leak_estimate = 0;
 443
 444    st->fft_table = spx_fft_init(N);
 445
 446    st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
 447    st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
 448    st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
 449    st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
 450    st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
 451    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 452    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 453    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 454    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 455    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 456
 457    st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
 458    st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
 459    st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
 460    st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
 461 #ifdef TWO_PATH
 462    st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
 463 #endif
 464    st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
 465    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
 466    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
 467    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 468    st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
 469    st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 470 #ifdef FIXED_POINT
 471    st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 472    for (i=0;i<N>>1;i++)
 473    {
 474       st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
 475       st->window[N-i-1] = st->window[i];
 476    }
 477 #else
 478    for (i=0;i<N;i++)
 479       st->window[i] = .5-.5*cos(2*M_PI*i/N);
 480 #endif
 481    for (i=0;i<=st->frame_size;i++)
 482       st->power_1[i] = FLOAT_ONE;
 483    for (i=0;i<N*M*K*C;i++)
 484       st->W[i] = 0;
 485    {
 486       spx_word32_t sum = 0;
 487       /* Ratio of ~10 between adaptation rate of first and last block */
 488       spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
 489       st->prop[0] = QCONST16(.7, 15);
 490       sum = EXTEND32(st->prop[0]);
 491       for (i=1;i<M;i++)
 492       {
 493          st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
 494          sum = ADD32(sum, EXTEND32(st->prop[i]));
 495       }
 496       for (i=M-1;i>=0;i--)
 497       {
 498          st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
 499       }
 500    }
 501
 502    st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
 503    st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
 504    st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
 505    st->preemph = QCONST16(.9,15);
 506    if (st->sampling_rate<12000)
 507       st->notch_radius = QCONST16(.9, 15);
 508    else if (st->sampling_rate<24000)
 509       st->notch_radius = QCONST16(.982, 15);
 510    else
 511       st->notch_radius = QCONST16(.992, 15);
 512
 513    st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
 514    st->adapted = 0;
 515    st->Pey = st->Pyy = FLOAT_ONE;
 516
 517 #ifdef TWO_PATH
 518    st->Davg1 = st->Davg2 = 0;
 519    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 520 #endif
 521
 522    st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
 523    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
 524    st->play_buf_started = 0;
 525
 526    return st;
 527 }
 528
 529 /** Resets echo canceller state */
 530 EXPORT void speex_echo_state_reset(SpeexEchoState *st)
 531 {
 532    int i, M, N, C, K;
 533    st->cancel_count=0;
 534    st->screwed_up = 0;
 535    N = st->window_size;
 536    M = st->M;
 537    C=st->C;
 538    K=st->K;
 539    for (i=0;i<N*M;i++)
 540       st->W[i] = 0;
 541 #ifdef TWO_PATH
 542    for (i=0;i<N*M;i++)
 543       st->foreground[i] = 0;
 544 #endif
 545    for (i=0;i<N*(M+1);i++)
 546       st->X[i] = 0;
 547    for (i=0;i<=st->frame_size;i++)
 548    {
 549       st->power[i] = 0;
 550       st->power_1[i] = FLOAT_ONE;
 551       st->Eh[i] = 0;
 552       st->Yh[i] = 0;
 553    }
 554    for (i=0;i<st->frame_size;i++)
 555    {
 556       st->last_y[i] = 0;
 557    }
 558    for (i=0;i<N*C;i++)
 559    {
 560       st->E[i] = 0;
 561    }
 562    for (i=0;i<N*K;i++)
 563    {
 564       st->x[i] = 0;
 565    }
 566    for (i=0;i<2*C;i++)
 567       st->notch_mem[i] = 0;
 568    for (i=0;i<C;i++)
 569       st->memD[i]=st->memE[i]=0;
 570    for (i=0;i<K;i++)
 571       st->memX[i]=0;
 572
 573    st->saturated = 0;
 574    st->adapted = 0;
 575    st->sum_adapt = 0;
 576    st->Pey = st->Pyy = FLOAT_ONE;
 577 #ifdef TWO_PATH
 578    st->Davg1 = st->Davg2 = 0;
 579    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 580 #endif
 581    for (i=0;i<3*st->frame_size;i++)
 582       st->play_buf[i] = 0;
 583    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
 584    st->play_buf_started = 0;
 585
 586 }
 587
 588 /** Destroys an echo canceller state */
 589 EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
 590 {
 591    spx_fft_destroy(st->fft_table);
 592
 593    speex_free(st->e);
 594    speex_free(st->x);
 595    speex_free(st->input);
 596    speex_free(st->y);
 597    speex_free(st->last_y);
 598    speex_free(st->Yf);
 599    speex_free(st->Rf);
 600    speex_free(st->Xf);
 601    speex_free(st->Yh);
 602    speex_free(st->Eh);
 603
 604    speex_free(st->X);
 605    speex_free(st->Y);
 606    speex_free(st->E);
 607    speex_free(st->W);
 608 #ifdef TWO_PATH
 609    speex_free(st->foreground);
 610 #endif
 611    speex_free(st->PHI);
 612    speex_free(st->power);
 613    speex_free(st->power_1);
 614    speex_free(st->window);
 615    speex_free(st->prop);
 616    speex_free(st->wtmp);
 617 #ifdef FIXED_POINT
 618    speex_free(st->wtmp2);
 619 #endif
 620    speex_free(st->memX);
 621    speex_free(st->memD);
 622    speex_free(st->memE);
 623    speex_free(st->notch_mem);
 624
 625    speex_free(st->play_buf);
 626    speex_free(st);
 627
 628 #ifdef DUMP_ECHO_CANCEL_DATA
 629    fclose(rFile);
 630    fclose(pFile);
 631    fclose(oFile);
 632    rFile = pFile = oFile = NULL;
 633 #endif
 634 }
 635
 636 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
 637 {
 638    int i;
 639    /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
 640    st->play_buf_started = 1;
 641    if (st->play_buf_pos>=st->frame_size)
 642    {
 643       speex_echo_cancellation(st, rec, st->play_buf, out);
 644       st->play_buf_pos -= st->frame_size;
 645       for (i=0;i<st->play_buf_pos;i++)
 646          st->play_buf[i] = st->play_buf[i+st->frame_size];
 647    } else {
 648       speex_warning("No playback frame available (your application is buggy and/or got xruns)");
 649       if (st->play_buf_pos!=0)
 650       {
 651          speex_warning("internal playback buffer corruption?");
 652          st->play_buf_pos = 0;
 653       }
 654       for (i=0;i<st->frame_size;i++)
 655          out[i] = rec[i];
 656    }
 657 }
 658
 659 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
 660 {
 661    /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
 662    if (!st->play_buf_started)
 663    {
 664       speex_warning("discarded first playback frame");
 665       return;
 666    }
 667    if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
 668    {
 669       int i;
 670       for (i=0;i<st->frame_size;i++)
 671          st->play_buf[st->play_buf_pos+i] = play[i];
 672       st->play_buf_pos += st->frame_size;
 673       if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
 674       {
 675          speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
 676          for (i=0;i<st->frame_size;i++)
 677             st->play_buf[st->play_buf_pos+i] = play[i];
 678          st->play_buf_pos += st->frame_size;
 679       }
 680    } else {
 681       speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
 682    }
 683 }
 684
 685 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
 686 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
 687 {
 688    speex_echo_cancellation(st, in, far_end, out);
 689 }
 690
 691 /** Performs echo cancellation on a frame */
 692 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
 693 {
 694    int i,j, chan, speak;
 695    int N,M, C, K;
 696    spx_word32_t Syy,See,Sxx,Sdd, Sff;
 697 #ifdef TWO_PATH
 698    spx_word32_t Dbf;
 699    int update_foreground;
 700 #endif
 701    spx_word32_t Sey;
 702    spx_word16_t ss, ss_1;
 703    spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
 704    spx_float_t alpha, alpha_1;
 705    spx_word16_t RER;
 706    spx_word32_t tmp32;
 707
 708    N = st->window_size;
 709    M = st->M;
 710    C = st->C;
 711    K = st->K;
 712
 713    st->cancel_count++;
 714 #ifdef FIXED_POINT
 715    ss=DIV32_16(11469,M);
 716    ss_1 = SUB16(32767,ss);
 717 #else
 718    ss=.35/M;
 719    ss_1 = 1-ss;
 720 #endif
 721
 722    for (chan = 0; chan < C; chan++)
 723    {
 724       /* Apply a notch filter to make sure DC doesn't end up causing problems */
 725       filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
 726       /* Copy input data to buffer and apply pre-emphasis */
 727       /* Copy input data to buffer */
 728       for (i=0;i<st->frame_size;i++)
 729       {
 730          spx_word32_t tmp32;
 731          /* FIXME: This core has changed a bit, need to merge properly */
 732          tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
 733 #ifdef FIXED_POINT
 734          if (tmp32 > 32767)
 735          {
 736             tmp32 = 32767;
 737             if (st->saturated == 0)
 738                st->saturated = 1;
 739          }
 740          if (tmp32 < -32767)
 741          {
 742             tmp32 = -32767;
 743             if (st->saturated == 0)
 744                st->saturated = 1;
 745          }
 746 #endif
 747          st->memD[chan] = st->input[chan*st->frame_size+i];
 748          st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
 749       }
 750    }
 751
 752    for (speak = 0; speak < K; speak++)
 753    {
 754       for (i=0;i<st->frame_size;i++)
 755       {
 756          spx_word32_t tmp32;
 757          st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
 758          tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
 759 #ifdef FIXED_POINT
 760          /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
 761          if (tmp32 > 32767)
 762          {
 763             tmp32 = 32767;
 764             st->saturated = M+1;
 765          }
 766          if (tmp32 < -32767)
 767          {
 768             tmp32 = -32767;
 769             st->saturated = M+1;
 770          }
 771 #endif
 772          st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
 773          st->memX[speak] = far_end[i*K+speak];
 774       }
 775    }
 776
 777    for (speak = 0; speak < K; speak++)
 778    {
 779       /* Shift memory: this could be optimized eventually*/
 780       for (j=M-1;j>=0;j--)
 781       {
 782          for (i=0;i<N;i++)
 783             st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
 784       }
 785       /* Convert x (echo input) to frequency domain */
 786       spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
 787    }
 788
 789    Sxx = 0;
 790    for (speak = 0; speak < K; speak++)
 791    {
 792       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
 793       power_spectrum_accum(st->X+speak*N, st->Xf, N);
 794    }
 795
 796    Sff = 0;
 797    for (chan = 0; chan < C; chan++)
 798    {
 799 #ifdef TWO_PATH
 800       /* Compute foreground filter */
 801       spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
 802       spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
 803       for (i=0;i<st->frame_size;i++)
 804          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
 805       Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
 806 #endif
 807    }
 808
 809    /* Adjust proportional adaption rate */
 810    /* FIXME: Adjust that for C, K*/
 811    if (st->adapted)
 812       mdf_adjust_prop (st->W, N, M, C*K, st->prop);
 813    /* Compute weight gradient */
 814    if (st->saturated == 0)
 815    {
 816       for (chan = 0; chan < C; chan++)
 817       {
 818          for (speak = 0; speak < K; speak++)
 819          {
 820             for (j=M-1;j>=0;j--)
 821             {
 822                weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
 823                for (i=0;i<N;i++)
 824                   st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
 825             }
 826          }
 827       }
 828    } else {
 829       st->saturated--;
 830    }
 831
 832    /* FIXME: MC conversion required */
 833    /* Update weight to prevent circular convolution (MDF / AUMDF) */
 834    for (chan = 0; chan < C; chan++)
 835    {
 836       for (speak = 0; speak < K; speak++)
 837       {
 838          for (j=0;j<M;j++)
 839          {
 840             /* This is a variant of the Alternatively Updated MDF (AUMDF) */
 841             /* Remove the "if" to make this an MDF filter */
 842             if (j==0 || st->cancel_count%(M-1) == j-1)
 843             {
 844 #ifdef FIXED_POINT
 845                for (i=0;i<N;i++)
 846                   st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
 847                spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
 848                for (i=0;i<st->frame_size;i++)
 849                {
 850                   st->wtmp[i]=0;
 851                }
 852                for (i=st->frame_size;i<N;i++)
 853                {
 854                   st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
 855                }
 856                spx_fft(st->fft_table, st->wtmp, st->wtmp2);
 857                /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
 858                for (i=0;i<N;i++)
 859                   st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
 860 #else
 861                spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
 862                for (i=st->frame_size;i<N;i++)
 863                {
 864                   st->wtmp[i]=0;
 865                }
 866                spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
 867 #endif
 868             }
 869          }
 870       }
 871    }
 872
 873    /* So we can use power_spectrum_accum */
 874    for (i=0;i<=st->frame_size;i++)
 875       st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
 876
 877    Dbf = 0;
 878    See = 0;
 879 #ifdef TWO_PATH
 880    /* Difference in response, this is used to estimate the variance of our residual power estimate */
 881    for (chan = 0; chan < C; chan++)
 882    {
 883       spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
 884       spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
 885       for (i=0;i<st->frame_size;i++)
 886          st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
 887       Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
 888       for (i=0;i<st->frame_size;i++)
 889          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
 890       See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
 891    }
 892 #endif
 893
 894 #ifndef TWO_PATH
 895    Sff = See;
 896 #endif
 897
 898 #ifdef TWO_PATH
 899    /* Logic for updating the foreground filter */
 900
 901    /* For two time windows, compute the mean of the energy difference, as well as the variance */
 902    st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
 903    st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
 904    st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
 905    st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
 906
 907    /* Equivalent float code:
 908    st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
 909    st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
 910    st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
 911    st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
 912    */
 913
 914    update_foreground = 0;
 915    /* Check if we have a statistically significant reduction in the residual echo */
 916    /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
 917    if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
 918       update_foreground = 1;
 919    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
 920       update_foreground = 1;
 921    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
 922       update_foreground = 1;
 923
 924    /* Do we update? */
 925    if (update_foreground)
 926    {
 927       st->Davg1 = st->Davg2 = 0;
 928       st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 929       /* Copy background filter to foreground filter */
 930       for (i=0;i<N*M*C*K;i++)
 931          st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
 932       /* Apply a smooth transition so as to not introduce blocking artifacts */
 933       for (chan = 0; chan < C; chan++)
 934          for (i=0;i<st->frame_size;i++)
 935             st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
 936    } else {
 937       int reset_background=0;
 938       /* Otherwise, check if the background filter is significantly worse */
 939       if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
 940          reset_background = 1;
 941       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
 942          reset_background = 1;
 943       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
 944          reset_background = 1;
 945       if (reset_background)
 946       {
 947          /* Copy foreground filter to background filter */
 948          for (i=0;i<N*M*C*K;i++)
 949             st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
 950          /* We also need to copy the output so as to get correct adaptation */
 951          for (chan = 0; chan < C; chan++)
 952          {
 953             for (i=0;i<st->frame_size;i++)
 954                st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
 955             for (i=0;i<st->frame_size;i++)
 956                st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
 957          }
 958          See = Sff;
 959          st->Davg1 = st->Davg2 = 0;
 960          st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 961       }
 962    }
 963 #endif
 964
 965    Sey = Syy = Sdd = 0;
 966    for (chan = 0; chan < C; chan++)
 967    {
 968       /* Compute error signal (for the output with de-emphasis) */
 969       for (i=0;i<st->frame_size;i++)
 970       {
 971          spx_word32_t tmp_out;
 972 #ifdef TWO_PATH
 973          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
 974 #else
 975          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
 976 #endif
 977          tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
 978       /* This is an arbitrary test for saturation in the microphone signal */
 979          if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
 980          {
 981          if (st->saturated == 0)
 982             st->saturated = 1;
 983          }
 984          out[i*C+chan] = WORD2INT(tmp_out);
 985          st->memE[chan] = tmp_out;
 986       }
 987
 988 #ifdef DUMP_ECHO_CANCEL_DATA
 989       dump_audio(in, far_end, out, st->frame_size);
 990 #endif
 991
 992       /* Compute error signal (filter update version) */
 993       for (i=0;i<st->frame_size;i++)
 994       {
 995          st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
 996          st->e[chan*N+i] = 0;
 997       }
 998
 999       /* Compute a bunch of correlations */
1000       /* FIXME: bad merge */
1001       Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1002       Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1003       Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
1004
1005       /* Convert error to frequency domain */
1006       spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1007       for (i=0;i<st->frame_size;i++)
1008          st->y[i+chan*N] = 0;
1009       spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1010
1011       /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1012       power_spectrum_accum(st->E+chan*N, st->Rf, N);
1013       power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1014
1015    }
1016
1017    /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1018
1019    /* Do some sanity check */
1020    if (!(Syy>=0 && Sxx>=0 && See >= 0)
1021 #ifndef FIXED_POINT
1022        || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1023 #endif
1024       )
1025    {
1026       /* Things have gone really bad */
1027       st->screwed_up += 50;
1028       for (i=0;i<st->frame_size*C;i++)
1029          out[i] = 0;
1030    } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1031    {
1032       /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1033       st->screwed_up++;
1034    } else {
1035       /* Everything's fine */
1036       st->screwed_up=0;
1037    }
1038    if (st->screwed_up>=50)
1039    {
1040       speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1041       speex_echo_state_reset(st);
1042       return;
1043    }
1044
1045    /* Add a small noise floor to make sure not to have problems when dividing */
1046    See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1047
1048    for (speak = 0; speak < K; speak++)
1049    {
1050       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1051       power_spectrum_accum(st->X+speak*N, st->Xf, N);
1052    }
1053
1054
1055    /* Smooth far end energy estimate over time */
1056    for (j=0;j<=st->frame_size;j++)
1057       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1058
1059    /* Compute filtered spectra and (cross-)correlations */
1060    for (j=st->frame_size;j>=0;j--)
1061    {
1062       spx_float_t Eh, Yh;
1063       Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1064       Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1065       Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1066       Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1067 #ifdef FIXED_POINT
1068       st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1069       st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1070 #else
1071       st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1072       st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1073 #endif
1074    }
1075
1076    Pyy = FLOAT_SQRT(Pyy);
1077    Pey = FLOAT_DIVU(Pey,Pyy);
1078
1079    /* Compute correlation updatete rate */
1080    tmp32 = MULT16_32_Q15(st->beta0,Syy);
1081    if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1082       tmp32 = MULT16_32_Q15(st->beta_max,See);
1083    alpha = FLOAT_DIV32(tmp32, See);
1084    alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1085    /* Update correlations (recursive average) */
1086    st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1087    st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1088    if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1089       st->Pyy = FLOAT_ONE;
1090    /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1091    if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1092       st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1093    if (FLOAT_GT(st->Pey, st->Pyy))
1094       st->Pey = st->Pyy;
1095    /* leak_estimate is the linear regression result */
1096    st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1097    /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1098    if (st->leak_estimate > 16383)
1099       st->leak_estimate = 32767;
1100    else
1101       st->leak_estimate = SHL16(st->leak_estimate,1);
1102    /*printf ("%f\n", st->leak_estimate);*/
1103
1104    /* Compute Residual to Error Ratio */
1105 #ifdef FIXED_POINT
1106    tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1107    tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1108    /* Check for y in e (lower bound on RER) */
1109    {
1110       spx_float_t bound = PSEUDOFLOAT(Sey);
1111       bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1112       if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1113          tmp32 = See;
1114       else if (tmp32 < FLOAT_EXTRACT32(bound))
1115          tmp32 = FLOAT_EXTRACT32(bound);
1116    }
1117    if (tmp32 > SHR32(See,1))
1118       tmp32 = SHR32(See,1);
1119    RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1120 #else
1121    RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1122    /* Check for y in e (lower bound on RER) */
1123    if (RER < Sey*Sey/(1+See*Syy))
1124       RER = Sey*Sey/(1+See*Syy);
1125    if (RER > .5)
1126       RER = .5;
1127 #endif
1128
1129    /* We consider that the filter has had minimal adaptation if the following is true*/
1130    if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1131    {
1132       st->adapted = 1;
1133    }
1134
1135    if (st->adapted)
1136    {
1137       /* Normal learning rate calculation once we're past the minimal adaptation phase */
1138       for (i=0;i<=st->frame_size;i++)
1139       {
1140          spx_word32_t r, e;
1141          /* Compute frequency-domain adaptation mask */
1142          r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1143          e = SHL32(st->Rf[i],3)+1;
1144 #ifdef FIXED_POINT
1145          if (r>SHR32(e,1))
1146             r = SHR32(e,1);
1147 #else
1148          if (r>.5*e)
1149             r = .5*e;
1150 #endif
1151          r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1152          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1153          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1154       }
1155    } else {
1156       /* Temporary adaption rate if filter is not yet adapted enough */
1157       spx_word16_t adapt_rate=0;
1158
1159       if (Sxx > SHR32(MULT16_16(N, 1000),6))
1160       {
1161          tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1162 #ifdef FIXED_POINT
1163          if (tmp32 > SHR32(See,2))
1164             tmp32 = SHR32(See,2);
1165 #else
1166          if (tmp32 > .25*See)
1167             tmp32 = .25*See;
1168 #endif
1169          adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1170       }
1171       for (i=0;i<=st->frame_size;i++)
1172          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1173
1174
1175       /* How much have we adapted so far? */
1176       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1177    }
1178
1179    /* FIXME: MC conversion required */
1180       for (i=0;i<st->frame_size;i++)
1181          st->last_y[i] = st->last_y[st->frame_size+i];
1182    if (st->adapted)
1183    {
1184       /* If the filter is adapted, take the filtered echo */
1185       for (i=0;i<st->frame_size;i++)
1186          st->last_y[st->frame_size+i] = in[i]-out[i];
1187    } else {
1188       /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1189       /* moved earlier: for (i=0;i<N;i++)
1190       st->last_y[i] = st->x[i];*/
1191    }
1192
1193 }
1194
1195 /* Compute spectrum of estimated echo for use in an echo post-filter */
1196 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1197 {
1198    int i;
1199    spx_word16_t leak2;
1200    int N;
1201
1202    N = st->window_size;
1203
1204    /* Apply hanning window (should pre-compute it)*/
1205    for (i=0;i<N;i++)
1206       st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1207
1208    /* Compute power spectrum of the echo */
1209    spx_fft(st->fft_table, st->y, st->Y);
1210    power_spectrum(st->Y, residual_echo, N);
1211
1212 #ifdef FIXED_POINT
1213    if (st->leak_estimate > 16383)
1214       leak2 = 32767;
1215    else
1216       leak2 = SHL16(st->leak_estimate, 1);
1217 #else
1218    if (st->leak_estimate>.5)
1219       leak2 = 1;
1220    else
1221       leak2 = 2*st->leak_estimate;
1222 #endif
1223    /* Estimate residual echo */
1224    for (i=0;i<=st->frame_size;i++)
1225       residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1226
1227 }
1228
1229 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1230 {
1231    switch(request)
1232    {
1233
1234       case SPEEX_ECHO_GET_FRAME_SIZE:
1235          (*(int*)ptr) = st->frame_size;
1236          break;
1237       case SPEEX_ECHO_SET_SAMPLING_RATE:
1238          st->sampling_rate = (*(int*)ptr);
1239          st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1240 #ifdef FIXED_POINT
1241          st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1242          st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1243 #else
1244          st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1245          st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1246 #endif
1247          if (st->sampling_rate<12000)
1248             st->notch_radius = QCONST16(.9, 15);
1249          else if (st->sampling_rate<24000)
1250             st->notch_radius = QCONST16(.982, 15);
1251          else
1252             st->notch_radius = QCONST16(.992, 15);
1253          break;
1254       case SPEEX_ECHO_GET_SAMPLING_RATE:
1255          (*(int*)ptr) = st->sampling_rate;
1256          break;
1257       case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1258          /*FIXME: Implement this for multiple channels */
1259          *((spx_int32_t *)ptr) = st->M * st->frame_size;
1260          break;
1261       case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1262       {
1263          int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1264          spx_int32_t *filt = (spx_int32_t *) ptr;
1265          for(j=0;j<M;j++)
1266          {
1267             /*FIXME: Implement this for multiple channels */
1268 #ifdef FIXED_POINT
1269             for (i=0;i<N;i++)
1270                st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1271             spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1272 #else
1273             spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1274 #endif
1275             for(i=0;i<n;i++)
1276                filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1277          }
1278       }
1279          break;
1280       default:
1281          speex_warning_int("Unknown speex_echo_ctl request: ", request);
1282          return -1;
1283    }
1284    return 0;
1285 }