1 /* Copyright (C) 2002-2006 Jean-Marc Valin
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include "quant_lsp.h"
42 #include "cb_search.h"
44 #include "stack_alloc.h"
46 #include <speex/speex_bits.h>
49 #include "math_approx.h"
50 #include "os_support.h"
51 #include <speex/speex_callbacks.h>
54 #include "vorbis_psy.h"
58 #define M_PI 3.14159265358979323846 /* pi */
65 #define SUBMODE(x) st->submodes[st->submodeID]->x
67 /* Default size for the encoder and decoder stack (can be changed at compile time).
68 This does not apply when using variable-size arrays or alloca. */
70 #define NB_ENC_STACK (8000*sizeof(spx_sig_t))
74 #define NB_DEC_STACK (4000*sizeof(spx_sig_t))
79 const spx_word32_t ol_gain_table[32]={18900, 25150, 33468, 44536, 59265, 78865, 104946, 139653, 185838, 247297, 329081, 437913, 582736, 775454, 1031906, 1373169, 1827293, 2431601, 3235761, 4305867, 5729870, 7624808, 10146425, 13501971, 17967238, 23909222, 31816294, 42338330, 56340132, 74972501, 99766822, 132760927};
80 const spx_word16_t exc_gain_quant_scal3_bound[7]={1841, 3883, 6051, 8062, 10444, 13580, 18560};
81 const spx_word16_t exc_gain_quant_scal3[8]={1002, 2680, 5086, 7016, 9108, 11781, 15380, 21740};
82 const spx_word16_t exc_gain_quant_scal1_bound[1]={14385};
83 const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224};
86 #define LSP_DELTA1 6553
87 #define LSP_DELTA2 1638
91 const float exc_gain_quant_scal3_bound[7]={0.112338f, 0.236980f, 0.369316f, 0.492054f, 0.637471f, 0.828874f, 1.132784f};
92 const float exc_gain_quant_scal3[8]={0.061130f, 0.163546f, 0.310413f, 0.428220f, 0.555887f, 0.719055f, 0.938694f, 1.326874f};
93 const float exc_gain_quant_scal1_bound[1]={0.87798f};
94 const float exc_gain_quant_scal1[2]={0.70469f, 1.05127f};
96 #define LSP_MARGIN .002f
97 #define LSP_DELTA1 .2f
98 #define LSP_DELTA2 .05f
103 #define EXTRA_BUFFER 100
105 #define EXTRA_BUFFER 0
109 #define sqr(x) ((x)*(x))
111 extern const spx_word16_t lag_window[];
112 extern const spx_word16_t lpc_window[];
114 void *nb_encoder_init(const SpeexMode *m)
117 const SpeexNBMode *mode;
120 mode=(const SpeexNBMode *)m->mode;
121 st = (EncState*)speex_alloc(sizeof(EncState));
124 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
127 st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK);
132 st->frameSize = mode->frameSize;
133 st->nbSubframes=mode->frameSize/mode->subframeSize;
134 st->subframeSize=mode->subframeSize;
135 st->windowSize = st->frameSize+st->subframeSize;
136 st->lpcSize = mode->lpcSize;
137 st->gamma1=mode->gamma1;
138 st->gamma2=mode->gamma2;
139 st->min_pitch=mode->pitchStart;
140 st->max_pitch=mode->pitchEnd;
141 st->lpc_floor = mode->lpc_floor;
143 st->submodes=mode->submodes;
144 st->submodeID=st->submodeSelect=mode->defaultSubmode;
145 st->bounded_pitch = 1;
147 st->encode_submode = 1;
150 st->psy = vorbis_psy_init(8000, 256);
151 st->curve = (float*)speex_alloc(128*sizeof(float));
152 st->old_curve = (float*)speex_alloc(128*sizeof(float));
153 st->psy_window = (float*)speex_alloc(256*sizeof(float));
156 st->cumul_gain = 1024;
158 /* Allocating input buffer */
159 st->winBuf = (spx_word16_t*)speex_alloc((st->windowSize-st->frameSize)*sizeof(spx_word16_t));
160 /* Allocating excitation buffer */
161 st->excBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
162 st->exc = st->excBuf + mode->pitchEnd + 2;
163 st->swBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
164 st->sw = st->swBuf + mode->pitchEnd + 2;
166 st->window= lpc_window;
168 /* Create the window for autocorrelation (lag-windowing) */
169 st->lagWindow = lag_window;
171 st->old_lsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
172 st->old_qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
174 for (i=0;i<st->lpcSize;i++)
175 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
177 st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
178 st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
179 st->mem_sw_whole = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
180 st->mem_exc = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
181 st->mem_exc2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
183 st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
184 st->innov_rms_save = NULL;
186 st->pitch = (int*)speex_alloc((st->nbSubframes)*sizeof(int));
189 st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
200 #endif /* #ifndef DISABLE_VBR */
204 st->sampling_rate=8000;
206 st->highpass_enabled = 1;
208 #ifdef ENABLE_VALGRIND
209 VALGRIND_MAKE_READABLE(st, NB_ENC_STACK);
214 void nb_encoder_destroy(void *state)
216 EncState *st=(EncState *)state;
217 /* Free all allocated memory */
218 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
219 speex_free_scratch(st->stack);
222 speex_free (st->winBuf);
223 speex_free (st->excBuf);
224 speex_free (st->old_qlsp);
225 speex_free (st->swBuf);
227 speex_free (st->old_lsp);
228 speex_free (st->mem_sp);
229 speex_free (st->mem_sw);
230 speex_free (st->mem_sw_whole);
231 speex_free (st->mem_exc);
232 speex_free (st->mem_exc2);
233 speex_free (st->pi_gain);
234 speex_free (st->pitch);
237 vbr_destroy(st->vbr);
238 speex_free (st->vbr);
239 #endif /* #ifndef DISABLE_VBR */
242 vorbis_psy_destroy(st->psy);
243 speex_free (st->curve);
244 speex_free (st->old_curve);
245 speex_free (st->psy_window);
248 /*Free state memory... should be last*/
252 int nb_encode(void *state, void *vin, SpeexBits *bits)
257 spx_word16_t ol_pitch_coef;
258 spx_word32_t ol_gain;
259 VARDECL(spx_word16_t *ringing);
260 VARDECL(spx_word16_t *target);
261 VARDECL(spx_sig_t *innov);
262 VARDECL(spx_word32_t *exc32);
263 VARDECL(spx_mem_t *mem);
264 VARDECL(spx_coef_t *bw_lpc1);
265 VARDECL(spx_coef_t *bw_lpc2);
266 VARDECL(spx_coef_t *lpc);
267 VARDECL(spx_lsp_t *lsp);
268 VARDECL(spx_lsp_t *qlsp);
269 VARDECL(spx_lsp_t *interp_lsp);
270 VARDECL(spx_lsp_t *interp_qlsp);
271 VARDECL(spx_coef_t *interp_lpc);
272 VARDECL(spx_coef_t *interp_qlpc);
274 VARDECL(spx_word16_t *syn_resp);
275 VARDECL(spx_word16_t *real_exc);
278 spx_word16_t fine_gain;
279 spx_word16_t *in = (spx_word16_t*)vin;
281 st=(EncState *)state;
284 ALLOC(lpc, st->lpcSize, spx_coef_t);
285 ALLOC(bw_lpc1, st->lpcSize, spx_coef_t);
286 ALLOC(bw_lpc2, st->lpcSize, spx_coef_t);
287 ALLOC(lsp, st->lpcSize, spx_lsp_t);
288 ALLOC(qlsp, st->lpcSize, spx_lsp_t);
289 ALLOC(interp_lsp, st->lpcSize, spx_lsp_t);
290 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
291 ALLOC(interp_lpc, st->lpcSize, spx_coef_t);
292 ALLOC(interp_qlpc, st->lpcSize, spx_coef_t);
294 /* Move signals 1 frame towards the past */
295 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, st->max_pitch+2);
296 SPEEX_MOVE(st->swBuf, st->swBuf+st->frameSize, st->max_pitch+2);
298 if (st->highpass_enabled)
299 highpass(in, in, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp);
302 VARDECL(spx_word16_t *w_sig);
303 VARDECL(spx_word16_t *autocorr);
304 ALLOC(w_sig, st->windowSize, spx_word16_t);
305 ALLOC(autocorr, st->lpcSize+1, spx_word16_t);
306 /* Window for analysis */
307 for (i=0;i<st->windowSize-st->frameSize;i++)
308 w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT));
309 for (;i<st->windowSize;i++)
310 w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT));
311 /* Compute auto-correlation */
312 _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize);
313 autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
315 /* Lag windowing: equivalent to filtering in the power-spectrum domain */
316 for (i=0;i<st->lpcSize+1;i++)
317 autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]);
319 /* Levinson-Durbin */
320 _spx_lpc(lpc, autocorr, st->lpcSize);
321 /* LPC to LSPs (x-domain) transform */
322 roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack);
323 /* Check if we found all the roots */
324 if (roots!=st->lpcSize)
326 /*If we can't find all LSP's, do some damage control and use previous filter*/
327 for (i=0;i<st->lpcSize;i++)
329 lsp[i]=st->old_lsp[i];
337 /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
339 int diff = st->windowSize-st->frameSize;
341 for (i=0;i<st->lpcSize;i++)
342 interp_lsp[i] = lsp[i];
344 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
346 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
348 /* Compute interpolated LPCs (unquantized) for whole frame*/
349 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
353 if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1
355 || st->vbr_enabled || st->vad_enabled
360 spx_word16_t nol_pitch_coef[6];
362 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
363 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
365 SPEEX_COPY(st->sw, st->winBuf, diff);
366 SPEEX_COPY(st->sw+diff, in, st->frameSize-diff);
367 filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack);
369 open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
370 nol_pitch, nol_pitch_coef, 6, stack);
371 ol_pitch=nol_pitch[0];
372 ol_pitch_coef = nol_pitch_coef[0];
373 /*Try to remove pitch multiples*/
377 if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) &&
379 if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) &&
381 (ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 ||
382 ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5))
384 /*ol_pitch_coef=nol_pitch_coef[i];*/
385 ol_pitch = nol_pitch[i];
390 /*ol_pitch_coef = sqrt(ol_pitch_coef);*/
397 /*Compute "real" excitation*/
398 SPEEX_COPY(st->exc, st->winBuf, diff);
399 SPEEX_COPY(st->exc+diff, in, st->frameSize-diff);
400 fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack);
402 /* Compute open-loop excitation gain */
404 spx_word16_t g = compute_rms16(st->exc, st->frameSize);
405 if (st->submodeID!=1 && ol_pitch>0)
406 ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
407 spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
409 ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
414 SPEEX_MOVE(st->psy_window, st->psy_window+st->frameSize, 256-st->frameSize);
415 SPEEX_COPY(&st->psy_window[256-st->frameSize], in, st->frameSize);
416 compute_curve(st->psy, st->psy_window, st->curve);
417 /*print_vec(st->curve, 128, "curve");*/
419 SPEEX_COPY(st->old_curve, st->curve, 128);
424 if (st->vbr && (st->vbr_enabled||st->vad_enabled))
427 for (i=0;i<st->lpcSize;i++)
428 lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
429 lsp_dist /= LSP_SCALING*LSP_SCALING;
434 if (st->abr_drift2 * st->abr_drift > 0)
436 /* Only adapt if long-term and short-term drift are the same sign */
437 qual_change = -.00001*st->abr_drift/(1+st->abr_count);
440 if (qual_change<-.05)
443 st->vbr_quality += qual_change;
444 if (st->vbr_quality>10)
446 if (st->vbr_quality<0)
450 st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef);
451 /*if (delta_qual<0)*/
452 /* delta_qual*=.1*(3+st->vbr_quality);*/
463 v1=(int)floor(st->vbr_quality);
465 thresh = vbr_nb_thresh[mode][v1];
467 thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
468 if (st->relative_quality > thresh &&
469 st->relative_quality-thresh<min_diff)
472 min_diff = st->relative_quality-thresh;
479 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
491 speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
495 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
496 if (rate > st->vbr_max)
499 speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
506 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate);
507 st->abr_drift+=(bitrate-st->abr_enabled);
508 st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled);
509 st->abr_count += 1.0;
515 if (st->relative_quality<2)
517 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20)
527 mode=st->submodeSelect;
529 /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
533 st->relative_quality = -1;
535 #endif /* #ifndef DISABLE_VBR */
537 if (st->encode_submode)
539 /* First, transmit a zero for narrowband */
540 speex_bits_pack(bits, 0, 1);
542 /* Transmit the sub-mode we use for this frame */
543 speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
547 /* If null mode (no transmission), just set a couple things to zero*/
548 if (st->submodes[st->submodeID] == NULL)
550 for (i=0;i<st->frameSize;i++)
551 st->exc[i]=st->sw[i]=VERY_SMALL;
553 for (i=0;i<st->lpcSize;i++)
556 st->bounded_pitch = 1;
558 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
560 /* Clear memory (no need to really compute it) */
561 for (i=0;i<st->lpcSize;i++)
567 /* LSP Quantization */
570 for (i=0;i<st->lpcSize;i++)
571 st->old_lsp[i] = lsp[i];
576 #if 1 /*0 for unquantized*/
577 SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits);
579 for (i=0;i<st->lpcSize;i++)
583 /*If we use low bit-rate pitch mode, transmit open-loop pitch*/
584 if (SUBMODE(lbr_pitch)!=-1)
586 speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
589 if (SUBMODE(forced_pitch_gain))
592 /* This just damps the pitch a bit, because it tends to be too aggressive when forced */
593 ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);
595 quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);
597 quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);
603 speex_bits_pack(bits, quant, 4);
604 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
608 /*Quantize and transmit open-loop excitation gain*/
611 int qe = scal_quant32(ol_gain, ol_gain_table, 32);
612 /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/
613 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
614 speex_bits_pack(bits, qe, 5);
618 int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING)));
623 ol_gain = exp(qe/3.5)*SIG_SCALING;
624 speex_bits_pack(bits, qe, 5);
630 /* Special case for first frame */
633 for (i=0;i<st->lpcSize;i++)
634 st->old_qlsp[i] = qlsp[i];
638 ALLOC(target, st->subframeSize, spx_word16_t);
639 ALLOC(innov, st->subframeSize, spx_sig_t);
640 ALLOC(exc32, st->subframeSize, spx_word32_t);
641 ALLOC(ringing, st->subframeSize, spx_word16_t);
642 ALLOC(syn_resp, st->subframeSize, spx_word16_t);
643 ALLOC(real_exc, st->subframeSize, spx_word16_t);
644 ALLOC(mem, st->lpcSize, spx_mem_t);
646 /* Loop on sub-frames */
647 for (sub=0;sub<st->nbSubframes;sub++)
653 int response_bound = st->subframeSize;
655 /* Offset relative to start of frame */
656 offset = st->subframeSize*sub;
659 /* Weighted signal */
662 /* LSP interpolation (quantized and unquantized) */
663 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes);
664 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
666 /* Make sure the filters are stable */
667 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
668 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
670 /* Compute interpolated LPCs (quantized and unquantized) */
671 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
673 lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack);
675 /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
677 spx_word32_t pi_g=LPC_SCALING;
678 for (i=0;i<st->lpcSize;i+=2)
680 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
681 pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
683 st->pi_gain[sub] = pi_g;
688 float curr_curve[128];
689 float fact = ((float)sub+1.0f)/st->nbSubframes;
691 curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
692 curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
695 /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
696 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
698 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
701 for (i=0;i<st->lpcSize;i++)
704 /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
707 /*FIXME: This will break if we change the window size */
708 speex_assert(st->windowSize-st->frameSize == st->subframeSize);
711 for (i=0;i<st->subframeSize;i++)
712 real_exc[i] = sw[i] = st->winBuf[i];
714 for (i=0;i<st->subframeSize;i++)
715 real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)];
717 fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack);
719 if (st->complexity==0)
720 response_bound >>= 1;
721 compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
722 for (i=response_bound;i<st->subframeSize;i++)
723 syn_resp[i]=VERY_SMALL;
725 /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
726 for (i=0;i<st->lpcSize;i++)
727 mem[i]=SHL32(st->mem_sp[i],1);
728 for (i=0;i<st->subframeSize;i++)
729 ringing[i] = VERY_SMALL;
731 iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack);
732 for (i=0;i<st->lpcSize;i++)
733 mem[i]=SHL32(st->mem_sw[i],1);
734 filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack);
735 SPEEX_MEMSET(&ringing[response_bound], 0, st->subframeSize-response_bound);
737 iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack);
738 for (i=0;i<st->lpcSize;i++)
739 mem[i]=SHL32(st->mem_sw[i],1);
740 filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack);
743 /* Compute weighted signal */
744 for (i=0;i<st->lpcSize;i++)
745 mem[i]=st->mem_sw[i];
746 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack);
748 if (st->complexity==0)
749 for (i=0;i<st->lpcSize;i++)
750 st->mem_sw[i]=mem[i];
752 /* Compute target signal (saturation prevents overflows on clipped input speech) */
753 for (i=0;i<st->subframeSize;i++)
754 target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767));
756 /* Reset excitation */
757 SPEEX_MEMSET(exc, 0, st->subframeSize);
759 /* If we have a long-term predictor (otherwise, something's wrong) */
760 speex_assert (SUBMODE(ltp_quant));
762 int pit_min, pit_max;
763 /* Long-term prediction */
764 if (SUBMODE(lbr_pitch) != -1)
766 /* Low bit-rate pitch handling */
768 margin = SUBMODE(lbr_pitch);
771 if (ol_pitch < st->min_pitch+margin-1)
772 ol_pitch=st->min_pitch+margin-1;
773 if (ol_pitch > st->max_pitch-margin)
774 ol_pitch=st->max_pitch-margin;
775 pit_min = ol_pitch-margin+1;
776 pit_max = ol_pitch+margin;
778 pit_min=pit_max=ol_pitch;
781 pit_min = st->min_pitch;
782 pit_max = st->max_pitch;
785 /* Force pitch to use only the current frame if needed */
786 if (st->bounded_pitch && pit_max>offset)
789 /* Perform pitch search */
790 pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
791 exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
792 st->lpcSize, st->subframeSize, bits, stack,
793 exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);
795 st->pitch[sub]=pitch;
797 /* Quantization of innovation */
798 SPEEX_MEMSET(innov, 0, st->subframeSize);
800 /* FIXME: Make sure this is save from overflows (so far so good) */
801 for (i=0;i<st->subframeSize;i++)
802 real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1)));
804 ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT);
806 /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
809 spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
816 fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
818 /* Calculate gain correction for the sub-frame (if any) */
819 if (SUBMODE(have_subframe_gain))
822 if (SUBMODE(have_subframe_gain)==3)
824 qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8);
825 speex_bits_pack(bits, qe, 3);
826 ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain);
828 qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2);
829 speex_bits_pack(bits, qe, 1);
830 ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain);
836 /*printf ("%f %f\n", ener, ol_gain);*/
838 /* Normalize innovation */
839 signal_div(target, target, ener, st->subframeSize);
841 /* Quantize innovation */
842 speex_assert (SUBMODE(innovation_quant));
844 /* Codebook search */
845 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
846 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
847 innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
849 /* De-normalize innovation and update excitation */
850 signal_mul(innov, innov, ener, st->subframeSize);
852 for (i=0;i<st->subframeSize;i++)
853 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
855 /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
856 if (SUBMODE(double_codebook)) {
857 char *tmp_stack=stack;
858 VARDECL(spx_sig_t *innov2);
859 ALLOC(innov2, st->subframeSize, spx_sig_t);
860 SPEEX_MEMSET(innov2, 0, st->subframeSize);
861 for (i=0;i<st->subframeSize;i++)
862 target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]);
863 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
864 SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
865 innov2, syn_resp, bits, stack, st->complexity, 0);
866 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
867 for (i=0;i<st->subframeSize;i++)
868 innov[i] = ADD32(innov[i],innov2[i]);
871 for (i=0;i<st->subframeSize;i++)
872 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
873 if (st->innov_rms_save)
875 st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize);
879 /* Final signal synthesis from excitation */
880 iir_mem16(exc, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack);
882 /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
883 if (st->complexity!=0)
884 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack);
888 /* Store the LSPs for interpolation in the next frame */
889 if (st->submodeID>=1)
891 for (i=0;i<st->lpcSize;i++)
892 st->old_lsp[i] = lsp[i];
893 for (i=0;i<st->lpcSize;i++)
894 st->old_qlsp[i] = qlsp[i];
898 if (st->submodeID>=1)
899 SPEEX_COPY(st->old_curve, st->curve, 128);
902 if (st->submodeID==1)
906 speex_bits_pack(bits, 15, 4);
909 speex_bits_pack(bits, 0, 4);
912 /* The next frame will not be the first (Duh!) */
914 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize);
916 if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
917 st->bounded_pitch = 1;
919 st->bounded_pitch = 0;
924 void *nb_decoder_init(const SpeexMode *m)
927 const SpeexNBMode *mode;
930 mode=(const SpeexNBMode*)m->mode;
931 st = (DecState *)speex_alloc(sizeof(DecState));
934 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA)
937 st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK);
943 st->encode_submode = 1;
946 /* Codec parameters, should eventually have several "modes"*/
947 st->frameSize = mode->frameSize;
948 st->nbSubframes=mode->frameSize/mode->subframeSize;
949 st->subframeSize=mode->subframeSize;
950 st->lpcSize = mode->lpcSize;
951 st->min_pitch=mode->pitchStart;
952 st->max_pitch=mode->pitchEnd;
954 st->submodes=mode->submodes;
955 st->submodeID=mode->defaultSubmode;
957 st->lpc_enh_enabled=1;
959 st->excBuf = (spx_word16_t*)speex_alloc((st->frameSize + 2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t));
960 st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6;
961 SPEEX_MEMSET(st->excBuf, 0, st->frameSize + st->max_pitch);
963 st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t));
964 st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t));
965 st->mem_sp = (spx_mem_t*)speex_alloc(st->lpcSize*sizeof(spx_mem_t));
966 st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
969 st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0;
970 st->pitch_gain_buf_idx = 0;
973 st->sampling_rate=8000;
974 st->last_ol_gain = 0;
976 st->user_callback.func = &speex_default_user_handler;
977 st->user_callback.data = NULL;
979 st->speex_callbacks[i].func = NULL;
981 st->voc_m1=st->voc_m2=st->voc_mean=0;
985 st->highpass_enabled = 1;
987 #ifdef ENABLE_VALGRIND
988 VALGRIND_MAKE_READABLE(st, NB_DEC_STACK);
993 void nb_decoder_destroy(void *state)
998 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA))
999 speex_free_scratch(st->stack);
1002 speex_free (st->excBuf);
1003 speex_free (st->interp_qlpc);
1004 speex_free (st->old_qlsp);
1005 speex_free (st->mem_sp);
1006 speex_free (st->pi_gain);
1011 #define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
1014 const spx_word16_t attenuation[10] = {32767, 31483, 27923, 22861, 17278, 12055, 7764, 4616, 2533, 1283};
1016 const spx_word16_t attenuation[10] = {1., 0.961, 0.852, 0.698, 0.527, 0.368, 0.237, 0.141, 0.077, 0.039};
1020 static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack)
1024 spx_word16_t pitch_gain;
1026 spx_word16_t gain_med;
1027 spx_word16_t innov_gain;
1028 spx_word16_t noise_gain;
1030 if (st->count_lost<10)
1031 fact = attenuation[st->count_lost];
1035 gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]);
1036 if (gain_med < st->last_pitch_gain)
1037 st->last_pitch_gain = gain_med;
1040 pitch_gain = st->last_pitch_gain;
1043 pitch_gain = SHL16(pitch_gain, 9);
1045 pitch_gain = GAIN_SCALING_1*st->last_pitch_gain;
1049 pitch_gain = MULT16_16_Q15(fact,pitch_gain) + VERY_SMALL;
1050 /* FIXME: This was rms of innovation (not exc) */
1051 innov_gain = compute_rms16(st->exc, st->frameSize);
1052 noise_gain = MULT16_16_Q15(innov_gain, MULT16_16_Q15(fact, SUB16(Q15ONE,MULT16_16_Q15(pitch_gain,pitch_gain))));
1053 /* Shift all buffers by one frame */
1054 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1057 pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT);
1058 if (pitch_val > st->max_pitch)
1059 pitch_val = st->max_pitch;
1060 if (pitch_val < st->min_pitch)
1061 pitch_val = st->min_pitch;
1062 for (i=0;i<st->frameSize;i++)
1064 st->exc[i]= MULT16_16_Q15(pitch_gain, (st->exc[i-pitch_val]+VERY_SMALL)) +
1065 speex_rand(noise_gain, &st->seed);
1068 bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize);
1069 iir_mem16(&st->exc[-st->subframeSize], st->interp_qlpc, out, st->frameSize,
1070 st->lpcSize, st->mem_sp, stack);
1071 highpass(out, out, st->frameSize, HIGHPASS_NARROWBAND|HIGHPASS_OUTPUT, st->mem_hp);
1075 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9);
1076 if (st->pitch_gain_buf_idx > 2) /* rollover */
1077 st->pitch_gain_buf_idx = 0;
1080 /* Just so we don't need to carry the complete wideband mode information */
1081 static const int wb_skip_table[8] = {0, 36, 112, 192, 352, 0, 0, 0};
1083 int nb_decode(void *state, SpeexBits *bits, void *vout)
1088 spx_word16_t pitch_gain[3];
1089 spx_word32_t ol_gain=0;
1091 spx_word16_t ol_pitch_coef=0;
1093 spx_word16_t best_pitch_gain=0;
1097 VARDECL(spx_sig_t *innov);
1098 VARDECL(spx_word32_t *exc32);
1099 VARDECL(spx_coef_t *ak);
1100 VARDECL(spx_lsp_t *qlsp);
1101 spx_word16_t pitch_average=0;
1103 spx_word16_t *out = (spx_word16_t*)vout;
1104 VARDECL(spx_lsp_t *interp_qlsp);
1106 st=(DecState*)state;
1109 /* Check if we're in DTX mode*/
1110 if (!bits && st->dtx_enabled)
1115 /* If bits is NULL, consider the packet to be lost (what could we do anyway) */
1118 nb_decode_lost(st, out, stack);
1122 if (st->encode_submode)
1125 /* Search for next narrowband block (handle requests, skip wideband blocks) */
1127 if (speex_bits_remaining(bits)<5)
1129 wideband = speex_bits_unpack_unsigned(bits, 1);
1130 if (wideband) /* Skip wideband block (for compatibility) */
1134 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1135 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1136 advance = wb_skip_table[submode];
1139 speex_notify("Invalid mode encountered. The stream is corrupted.");
1142 advance -= (SB_SUBMODE_BITS+1);
1143 speex_bits_advance(bits, advance);
1145 if (speex_bits_remaining(bits)<5)
1147 wideband = speex_bits_unpack_unsigned(bits, 1);
1150 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS);
1151 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/
1152 advance = wb_skip_table[submode];
1155 speex_notify("Invalid mode encountered. The stream is corrupted.");
1158 advance -= (SB_SUBMODE_BITS+1);
1159 speex_bits_advance(bits, advance);
1160 wideband = speex_bits_unpack_unsigned(bits, 1);
1163 speex_notify("More than two wideband layers found. The stream is corrupted.");
1169 if (speex_bits_remaining(bits)<4)
1171 /* FIXME: Check for overflow */
1172 m = speex_bits_unpack_unsigned(bits, 4);
1173 if (m==15) /* We found a terminator */
1176 } else if (m==14) /* Speex in-band request */
1178 int ret = speex_inband_handler(bits, st->speex_callbacks, state);
1181 } else if (m==13) /* User in-band request */
1183 int ret = st->user_callback.func(bits, state, st->user_callback.data);
1186 } else if (m>8) /* Invalid mode */
1188 speex_notify("Invalid mode encountered. The stream is corrupted.");
1194 /* Get the sub-mode that was used */
1200 /* Shift all buffers by one frame */
1201 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12);
1203 /* If null mode (no transmission), just set a couple things to zero*/
1204 if (st->submodes[st->submodeID] == NULL)
1206 VARDECL(spx_coef_t *lpc);
1207 ALLOC(lpc, st->lpcSize, spx_coef_t);
1208 bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize);
1210 spx_word16_t innov_gain=0;
1211 /* FIXME: This was innov, not exc */
1212 innov_gain = compute_rms16(st->exc, st->frameSize);
1213 for (i=0;i<st->frameSize;i++)
1214 st->exc[i]=speex_rand(innov_gain, &st->seed);
1220 /* Final signal synthesis from excitation */
1221 iir_mem16(st->exc, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack);
1227 ALLOC(qlsp, st->lpcSize, spx_lsp_t);
1229 /* Unquantize LSPs */
1230 SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits);
1232 /*Damp memory if a frame was lost and the LSP changed too much*/
1236 spx_word32_t lsp_dist=0;
1237 for (i=0;i<st->lpcSize;i++)
1238 lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i])));
1240 fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2));
1242 fact = .6*exp(-.2*lsp_dist);
1244 for (i=0;i<st->lpcSize;i++)
1245 st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]);
1249 /* Handle first frame and lost-packet case */
1250 if (st->first || st->count_lost)
1252 for (i=0;i<st->lpcSize;i++)
1253 st->old_qlsp[i] = qlsp[i];
1256 /* Get open-loop pitch estimation for low bit-rate pitch coding */
1257 if (SUBMODE(lbr_pitch)!=-1)
1259 ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
1262 if (SUBMODE(forced_pitch_gain))
1265 quant = speex_bits_unpack_unsigned(bits, 4);
1266 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT));
1269 /* Get global excitation gain */
1272 qe = speex_bits_unpack_unsigned(bits, 5);
1274 /* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */
1275 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]);
1277 ol_gain = SIG_SCALING*exp(qe/3.5);
1281 ALLOC(ak, st->lpcSize, spx_coef_t);
1282 ALLOC(innov, st->subframeSize, spx_sig_t);
1283 ALLOC(exc32, st->subframeSize, spx_word32_t);
1285 if (st->submodeID==1)
1288 extra = speex_bits_unpack_unsigned(bits, 4);
1295 if (st->submodeID>1)
1298 /*Loop on subframes */
1299 for (sub=0;sub<st->nbSubframes;sub++)
1304 spx_word16_t *innov_save = NULL;
1307 /* Offset relative to start of frame */
1308 offset = st->subframeSize*sub;
1311 /* Original signal */
1314 innov_save = st->innov_save+offset;
1317 /* Reset excitation */
1318 SPEEX_MEMSET(exc, 0, st->subframeSize);
1320 /*Adaptive codebook contribution*/
1321 speex_assert (SUBMODE(ltp_unquant));
1323 int pit_min, pit_max;
1324 /* Handle pitch constraints if any */
1325 if (SUBMODE(lbr_pitch) != -1)
1328 margin = SUBMODE(lbr_pitch);
1331 /* GT - need optimization?
1332 if (ol_pitch < st->min_pitch+margin-1)
1333 ol_pitch=st->min_pitch+margin-1;
1334 if (ol_pitch > st->max_pitch-margin)
1335 ol_pitch=st->max_pitch-margin;
1336 pit_min = ol_pitch-margin+1;
1337 pit_max = ol_pitch+margin;
1339 pit_min = ol_pitch-margin+1;
1340 if (pit_min < st->min_pitch)
1341 pit_min = st->min_pitch;
1342 pit_max = ol_pitch+margin;
1343 if (pit_max > st->max_pitch)
1344 pit_max = st->max_pitch;
1346 pit_min = pit_max = ol_pitch;
1349 pit_min = st->min_pitch;
1350 pit_max = st->max_pitch;
1355 SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
1356 st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
1357 st->count_lost, offset, st->last_pitch_gain, 0);
1359 /* Ensuring that things aren't blowing up as would happen if e.g. an encoder is
1360 crafting packets to make us produce NaNs and slow down the decoder (vague DoS threat).
1361 We can probably be even more aggressive and limit to 15000 or so. */
1362 sanitize_values32(exc32, NEG32(QCONST32(32000,SIG_SHIFT-1)), QCONST32(32000,SIG_SHIFT-1), st->subframeSize);
1364 tmp = gain_3tap_to_1tap(pitch_gain);
1366 pitch_average += tmp;
1367 if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5)
1368 || (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5))
1369 || (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) )
1372 if (tmp > best_pitch_gain)
1373 best_pitch_gain = tmp;
1377 /* Unquantize the innovation */
1382 SPEEX_MEMSET(innov, 0, st->subframeSize);
1384 /* Decode sub-frame gain correction */
1385 if (SUBMODE(have_subframe_gain)==3)
1387 q_energy = speex_bits_unpack_unsigned(bits, 3);
1388 ener = MULT16_32_Q14(exc_gain_quant_scal3[q_energy],ol_gain);
1389 } else if (SUBMODE(have_subframe_gain)==1)
1391 q_energy = speex_bits_unpack_unsigned(bits, 1);
1392 ener = MULT16_32_Q14(exc_gain_quant_scal1[q_energy],ol_gain);
1397 speex_assert (SUBMODE(innovation_unquant));
1399 /*Fixed codebook contribution*/
1400 SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1401 /* De-normalize innovation and update excitation */
1403 signal_mul(innov, innov, ener, st->subframeSize);
1405 /* Decode second codebook (only for some modes) */
1406 if (SUBMODE(double_codebook))
1408 char *tmp_stack=stack;
1409 VARDECL(spx_sig_t *innov2);
1410 ALLOC(innov2, st->subframeSize, spx_sig_t);
1411 SPEEX_MEMSET(innov2, 0, st->subframeSize);
1412 SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed);
1413 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize);
1414 for (i=0;i<st->subframeSize;i++)
1415 innov[i] = ADD32(innov[i], innov2[i]);
1418 for (i=0;i<st->subframeSize;i++)
1419 exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767));
1420 /*print_vec(exc, 40, "innov");*/
1423 for (i=0;i<st->subframeSize;i++)
1424 innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT));
1429 if (st->submodeID==1)
1431 spx_word16_t g=ol_pitch_coef;
1432 g=MULT16_16_P14(QCONST16(1.5f,14),(g-QCONST16(.2f,6)));
1438 SPEEX_MEMSET(exc, 0, st->subframeSize);
1439 while (st->voc_offset<st->subframeSize)
1441 /* exc[st->voc_offset]= g*sqrt(2*ol_pitch)*ol_gain;
1442 Not quite sure why we need the factor of two in the sqrt */
1443 if (st->voc_offset>=0)
1444 exc[st->voc_offset]=MULT16_16(spx_sqrt(MULT16_16_16(2,ol_pitch)),EXTRACT16(PSHR32(MULT16_16(g,PSHR32(ol_gain,SIG_SHIFT)),6)));
1445 st->voc_offset+=ol_pitch;
1447 st->voc_offset -= st->subframeSize;
1449 for (i=0;i<st->subframeSize;i++)
1451 spx_word16_t exci=exc[i];
1452 exc[i]= ADD16(ADD16(MULT16_16_Q15(QCONST16(.7f,15),exc[i]) , MULT16_16_Q15(QCONST16(.3f,15),st->voc_m1)),
1453 SUB16(MULT16_16_Q15(Q15_ONE-MULT16_16_16(QCONST16(.85f,9),g),EXTRACT16(PSHR32(innov[i],SIG_SHIFT))),
1454 MULT16_16_Q15(MULT16_16_16(QCONST16(.15f,9),g),EXTRACT16(PSHR32(st->voc_m2,SIG_SHIFT)))
1457 st->voc_m2=innov[i];
1458 st->voc_mean = EXTRACT16(PSHR32(ADD32(MULT16_16(QCONST16(.8f,15),st->voc_mean), MULT16_16(QCONST16(.2f,15),exc[i])), 15));
1459 exc[i]-=st->voc_mean;
1466 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
1468 if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost)
1470 multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1471 multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
1473 SPEEX_COPY(out, &st->exc[-st->subframeSize], st->frameSize);
1476 /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */
1479 spx_word16_t exc_ener;
1480 spx_word32_t gain32;
1482 exc_ener = compute_rms16 (st->exc, st->frameSize);
1483 gain32 = PDIV32(ol_gain, ADD16(exc_ener,1));
1487 gain = EXTRACT16(gain32);
1493 for (i=0;i<st->frameSize;i++)
1495 st->exc[i] = MULT16_16_Q14(gain, st->exc[i]);
1496 out[i]=st->exc[i-st->subframeSize];
1500 /*Loop on subframes */
1501 for (sub=0;sub<st->nbSubframes;sub++)
1506 /* Offset relative to start of frame */
1507 offset = st->subframeSize*sub;
1508 /* Original signal */
1513 /* LSP interpolation (quantized and unquantized) */
1514 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
1516 /* Make sure the LSP's are stable */
1517 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
1519 /* Compute interpolated LPCs (unquantized) */
1520 lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack);
1522 /* Compute analysis filter at w=pi */
1524 spx_word32_t pi_g=LPC_SCALING;
1525 for (i=0;i<st->lpcSize;i+=2)
1527 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/
1528 pi_g = ADD32(pi_g, SUB32(EXTEND32(ak[i+1]),EXTEND32(ak[i])));
1530 st->pi_gain[sub] = pi_g;
1533 iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
1536 for (i=0;i<st->lpcSize;i++)
1537 st->interp_qlpc[i] = ak[i];
1541 if (st->highpass_enabled)
1542 highpass(out, out, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_OUTPUT, st->mem_hp);
1543 /*for (i=0;i<st->frameSize;i++)
1544 printf ("%d\n", (int)st->frame[i]);*/
1546 /* Tracking output level */
1547 st->level = 1+PSHR32(ol_gain,SIG_SHIFT);
1548 st->max_level = MAX16(MULT16_16_Q15(QCONST16(.99f,15), st->max_level), st->level);
1549 st->min_level = MIN16(ADD16(1,MULT16_16_Q14(QCONST16(1.01f,14), st->min_level)), st->level);
1550 if (st->max_level < st->min_level+1)
1551 st->max_level = st->min_level+1;
1552 /*printf ("%f %f %f %d\n", og, st->min_level, st->max_level, update);*/
1554 /* Store the LSPs for interpolation in the next frame */
1555 for (i=0;i<st->lpcSize;i++)
1556 st->old_qlsp[i] = qlsp[i];
1558 /* The next frame will not be the first (Duh!) */
1561 st->last_pitch = best_pitch;
1563 st->last_pitch_gain = PSHR16(pitch_average,2);
1565 st->last_pitch_gain = .25*pitch_average;
1567 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain;
1568 if (st->pitch_gain_buf_idx > 2) /* rollover */
1569 st->pitch_gain_buf_idx = 0;
1571 st->last_ol_gain = ol_gain;
1576 int nb_encoder_ctl(void *state, int request, void *ptr)
1579 st=(EncState*)state;
1582 case SPEEX_GET_FRAME_SIZE:
1583 (*(spx_int32_t*)ptr) = st->frameSize;
1585 case SPEEX_SET_LOW_MODE:
1586 case SPEEX_SET_MODE:
1587 st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr);
1589 case SPEEX_GET_LOW_MODE:
1590 case SPEEX_GET_MODE:
1591 (*(spx_int32_t*)ptr) = st->submodeID;
1595 st->vbr_enabled = (*(spx_int32_t*)ptr);
1598 (*(spx_int32_t*)ptr) = st->vbr_enabled;
1601 st->vad_enabled = (*(spx_int32_t*)ptr);
1604 (*(spx_int32_t*)ptr) = st->vad_enabled;
1607 st->dtx_enabled = (*(spx_int32_t*)ptr);
1610 (*(spx_int32_t*)ptr) = st->dtx_enabled;
1613 st->abr_enabled = (*(spx_int32_t*)ptr);
1614 st->vbr_enabled = st->abr_enabled!=0;
1615 if (st->vbr_enabled)
1618 spx_int32_t rate, target;
1620 target = (*(spx_int32_t*)ptr);
1623 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1624 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1632 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual);
1640 (*(spx_int32_t*)ptr) = st->abr_enabled;
1642 #endif /* #ifndef DISABLE_VBR */
1643 #if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API)
1644 case SPEEX_SET_VBR_QUALITY:
1645 st->vbr_quality = (*(float*)ptr);
1647 case SPEEX_GET_VBR_QUALITY:
1648 (*(float*)ptr) = st->vbr_quality;
1650 #endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */
1651 case SPEEX_SET_QUALITY:
1653 int quality = (*(spx_int32_t*)ptr);
1658 st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality];
1661 case SPEEX_SET_COMPLEXITY:
1662 st->complexity = (*(spx_int32_t*)ptr);
1663 if (st->complexity<0)
1666 case SPEEX_GET_COMPLEXITY:
1667 (*(spx_int32_t*)ptr) = st->complexity;
1669 case SPEEX_SET_BITRATE:
1672 spx_int32_t rate, target;
1673 target = (*(spx_int32_t*)ptr);
1676 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
1677 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
1684 case SPEEX_GET_BITRATE:
1685 if (st->submodes[st->submodeID])
1686 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1688 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1690 case SPEEX_SET_SAMPLING_RATE:
1691 st->sampling_rate = (*(spx_int32_t*)ptr);
1693 case SPEEX_GET_SAMPLING_RATE:
1694 (*(spx_int32_t*)ptr)=st->sampling_rate;
1696 case SPEEX_RESET_STATE:
1699 st->bounded_pitch = 1;
1701 for (i=0;i<st->lpcSize;i++)
1702 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1);
1703 for (i=0;i<st->lpcSize;i++)
1704 st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
1705 for (i=0;i<st->frameSize+st->max_pitch+1;i++)
1706 st->excBuf[i]=st->swBuf[i]=0;
1707 for (i=0;i<st->windowSize-st->frameSize;i++)
1711 case SPEEX_SET_SUBMODE_ENCODING:
1712 st->encode_submode = (*(spx_int32_t*)ptr);
1714 case SPEEX_GET_SUBMODE_ENCODING:
1715 (*(spx_int32_t*)ptr) = st->encode_submode;
1717 case SPEEX_GET_LOOKAHEAD:
1718 (*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize);
1720 case SPEEX_SET_PLC_TUNING:
1721 st->plc_tuning = (*(spx_int32_t*)ptr);
1722 if (st->plc_tuning>100)
1725 case SPEEX_GET_PLC_TUNING:
1726 (*(spx_int32_t*)ptr)=(st->plc_tuning);
1729 case SPEEX_SET_VBR_MAX_BITRATE:
1730 st->vbr_max = (*(spx_int32_t*)ptr);
1732 case SPEEX_GET_VBR_MAX_BITRATE:
1733 (*(spx_int32_t*)ptr) = st->vbr_max;
1735 #endif /* #ifndef DISABLE_VBR */
1736 case SPEEX_SET_HIGHPASS:
1737 st->highpass_enabled = (*(spx_int32_t*)ptr);
1739 case SPEEX_GET_HIGHPASS:
1740 (*(spx_int32_t*)ptr) = st->highpass_enabled;
1743 /* This is all internal stuff past this point */
1744 case SPEEX_GET_PI_GAIN:
1747 spx_word32_t *g = (spx_word32_t*)ptr;
1748 for (i=0;i<st->nbSubframes;i++)
1749 g[i]=st->pi_gain[i];
1755 for (i=0;i<st->nbSubframes;i++)
1756 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1760 case SPEEX_GET_RELATIVE_QUALITY:
1761 (*(float*)ptr)=st->relative_quality;
1763 #endif /* #ifndef DISABLE_VBR */
1764 case SPEEX_SET_INNOVATION_SAVE:
1765 st->innov_rms_save = (spx_word16_t*)ptr;
1767 case SPEEX_SET_WIDEBAND:
1768 st->isWideband = *((spx_int32_t*)ptr);
1770 case SPEEX_GET_STACK:
1771 *((char**)ptr) = st->stack;
1774 speex_warning_int("Unknown nb_ctl request: ", request);
1780 int nb_decoder_ctl(void *state, int request, void *ptr)
1783 st=(DecState*)state;
1786 case SPEEX_SET_LOW_MODE:
1787 case SPEEX_SET_MODE:
1788 st->submodeID = (*(spx_int32_t*)ptr);
1790 case SPEEX_GET_LOW_MODE:
1791 case SPEEX_GET_MODE:
1792 (*(spx_int32_t*)ptr) = st->submodeID;
1795 st->lpc_enh_enabled = *((spx_int32_t*)ptr);
1798 *((spx_int32_t*)ptr) = st->lpc_enh_enabled;
1800 case SPEEX_GET_FRAME_SIZE:
1801 (*(spx_int32_t*)ptr) = st->frameSize;
1803 case SPEEX_GET_BITRATE:
1804 if (st->submodes[st->submodeID])
1805 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
1807 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
1809 case SPEEX_SET_SAMPLING_RATE:
1810 st->sampling_rate = (*(spx_int32_t*)ptr);
1812 case SPEEX_GET_SAMPLING_RATE:
1813 (*(spx_int32_t*)ptr)=st->sampling_rate;
1815 case SPEEX_SET_HANDLER:
1817 SpeexCallback *c = (SpeexCallback*)ptr;
1818 st->speex_callbacks[c->callback_id].func=c->func;
1819 st->speex_callbacks[c->callback_id].data=c->data;
1820 st->speex_callbacks[c->callback_id].callback_id=c->callback_id;
1823 case SPEEX_SET_USER_HANDLER:
1825 SpeexCallback *c = (SpeexCallback*)ptr;
1826 st->user_callback.func=c->func;
1827 st->user_callback.data=c->data;
1828 st->user_callback.callback_id=c->callback_id;
1831 case SPEEX_RESET_STATE:
1834 for (i=0;i<st->lpcSize;i++)
1836 for (i=0;i<st->frameSize + st->max_pitch + 1;i++)
1840 case SPEEX_SET_SUBMODE_ENCODING:
1841 st->encode_submode = (*(spx_int32_t*)ptr);
1843 case SPEEX_GET_SUBMODE_ENCODING:
1844 (*(spx_int32_t*)ptr) = st->encode_submode;
1846 case SPEEX_GET_LOOKAHEAD:
1847 (*(spx_int32_t*)ptr)=st->subframeSize;
1849 case SPEEX_SET_HIGHPASS:
1850 st->highpass_enabled = (*(spx_int32_t*)ptr);
1852 case SPEEX_GET_HIGHPASS:
1853 (*(spx_int32_t*)ptr) = st->highpass_enabled;
1855 /* FIXME: Convert to fixed-point and re-enable even when float API is disabled */
1856 #ifndef DISABLE_FLOAT_API
1857 case SPEEX_GET_ACTIVITY:
1860 ret = log(st->level/st->min_level)/log(st->max_level/st->min_level);
1863 /* Done in a strange way to catch NaNs as well */
1866 /*printf ("%f %f %f %f\n", st->level, st->min_level, st->max_level, ret);*/
1867 (*(spx_int32_t*)ptr) = (int)(100*ret);
1871 case SPEEX_GET_PI_GAIN:
1874 spx_word32_t *g = (spx_word32_t*)ptr;
1875 for (i=0;i<st->nbSubframes;i++)
1876 g[i]=st->pi_gain[i];
1882 for (i=0;i<st->nbSubframes;i++)
1883 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize);
1886 case SPEEX_GET_DTX_STATUS:
1887 *((spx_int32_t*)ptr) = st->dtx_enabled;
1889 case SPEEX_SET_INNOVATION_SAVE:
1890 st->innov_save = (spx_word16_t*)ptr;
1892 case SPEEX_SET_WIDEBAND:
1893 st->isWideband = *((spx_int32_t*)ptr);
1895 case SPEEX_GET_STACK:
1896 *((char**)ptr) = st->stack;
1899 speex_warning_int("Unknown nb_ctl request: ", request);