1 /* Copyright (C) 2002-2006 Jean-Marc Valin
3 Long-Term Prediction functions
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 - Neither the name of the Xiph.org Foundation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include "stack_alloc.h"
41 #include <speex/speex_bits.h>
42 #include "math_approx.h"
43 #include "os_support.h"
52 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
54 #elif defined (BFIN_ASM)
58 #ifndef OVERRIDE_INNER_PROD
59 spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
66 part = MAC16_16(part,*x++,*y++);
67 part = MAC16_16(part,*x++,*y++);
68 part = MAC16_16(part,*x++,*y++);
69 part = MAC16_16(part,*x++,*y++);
70 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
71 sum = ADD32(sum,SHR32(part,6));
77 #ifndef OVERRIDE_PITCH_XCORR
78 #if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
79 void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
82 for (i=0;i<nb_pitch;i+=4)
84 /* Compute correlation*/
85 /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
90 const spx_word16_t *y = _y+i;
91 const spx_word16_t *x = _x;
92 spx_word16_t y0, y1, y2, y3;
93 /*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/
104 part1 = MULT16_16(*x,y0);
105 part2 = MULT16_16(*x,y1);
106 part3 = MULT16_16(*x,y2);
107 part4 = MULT16_16(*x,y3);
110 part1 = MAC16_16(part1,*x,y1);
111 part2 = MAC16_16(part2,*x,y2);
112 part3 = MAC16_16(part3,*x,y3);
113 part4 = MAC16_16(part4,*x,y0);
116 part1 = MAC16_16(part1,*x,y2);
117 part2 = MAC16_16(part2,*x,y3);
118 part3 = MAC16_16(part3,*x,y0);
119 part4 = MAC16_16(part4,*x,y1);
122 part1 = MAC16_16(part1,*x,y3);
123 part2 = MAC16_16(part2,*x,y0);
124 part3 = MAC16_16(part3,*x,y1);
125 part4 = MAC16_16(part4,*x,y2);
129 sum1 = ADD32(sum1,SHR32(part1,6));
130 sum2 = ADD32(sum2,SHR32(part2,6));
131 sum3 = ADD32(sum3,SHR32(part3,6));
132 sum4 = ADD32(sum4,SHR32(part4,6));
134 corr[nb_pitch-1-i]=sum1;
135 corr[nb_pitch-2-i]=sum2;
136 corr[nb_pitch-3-i]=sum3;
137 corr[nb_pitch-4-i]=sum4;
142 void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
145 for (i=0;i<nb_pitch;i++)
147 /* Compute correlation*/
148 corr[nb_pitch-1-i]=inner_prod(_x, _y+i, len);
155 #ifndef OVERRIDE_COMPUTE_PITCH_ERROR
156 static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
158 spx_word32_t sum = 0;
159 sum = ADD32(sum,MULT16_16(MULT16_16_16(g[0],pitch_control),C[0]));
160 sum = ADD32(sum,MULT16_16(MULT16_16_16(g[1],pitch_control),C[1]));
161 sum = ADD32(sum,MULT16_16(MULT16_16_16(g[2],pitch_control),C[2]));
162 sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[1]),C[3]));
163 sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[1]),C[4]));
164 sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[0]),C[5]));
165 sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[0]),C[6]));
166 sum = SUB32(sum,MULT16_16(MULT16_16_16(g[1],g[1]),C[7]));
167 sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[2]),C[8]));
172 #ifndef OVERRIDE_OPEN_LOOP_NBEST_PITCH
173 void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
176 VARDECL(spx_word32_t *best_score);
177 VARDECL(spx_word32_t *best_ener);
179 VARDECL(spx_word32_t *corr);
181 /* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16)
182 arrays for (normalized) 16-bit values */
183 VARDECL(spx_word16_t *corr16);
184 VARDECL(spx_word16_t *ener16);
185 spx_word32_t *energy;
186 int cshift=0, eshift=0;
188 ALLOC(corr16, end-start+1, spx_word16_t);
189 ALLOC(ener16, end-start+1, spx_word16_t);
190 ALLOC(corr, end-start+1, spx_word32_t);
193 /* In floating-point, we need to float arrays and no normalized copies */
194 VARDECL(spx_word32_t *energy);
195 spx_word16_t *corr16;
196 spx_word16_t *ener16;
197 ALLOC(energy, end-start+2, spx_word32_t);
198 ALLOC(corr, end-start+1, spx_word32_t);
203 ALLOC(best_score, N, spx_word32_t);
204 ALLOC(best_ener, N, spx_word32_t);
213 for (i=-end;i<len;i++)
215 if (ABS16(sw[i])>16383)
221 /* If the weighted input is close to saturation, then we scale it down */
224 for (i=-end;i<len;i++)
226 sw[i]=SHR16(sw[i],1);
230 energy[0]=inner_prod(sw-start, sw-start, len);
231 e0=inner_prod(sw, sw, len);
232 for (i=start;i<end;i++)
234 /* Update energy for next pitch*/
235 energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6));
236 if (energy[i-start+1] < 0)
237 energy[i-start+1] = 0;
241 eshift = normalize16(energy, ener16, 32766, end-start+1);
244 /* In fixed-point, this actually overrites the energy array (aliased to corr) */
245 pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
248 /* Normalize to 180 so we can square it and it still fits in 16 bits */
249 cshift = normalize16(corr, corr16, 180, end-start+1);
250 /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */
253 for (i=-end;i<len;i++)
255 sw[i]=SHL16(sw[i],1);
260 /* Search for the best pitch prediction gain */
261 for (i=start;i<=end;i++)
263 spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
264 /* Instead of dividing the tmp by the energy, we multiply on the other side */
265 if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
267 /* We can safely put it last and then check */
269 best_ener[N-1]=ener16[i-start]+1;
271 /* Check if it comes in front of others */
274 if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
278 best_score[k]=best_score[k-1];
279 best_ener[k]=best_ener[k-1];
283 best_ener[j]=ener16[i-start]+1;
291 /* Compute open-loop gain if necessary */
298 g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));
299 /* FIXME: g = max(g,corr/energy) */
310 #ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
311 static int pitch_gain_search_3tap_vq(
312 const signed char *gain_cdbk,
315 spx_word16_t max_gain
318 const signed char *ptr=gain_cdbk;
320 spx_word32_t best_sum=-VERY_LARGE32;
323 spx_word16_t pitch_control=64;
324 spx_word16_t gain_sum;
327 for (i=0;i<gain_cdbk_size;i++) {
330 g[0]=ADD16((spx_word16_t)ptr[0],32);
331 g[1]=ADD16((spx_word16_t)ptr[1],32);
332 g[2]=ADD16((spx_word16_t)ptr[2],32);
333 gain_sum = (spx_word16_t)ptr[3];
335 sum = compute_pitch_error(C16, g, pitch_control);
337 if (sum>best_sum && gain_sum<=max_gain) {
347 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
348 static spx_word32_t pitch_gain_search_3tap(
349 const spx_word16_t target[], /* Target vector */
350 const spx_coef_t ak[], /* LPCs for this subframe */
351 const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */
352 const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */
353 spx_sig_t exc[], /* Excitation */
354 const signed char *gain_cdbk,
356 int pitch, /* Pitch value */
357 int p, /* Number of LPC coeffs */
358 int nsf, /* Number of samples in subframe */
361 const spx_word16_t *exc2,
362 const spx_word16_t *r,
363 spx_word16_t *new_target,
366 spx_word32_t cumul_gain,
371 VARDECL(spx_word16_t *tmp1);
372 VARDECL(spx_word16_t *e);
374 spx_word32_t corr[3];
375 spx_word32_t A[3][3];
376 spx_word16_t gain[3];
378 spx_word16_t max_gain=128;
381 ALLOC(tmp1, 3*nsf, spx_word16_t);
382 ALLOC(e, nsf, spx_word16_t);
384 if (cumul_gain > 262144)
392 new_target[j] = target[j];
395 VARDECL(spx_mem_t *mm);
397 ALLOC(mm, p, spx_mem_t);
402 else if (j-pp-pitch<0)
403 e[j]=exc2[j-pp-pitch];
408 /* Scale target and excitation down if needed (avoiding overflow) */
412 e[j] = SHR16(e[j],1);
414 new_target[j] = SHR16(new_target[j],1);
419 iir_mem16(e, ak, e, nsf, p, mm, stack);
422 filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack);
428 spx_word16_t e0=exc2[-pitch-1+i];
430 /* Scale excitation down if needed (avoiding overflow) */
434 x[i][0]=MULT16_16_Q14(r[0], e0);
435 for (j=0;j<nsf-1;j++)
436 x[i][j+1]=ADD32(x[i+1][j],MULT16_16_P14(r[j+1], e0));
440 corr[i]=inner_prod(x[i],new_target,nsf);
443 A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
468 C[0] = SHL32(C[0],1);
469 C[1] = SHL32(C[1],1);
470 C[2] = SHL32(C[2],1);
471 C[3] = SHL32(C[3],1);
472 C[4] = SHL32(C[4],1);
473 C[5] = SHL32(C[5],1);
474 C[6] = MAC16_32_Q15(C[6],MULT16_16_16(plc_tuning,655),C[6]);
475 C[7] = MAC16_32_Q15(C[7],MULT16_16_16(plc_tuning,655),C[7]);
476 C[8] = MAC16_32_Q15(C[8],MULT16_16_16(plc_tuning,655),C[8]);
477 normalize16(C, C16, 32767, 9);
479 C[6]*=.5*(1+.02*plc_tuning);
480 C[7]*=.5*(1+.02*plc_tuning);
481 C[8]*=.5*(1+.02*plc_tuning);
484 best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain);
487 gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4]);
488 gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+1]);
489 gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+2]);
490 /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
492 gain[0] = 0.015625*gain_cdbk[best_cdbk*4] + .5;
493 gain[1] = 0.015625*gain_cdbk[best_cdbk*4+1]+ .5;
494 gain[2] = 0.015625*gain_cdbk[best_cdbk*4+2]+ .5;
496 *cdbk_index=best_cdbk;
499 SPEEX_MEMSET(exc, 0, nsf);
509 exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp]);
513 for (j=tmp1;j<tmp3;j++)
514 exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp-pitch]);
518 spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0],x[2][i]),MULT16_16(gain[1],x[1][i])),
519 MULT16_16(gain[2],x[0][i]));
520 new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp,6)));
522 err = inner_prod(new_target, new_target, nsf);
527 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
528 int pitch_search_3tap(
529 spx_word16_t target[], /* Target vector */
531 spx_coef_t ak[], /* LPCs for this subframe */
532 spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */
533 spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */
534 spx_sig_t exc[], /* Excitation */
536 int start, /* Smallest pitch value allowed */
537 int end, /* Largest pitch value allowed */
538 spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
539 int p, /* Number of LPC coeffs */
540 int nsf, /* Number of samples in subframe */
548 spx_word32_t *cumul_gain
552 int cdbk_index, pitch=0, best_gain_index=0;
553 VARDECL(spx_sig_t *best_exc);
554 VARDECL(spx_word16_t *new_target);
555 VARDECL(spx_word16_t *best_target);
557 spx_word32_t err, best_err=-1;
559 const ltp_params *params;
560 const signed char *gain_cdbk;
566 params = (const ltp_params*) par;
567 gain_cdbk_size = 1<<params->gain_bits;
568 gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
576 ALLOC(nbest, N, int);
577 params = (const ltp_params*) par;
581 speex_bits_pack(bits, 0, params->pitch_bits);
582 speex_bits_pack(bits, 0, params->gain_bits);
583 SPEEX_MEMSET(exc, 0, nsf);
588 /* Check if we need to scale everything down in the pitch search to avoid overflows */
591 if (ABS16(target[i])>16383)
597 for (i=-end;i<nsf;i++)
599 if (ABS16(exc2[i])>16383)
609 open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack);
613 ALLOC(best_exc, nsf, spx_sig_t);
614 ALLOC(new_target, nsf, spx_word16_t);
615 ALLOC(best_target, nsf, spx_word16_t);
620 SPEEX_MEMSET(exc, 0, nsf);
621 err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf,
622 bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain, scaledown);
623 if (err<best_err || best_err<0)
625 SPEEX_COPY(best_exc, exc, nsf);
626 SPEEX_COPY(best_target, new_target, nsf);
629 best_gain_index=cdbk_index;
632 /*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/
633 speex_bits_pack(bits, best_pitch-start, params->pitch_bits);
634 speex_bits_pack(bits, best_gain_index, params->gain_bits);
636 *cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4*best_gain_index+3],8), MAX32(1024,*cumul_gain));
638 *cumul_gain = 0.03125*MAX32(1024,*cumul_gain)*params->gain_cdbk[4*best_gain_index+3];
640 /*printf ("%f\n", cumul_gain);*/
641 /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/
642 SPEEX_COPY(exc, best_exc, nsf);
643 SPEEX_COPY(target, best_target, nsf);
645 /* Scale target back up if needed */
649 target[i]=SHL16(target[i],1);
655 void pitch_unquant_3tap(
656 spx_word16_t exc[], /* Input excitation */
657 spx_word32_t exc_out[], /* Output excitation */
658 int start, /* Smallest pitch value allowed */
659 int end, /* Largest pitch value allowed */
660 spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
662 int nsf, /* Number of samples in subframe */
664 spx_word16_t *gain_val,
669 spx_word16_t last_pitch_gain,
676 spx_word16_t gain[3];
677 const signed char *gain_cdbk;
679 const ltp_params *params;
681 params = (const ltp_params*) par;
682 gain_cdbk_size = 1<<params->gain_bits;
683 gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
685 pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
687 gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits);
688 /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
690 gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4]);
691 gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+1]);
692 gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+2]);
694 gain[0] = 0.015625*gain_cdbk[gain_index*4]+.5;
695 gain[1] = 0.015625*gain_cdbk[gain_index*4+1]+.5;
696 gain[2] = 0.015625*gain_cdbk[gain_index*4+2]+.5;
699 if (count_lost && pitch > subframe_offset)
701 spx_word16_t gain_sum;
704 spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain,1);
708 spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain;
712 gain_sum = gain_3tap_to_1tap(gain);
716 spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp),14),gain_sum);
718 gain[i]=MULT16_16_Q14(fact,gain[i]);
729 gain[0] = SHL16(gain[0],7);
730 gain[1] = SHL16(gain[1],7);
731 gain[2] = SHL16(gain[2],7);
732 SPEEX_MEMSET(exc_out, 0, nsf);
742 exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp]);
746 for (j=tmp1;j<tmp3;j++)
747 exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp-pitch]);
749 /*for (i=0;i<nsf;i++)
750 exc[i]=PSHR32(exc32[i],13);*/
754 /** Forced pitch delay and gain */
755 int forced_pitch_quant(
756 spx_word16_t target[], /* Target vector */
758 spx_coef_t ak[], /* LPCs for this subframe */
759 spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */
760 spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */
761 spx_sig_t exc[], /* Excitation */
763 int start, /* Smallest pitch value allowed */
764 int end, /* Largest pitch value allowed */
765 spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
766 int p, /* Number of LPC coeffs */
767 int nsf, /* Number of samples in subframe */
775 spx_word32_t *cumul_gain
779 VARDECL(spx_word16_t *res);
780 ALLOC(res, nsf, spx_word16_t);
788 for (i=0;i<nsf&&i<start;i++)
790 exc[i]=MULT16_16(SHL16(pitch_coef, 7),exc2[i-start]);
794 exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]);
797 res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT-1));
798 syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack);
800 target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),EXTEND32(res[i])),32700));
804 /** Unquantize forced pitch delay and gain */
805 void forced_pitch_unquant(
806 spx_word16_t exc[], /* Input excitation */
807 spx_word32_t exc_out[], /* Output excitation */
808 int start, /* Smallest pitch value allowed */
809 int end, /* Largest pitch value allowed */
810 spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */
812 int nsf, /* Number of samples in subframe */
814 spx_word16_t *gain_val,
819 spx_word16_t last_pitch_gain,
833 exc_out[i]=MULT16_16(exc[i-start],SHL16(pitch_coef,7));
834 exc[i] = EXTRACT16(PSHR32(exc_out[i],13));
837 gain_val[0]=gain_val[2]=0;
838 gain_val[1] = pitch_coef;