4 * Copyright (c) 1999-2000 Mark Taylor
5 * Copyright (c) 2000-2011 Robert Hegemann
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
23 /* $Id: vbrquantize.c,v 1.141 2011/05/07 16:05:17 rbrito Exp $ */
34 #include "vbrquantize.h"
35 #include "quantize_pvt.h"
41 typedef struct algo_s algo_t;
43 typedef void (*alloc_sf_f) (const algo_t *, const int *, const int *, int);
44 typedef uint8_t (*find_sf_f) (const FLOAT *, const FLOAT *, FLOAT, unsigned int, uint8_t);
49 const FLOAT *xr34orig;
50 lame_internal_flags *gfc;
58 /* Remarks on optimizing compilers:
60 * the MSVC compiler may get into aliasing problems when accessing
61 * memory through the fi_union. declaring it volatile does the trick here
63 * the calc_sfb_noise_* functions are not inlined because the intel compiler
64 * optimized executeables won't work as expected anymore
69 # define VOLATILE volatile
73 # define FORCEINLINE __forceinline
84 typedef VOLATILE union {
91 #ifdef TAKEHIRO_IEEE754_HACK
92 #define DOUBLEX double
97 #define MAGIC_FLOAT_def (65536*(128))
98 #define MAGIC_INT_def 0x4b000000
100 #ifdef TAKEHIRO_IEEE754_HACK
102 /*********************************************************************
103 * XRPOW_FTOI is a macro to convert floats to ints.
104 * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
107 * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]
109 *********************************************************************/
110 # define QUANTFAC(rx) adj43[rx]
111 # define ROUNDFAC_def 0.4054f
112 # define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
115 static int const MAGIC_INT = MAGIC_INT_def;
116 #ifndef TAKEHIRO_IEEE754_HACK
117 static DOUBLEX const ROUNDFAC = ROUNDFAC_def;
119 static DOUBLEX const MAGIC_FLOAT = MAGIC_FLOAT_def;
122 FORCEINLINE static float
123 vec_max_c(const float * xr34, unsigned int bw)
126 unsigned int i = bw >> 2u;
127 unsigned int const remaining = (bw & 0x03u);
130 if (xfsf < xr34[0]) {
133 if (xfsf < xr34[1]) {
136 if (xfsf < xr34[2]) {
139 if (xfsf < xr34[3]) {
144 switch( remaining ) {
145 case 3: if (xfsf < xr34[2]) xfsf = xr34[2];
146 case 2: if (xfsf < xr34[1]) xfsf = xr34[1];
147 case 1: if (xfsf < xr34[0]) xfsf = xr34[0];
153 FORCEINLINE static float
154 vec_sum_sq_c(const float * xr, unsigned int bw)
157 unsigned int i = bw >> 2u;
158 unsigned int const remaining = bw & 0x03u;
160 sum += xr[0] * xr[0];
161 sum += xr[1] * xr[1];
162 sum += xr[2] * xr[2];
163 sum += xr[3] * xr[3];
166 switch( remaining ) {
167 case 3: sum += xr[2] * xr[2];
168 case 2: sum += xr[1] * xr[1];
169 case 1: sum += xr[0] * xr[0];
176 FORCEINLINE static uint8_t
177 find_lowest_scalefac(const FLOAT xr34)
180 uint8_t sf = 128, delsf = 64;
182 FLOAT const ixmax_val = IXMAX_VAL;
183 for (i = 0; i < 8; ++i) {
184 FLOAT const xfsf = ipow20[sf] * xr34;
185 if (xfsf <= ixmax_val) {
198 FORCEINLINE static int
199 below_noise_floor(FLOAT sum, FLOAT l3xmin)
201 FLOAT const d = -1E-20;
202 return (l3xmin - sum) >= d ? 1 : 0;
206 FORCEINLINE static void
207 k_34_4(DOUBLEX x[4], int l3[4])
209 #ifdef TAKEHIRO_IEEE754_HACK
212 assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL && x[2] <= IXMAX_VAL && x[3] <= IXMAX_VAL);
221 fi[0].f = x[0] + adj43asm[fi[0].i - MAGIC_INT];
222 fi[1].f = x[1] + adj43asm[fi[1].i - MAGIC_INT];
223 fi[2].f = x[2] + adj43asm[fi[2].i - MAGIC_INT];
224 fi[3].f = x[3] + adj43asm[fi[3].i - MAGIC_INT];
225 l3[0] = fi[0].i - MAGIC_INT;
226 l3[1] = fi[1].i - MAGIC_INT;
227 l3[2] = fi[2].i - MAGIC_INT;
228 l3[3] = fi[3].i - MAGIC_INT;
230 assert(x[0] <= IXMAX_VAL && x[1] <= IXMAX_VAL && x[2] <= IXMAX_VAL && x[3] <= IXMAX_VAL);
231 XRPOW_FTOI(x[0], l3[0]);
232 XRPOW_FTOI(x[1], l3[1]);
233 XRPOW_FTOI(x[2], l3[2]);
234 XRPOW_FTOI(x[3], l3[3]);
235 x[0] += QUANTFAC(l3[0]);
236 x[1] += QUANTFAC(l3[1]);
237 x[2] += QUANTFAC(l3[2]);
238 x[3] += QUANTFAC(l3[3]);
239 XRPOW_FTOI(x[0], l3[0]);
240 XRPOW_FTOI(x[1], l3[1]);
241 XRPOW_FTOI(x[2], l3[2]);
242 XRPOW_FTOI(x[3], l3[3]);
250 /* do call the calc_sfb_noise_* functions only with sf values
251 * for which holds: sfpow34*xr34 <= IXMAX_VAL
255 calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, unsigned int bw, uint8_t sf)
259 const FLOAT sfpow = pow20[sf + Q_MAX2]; /*pow(2.0,sf/4.0); */
260 const FLOAT sfpow34 = ipow20[sf]; /*pow(sfpow,-3.0/4.0); */
263 unsigned int i = bw >> 2u;
264 unsigned int const remaining = (bw & 0x03u);
267 x[0] = sfpow34 * xr34[0];
268 x[1] = sfpow34 * xr34[1];
269 x[2] = sfpow34 * xr34[2];
270 x[3] = sfpow34 * xr34[3];
274 x[0] = fabsf(xr[0]) - sfpow * pow43[l3[0]];
275 x[1] = fabsf(xr[1]) - sfpow * pow43[l3[1]];
276 x[2] = fabsf(xr[2]) - sfpow * pow43[l3[2]];
277 x[3] = fabsf(xr[3]) - sfpow * pow43[l3[3]];
278 xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
284 x[0] = x[1] = x[2] = x[3] = 0;
285 switch( remaining ) {
286 case 3: x[2] = sfpow34 * xr34[2];
287 case 2: x[1] = sfpow34 * xr34[1];
288 case 1: x[0] = sfpow34 * xr34[0];
292 x[0] = x[1] = x[2] = x[3] = 0;
294 switch( remaining ) {
295 case 3: x[2] = fabsf(xr[2]) - sfpow * pow43[l3[2]];
296 case 2: x[1] = fabsf(xr[1]) - sfpow * pow43[l3[1]];
297 case 1: x[0] = fabsf(xr[0]) - sfpow * pow43[l3[0]];
299 xfsf += (x[0] * x[0] + x[1] * x[1]) + (x[2] * x[2] + x[3] * x[3]);
306 struct calc_noise_cache {
311 typedef struct calc_noise_cache calc_noise_cache_t;
315 tri_calc_sfb_noise_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw,
316 uint8_t sf, calc_noise_cache_t * did_it)
318 if (did_it[sf].valid == 0) {
319 did_it[sf].valid = 1;
320 did_it[sf].value = calc_sfb_noise_x34(xr, xr34, bw, sf);
322 if (l3_xmin < did_it[sf].value) {
326 uint8_t const sf_x = sf + 1;
327 if (did_it[sf_x].valid == 0) {
328 did_it[sf_x].valid = 1;
329 did_it[sf_x].value = calc_sfb_noise_x34(xr, xr34, bw, sf_x);
331 if (l3_xmin < did_it[sf_x].value) {
336 uint8_t const sf_x = sf - 1;
337 if (did_it[sf_x].valid == 0) {
338 did_it[sf_x].valid = 1;
339 did_it[sf_x].value = calc_sfb_noise_x34(xr, xr34, bw, sf_x);
341 if (l3_xmin < did_it[sf_x].value) {
350 * Robert Hegemann 2001-05-01
351 * calculates quantization step size determined by allowed masking
354 calc_scalefac(FLOAT l3_xmin, int bw)
356 FLOAT const c = 5.799142446; /* 10 * 10^(2/3) * log10(4/3) */
357 return 210 + (int) (c * log10f(l3_xmin / bw) - .5f);
361 guess_scalefac_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw, uint8_t sf_min)
363 int const guess = calc_scalefac(l3_xmin, bw);
364 if (guess < sf_min) return sf_min;
365 if (guess >= 255) return 255;
372 /* the find_scalefac* routines calculate
373 * a quantization step size which would
374 * introduce as much noise as is allowed.
375 * The larger the step size the more
376 * quantization noise we'll get. The
377 * scalefactors are there to lower the
378 * global step size, allowing limited
379 * differences in quantization step sizes
380 * per band (shaping the noise).
384 find_scalefac_x34(const FLOAT * xr, const FLOAT * xr34, FLOAT l3_xmin, unsigned int bw,
387 calc_noise_cache_t did_it[256];
388 uint8_t sf = 128, sf_ok = 255, delsf = 128, seen_good_one = 0, i;
389 memset(did_it, 0, sizeof(did_it));
390 for (i = 0; i < 8; ++i) {
396 uint8_t const bad = tri_calc_sfb_noise_x34(xr, xr34, l3_xmin, bw, sf, did_it);
397 if (bad) { /* distortion. try a smaller scalefactor */
407 /* returning a scalefac without distortion, if possible
409 if (seen_good_one > 0) {
420 /***********************************************************************
422 * calc_short_block_vbr_sf()
423 * calc_long_block_vbr_sf()
425 * Mark Taylor 2000-??-??
426 * Robert Hegemann 2000-10-25 made functions of it
428 ***********************************************************************/
430 /* a variation for vbr-mtrh */
432 block_sf(algo_t * that, const FLOAT l3_xmin[SFBMAX], int vbrsf[SFBMAX], int vbrsfmin[SFBMAX])
435 const FLOAT *const xr = &that->cod_info->xr[0];
436 const FLOAT *const xr34_orig = &that->xr34orig[0];
437 const int *const width = &that->cod_info->width[0];
438 unsigned int const max_nonzero_coeff = (unsigned int) that->cod_info->max_nonzero_coeff;
441 unsigned int j = 0, i = 0;
442 int const psymax = that->cod_info->psymax;
444 assert(that->cod_info->max_nonzero_coeff >= 0);
447 that->mingain_s[0] = 0;
448 that->mingain_s[1] = 0;
449 that->mingain_s[2] = 0;
450 while (j <= max_nonzero_coeff) {
451 unsigned int const w = (unsigned int) width[sfb];
452 unsigned int const m = (unsigned int) (max_nonzero_coeff - j + 1);
458 max_xr34 = vec_max_c(&xr34_orig[j], l);
460 m1 = find_lowest_scalefac(max_xr34);
462 if (that->mingain_l < m1) {
463 that->mingain_l = m1;
465 if (that->mingain_s[i] < m1) {
466 that->mingain_s[i] = m1;
471 if (sfb < psymax && w > 2) { /* mpeg2.5 at 8 kHz doesn't use all scalefactors, unused have width 2 */
472 float sum_sq = vec_sum_sq_c(&xr[j], l);
473 if (below_noise_floor(sum_sq, l3_xmin[sfb]) == 0) {
474 m2 = that->find(&xr[j], &xr34_orig[j], l3_xmin[sfb], l, m1);
477 /** Robert Hegemann 2007-09-29:
478 * It seems here is some more potential for speed improvements.
479 * Current find method does 11-18 quantization calculations.
480 * Using a "good guess" may help to reduce this amount.
482 uint8_t guess = calc_scalefac(l3_xmin[sfb], l);
483 DEBUGF(that->gfc, "sfb=%3d guess=%3d found=%3d diff=%3d\n", sfb, guess, m2,
506 for (; sfb < SFBMAX; ++sfb) {
515 /***********************************************************************
517 * quantize xr34 based on scalefactors
521 * Mark Taylor 2000-??-??
522 * Robert Hegemann 2000-10-20 made functions of them
524 ***********************************************************************/
527 quantize_x34(const algo_t * that)
530 const FLOAT *xr34_orig = that->xr34orig;
531 gr_info *const cod_info = that->cod_info;
532 int const ifqstep = (cod_info->scalefac_scale == 0) ? 2 : 4;
533 int *l3 = cod_info->l3_enc;
534 unsigned int j = 0, sfb = 0;
535 unsigned int const max_nonzero_coeff = (unsigned int) cod_info->max_nonzero_coeff;
537 assert(cod_info->max_nonzero_coeff >= 0);
538 assert(cod_info->max_nonzero_coeff < 576);
540 while (j <= max_nonzero_coeff) {
542 (cod_info->scalefac[sfb] + (cod_info->preflag ? pretab[sfb] : 0)) * ifqstep
543 + cod_info->subblock_gain[cod_info->window[sfb]] * 8;
544 uint8_t const sfac = (uint8_t) (cod_info->global_gain - s);
545 FLOAT const sfpow34 = ipow20[sfac];
546 unsigned int const w = (unsigned int) cod_info->width[sfb];
547 unsigned int const m = (unsigned int) (max_nonzero_coeff - j + 1);
548 unsigned int i, remaining;
550 assert((cod_info->global_gain - s) >= 0);
551 assert(cod_info->width[sfb] >= 0);
555 i = (w <= m) ? w : m;
556 remaining = (i & 0x03u);
560 x[0] = sfpow34 * xr34_orig[0];
561 x[1] = sfpow34 * xr34_orig[1];
562 x[2] = sfpow34 * xr34_orig[2];
563 x[3] = sfpow34 * xr34_orig[3];
572 x[0] = x[1] = x[2] = x[3] = 0;
573 switch( remaining ) {
574 case 3: x[2] = sfpow34 * xr34_orig[2];
575 case 2: x[1] = sfpow34 * xr34_orig[1];
576 case 1: x[0] = sfpow34 * xr34_orig[0];
581 switch( remaining ) {
582 case 3: l3[2] = tmp_l3[2];
583 case 2: l3[1] = tmp_l3[1];
584 case 1: l3[0] = tmp_l3[0];
588 xr34_orig += remaining;
595 static const uint8_t max_range_short[SBMAX_s * 3] = {
596 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
597 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
601 static const uint8_t max_range_long[SBMAX_l] = {
602 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0
605 static const uint8_t max_range_long_lsf_pretab[SBMAX_l] = {
606 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
612 sfb=0..5 scalefac < 16
615 ifqstep = ( cod_info->scalefac_scale == 0 ) ? 2 : 4;
616 ol_sf = (cod_info->global_gain-210.0);
617 ol_sf -= 8*cod_info->subblock_gain[i];
618 ol_sf -= ifqstep*scalefac[gr][ch].s[sfb][i];
622 set_subblock_gain(gr_info * cod_info, const int mingain_s[3], int sf[])
624 const int maxrange1 = 15, maxrange2 = 7;
625 const int ifqstepShift = (cod_info->scalefac_scale == 0) ? 1 : 2;
626 int *const sbg = cod_info->subblock_gain;
627 unsigned int const psymax = (unsigned int) cod_info->psymax;
628 unsigned int psydiv = 18;
629 int sbg0, sbg1, sbg2;
633 if (psydiv > psymax) {
636 for (i = 0; i < 3; ++i) {
637 int maxsf1 = 0, maxsf2 = 0, minsf = 1000;
638 /* see if we should use subblock gain */
639 for (sfb = i; sfb < psydiv; sfb += 3) { /* part 1 */
640 int const v = -sf[sfb];
648 for (; sfb < SFBMAX; sfb += 3) { /* part 2 */
649 int const v = -sf[sfb];
658 /* boost subblock gain as little as possible so we can
659 * reach maxsf1 with scalefactors
663 int const m1 = maxsf1 - (maxrange1 << ifqstepShift);
664 int const m2 = maxsf2 - (maxrange2 << ifqstepShift);
666 maxsf1 = Max(m1, m2);
675 int const m1 = sbg[i];
676 int const m2 = (maxsf1 + 7) >> 3;
677 sbg[i] = Max(m1, m2);
679 if (sbg[i] > 0 && mingain_s[i] > (cod_info->global_gain - sbg[i] * 8)) {
680 sbg[i] = (cod_info->global_gain - mingain_s[i]) >> 3;
685 if (min_sbg > sbg[i]) {
692 for (sfb = 0; sfb < SFBMAX; sfb += 3) {
698 for (i = 0; i < 3; ++i) {
701 cod_info->global_gain -= min_sbg * 8;
708 ifqstep = ( cod_info->scalefac_scale == 0 ) ? 2 : 4;
709 ol_sf = (cod_info->global_gain-210.0);
710 ol_sf -= ifqstep*scalefac[gr][ch].l[sfb];
711 if (cod_info->preflag && sfb>=11)
712 ol_sf -= ifqstep*pretab[sfb];
715 set_scalefacs(gr_info * cod_info, const int *vbrsfmin, int sf[], const uint8_t * max_range)
717 const int ifqstep = (cod_info->scalefac_scale == 0) ? 2 : 4;
718 const int ifqstepShift = (cod_info->scalefac_scale == 0) ? 1 : 2;
719 int *const scalefac = cod_info->scalefac;
720 int const sfbmax = cod_info->sfbmax;
722 int const *const sbg = cod_info->subblock_gain;
723 int const *const window = cod_info->window;
724 int const preflag = cod_info->preflag;
727 for (sfb = 11; sfb < sfbmax; ++sfb) {
728 sf[sfb] += pretab[sfb] * ifqstep;
731 for (sfb = 0; sfb < sfbmax; ++sfb) {
732 int const gain = cod_info->global_gain - (sbg[window[sfb]] * 8)
733 - ((preflag ? pretab[sfb] : 0) * ifqstep);
736 int const m = gain - vbrsfmin[sfb];
737 /* ifqstep*scalefac >= -sf[sfb], so round UP */
738 scalefac[sfb] = (ifqstep - 1 - sf[sfb]) >> ifqstepShift;
740 if (scalefac[sfb] > max_range[sfb]) {
741 scalefac[sfb] = max_range[sfb];
743 if (scalefac[sfb] > 0 && (scalefac[sfb] << ifqstepShift) > m) {
744 scalefac[sfb] = m >> ifqstepShift;
751 for (; sfb < SFBMAX; ++sfb) {
752 scalefac[sfb] = 0; /* sfb21 */
759 checkScalefactor(const gr_info * cod_info, const int vbrsfmin[SFBMAX])
761 int const ifqstep = cod_info->scalefac_scale == 0 ? 2 : 4;
763 for (sfb = 0; sfb < cod_info->psymax; ++sfb) {
765 ((cod_info->scalefac[sfb] +
766 (cod_info->preflag ? pretab[sfb] : 0)) * ifqstep) +
767 cod_info->subblock_gain[cod_info->window[sfb]] * 8;
769 if ((cod_info->global_gain - s) < vbrsfmin[sfb]) {
771 fprintf( stdout, "sf %d\n", sfb );
772 fprintf( stdout, "min %d\n", vbrsfmin[sfb] );
773 fprintf( stdout, "ggain %d\n", cod_info->global_gain );
774 fprintf( stdout, "scalefac %d\n", cod_info->scalefac[sfb] );
775 fprintf( stdout, "pretab %d\n", (cod_info->preflag ? pretab[sfb] : 0) );
776 fprintf( stdout, "scale %d\n", (cod_info->scalefac_scale + 1) );
777 fprintf( stdout, "subgain %d\n", cod_info->subblock_gain[cod_info->window[sfb]] * 8 );
789 /******************************************************************
791 * short block scalefacs
793 ******************************************************************/
796 short_block_constrain(const algo_t * that, const int vbrsf[SFBMAX],
797 const int vbrsfmin[SFBMAX], int vbrmax)
799 gr_info *const cod_info = that->cod_info;
800 lame_internal_flags const *const gfc = that->gfc;
801 SessionConfig_t const *const cfg = &gfc->cfg;
802 int const maxminsfb = that->mingain_l;
803 int mover, maxover0 = 0, maxover1 = 0, delta = 0;
806 int const psymax = cod_info->psymax;
808 for (sfb = 0; sfb < psymax; ++sfb) {
809 assert(vbrsf[sfb] >= vbrsfmin[sfb]);
810 v = vbrmax - vbrsf[sfb];
814 v0 = v - (4 * 14 + 2 * max_range_short[sfb]);
815 v1 = v - (4 * 14 + 4 * max_range_short[sfb]);
823 if (cfg->noise_shaping == 2) {
824 /* allow scalefac_scale=1 */
825 mover = Min(maxover0, maxover1);
838 cod_info->scalefac_scale = 0;
840 else if (maxover1 == 0) {
841 cod_info->scalefac_scale = 1;
843 if (vbrmax < maxminsfb) {
846 cod_info->global_gain = vbrmax;
848 if (cod_info->global_gain < 0) {
849 cod_info->global_gain = 0;
851 else if (cod_info->global_gain > 255) {
852 cod_info->global_gain = 255;
856 for (sfb = 0; sfb < SFBMAX; ++sfb) {
857 sf_temp[sfb] = vbrsf[sfb] - vbrmax;
859 set_subblock_gain(cod_info, &that->mingain_s[0], sf_temp);
860 set_scalefacs(cod_info, vbrsfmin, sf_temp, max_range_short);
862 assert(checkScalefactor(cod_info, vbrsfmin));
867 /******************************************************************
869 * long block scalefacs
871 ******************************************************************/
874 long_block_constrain(const algo_t * that, const int vbrsf[SFBMAX], const int vbrsfmin[SFBMAX],
877 gr_info *const cod_info = that->cod_info;
878 lame_internal_flags const *const gfc = that->gfc;
879 SessionConfig_t const *const cfg = &gfc->cfg;
880 uint8_t const *max_rangep;
881 int const maxminsfb = that->mingain_l;
883 int maxover0, maxover1, maxover0p, maxover1p, mover, delta = 0;
884 int v, v0, v1, v0p, v1p, vm0p = 1, vm1p = 1;
885 int const psymax = cod_info->psymax;
887 max_rangep = cfg->mode_gr == 2 ? max_range_long : max_range_long_lsf_pretab;
891 maxover0p = 0; /* pretab */
892 maxover1p = 0; /* pretab */
894 for (sfb = 0; sfb < psymax; ++sfb) {
895 assert(vbrsf[sfb] >= vbrsfmin[sfb]);
896 v = vbrmax - vbrsf[sfb];
900 v0 = v - 2 * max_range_long[sfb];
901 v1 = v - 4 * max_range_long[sfb];
902 v0p = v - 2 * (max_rangep[sfb] + pretab[sfb]);
903 v1p = v - 4 * (max_rangep[sfb] + pretab[sfb]);
910 if (maxover0p < v0p) {
913 if (maxover1p < v1p) {
918 int gain = vbrmax - maxover0p;
919 if (gain < maxminsfb) {
922 for (sfb = 0; sfb < psymax; ++sfb) {
923 int const a = (gain - vbrsfmin[sfb]) - 2 * pretab[sfb];
932 int gain = vbrmax - maxover1p;
933 if (gain < maxminsfb) {
936 for (sfb = 0; sfb < psymax; ++sfb) {
937 int const b = (gain - vbrsfmin[sfb]) - 4 * pretab[sfb];
945 maxover0p = maxover0;
948 maxover1p = maxover1;
950 if (cfg->noise_shaping != 2) {
952 maxover1p = maxover0p;
954 mover = Min(maxover0, maxover0p);
955 mover = Min(mover, maxover1);
956 mover = Min(mover, maxover1p);
962 if (vbrmax < maxminsfb) {
971 cod_info->scalefac_scale = 0;
972 cod_info->preflag = 0;
973 max_rangep = max_range_long;
975 else if (maxover0p == 0) {
976 cod_info->scalefac_scale = 0;
977 cod_info->preflag = 1;
979 else if (maxover1 == 0) {
980 cod_info->scalefac_scale = 1;
981 cod_info->preflag = 0;
982 max_rangep = max_range_long;
984 else if (maxover1p == 0) {
985 cod_info->scalefac_scale = 1;
986 cod_info->preflag = 1;
989 assert(0); /* this should not happen */
991 cod_info->global_gain = vbrmax;
992 if (cod_info->global_gain < 0) {
993 cod_info->global_gain = 0;
995 else if (cod_info->global_gain > 255) {
996 cod_info->global_gain = 255;
1000 for (sfb = 0; sfb < SFBMAX; ++sfb) {
1001 sf_temp[sfb] = vbrsf[sfb] - vbrmax;
1003 set_scalefacs(cod_info, vbrsfmin, sf_temp, max_rangep);
1005 assert(checkScalefactor(cod_info, vbrsfmin));
1011 bitcount(const algo_t * that)
1013 int rc = scale_bitcount(that->gfc, that->cod_info);
1018 /* this should not happen due to the way the scalefactors are selected */
1019 ERRORF(that->gfc, "INTERNAL ERROR IN VBR NEW CODE (986), please send bug report\n");
1026 quantizeAndCountBits(const algo_t * that)
1029 that->cod_info->part2_3_length = noquant_count_bits(that->gfc, that->cod_info, 0);
1030 return that->cod_info->part2_3_length;
1038 tryGlobalStepsize(const algo_t * that, const int sfwork[SFBMAX],
1039 const int vbrsfmin[SFBMAX], int delta)
1041 FLOAT const xrpow_max = that->cod_info->xrpow_max;
1042 int sftemp[SFBMAX], i, nbits;
1043 int gain, vbrmax = 0;
1044 for (i = 0; i < SFBMAX; ++i) {
1045 gain = sfwork[i] + delta;
1046 if (gain < vbrsfmin[i]) {
1052 if (vbrmax < gain) {
1057 that->alloc(that, sftemp, vbrsfmin, vbrmax);
1059 nbits = quantizeAndCountBits(that);
1060 that->cod_info->xrpow_max = xrpow_max;
1067 searchGlobalStepsizeMax(const algo_t * that, const int sfwork[SFBMAX],
1068 const int vbrsfmin[SFBMAX], int target)
1070 gr_info const *const cod_info = that->cod_info;
1071 const int gain = cod_info->global_gain;
1074 int nbits = LARGE_BITS;
1075 int l = gain, r = 512;
1079 curr = (l + r) >> 1;
1080 nbits = tryGlobalStepsize(that, sfwork, vbrsfmin, curr - gain);
1081 if (nbits == 0 || (nbits + cod_info->part2_length) < target) {
1087 if (gain_ok == 1024) {
1092 if (gain_ok != curr) {
1094 nbits = tryGlobalStepsize(that, sfwork, vbrsfmin, curr - gain);
1101 sfDepth(const int sfwork[SFBMAX])
1105 for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
1106 int const di = 255 - sfwork[i];
1110 assert(sfwork[i] >= 0);
1111 assert(sfwork[i] <= 255);
1120 cutDistribution(const int sfwork[SFBMAX], int sf_out[SFBMAX], int cut)
1123 for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
1124 int const x = sfwork[i];
1125 sf_out[i] = x < cut ? x : cut;
1131 flattenDistribution(const int sfwork[SFBMAX], int sf_out[SFBMAX], int dm, int k, int p)
1136 for (j = SFBMAX, i = 0; j > 0; --j, ++i) {
1137 int const di = p - sfwork[i];
1138 x = sfwork[i] + (k * di) / dm;
1154 for (j = SFBMAX, i = 0; j > 0u; --j, ++i) {
1167 tryThatOne(algo_t const* that, const int sftemp[SFBMAX], const int vbrsfmin[SFBMAX], int vbrmax)
1169 FLOAT const xrpow_max = that->cod_info->xrpow_max;
1170 int nbits = LARGE_BITS;
1171 that->alloc(that, sftemp, vbrsfmin, vbrmax);
1173 nbits = quantizeAndCountBits(that);
1174 nbits += that->cod_info->part2_length;
1175 that->cod_info->xrpow_max = xrpow_max;
1181 outOfBitsStrategy(algo_t const* that, const int sfwork[SFBMAX], const int vbrsfmin[SFBMAX], int target)
1184 int const dm = sfDepth(sfwork);
1185 int const p = that->cod_info->global_gain;
1195 int const sfmax = flattenDistribution(sfwork, wrk, dm, bi, p);
1196 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1197 if (nbits <= target) {
1213 int const sfmax = flattenDistribution(sfwork, wrk, dm, bi_ok, p);
1214 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1222 int bi = (255 + p) / 2;
1227 int const sfmax = flattenDistribution(sfwork, wrk, dm, dm, bi);
1228 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1229 if (nbits <= target) {
1245 int const sfmax = flattenDistribution(sfwork, wrk, dm, dm, bi_ok);
1246 nbits = tryThatOne(that, wrk, vbrsfmin, sfmax);
1252 /* fall back to old code, likely to be never called */
1253 searchGlobalStepsizeMax(that, wrk, vbrsfmin, target);
1258 reduce_bit_usage(lame_internal_flags * gfc, int gr, int ch
1260 , const FLOAT xr34orig[576], const FLOAT l3_xmin[SFBMAX], int maxbits
1264 SessionConfig_t const *const cfg = &gfc->cfg;
1265 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
1266 /* try some better scalefac storage
1268 best_scalefac_store(gfc, gr, ch, &gfc->l3_side);
1270 /* best huffman_divide may save some bits too
1272 if (cfg->use_best_huffman == 1)
1273 best_huffman_divide(gfc, cod_info);
1274 return cod_info->part2_3_length + cod_info->part2_length;
1281 VBR_encode_frame(lame_internal_flags * gfc, const FLOAT xr34orig[2][2][576],
1282 const FLOAT l3_xmin[2][2][SFBMAX], const int max_bits[2][2])
1284 SessionConfig_t const *const cfg = &gfc->cfg;
1285 int sfwork_[2][2][SFBMAX];
1286 int vbrsfmin_[2][2][SFBMAX];
1288 int const ngr = cfg->mode_gr;
1289 int const nch = cfg->channels_out;
1290 int max_nbits_ch[2][2] = {{0, 0}, {0 ,0}};
1291 int max_nbits_gr[2] = {0, 0};
1292 int max_nbits_fr = 0;
1293 int use_nbits_ch[2][2] = {{MAX_BITS_PER_CHANNEL+1, MAX_BITS_PER_CHANNEL+1}
1294 ,{MAX_BITS_PER_CHANNEL+1, MAX_BITS_PER_CHANNEL+1}};
1295 int use_nbits_gr[2] = { MAX_BITS_PER_GRANULE+1, MAX_BITS_PER_GRANULE+1 };
1296 int use_nbits_fr = MAX_BITS_PER_GRANULE+MAX_BITS_PER_GRANULE;
1300 /* set up some encoding parameters
1302 for (gr = 0; gr < ngr; ++gr) {
1303 max_nbits_gr[gr] = 0;
1304 for (ch = 0; ch < nch; ++ch) {
1305 max_nbits_ch[gr][ch] = max_bits[gr][ch];
1306 use_nbits_ch[gr][ch] = 0;
1307 max_nbits_gr[gr] += max_bits[gr][ch];
1308 max_nbits_fr += max_bits[gr][ch];
1309 that_[gr][ch].find = (cfg->full_outer_loop < 0) ? guess_scalefac_x34 : find_scalefac_x34;
1310 that_[gr][ch].gfc = gfc;
1311 that_[gr][ch].cod_info = &gfc->l3_side.tt[gr][ch];
1312 that_[gr][ch].xr34orig = xr34orig[gr][ch];
1313 if (that_[gr][ch].cod_info->block_type == SHORT_TYPE) {
1314 that_[gr][ch].alloc = short_block_constrain;
1317 that_[gr][ch].alloc = long_block_constrain;
1321 /* searches scalefactors
1323 for (gr = 0; gr < ngr; ++gr) {
1324 for (ch = 0; ch < nch; ++ch) {
1325 if (max_bits[gr][ch] > 0) {
1326 algo_t *that = &that_[gr][ch];
1327 int *sfwork = sfwork_[gr][ch];
1328 int *vbrsfmin = vbrsfmin_[gr][ch];
1331 vbrmax = block_sf(that, l3_xmin[gr][ch], sfwork, vbrsfmin);
1332 that->alloc(that, sfwork, vbrsfmin, vbrmax);
1336 /* xr contains no energy
1337 * l3_enc, our encoding data, will be quantized to zero
1338 * continue with next channel
1346 for (gr = 0; gr < ngr; ++gr) {
1347 use_nbits_gr[gr] = 0;
1348 for (ch = 0; ch < nch; ++ch) {
1349 algo_t const *that = &that_[gr][ch];
1350 if (max_bits[gr][ch] > 0) {
1351 memset(&that->cod_info->l3_enc[0], 0, sizeof(that->cod_info->l3_enc));
1352 (void) quantizeAndCountBits(that);
1355 /* xr contains no energy
1356 * l3_enc, our encoding data, will be quantized to zero
1357 * continue with next channel
1360 use_nbits_ch[gr][ch] = reduce_bit_usage(gfc, gr, ch);
1361 use_nbits_gr[gr] += use_nbits_ch[gr][ch];
1363 use_nbits_fr += use_nbits_gr[gr];
1366 /* check bit constrains
1368 if (use_nbits_fr <= max_nbits_fr) {
1370 for (gr = 0; gr < ngr; ++gr) {
1371 if (use_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
1372 /* violates the rule that every granule has to use no more
1373 * bits than MAX_BITS_PER_GRANULE
1377 for (ch = 0; ch < nch; ++ch) {
1378 if (use_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1379 /* violates the rule that every gr_ch has to use no more
1380 * bits than MAX_BITS_PER_CHANNEL
1382 * This isn't explicitly stated in the ISO docs, but the
1383 * part2_3_length field has only 12 bits, that makes it
1384 * up to a maximum size of 4095 bits!!!
1391 return use_nbits_fr;
1395 /* OK, we are in trouble and have to define how many bits are
1396 * to be used for each granule
1402 for (gr = 0; gr < ngr; ++gr) {
1403 max_nbits_gr[gr] = 0;
1404 for (ch = 0; ch < nch; ++ch) {
1405 if (use_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1406 max_nbits_ch[gr][ch] = MAX_BITS_PER_CHANNEL;
1409 max_nbits_ch[gr][ch] = use_nbits_ch[gr][ch];
1411 max_nbits_gr[gr] += max_nbits_ch[gr][ch];
1413 if (max_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
1414 float f[2] = {0.0f, 0.0f}, s = 0.0f;
1415 for (ch = 0; ch < nch; ++ch) {
1416 if (max_nbits_ch[gr][ch] > 0) {
1417 f[ch] = sqrt(sqrt(max_nbits_ch[gr][ch]));
1424 for (ch = 0; ch < nch; ++ch) {
1426 max_nbits_ch[gr][ch] = MAX_BITS_PER_GRANULE * f[ch] / s;
1429 max_nbits_ch[gr][ch] = 0;
1433 if (max_nbits_ch[gr][0] > use_nbits_ch[gr][0] + 32) {
1434 max_nbits_ch[gr][1] += max_nbits_ch[gr][0];
1435 max_nbits_ch[gr][1] -= use_nbits_ch[gr][0] + 32;
1436 max_nbits_ch[gr][0] = use_nbits_ch[gr][0] + 32;
1438 if (max_nbits_ch[gr][1] > use_nbits_ch[gr][1] + 32) {
1439 max_nbits_ch[gr][0] += max_nbits_ch[gr][1];
1440 max_nbits_ch[gr][0] -= use_nbits_ch[gr][1] + 32;
1441 max_nbits_ch[gr][1] = use_nbits_ch[gr][1] + 32;
1443 if (max_nbits_ch[gr][0] > MAX_BITS_PER_CHANNEL) {
1444 max_nbits_ch[gr][0] = MAX_BITS_PER_CHANNEL;
1446 if (max_nbits_ch[gr][1] > MAX_BITS_PER_CHANNEL) {
1447 max_nbits_ch[gr][1] = MAX_BITS_PER_CHANNEL;
1450 max_nbits_gr[gr] = 0;
1451 for (ch = 0; ch < nch; ++ch) {
1452 max_nbits_gr[gr] += max_nbits_ch[gr][ch];
1455 sum_fr += max_nbits_gr[gr];
1457 if (sum_fr > max_nbits_fr) {
1459 float f[2] = {0.0f, 0.0f}, s = 0.0f;
1460 for (gr = 0; gr < ngr; ++gr) {
1461 if (max_nbits_gr[gr] > 0) {
1462 f[gr] = sqrt(max_nbits_gr[gr]);
1469 for (gr = 0; gr < ngr; ++gr) {
1471 max_nbits_gr[gr] = max_nbits_fr * f[gr] / s;
1474 max_nbits_gr[gr] = 0;
1479 if (max_nbits_gr[0] > use_nbits_gr[0] + 125) {
1480 max_nbits_gr[1] += max_nbits_gr[0];
1481 max_nbits_gr[1] -= use_nbits_gr[0] + 125;
1482 max_nbits_gr[0] = use_nbits_gr[0] + 125;
1484 if (max_nbits_gr[1] > use_nbits_gr[1] + 125) {
1485 max_nbits_gr[0] += max_nbits_gr[1];
1486 max_nbits_gr[0] -= use_nbits_gr[1] + 125;
1487 max_nbits_gr[1] = use_nbits_gr[1] + 125;
1489 for (gr = 0; gr < ngr; ++gr) {
1490 if (max_nbits_gr[gr] > MAX_BITS_PER_GRANULE) {
1491 max_nbits_gr[gr] = MAX_BITS_PER_GRANULE;
1495 for (gr = 0; gr < ngr; ++gr) {
1496 float f[2] = {0.0f, 0.0f}, s = 0.0f;
1497 for (ch = 0; ch < nch; ++ch) {
1498 if (max_nbits_ch[gr][ch] > 0) {
1499 f[ch] = sqrt(max_nbits_ch[gr][ch]);
1506 for (ch = 0; ch < nch; ++ch) {
1508 max_nbits_ch[gr][ch] = max_nbits_gr[gr] * f[ch] / s;
1511 max_nbits_ch[gr][ch] = 0;
1515 if (max_nbits_ch[gr][0] > use_nbits_ch[gr][0] + 32) {
1516 max_nbits_ch[gr][1] += max_nbits_ch[gr][0];
1517 max_nbits_ch[gr][1] -= use_nbits_ch[gr][0] + 32;
1518 max_nbits_ch[gr][0] = use_nbits_ch[gr][0] + 32;
1520 if (max_nbits_ch[gr][1] > use_nbits_ch[gr][1] + 32) {
1521 max_nbits_ch[gr][0] += max_nbits_ch[gr][1];
1522 max_nbits_ch[gr][0] -= use_nbits_ch[gr][1] + 32;
1523 max_nbits_ch[gr][1] = use_nbits_ch[gr][1] + 32;
1525 for (ch = 0; ch < nch; ++ch) {
1526 if (max_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1527 max_nbits_ch[gr][ch] = MAX_BITS_PER_CHANNEL;
1535 for (gr = 0; gr < ngr; ++gr) {
1537 for (ch = 0; ch < nch; ++ch) {
1538 sum_gr += max_nbits_ch[gr][ch];
1539 if (max_nbits_ch[gr][ch] > MAX_BITS_PER_CHANNEL) {
1544 if (sum_gr > MAX_BITS_PER_GRANULE) {
1548 if (sum_fr > max_nbits_fr) {
1552 /* we must have done something wrong, fallback to 'on_pe' based constrain */
1553 for (gr = 0; gr < ngr; ++gr) {
1554 for (ch = 0; ch < nch; ++ch) {
1555 max_nbits_ch[gr][ch] = max_bits[gr][ch];
1561 /* we already called the 'best_scalefac_store' function, so we need to reset some
1562 * variables before we can do it again.
1564 for (ch = 0; ch < nch; ++ch) {
1565 gfc->l3_side.scfsi[ch][0] = 0;
1566 gfc->l3_side.scfsi[ch][1] = 0;
1567 gfc->l3_side.scfsi[ch][2] = 0;
1568 gfc->l3_side.scfsi[ch][3] = 0;
1570 for (gr = 0; gr < ngr; ++gr) {
1571 for (ch = 0; ch < nch; ++ch) {
1572 gfc->l3_side.tt[gr][ch].scalefac_compress = 0;
1576 /* alter our encoded data, until it fits into the target bitrate
1579 for (gr = 0; gr < ngr; ++gr) {
1580 use_nbits_gr[gr] = 0;
1581 for (ch = 0; ch < nch; ++ch) {
1582 algo_t const *that = &that_[gr][ch];
1583 use_nbits_ch[gr][ch] = 0;
1584 if (max_bits[gr][ch] > 0) {
1585 int *sfwork = sfwork_[gr][ch];
1586 int const *vbrsfmin = vbrsfmin_[gr][ch];
1587 cutDistribution(sfwork, sfwork, that->cod_info->global_gain);
1588 outOfBitsStrategy(that, sfwork, vbrsfmin, max_nbits_ch[gr][ch]);
1590 use_nbits_ch[gr][ch] = reduce_bit_usage(gfc, gr, ch);
1591 assert(use_nbits_ch[gr][ch] <= max_nbits_ch[gr][ch]);
1592 use_nbits_gr[gr] += use_nbits_ch[gr][ch];
1594 use_nbits_fr += use_nbits_gr[gr];
1597 /* check bit constrains, but it should always be ok, iff there are no bugs ;-)
1599 if (use_nbits_fr <= max_nbits_fr) {
1600 return use_nbits_fr;
1603 ERRORF(gfc, "INTERNAL ERROR IN VBR NEW CODE (1313), please send bug report\n"
1604 "maxbits=%d usedbits=%d\n", max_nbits_fr, use_nbits_fr);