4 * Copyright (c) 1999-2000 Mark Taylor
5 * Copyright (c) 2001-2002 Naoki Shibata
6 * Copyright (c) 2000-2003 Takehiro Tominaga
7 * Copyright (c) 2000-2011 Robert Hegemann
8 * Copyright (c) 2000-2005 Gabriel Bouvigne
9 * Copyright (c) 2000-2005 Alexander Leidinger
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Library General Public License for more details.
21 * You should have received a copy of the GNU Library General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
27 /* $Id: psymodel.c,v 1.209 2011/05/24 20:45:55 robert Exp $ */
34 This routine computes the psycho acoustics, delayed by one granule.
36 Input: buffer of PCM data (1024 samples).
38 This window should be centered over the 576 sample granule window.
39 The routine will compute the psycho acoustics for
40 this granule, but return the psycho acoustics computed
41 for the *previous* granule. This is because the block
42 type of the previous granule can only be determined
43 after we have computed the psycho acoustics for the following
46 Output: maskings and energies for each scalefactor band.
47 block type, PE, and some correlation measures.
48 The PE is used by CBR modes to determine if extra bits
49 from the bit reservoir should be used. The correlation
50 measures are used to determine mid/side or regular stereo.
55 barks: a non-linear frequency scale. Mapping from frequency to
56 barks is given by freq2bark()
58 scalefactor bands: The spectrum (frequencies) are broken into
59 SBMAX "scalefactor bands". Thes bands
60 are determined by the MPEG ISO spec. In
61 the noise shaping/quantization code, we allocate
62 bits among the partition bands to achieve the
65 partition bands: The spectrum is also broken into about
66 64 "partition bands". Each partition
67 band is about .34 barks wide. There are about 2-5
68 partition bands for each scalefactor band.
70 LAME computes all psycho acoustic information for each partition
71 band. Then at the end of the computations, this information
72 is mapped to scalefactor bands. The energy in each scalefactor
73 band is taken as the sum of the energy in all partition bands
74 which overlap the scalefactor band. The maskings can be computed
75 in the same way (and thus represent the average masking in that band)
76 or by taking the minmum value multiplied by the number of
77 partition bands used (which represents a minimum masking in that band).
80 The general outline is as follows:
82 1. compute the energy in each partition band
83 2. compute the tonality in each partition band
84 3. compute the strength of each partion band "masker"
85 4. compute the masking (via the spreading function applied to each masker)
86 5. Modifications for mid/side masking.
88 Each partition band is considiered a "masker". The strength
89 of the i'th masker in band j is given by:
91 s3(bark(i)-bark(j))*strength(i)
93 The strength of the masker is a function of the energy and tonality.
94 The more tonal, the less masking. LAME uses a simple linear formula
95 (controlled by NMT and TMN) which says the strength is given by the
96 energy divided by a linear function of the tonality.
99 s3() is the "spreading function". It is given by a formula
100 determined via listening tests.
102 The total masking in the j'th partition band is the sum over
103 all maskings i. It is thus given by the convolution of
104 the strength with s3(), the "spreading function."
106 masking(j) = sum_over_i s3(i-j)*strength(i) = s3 o strength
108 where "o" = convolution operator. s3 is given by a formula determined
109 via listening tests. It is normalized so that s3 o 1 = 1.
111 Note: instead of a simple convolution, LAME also has the
112 option of using "additive masking"
114 The most critical part is step 2, computing the tonality of each
115 partition band. LAME has two tonality estimators. The first
116 is based on the ISO spec, and measures how predictiable the
117 signal is over time. The more predictable, the more tonal.
118 The second measure is based on looking at the spectrum of
119 a single granule. The more peaky the spectrum, the more
120 tonal. By most indications, the latter approach is better.
122 Finally, in step 5, the maskings for the mid and side
123 channel are possibly increased. Under certain circumstances,
124 noise in the mid & side channels is assumed to also
125 be masked by strong maskers in the L or R channels.
128 Other data computed by the psy-model:
130 ms_ratio side-channel / mid-channel masking ratio (for previous granule)
131 ms_ratio_next side-channel / mid-channel masking ratio for this granule
133 percep_entropy[2] L and R values (prev granule) of PE - A measure of how
134 much pre-echo is in the previous granule
135 percep_entropy_MS[2] mid and side channel values (prev granule) of percep_entropy
136 energy[4] L,R,M,S energy in each channel, prev granule
137 blocktype_d[2] block type to use for previous granule
151 #include "psymodel.h"
152 #include "lame_global_flags.h"
154 #include "lame-analysis.h"
160 #define LN_TO_LOG10 (M_LN10/10)
162 #define LN_TO_LOG10 0.2302585093
167 L3psycho_anal. Compute psycho acoustics.
169 Data returned to the calling program must be delayed by one
172 This is done in two places.
173 If we do not need to know the blocktype, the copying
174 can be done here at the top of the program: we copy the data for
175 the last granule (computed during the last call) before it is
176 overwritten with the new data. It looks like this:
178 0. static psymodel_data
179 1. calling_program_data = psymodel_data
180 2. compute psymodel_data
182 For data which needs to know the blocktype, the copying must be
183 done at the end of this loop, and the old values must be saved:
185 0. static psymodel_data_old
186 1. compute psymodel_data
187 2. compute possible block type of this granule
188 3. compute final block type of previous granule based on #2.
189 4. calling_program_data = psymodel_data_old
190 5. psymodel_data_old = psymodel_data
197 /* psycho_loudness_approx
199 in: energy - BLKSIZE/2 elements of frequency magnitudes ^ 2
200 gfp - uses out_samplerate, ATHtype (also needed for ATHformula)
201 returns: loudness^2 approximation, a positive value roughly tuned for a value
202 of 1.0 for signals near clipping.
203 notes: When calibrated, feeding this function binary white noise at sample
204 values +32767 or -32768 should return values that approach 3.
205 ATHformula is used to approximate an equal loudness curve.
206 future: Data indicates that the shape of the equal loudness curve varies
207 with intensity. This function might be improved by using an equal
208 loudness curve shaped for typical playback levels (instead of the
209 ATH, that is shaped for the threshold). A flexible realization might
210 simply bend the existing ATH curve to achieve the desired shape.
211 However, the potential gain may not be enough to justify an effort.
214 psycho_loudness_approx(FLOAT const *energy, FLOAT const *eql_w)
217 FLOAT loudness_power;
219 loudness_power = 0.0;
220 /* apply weights to power in freq. bands */
221 for (i = 0; i < BLKSIZE / 2; ++i)
222 loudness_power += energy[i] * eql_w[i];
223 loudness_power *= VO_SCALE;
225 return loudness_power;
228 /* mask_add optimization */
229 /* init the limit values used to avoid computing log in mask_add when it is not necessary */
231 /* For example, with i = 10*log10(m2/m1)/10*16 (= log10(m2/m1)*16)
233 * abs(i)>8 is equivalent (as i is an integer) to
236 * equivalent to (as i is the biggest integer smaller than log10(m2/m1)*16
237 * or the smallest integer bigger than log10(m2/m1)*16 depending on the sign of log10(m2/m1)*16)
238 * log10(m2/m1)>=9/16 || log10(m2/m1)<=-9/16
239 * exp10 is strictly increasing thus this is equivalent to
240 * m2/m1 >= 10^(9/16) || m2/m1<=10^(-9/16) which are comparisons to constants
244 #define I1LIMIT 8 /* as in if(i>8) */
245 #define I2LIMIT 23 /* as in if(i>24) -> changed 23 */
246 #define MLIMIT 15 /* as in if(m<15) */
248 static FLOAT ma_max_i1;
249 static FLOAT ma_max_i2;
250 static FLOAT ma_max_m;
252 /*This is the masking table:
253 According to tonality, values are going from 0dB (TMN)
255 After additive masking computation, 8dB are added, so
256 final values are going from 8dB to 17.3dB
258 static const FLOAT tab[] = {
259 1.0 /*pow(10, -0) */ ,
260 0.79433 /*pow(10, -0.1) */ ,
261 0.63096 /*pow(10, -0.2) */ ,
262 0.63096 /*pow(10, -0.2) */ ,
263 0.63096 /*pow(10, -0.2) */ ,
264 0.63096 /*pow(10, -0.2) */ ,
265 0.63096 /*pow(10, -0.2) */ ,
266 0.25119 /*pow(10, -0.6) */ ,
267 0.11749 /*pow(10, -0.93) */
270 static const int tab_mask_add_delta[] = { 2, 2, 2, 1, 1, 1, 0, 0, -1 };
271 #define STATIC_ASSERT_EQUAL_DIMENSION(A,B) {extern char static_assert_##A[dimension_of(A) == dimension_of(B) ? 1 : -1];(void) static_assert_##A;}
274 mask_add_delta(int i)
276 STATIC_ASSERT_EQUAL_DIMENSION(tab_mask_add_delta,tab);
277 assert(i < (int)dimension_of(tab));
278 return tab_mask_add_delta[i];
283 init_mask_add_max_values(void)
285 ma_max_i1 = pow(10, (I1LIMIT + 1) / 16.0);
286 ma_max_i2 = pow(10, (I2LIMIT + 1) / 16.0);
287 ma_max_m = pow(10, (MLIMIT) / 10.0);
293 /* addition of simultaneous masking Naoki Shibata 2000/7 */
295 vbrpsy_mask_add(FLOAT m1, FLOAT m2, int b, int delta)
297 static const FLOAT table2[] = {
298 1.33352 * 1.33352, 1.35879 * 1.35879, 1.38454 * 1.38454, 1.39497 * 1.39497,
299 1.40548 * 1.40548, 1.3537 * 1.3537, 1.30382 * 1.30382, 1.22321 * 1.22321,
324 if (abs(b) <= delta) { /* approximately, 1 bark = 3 partitions */
325 /* originally 'if(i > 8)' */
326 if (ratio >= ma_max_i1) {
330 int i = (int) (FAST_LOG10_X(ratio, 16.0f));
331 return (m1 + m2) * table2[i];
334 if (ratio < ma_max_i2) {
344 /* short block threshold calculation (part 2)
346 partition band bo_s[sfb] is at the transition from scalefactor
347 band sfb to the next one sfb+1; enn and thmm have to be split
351 convert_partition2scalefac(PsyConst_CB2SB_t const *const gd, FLOAT const *eb, FLOAT const *thr,
352 FLOAT enn_out[], FLOAT thm_out[])
354 static FLOAT enn, thmm;
355 int sb, b, n = gd->n_sb;
357 for (sb = b = 0; sb < n; ++b, ++sb) {
358 int const bo_sb = gd->bo[sb];
359 int const npart = gd->npart;
360 int const b_lim = bo_sb < npart ? bo_sb : npart;
362 assert(eb[b] >= 0); /* iff failed, it may indicate some index error elsewhere */
374 assert(eb[b] >= 0); /* iff failed, it may indicate some index error elsewhere */
377 /* at transition sfb -> sfb+1 */
378 FLOAT const w_curr = gd->bo_weight[sb];
379 FLOAT const w_next = 1.0f - w_curr;
380 enn += w_curr * eb[b];
381 thmm += w_curr * thr[b];
384 enn = w_next * eb[b];
385 thmm = w_next * thr[b];
388 /* zero initialize the rest */
389 for (; sb < n; ++sb) {
396 convert_partition2scalefac_s(lame_internal_flags * gfc, FLOAT const *eb, FLOAT const *thr, int chn,
399 PsyStateVar_t *const psv = &gfc->sv_psy;
400 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
401 static FLOAT enn[SBMAX_s], thm[SBMAX_s];
403 convert_partition2scalefac(gds, eb, thr, enn, thm);
404 for (sb = 0; sb < SBMAX_s; ++sb) {
405 psv->en[chn].s[sb][sblock] = enn[sb];
406 psv->thm[chn].s[sb][sblock] = thm[sb];
410 /* longblock threshold calculation (part 2) */
412 convert_partition2scalefac_l(lame_internal_flags * gfc, FLOAT const *eb, FLOAT const *thr, int chn)
414 PsyStateVar_t *const psv = &gfc->sv_psy;
415 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
416 FLOAT *enn = &psv->en[chn].l[0];
417 FLOAT *thm = &psv->thm[chn].l[0];
418 convert_partition2scalefac(gdl, eb, thr, enn, thm);
422 convert_partition2scalefac_l_to_s(lame_internal_flags * gfc, FLOAT const *eb, FLOAT const *thr,
425 PsyStateVar_t *const psv = &gfc->sv_psy;
426 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->l_to_s;
427 static FLOAT enn[SBMAX_s], thm[SBMAX_s];
429 convert_partition2scalefac(gds, eb, thr, enn, thm);
430 for (sb = 0; sb < SBMAX_s; ++sb) {
431 FLOAT const scale = 1. / 64.f;
432 FLOAT const tmp_enn = enn[sb];
433 FLOAT const tmp_thm = thm[sb] * scale;
434 for (sblock = 0; sblock < 3; ++sblock) {
435 psv->en[chn].s[sb][sblock] = tmp_enn;
436 psv->thm[chn].s[sb][sblock] = tmp_thm;
444 NS_INTERP(FLOAT x, FLOAT y, FLOAT r)
446 /* was pow((x),(r))*pow((y),1-(r)) */
448 return x; /* 99.7% of the time */
452 return powf(x / y, r) * y; /* rest of the time */
453 return 0.0f; /* never happens */
459 pecalc_s(III_psy_ratio const *mr, FLOAT masking_lower)
462 static const FLOAT regcoef_s[] = {
463 11.8, /* these values are tuned only for 44.1kHz... */
477 unsigned int sb, sblock;
480 for (sb = 0; sb < SBMAX_s - 1; sb++) {
481 for (sblock = 0; sblock < 3; sblock++) {
482 FLOAT const thm = mr->thm.s[sb][sblock];
483 assert(sb < dimension_of(regcoef_s));
485 FLOAT const x = thm * masking_lower;
486 FLOAT const en = mr->en.s[sb][sblock];
488 if (en > x * 1e10f) {
489 pe_s += regcoef_s[sb] * (10.0f * LOG10);
493 pe_s += regcoef_s[sb] * FAST_LOG10(en / x);
504 pecalc_l(III_psy_ratio const *mr, FLOAT masking_lower)
507 static const FLOAT regcoef_l[] = {
508 6.8, /* these values are tuned only for 44.1kHz... */
534 for (sb = 0; sb < SBMAX_l - 1; sb++) {
535 FLOAT const thm = mr->thm.l[sb];
536 assert(sb < dimension_of(regcoef_l));
538 FLOAT const x = thm * masking_lower;
539 FLOAT const en = mr->en.l[sb];
541 if (en > x * 1e10f) {
542 pe_l += regcoef_l[sb] * (10.0f * LOG10);
546 pe_l += regcoef_l[sb] * FAST_LOG10(en / x);
557 calc_energy(PsyConst_CB2SB_t const *l, FLOAT const *fftenergy, FLOAT * eb, FLOAT * max, FLOAT * avg)
561 for (b = j = 0; b < l->npart; ++b) {
562 FLOAT ebb = 0, m = 0;
564 for (i = 0; i < l->numlines[b]; ++i, ++j) {
565 FLOAT const el = fftenergy[j];
573 avg[b] = ebb * l->rnumlines[b];
574 assert(l->rnumlines[b] >= 0);
584 calc_mask_index_l(lame_internal_flags const *gfc, FLOAT const *max,
585 FLOAT const *avg, unsigned char *mask_idx)
587 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
590 int const last_tab_entry = sizeof(tab) / sizeof(tab[0]) - 1;
592 a = avg[b] + avg[b + 1];
598 assert((gdl->numlines[b] + gdl->numlines[b + 1] - 1) > 0);
599 a = 20.0f * (m * 2.0f - a)
600 / (a * (gdl->numlines[b] + gdl->numlines[b + 1] - 1));
602 if (k > last_tab_entry)
610 for (b = 1; b < gdl->npart - 1; b++) {
611 a = avg[b - 1] + avg[b] + avg[b + 1];
619 assert((gdl->numlines[b - 1] + gdl->numlines[b] + gdl->numlines[b + 1] - 1) > 0);
620 a = 20.0f * (m * 3.0f - a)
621 / (a * (gdl->numlines[b - 1] + gdl->numlines[b] + gdl->numlines[b + 1] - 1));
623 if (k > last_tab_entry)
632 assert(b == gdl->npart - 1);
634 a = avg[b - 1] + avg[b];
640 assert((gdl->numlines[b - 1] + gdl->numlines[b] - 1) > 0);
641 a = 20.0f * (m * 2.0f - a)
642 / (a * (gdl->numlines[b - 1] + gdl->numlines[b] - 1));
644 if (k > last_tab_entry)
651 assert(b == (gdl->npart - 1));
656 vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
657 int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
659 SessionConfig_t const *const cfg = &gfc->cfg;
660 PsyStateVar_t *psv = &gfc->sv_psy;
661 plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
665 fft_long(gfc, *wsamp_l, chn, buffer);
668 FLOAT const sqrt2_half = SQRT2 * 0.5f;
669 /* FFT data for mid and side channel is derived from L & R */
670 for (j = BLKSIZE - 1; j >= 0; --j) {
671 FLOAT const l = wsamp_l[0][j];
672 FLOAT const r = wsamp_l[1][j];
673 wsamp_l[0][j] = (l + r) * sqrt2_half;
674 wsamp_l[1][j] = (l - r) * sqrt2_half;
678 /*********************************************************************
680 *********************************************************************/
681 fftenergy[0] = wsamp_l[0][0];
682 fftenergy[0] *= fftenergy[0];
684 for (j = BLKSIZE / 2 - 1; j >= 0; --j) {
685 FLOAT const re = (*wsamp_l)[BLKSIZE / 2 - j];
686 FLOAT const im = (*wsamp_l)[BLKSIZE / 2 + j];
687 fftenergy[BLKSIZE / 2 - j] = (re * re + im * im) * 0.5f;
691 FLOAT totalenergy = 0.0f;
692 for (j = 11; j < HBLKSIZE; j++)
693 totalenergy += fftenergy[j];
695 psv->tot_ener[chn] = totalenergy;
699 for (j = 0; j < HBLKSIZE; j++) {
700 plt->energy[gr_out][chn][j] = plt->energy_save[chn][j];
701 plt->energy_save[chn][j] = fftenergy[j];
708 vbrpsy_compute_fft_s(lame_internal_flags const *gfc, const sample_t * const buffer[2], int chn,
709 int sblock, FLOAT(*fftenergy_s)[HBLKSIZE_s], FLOAT(*wsamp_s)[3][BLKSIZE_s])
713 if (sblock == 0 && chn < 2) {
714 fft_short(gfc, *wsamp_s, chn, buffer);
717 FLOAT const sqrt2_half = SQRT2 * 0.5f;
718 /* FFT data for mid and side channel is derived from L & R */
719 for (j = BLKSIZE_s - 1; j >= 0; --j) {
720 FLOAT const l = wsamp_s[0][sblock][j];
721 FLOAT const r = wsamp_s[1][sblock][j];
722 wsamp_s[0][sblock][j] = (l + r) * sqrt2_half;
723 wsamp_s[1][sblock][j] = (l - r) * sqrt2_half;
727 /*********************************************************************
729 *********************************************************************/
730 fftenergy_s[sblock][0] = (*wsamp_s)[sblock][0];
731 fftenergy_s[sblock][0] *= fftenergy_s[sblock][0];
732 for (j = BLKSIZE_s / 2 - 1; j >= 0; --j) {
733 FLOAT const re = (*wsamp_s)[sblock][BLKSIZE_s / 2 - j];
734 FLOAT const im = (*wsamp_s)[sblock][BLKSIZE_s / 2 + j];
735 fftenergy_s[sblock][BLKSIZE_s / 2 - j] = (re * re + im * im) * 0.5f;
740 /*********************************************************************
741 * compute loudness approximation (used for ATH auto-level adjustment)
742 *********************************************************************/
744 vbrpsy_compute_loudness_approximation_l(lame_internal_flags * gfc, int gr_out, int chn,
745 const FLOAT fftenergy[HBLKSIZE])
747 PsyStateVar_t *psv = &gfc->sv_psy;
748 if (chn < 2) { /*no loudness for mid/side ch */
749 gfc->ov_psy.loudness_sq[gr_out][chn] = psv->loudness_sq_save[chn];
750 psv->loudness_sq_save[chn] = psycho_loudness_approx(fftenergy, gfc->ATH->eql_w);
755 /**********************************************************************
756 * Apply HPF of fs/4 to the input signal.
757 * This is used for attack detection / handling.
758 **********************************************************************/
760 vbrpsy_attack_detection(lame_internal_flags * gfc, const sample_t * const buffer[2], int gr_out,
761 III_psy_ratio masking_ratio[2][2], III_psy_ratio masking_MS_ratio[2][2],
762 FLOAT energy[4], FLOAT sub_short_factor[4][3], int ns_attacks[4][4],
765 static FLOAT ns_hpfsmpl[2][576];
766 SessionConfig_t const *const cfg = &gfc->cfg;
767 PsyStateVar_t *const psv = &gfc->sv_psy;
768 plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
769 int const n_chn_out = cfg->channels_out;
770 /* chn=2 and 3 = Mid and Side channels */
771 int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : n_chn_out;
774 memset(&ns_hpfsmpl[0][0], 0, sizeof(ns_hpfsmpl));
775 /* Don't copy the input buffer into a temporary buffer */
776 /* unroll the loop 2 times */
777 for (chn = 0; chn < n_chn_out; chn++) {
778 static const FLOAT fircoef[] = {
779 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
780 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
781 -5.52212e-17 * 2, -0.313819 * 2
783 /* apply high pass filter of fs/4 */
784 const sample_t *const firbuf = &buffer[chn][576 - 350 - NSFIRLEN + 192];
785 assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
786 for (i = 0; i < 576; i++) {
787 static FLOAT sum1, sum2;
788 sum1 = firbuf[i + 10];
790 for (j = 0; j < ((NSFIRLEN - 1) / 2) - 1; j += 2) {
791 sum1 += fircoef[j] * (firbuf[i + j] + firbuf[i + NSFIRLEN - j]);
792 sum2 += fircoef[j + 1] * (firbuf[i + j + 1] + firbuf[i + NSFIRLEN - j - 1]);
794 ns_hpfsmpl[chn][i] = sum1 + sum2;
796 masking_ratio[gr_out][chn].en = psv->en[chn];
797 masking_ratio[gr_out][chn].thm = psv->thm[chn];
800 /*percep_MS_entropy [chn-2] = gfc -> pe [chn]; */
801 masking_MS_ratio[gr_out][chn].en = psv->en[chn + 2];
802 masking_MS_ratio[gr_out][chn].thm = psv->thm[chn + 2];
805 for (chn = 0; chn < n_chn_psy; chn++) {
806 static FLOAT attack_intensity[12];
807 static FLOAT en_subshort[12];
808 FLOAT en_short[4] = { 0, 0, 0, 0 };
809 FLOAT const *pf = ns_hpfsmpl[chn & 1];
810 int ns_uselongblock = 1;
813 for (i = 0, j = 576; j > 0; ++i, --j) {
814 FLOAT const l = ns_hpfsmpl[0][i];
815 FLOAT const r = ns_hpfsmpl[1][i];
816 ns_hpfsmpl[0][i] = l + r;
817 ns_hpfsmpl[1][i] = l - r;
820 /***************************************************************
821 * determine the block type (window type)
822 ***************************************************************/
823 /* calculate energies of each sub-shortblocks */
824 for (i = 0; i < 3; i++) {
825 en_subshort[i] = psv->last_en_subshort[chn][i + 6];
826 assert(psv->last_en_subshort[chn][i + 4] > 0);
827 attack_intensity[i] = en_subshort[i] / psv->last_en_subshort[chn][i + 4];
828 en_short[0] += en_subshort[i];
831 for (i = 0; i < 9; i++) {
832 FLOAT const *const pfe = pf + 576 / 9;
834 for (; pf < pfe; pf++)
837 psv->last_en_subshort[chn][i] = en_subshort[i + 3] = p;
838 en_short[1 + i / 3] += p;
839 if (p > en_subshort[i + 3 - 2]) {
840 assert(en_subshort[i + 3 - 2] > 0);
841 p = p / en_subshort[i + 3 - 2];
843 else if (en_subshort[i + 3 - 2] > p * 10.0f) {
845 p = en_subshort[i + 3 - 2] / (p * 10.0f);
850 attack_intensity[i + 3] = p;
853 /* pulse like signal detection for fatboy.wav and so on */
854 for (i = 0; i < 3; ++i) {
856 en_subshort[i * 3 + 3] + en_subshort[i * 3 + 4] + en_subshort[i * 3 + 5];
858 if (en_subshort[i * 3 + 5] * 6 < enn) {
860 if (en_subshort[i * 3 + 4] * 6 < enn) {
864 sub_short_factor[chn][i] = factor;
868 FLOAT x = attack_intensity[0];
869 for (i = 1; i < 12; i++) {
870 if (x < attack_intensity[i]) {
871 x = attack_intensity[i];
874 plt->ers[gr_out][chn] = plt->ers_save[chn];
875 plt->ers_save[chn] = x;
878 /* compare energies between sub-shortblocks */
880 FLOAT x = gfc->cd_psy->attack_threshold[chn];
881 for (i = 0; i < 12; i++) {
882 if (ns_attacks[chn][i / 3] == 0) {
883 if (attack_intensity[i] > x) {
884 ns_attacks[chn][i / 3] = (i % 3) + 1;
889 /* should have energy change between short blocks, in order to avoid periodic signals */
890 /* Good samples to show the effect are Trumpet test songs */
891 /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
892 /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
893 for (i = 1; i < 4; i++) {
894 FLOAT const u = en_short[i - 1];
895 FLOAT const v = en_short[i];
896 FLOAT const m = Max(u, v);
897 if (m < 40000) { /* (2) */
898 if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
899 if (i == 1 && ns_attacks[chn][0] <= ns_attacks[chn][i]) {
900 ns_attacks[chn][0] = 0;
902 ns_attacks[chn][i] = 0;
907 if (ns_attacks[chn][0] <= psv->last_attacks[chn]) {
908 ns_attacks[chn][0] = 0;
911 if (psv->last_attacks[chn] == 3 ||
912 ns_attacks[chn][0] + ns_attacks[chn][1] + ns_attacks[chn][2] + ns_attacks[chn][3]) {
915 if (ns_attacks[chn][1] && ns_attacks[chn][0]) {
916 ns_attacks[chn][1] = 0;
918 if (ns_attacks[chn][2] && ns_attacks[chn][1]) {
919 ns_attacks[chn][2] = 0;
921 if (ns_attacks[chn][3] && ns_attacks[chn][2]) {
922 ns_attacks[chn][3] = 0;
927 uselongblock[chn] = ns_uselongblock;
930 if (ns_uselongblock == 0) {
931 uselongblock[0] = uselongblock[1] = 0;
935 /* there is a one granule delay. Copy maskings computed last call
936 * into masking_ratio to return to calling program.
938 energy[chn] = psv->tot_ener[chn];
944 vbrpsy_skip_masking_s(lame_internal_flags * gfc, int chn, int sblock)
947 FLOAT *nbs2 = &gfc->sv_psy.nb_s2[chn][0];
948 FLOAT *nbs1 = &gfc->sv_psy.nb_s1[chn][0];
949 int const n = gfc->cd_psy->s.npart;
951 for (b = 0; b < n; b++) {
959 vbrpsy_calc_mask_index_s(lame_internal_flags const *gfc, FLOAT const *max,
960 FLOAT const *avg, unsigned char *mask_idx)
962 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
965 int const last_tab_entry = dimension_of(tab) - 1;
967 a = avg[b] + avg[b + 1];
973 assert((gds->numlines[b] + gds->numlines[b + 1] - 1) > 0);
974 a = 20.0f * (m * 2.0f - a)
975 / (a * (gds->numlines[b] + gds->numlines[b + 1] - 1));
977 if (k > last_tab_entry)
985 for (b = 1; b < gds->npart - 1; b++) {
986 a = avg[b - 1] + avg[b] + avg[b + 1];
987 assert(b + 1 < gds->npart);
995 assert((gds->numlines[b - 1] + gds->numlines[b] + gds->numlines[b + 1] - 1) > 0);
996 a = 20.0f * (m * 3.0f - a)
997 / (a * (gds->numlines[b - 1] + gds->numlines[b] + gds->numlines[b + 1] - 1));
999 if (k > last_tab_entry)
1008 assert(b == gds->npart - 1);
1010 a = avg[b - 1] + avg[b];
1016 assert((gds->numlines[b - 1] + gds->numlines[b] - 1) > 0);
1017 a = 20.0f * (m * 2.0f - a)
1018 / (a * (gds->numlines[b - 1] + gds->numlines[b] - 1));
1020 if (k > last_tab_entry)
1027 assert(b == (gds->npart - 1));
1032 vbrpsy_compute_masking_s(lame_internal_flags * gfc, const FLOAT(*fftenergy_s)[HBLKSIZE_s],
1033 FLOAT * eb, FLOAT * thr, int chn, int sblock)
1035 PsyStateVar_t *const psv = &gfc->sv_psy;
1036 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
1037 static FLOAT max[CBANDS], avg[CBANDS];
1039 static unsigned char mask_idx_s[CBANDS];
1041 memset(max, 0, sizeof(max));
1042 memset(avg, 0, sizeof(avg));
1044 for (b = j = 0; b < gds->npart; ++b) {
1045 FLOAT ebb = 0, m = 0;
1046 int const n = gds->numlines[b];
1047 for (i = 0; i < n; ++i, ++j) {
1048 FLOAT const el = fftenergy_s[sblock][j];
1057 avg[b] = ebb * gds->rnumlines[b];
1058 assert(avg[b] >= 0);
1060 assert(b == gds->npart);
1062 vbrpsy_calc_mask_index_s(gfc, max, avg, mask_idx_s);
1063 for (j = b = 0; b < gds->npart; b++) {
1064 int kk = gds->s3ind[b][0];
1065 int const last = gds->s3ind[b][1];
1066 int const delta = mask_add_delta(mask_idx_s[b]);
1068 FLOAT x, ecb, avg_mask;
1069 FLOAT const masking_lower = gds->masking_lower[b] * gfc->sv_qnt.masking_lower;
1071 dd = mask_idx_s[kk];
1073 ecb = gds->s3[j] * eb[kk] * tab[mask_idx_s[kk]];
1075 while (kk <= last) {
1076 dd += mask_idx_s[kk];
1078 x = gds->s3[j] * eb[kk] * tab[mask_idx_s[kk]];
1079 ecb = vbrpsy_mask_add(ecb, x, kk - b, delta);
1082 dd = (1 + 2 * dd) / (2 * dd_n);
1083 avg_mask = tab[dd] * 0.5f;
1085 #if 0 /* we can do PRE ECHO control now here, or do it later */
1086 if (psv->blocktype_old[chn & 0x01] == SHORT_TYPE) {
1087 /* limit calculated threshold by even older granule */
1088 FLOAT const t1 = rpelev_s * psv->nb_s1[chn][b];
1089 FLOAT const t2 = rpelev2_s * psv->nb_s2[chn][b];
1090 FLOAT const tm = (t2 > 0) ? Min(ecb, t2) : ecb;
1091 thr[b] = (t1 > 0) ? NS_INTERP(Min(tm, t1), ecb, 0.6) : ecb;
1094 /* limit calculated threshold by older granule */
1095 FLOAT const t1 = rpelev_s * psv->nb_s1[chn][b];
1096 thr[b] = (t1 > 0) ? NS_INTERP(Min(ecb, t1), ecb, 0.6) : ecb;
1098 #else /* we do it later */
1101 psv->nb_s2[chn][b] = psv->nb_s1[chn][b];
1102 psv->nb_s1[chn][b] = ecb;
1104 /* if THR exceeds EB, the quantization routines will take the difference
1105 * from other bands. in case of strong tonal samples (tonaltest.wav)
1106 * this leads to heavy distortions. that's why we limit THR here.
1109 x *= gds->minval[b];
1115 if (masking_lower > 1) {
1116 thr[b] *= masking_lower;
1118 if (thr[b] > eb[b]) {
1121 if (masking_lower < 1) {
1122 thr[b] *= masking_lower;
1125 assert(thr[b] >= 0);
1127 for (; b < CBANDS; ++b) {
1135 vbrpsy_compute_masking_l(lame_internal_flags * gfc, const FLOAT fftenergy[HBLKSIZE],
1136 FLOAT eb_l[CBANDS], FLOAT thr[CBANDS], int chn)
1138 PsyStateVar_t *const psv = &gfc->sv_psy;
1139 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
1140 static FLOAT max[CBANDS], avg[CBANDS];
1141 static unsigned char mask_idx_l[CBANDS + 2];
1144 /*********************************************************************
1145 * Calculate the energy and the tonality of each partition.
1146 *********************************************************************/
1147 calc_energy(gdl, fftenergy, eb_l, max, avg);
1148 calc_mask_index_l(gfc, max, avg, mask_idx_l);
1150 /*********************************************************************
1151 * convolve the partitioned energy and unpredictability
1152 * with the spreading function, s3_l[b][k]
1153 ********************************************************************/
1155 for (b = 0; b < gdl->npart; b++) {
1156 FLOAT x, ecb, avg_mask, t;
1157 FLOAT const masking_lower = gdl->masking_lower[b] * gfc->sv_qnt.masking_lower;
1158 /* convolve the partitioned energy with the spreading function */
1159 int kk = gdl->s3ind[b][0];
1160 int const last = gdl->s3ind[b][1];
1161 int const delta = mask_add_delta(mask_idx_l[b]);
1162 int dd = 0, dd_n = 0;
1164 dd = mask_idx_l[kk];
1166 ecb = gdl->s3[k] * eb_l[kk] * tab[mask_idx_l[kk]];
1168 while (kk <= last) {
1169 dd += mask_idx_l[kk];
1171 x = gdl->s3[k] * eb_l[kk] * tab[mask_idx_l[kk]];
1172 t = vbrpsy_mask_add(ecb, x, kk - b, delta);
1183 dd = (1 + 2 * dd) / (2 * dd_n);
1184 avg_mask = tab[dd] * 0.5f;
1187 /**** long block pre-echo control ****/
1188 /* dont use long block pre-echo control if previous granule was
1189 * a short block. This is to avoid the situation:
1190 * frame0: quiet (very low masking)
1191 * frame1: surge (triggers short blocks)
1192 * frame2: regular frame. looks like pre-echo when compared to
1193 * frame0, but all pre-echo was in frame1.
1195 /* chn=0,1 L and R channels
1196 chn=2,3 S and M channels.
1198 if (psv->blocktype_old[chn & 0x01] == SHORT_TYPE) {
1199 FLOAT const ecb_limit = rpelev * psv->nb_l1[chn][b];
1200 if (ecb_limit > 0) {
1201 thr[b] = Min(ecb, ecb_limit);
1205 Because we don't calculate long block psy when we know a granule
1206 should be of short blocks, we don't have any clue how the granule
1207 before would have looked like as a long block. So we have to guess
1208 a little bit for this END_TYPE block.
1209 Most of the time we get away with this sloppyness. (fingers crossed :)
1210 The speed increase is worth it.
1212 thr[b] = Min(ecb, eb_l[b] * NS_PREECHO_ATT2);
1216 FLOAT ecb_limit_2 = rpelev2 * psv->nb_l2[chn][b];
1217 FLOAT ecb_limit_1 = rpelev * psv->nb_l1[chn][b];
1219 if (ecb_limit_2 <= 0) {
1222 if (ecb_limit_1 <= 0) {
1225 if (psv->blocktype_old[chn & 0x01] == NORM_TYPE) {
1226 ecb_limit = Min(ecb_limit_1, ecb_limit_2);
1229 ecb_limit = ecb_limit_1;
1231 thr[b] = Min(ecb, ecb_limit);
1233 psv->nb_l2[chn][b] = psv->nb_l1[chn][b];
1234 psv->nb_l1[chn][b] = ecb;
1236 /* if THR exceeds EB, the quantization routines will take the difference
1237 * from other bands. in case of strong tonal samples (tonaltest.wav)
1238 * this leads to heavy distortions. that's why we limit THR here.
1241 x *= gdl->minval[b];
1247 if (masking_lower > 1) {
1248 thr[b] *= masking_lower;
1250 if (thr[b] > eb_l[b]) {
1253 if (masking_lower < 1) {
1254 thr[b] *= masking_lower;
1256 assert(thr[b] >= 0);
1258 for (; b < CBANDS; ++b) {
1266 vbrpsy_compute_block_type(SessionConfig_t const *cfg, int *uselongblock)
1270 if (cfg->short_blocks == short_block_coupled
1271 /* force both channels to use the same block type */
1272 /* this is necessary if the frame is to be encoded in ms_stereo. */
1273 /* But even without ms_stereo, FhG does this */
1274 && !(uselongblock[0] && uselongblock[1]))
1275 uselongblock[0] = uselongblock[1] = 0;
1277 for (chn = 0; chn < cfg->channels_out; chn++) {
1278 /* disable short blocks */
1279 if (cfg->short_blocks == short_block_dispensed) {
1280 uselongblock[chn] = 1;
1282 if (cfg->short_blocks == short_block_forced) {
1283 uselongblock[chn] = 0;
1290 vbrpsy_apply_block_type(PsyStateVar_t * psv, int nch, int const *uselongblock, int *blocktype_d)
1294 /* update the blocktype of the previous granule, since it depends on what
1295 * happend in this granule */
1296 for (chn = 0; chn < nch; chn++) {
1297 int blocktype = NORM_TYPE;
1298 /* disable short blocks */
1300 if (uselongblock[chn]) {
1301 /* no attack : use long blocks */
1302 assert(psv->blocktype_old[chn] != START_TYPE);
1303 if (psv->blocktype_old[chn] == SHORT_TYPE)
1304 blocktype = STOP_TYPE;
1307 /* attack : use short blocks */
1308 blocktype = SHORT_TYPE;
1309 if (psv->blocktype_old[chn] == NORM_TYPE) {
1310 psv->blocktype_old[chn] = START_TYPE;
1312 if (psv->blocktype_old[chn] == STOP_TYPE)
1313 psv->blocktype_old[chn] = SHORT_TYPE;
1316 blocktype_d[chn] = psv->blocktype_old[chn]; /* value returned to calling program */
1317 psv->blocktype_old[chn] = blocktype; /* save for next call to l3psy_anal */
1322 /***************************************************************
1323 * compute M/S thresholds from Johnston & Ferreira 1992 ICASSP paper
1324 ***************************************************************/
1327 vbrpsy_compute_MS_thresholds(const FLOAT eb[4][CBANDS], FLOAT thr[4][CBANDS],
1328 const FLOAT cb_mld[CBANDS], const FLOAT ath_cb[CBANDS], FLOAT athlower,
1331 FLOAT const msfix2 = msfix * 2.f;
1334 for (b = 0; b < n; ++b) {
1335 FLOAT const ebM = eb[2][b];
1336 FLOAT const ebS = eb[3][b];
1337 FLOAT const thmL = thr[0][b];
1338 FLOAT const thmR = thr[1][b];
1339 FLOAT thmM = thr[2][b];
1340 FLOAT thmS = thr[3][b];
1342 /* use this fix if L & R masking differs by 2db or less */
1343 /* if db = 10*log10(x2/x1) < 2 */
1344 /* if (x2 < 1.58*x1) { */
1345 if (thmL <= 1.58f * thmR && thmR <= 1.58f * thmL) {
1346 FLOAT const mld_m = cb_mld[b] * ebS;
1347 FLOAT const mld_s = cb_mld[b] * ebM;
1348 FLOAT const tmp_m = Min(thmS, mld_m);
1349 FLOAT const tmp_s = Min(thmM, mld_s);
1350 rmid = Max(thmM, tmp_m);
1351 rside = Max(thmS, tmp_s);
1358 /***************************************************************/
1359 /* Adjust M/S maskings if user set "msfix" */
1360 /***************************************************************/
1361 /* Naoki Shibata 2000 */
1363 FLOAT const ath = ath_cb[b] * athlower;
1364 FLOAT const tmp_l = Max(thmL, ath);
1365 FLOAT const tmp_r = Max(thmR, ath);
1366 thmLR = Min(tmp_l, tmp_r);
1367 thmM = Max(rmid, ath);
1368 thmS = Max(rside, ath);
1369 thmMS = thmM + thmS;
1370 if (thmMS > 0.f && (thmLR * msfix2) < thmMS) {
1371 FLOAT const f = thmLR * msfix2 / thmMS;
1374 assert(thmMS > 0.f);
1376 rmid = Min(thmM, rmid);
1377 rside = Min(thmS, rside);
1392 * NOTE: the bitrate reduction from the inter-channel masking effect is low
1393 * compared to the chance of getting annyoing artefacts. L3psycho_anal_vbr does
1394 * not use this feature. (Robert 071216)
1398 L3psycho_anal_vbr(lame_internal_flags * gfc,
1399 const sample_t * const buffer[2], int gr_out,
1400 III_psy_ratio masking_ratio[2][2],
1401 III_psy_ratio masking_MS_ratio[2][2],
1402 FLOAT percep_entropy[2], FLOAT percep_MS_entropy[2],
1403 FLOAT energy[4], int blocktype_d[2])
1405 SessionConfig_t const *const cfg = &gfc->cfg;
1406 PsyStateVar_t *const psv = &gfc->sv_psy;
1407 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
1408 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
1409 plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
1411 /* Jonathan C: Keep these variable OFF the stack, Watcom C doesn't give us much */
1412 static III_psy_xmin last_thm[4];
1414 /* fft and energy calculation */
1415 static FLOAT(*wsamp_l)[BLKSIZE];
1416 static FLOAT(*wsamp_s)[3][BLKSIZE_s];
1417 static FLOAT fftenergy[HBLKSIZE];
1418 static FLOAT fftenergy_s[3][HBLKSIZE_s];
1419 static FLOAT wsamp_L[2][BLKSIZE];
1420 static FLOAT wsamp_S[2][3][BLKSIZE_s];
1421 static FLOAT eb[4][CBANDS], thr[4][CBANDS];
1423 static FLOAT sub_short_factor[4][3];
1425 FLOAT const pcfact = 0.6f;
1426 FLOAT const ath_factor =
1427 (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f;
1429 const FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb;
1430 const FLOAT(*const_fftenergy_s)[HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s;
1433 int ns_attacks[4][4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} };
1434 static int uselongblock[2];
1436 /* usual variables like loop indices, etc.. */
1437 int chn, sb, sblock;
1439 /* chn=2 and 3 = Mid and Side channels */
1440 int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : cfg->channels_out;
1442 memcpy(&last_thm[0], &psv->thm[0], sizeof(last_thm));
1444 vbrpsy_attack_detection(gfc, buffer, gr_out, masking_ratio, masking_MS_ratio, energy,
1445 sub_short_factor, ns_attacks, uselongblock);
1447 vbrpsy_compute_block_type(cfg, uselongblock);
1449 /* LONG BLOCK CASE */
1451 for (chn = 0; chn < n_chn_psy; chn++) {
1452 int const ch01 = chn & 0x01;
1454 wsamp_l = wsamp_L + ch01;
1455 vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l);
1456 vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy);
1457 vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn);
1459 if (cfg->mode == JOINT_STEREO) {
1460 if ((uselongblock[0] + uselongblock[1]) == 2) {
1461 vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l,
1462 ath_factor, cfg->msfix, gdl->npart);
1465 /* TODO: apply adaptive ATH masking here ?? */
1466 for (chn = 0; chn < n_chn_psy; chn++) {
1467 convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn);
1468 convert_partition2scalefac_l_to_s(gfc, eb[chn], thr[chn], chn);
1471 /* SHORT BLOCKS CASE */
1473 int const force_short_block_calc = gfc->cd_psy->force_short_block_calc;
1474 for (sblock = 0; sblock < 3; sblock++) {
1475 for (chn = 0; chn < n_chn_psy; ++chn) {
1476 int const ch01 = chn & 0x01;
1477 if (uselongblock[ch01] && !force_short_block_calc) {
1478 vbrpsy_skip_masking_s(gfc, chn, sblock);
1481 /* compute masking thresholds for short blocks */
1482 wsamp_s = wsamp_S + ch01;
1483 vbrpsy_compute_fft_s(gfc, buffer, chn, sblock, fftenergy_s, wsamp_s);
1484 vbrpsy_compute_masking_s(gfc, const_fftenergy_s, eb[chn], thr[chn], chn,
1488 if (cfg->mode == JOINT_STEREO) {
1489 if ((uselongblock[0] + uselongblock[1]) == 0) {
1490 vbrpsy_compute_MS_thresholds(const_eb, thr, gds->mld_cb, gfc->ATH->cb_s,
1491 ath_factor, cfg->msfix, gds->npart);
1494 /* TODO: apply adaptive ATH masking here ?? */
1495 for (chn = 0; chn < n_chn_psy; ++chn) {
1496 int const ch01 = chn & 0x01;
1497 if (!uselongblock[ch01] || force_short_block_calc) {
1498 convert_partition2scalefac_s(gfc, eb[chn], thr[chn], chn, sblock);
1503 /**** short block pre-echo control ****/
1504 for (chn = 0; chn < n_chn_psy; chn++) {
1505 for (sb = 0; sb < SBMAX_s; sb++) {
1506 FLOAT new_thmm[3], prev_thm, t1, t2;
1507 for (sblock = 0; sblock < 3; sblock++) {
1508 thmm = psv->thm[chn].s[sb][sblock];
1509 thmm *= NS_PREECHO_ATT0;
1514 prev_thm = new_thmm[sblock - 1];
1517 prev_thm = last_thm[chn].s[sb][2];
1519 if (ns_attacks[chn][sblock] >= 2 || ns_attacks[chn][sblock + 1] == 1) {
1520 t1 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT1 * pcfact);
1522 thmm = Min(t1, thmm);
1523 if (ns_attacks[chn][sblock] == 1) {
1524 t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
1526 else if ((sblock == 0 && psv->last_attacks[chn] == 3)
1527 || (sblock > 0 && ns_attacks[chn][sblock - 1] == 3)) { /* 2nd preceeding block */
1530 prev_thm = last_thm[chn].s[sb][1];
1533 prev_thm = last_thm[chn].s[sb][2];
1536 prev_thm = new_thmm[0];
1539 t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
1542 thmm = Min(t1, thmm);
1543 thmm = Min(t2, thmm);
1545 /* pulse like signal detection for fatboy.wav and so on */
1546 thmm *= sub_short_factor[chn][sblock];
1548 new_thmm[sblock] = thmm;
1550 for (sblock = 0; sblock < 3; sblock++) {
1551 psv->thm[chn].s[sb][sblock] = new_thmm[sblock];
1556 for (chn = 0; chn < n_chn_psy; chn++) {
1557 psv->last_attacks[chn] = ns_attacks[chn][2];
1561 /***************************************************************
1562 * determine final block type
1563 ***************************************************************/
1564 vbrpsy_apply_block_type(psv, cfg->channels_out, uselongblock, blocktype_d);
1566 /*********************************************************************
1567 * compute the value of PE to return ... no delay and advance
1568 *********************************************************************/
1569 for (chn = 0; chn < n_chn_psy; chn++) {
1572 III_psy_ratio const *mr;
1575 ppe = percep_MS_entropy - 2;
1577 if (blocktype_d[0] == SHORT_TYPE || blocktype_d[1] == SHORT_TYPE)
1579 mr = &masking_MS_ratio[gr_out][chn - 2];
1582 ppe = percep_entropy;
1583 type = blocktype_d[chn];
1584 mr = &masking_ratio[gr_out][chn];
1586 if (type == SHORT_TYPE) {
1587 ppe[chn] = pecalc_s(mr, gfc->sv_qnt.masking_lower);
1590 ppe[chn] = pecalc_l(mr, gfc->sv_qnt.masking_lower);
1594 plt->pe[gr_out][chn] = ppe[chn];
1604 * The spreading function. Values returned in units of energy
1609 FLOAT tempx, x, tempy, temp;
1616 if (tempx >= 0.5 && tempx <= 2.5) {
1618 x = 8.0 * (temp * temp - 2.0 * temp);
1623 tempy = 15.811389 + 7.5 * tempx - 17.5 * sqrt(1.0 + tempx * tempx);
1628 tempx = exp((x + tempy) * LN_TO_LOG10);
1630 /* Normalization. The spreading function should be normalized so that:
1633 | s3 [ bark ] d(bark) = 1
1645 double lim_a = 0, lim_b = 0;
1647 for (x = 0; s3_func(x) > 1e-20; x -= 1);
1650 while (fabs(h - l) > 1e-12) {
1652 if (s3_func(x) > 0) {
1660 for (x = 0; s3_func(x) > 1e-20; x += 1);
1663 while (fabs(h - l) > 1e-12) {
1665 if (s3_func(x) > 0) {
1677 for (i = 0; i <= m; ++i) {
1678 double x = lim_a + i * (lim_b - lim_a) / m;
1679 double y = s3_func(x);
1683 double norm = (m + 1) / (sum * (lim_b - lim_a));
1684 /*printf( "norm = %lf\n",norm); */
1692 stereo_demask(double f)
1694 /* setup stereo demasking thresholds */
1695 /* formula reverse enginerred from plot in paper */
1696 double arg = freq2bark(f);
1697 arg = (Min(arg, 15.5) / 15.5);
1699 return pow(10.0, 1.25 * (1 - cos(PI * arg)) - 2.5);
1703 init_numline(PsyConst_CB2SB_t * gd, FLOAT sfreq, int fft_size,
1704 int mdct_size, int sbmax, int const *scalepos)
1706 static FLOAT b_frq[CBANDS + 1];
1707 FLOAT const mdct_freq_frac = sfreq / (2.0f * mdct_size);
1708 FLOAT const deltafreq = fft_size / (2.0f * mdct_size);
1709 static int partition[HBLKSIZE] = { 0 };
1715 /* compute numlines, the number of spectral lines in each partition band */
1716 /* each partition band should be about DELBARK wide. */
1717 for (i = 0; i < CBANDS; i++) {
1720 bark1 = freq2bark(sfreq * j);
1722 b_frq[i] = sfreq * j;
1724 for (j2 = j; freq2bark(sfreq * j2) - bark1 < DELBARK && j2 <= fft_size / 2; j2++);
1727 gd->numlines[i] = nl;
1728 gd->rnumlines[i] = (nl > 0) ? (1.0f / nl) : 0;
1733 assert(j < HBLKSIZE);
1736 if (j > fft_size / 2) {
1743 b_frq[i] = sfreq * j;
1750 for (i = 0; i < gd->npart; i++) {
1751 int const nl = gd->numlines[i];
1752 FLOAT const freq = sfreq * (j + nl / 2);
1753 gd->mld_cb[i] = stereo_demask(freq);
1756 for (; i < CBANDS; ++i) {
1760 for (sfb = 0; sfb < sbmax; sfb++) {
1762 int start = scalepos[sfb];
1763 int end = scalepos[sfb + 1];
1765 i1 = floor(.5 + deltafreq * (start - .5));
1768 i2 = floor(.5 + deltafreq * (end - .5));
1770 if (i2 > fft_size / 2)
1774 gd->bm[sfb] = (partition[i1] + partition[i2]) / 2;
1777 /* calculate how much of this band belongs to current scalefactor band */
1779 FLOAT const f_tmp = mdct_freq_frac * end;
1780 FLOAT bo_w = (f_tmp - b_frq[bo]) / (b_frq[bo + 1] - b_frq[bo]);
1789 gd->bo_weight[sfb] = bo_w;
1791 gd->mld[sfb] = stereo_demask(mdct_freq_frac * start);
1796 compute_bark_values(PsyConst_CB2SB_t const *gd, FLOAT sfreq, int fft_size,
1797 FLOAT * bval, FLOAT * bval_width)
1799 /* compute bark values of each critical band */
1800 int k, j = 0, ni = gd->npart;
1802 for (k = 0; k < ni; k++) {
1803 int const w = gd->numlines[k];
1806 bark1 = freq2bark(sfreq * (j));
1807 bark2 = freq2bark(sfreq * (j + w - 1));
1808 bval[k] = .5 * (bark1 + bark2);
1810 bark1 = freq2bark(sfreq * (j - .5));
1811 bark2 = freq2bark(sfreq * (j + w - .5));
1812 bval_width[k] = bark2 - bark1;
1818 init_s3_values(FLOAT ** p, int (*s3ind)[2], int npart,
1819 FLOAT const *bval, FLOAT const *bval_width, FLOAT const *norm)
1821 static FLOAT s3[CBANDS][CBANDS];
1822 /* The s3 array is not linear in the bark scale.
1823 * bval[x] should be used to get the bark value.
1826 int numberOfNoneZero = 0;
1828 for (i=0;i < CBANDS;i++) {
1829 for (j=0;j < CBANDS;j++) {
1834 /* s[i][j], the value of the spreading function,
1835 * centered at band j (masker), for band i (maskee)
1837 * i.e.: sum over j to spread into signal barkval=i
1838 * NOTE: i and j are used opposite as in the ISO docs
1840 for (i = 0; i < npart; i++) {
1841 for (j = 0; j < npart; j++) {
1842 FLOAT v = s3_func(bval[i] - bval[j]) * bval_width[j];
1843 s3[i][j] = v * norm[i];
1846 for (i = 0; i < npart; i++) {
1847 for (j = 0; j < npart; j++) {
1848 if (s3[i][j] > 0.0f)
1853 for (j = npart - 1; j > 0; j--) {
1854 if (s3[i][j] > 0.0f)
1858 numberOfNoneZero += (s3ind[i][1] - s3ind[i][0] + 1);
1860 if (numberOfNoneZero == 0)
1862 *p = malloc(sizeof(FLOAT) * numberOfNoneZero);
1867 for (i = 0; i < npart; i++)
1868 for (j = s3ind[i][0]; j <= s3ind[i][1]; j++)
1869 (*p)[k++] = s3[i][j];
1870 while (k < numberOfNoneZero)
1873 assert(npart <= numberOfNoneZero);
1874 assert(k <= numberOfNoneZero);
1879 psymodel_init(lame_global_flags const *gfp)
1881 lame_internal_flags *const gfc = gfp->internal_flags;
1882 SessionConfig_t *const cfg = &gfc->cfg;
1883 PsyStateVar_t *const psv = &gfc->sv_psy;
1886 FLOAT bvl_a = 13, bvl_b = 24;
1887 FLOAT snr_l_a = 0, snr_l_b = 0;
1888 FLOAT snr_s_a = -8.25, snr_s_b = -4.5;
1890 static FLOAT bval[CBANDS];
1891 static FLOAT bval_width[CBANDS];
1892 static FLOAT norm[CBANDS];
1893 FLOAT const sfreq = cfg->samplerate_out;
1895 FLOAT xav = 10, xbv = 12;
1896 FLOAT const minval_low = (0.f - cfg->minval);
1898 if (gfc->cd_psy != 0) {
1901 memset(norm, 0, sizeof(norm));
1903 gd = calloc(1, sizeof(PsyConst_t));
1906 gd->force_short_block_calc = gfp->experimentalZ;
1908 psv->blocktype_old[0] = psv->blocktype_old[1] = NORM_TYPE; /* the vbr header is long blocks */
1910 for (i = 0; i < 4; ++i) {
1911 for (j = 0; j < CBANDS; ++j) {
1912 psv->nb_l1[i][j] = 1e20;
1913 psv->nb_l2[i][j] = 1e20;
1914 psv->nb_s1[i][j] = psv->nb_s2[i][j] = 1.0;
1916 for (sb = 0; sb < SBMAX_l; sb++) {
1917 psv->en[i].l[sb] = 1e20;
1918 psv->thm[i].l[sb] = 1e20;
1920 for (j = 0; j < 3; ++j) {
1921 for (sb = 0; sb < SBMAX_s; sb++) {
1922 psv->en[i].s[sb][j] = 1e20;
1923 psv->thm[i].s[sb][j] = 1e20;
1925 psv->last_attacks[i] = 0;
1927 for (j = 0; j < 9; j++)
1928 psv->last_en_subshort[i][j] = 10.;
1932 /* init. for loudness approx. -jd 2001 mar 27 */
1933 psv->loudness_sq_save[0] = psv->loudness_sq_save[1] = 0.0;
1937 /*************************************************************************
1938 * now compute the psychoacoustic model specific constants
1939 ************************************************************************/
1940 /* compute numlines, bo, bm, bval, bval_width, mld */
1941 init_numline(&gd->l, sfreq, BLKSIZE, 576, SBMAX_l, gfc->scalefac_band.l);
1942 assert(gd->l.npart < CBANDS);
1943 compute_bark_values(&gd->l, sfreq, BLKSIZE, bval, bval_width);
1945 /* compute the spreading function */
1946 for (i = 0; i < gd->l.npart; i++) {
1947 double snr = snr_l_a;
1948 if (bval[i] >= bvl_a) {
1949 snr = snr_l_b * (bval[i] - bvl_a) / (bvl_b - bvl_a)
1950 + snr_l_a * (bvl_b - bval[i]) / (bvl_b - bvl_a);
1952 norm[i] = pow(10.0, snr / 10.0);
1954 i = init_s3_values(&gd->l.s3, gd->l.s3ind, gd->l.npart, bval, bval_width, norm);
1958 /* compute long block specific values, ATH and MINVAL */
1960 for (i = 0; i < gd->l.npart; i++) {
1965 for (k = 0; k < gd->l.numlines[i]; k++, j++) {
1966 FLOAT const freq = sfreq * j / (1000.0 * BLKSIZE);
1968 /* freq = Min(.1,freq); *//* ATH below 100 Hz constant, not further climbing */
1969 level = ATHformula(cfg, freq * 1000) - 20; /* scale to FFT units; returned value is in dB */
1970 level = pow(10., 0.1 * level); /* convert from dB -> energy */
1971 level *= gd->l.numlines[i];
1975 gfc->ATH->cb_l[i] = x;
1978 For low freq, the strength of the masking is limited by minval
1979 this is an ISO MPEG1 thing, dont know if it is really needed */
1980 /* FIXME: it does work to reduce low-freq problems in S53-Wind-Sax
1981 and lead-voice samples, but introduces some 3 kbps bit bloat too.
1982 TODO: Further refinement of the shape of this hack.
1984 x = 20.0 * (bval[i] / xav - 1.0);
1988 if (x < minval_low) {
1991 if (cfg->samplerate_out < 44000) {
1995 gd->l.minval[i] = pow(10.0, x / 10.) * gd->l.numlines[i];
1998 /************************************************************************
1999 * do the same things for short blocks
2000 ************************************************************************/
2001 init_numline(&gd->s, sfreq, BLKSIZE_s, 192, SBMAX_s, gfc->scalefac_band.s);
2002 assert(gd->s.npart < CBANDS);
2003 compute_bark_values(&gd->s, sfreq, BLKSIZE_s, bval, bval_width);
2005 /* SNR formula. short block is normalized by SNR. is it still right ? */
2007 for (i = 0; i < gd->s.npart; i++) {
2009 double snr = snr_s_a;
2010 if (bval[i] >= bvl_a) {
2011 snr = snr_s_b * (bval[i] - bvl_a) / (bvl_b - bvl_a)
2012 + snr_s_a * (bvl_b - bval[i]) / (bvl_b - bvl_a);
2014 norm[i] = pow(10.0, snr / 10.0);
2018 for (k = 0; k < gd->s.numlines[i]; k++, j++) {
2019 FLOAT const freq = sfreq * j / (1000.0 * BLKSIZE_s);
2021 /* freq = Min(.1,freq); *//* ATH below 100 Hz constant, not further climbing */
2022 level = ATHformula(cfg, freq * 1000) - 20; /* scale to FFT units; returned value is in dB */
2023 level = pow(10., 0.1 * level); /* convert from dB -> energy */
2024 level *= gd->s.numlines[i];
2028 gfc->ATH->cb_s[i] = x;
2031 For low freq, the strength of the masking is limited by minval
2032 this is an ISO MPEG1 thing, dont know if it is really needed */
2033 x = 7.0 * (bval[i] / xbv - 1.0);
2034 if (bval[i] > xbv) {
2035 x *= 1 + log(1 + x) * 3.1;
2037 if (bval[i] < xbv) {
2038 x *= 1 + log(1 - x) * 2.3;
2043 if (x < minval_low) {
2046 if (cfg->samplerate_out < 44000) {
2050 gd->s.minval[i] = pow(10.0, x / 10) * gd->s.numlines[i];
2053 i = init_s3_values(&gd->s.s3, gd->s.s3ind, gd->s.npart, bval, bval_width, norm);
2058 init_mask_add_max_values();
2061 /* setup temporal masking */
2062 gd->decay = exp(-1.0 * LOG10 / (temporalmask_sustain_sec * sfreq / 192.0));
2067 if (cfg->use_safe_joint_stereo)
2069 if (fabs(cfg->msfix) > 0.0)
2073 /* spread only from npart_l bands. Normally, we use the spreading
2074 * function to convolve from npart_l down to npart_l bands
2076 for (b = 0; b < gd->l.npart; b++)
2077 if (gd->l.s3ind[b][1] > gd->l.npart - 1)
2078 gd->l.s3ind[b][1] = gd->l.npart - 1;
2081 /* prepare for ATH auto adjustment:
2082 * we want to decrease the ATH by 12 dB per second
2084 #define frame_duration (576. * cfg->mode_gr / sfreq)
2085 gfc->ATH->decay = pow(10., -12. / 10. * frame_duration);
2086 gfc->ATH->adjust_factor = 0.01; /* minimum, for leading low loudness */
2087 gfc->ATH->adjust_limit = 1.0; /* on lead, allow adjust up to maximum */
2088 #undef frame_duration
2090 assert(gd->l.bo[SBMAX_l - 1] <= gd->l.npart);
2091 assert(gd->s.bo[SBMAX_s - 1] <= gd->s.npart);
2093 if (cfg->ATHtype != -1) {
2094 /* compute equal loudness weights (eql_w) */
2096 FLOAT const freq_inc = (FLOAT) cfg->samplerate_out / (FLOAT) (BLKSIZE);
2097 FLOAT eql_balance = 0.0;
2099 for (i = 0; i < BLKSIZE / 2; ++i) {
2100 /* convert ATH dB to relative power (not dB) */
2101 /* to determine eql_w */
2103 gfc->ATH->eql_w[i] = 1. / pow(10, ATHformula(cfg, freq) / 10);
2104 eql_balance += gfc->ATH->eql_w[i];
2106 eql_balance = 1.0 / eql_balance;
2107 for (i = BLKSIZE / 2; --i >= 0;) { /* scale weights */
2108 gfc->ATH->eql_w[i] *= eql_balance;
2112 for (b = j = 0; b < gd->s.npart; ++b) {
2113 for (i = 0; i < gd->s.numlines[b]; ++i) {
2118 for (b = j = 0; b < gd->l.npart; ++b) {
2119 for (i = 0; i < gd->l.numlines[b]; ++i) {
2125 /* short block attack threshold */
2127 float x = gfp->attackthre;
2128 float y = gfp->attackthre_s;
2135 gd->attack_threshold[0] = gd->attack_threshold[1] = gd->attack_threshold[2] = x;
2136 gd->attack_threshold[3] = y;
2139 float sk_s = -10.f, sk_l = -4.7f;
2140 static float const sk[] =
2141 { -7.4, -7.4, -7.4, -9.5, -7.4, -6.1, -5.5, -4.7, -4.7, -4.7, -4.7 };
2142 if (gfp->VBR_q < 4) {
2143 sk_l = sk_s = sk[0];
2146 sk_l = sk_s = sk[gfp->VBR_q] + gfp->VBR_q_frac * (sk[gfp->VBR_q] - sk[gfp->VBR_q + 1]);
2149 for (; b < gd->s.npart; b++) {
2150 float m = (float) (gd->s.npart - b) / gd->s.npart;
2151 gd->s.masking_lower[b] = powf(10.f, sk_s * m * 0.1f);
2153 for (; b < CBANDS; ++b) {
2154 gd->s.masking_lower[b] = 1.f;
2157 for (; b < gd->l.npart; b++) {
2158 float m = (float) (gd->l.npart - b) / gd->l.npart;
2159 gd->l.masking_lower[b] = powf(10.f, sk_l * m * 0.1f);
2161 for (; b < CBANDS; ++b) {
2162 gd->l.masking_lower[b] = 1.f;
2165 memcpy(&gd->l_to_s, &gd->l, sizeof(gd->l_to_s));
2166 init_numline(&gd->l_to_s, sfreq, BLKSIZE, 192, SBMAX_s, gfc->scalefac_band.s);