2 * LAME MP3 encoding engine
4 * Copyright (c) 1999 Mark Taylor
5 * Copyright (c) 2000-2002 Takehiro Tominaga
6 * Copyright (c) 2000-2011 Robert Hegemann
7 * Copyright (c) 2001 Gabriel Bouvigne
8 * Copyright (c) 2001 John Dahlstrom
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
20 * You should have received a copy of the GNU Library General Public
21 * License along with this library; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 02111-1307, USA.
26 /* $Id: encoder.c,v 1.111 2011/05/07 16:05:17 rbrito Exp $ */
37 #include "lame_global_flags.h"
40 #include "lame-analysis.h"
41 #include "bitstream.h"
43 #include "quantize_pvt.h"
48 * auto-adjust of ATH, useful for low volume
49 * Gabriel Bouvigne 3 feb 2001
51 * modifies some values in
52 * gfp->internal_flags->ATH
56 adjust_ATH(lame_internal_flags const *const gfc)
58 SessionConfig_t const *const cfg = &gfc->cfg;
59 FLOAT gr2_max, max_pow;
61 if (gfc->ATH->use_adjust == 0) {
62 gfc->ATH->adjust_factor = 1.0; /* no adjustment */
66 /* jd - 2001 mar 12, 27, jun 30 */
67 /* loudness based on equal loudness curve; */
68 /* use granule with maximum combined loudness */
69 max_pow = gfc->ov_psy.loudness_sq[0][0];
70 gr2_max = gfc->ov_psy.loudness_sq[1][0];
71 if (cfg->channels_out == 2) {
72 max_pow += gfc->ov_psy.loudness_sq[0][1];
73 gr2_max += gfc->ov_psy.loudness_sq[1][1];
79 if (cfg->mode_gr == 2) {
80 max_pow = Max(max_pow, gr2_max);
82 max_pow *= 0.5; /* max_pow approaches 1.0 for full band noise */
84 /* jd - 2001 mar 31, jun 30 */
85 /* user tuning of ATH adjustment region */
86 max_pow *= gfc->ATH->aa_sensitivity_p;
88 /* adjust ATH depending on range of maximum value
91 /* jd - 2001 feb27, mar12,20, jun30, jul22 */
92 /* continuous curves based on approximation */
93 /* to GB's original values. */
94 /* For an increase in approximate loudness, */
95 /* set ATH adjust to adjust_limit immediately */
96 /* after a delay of one frame. */
97 /* For a loudness decrease, reduce ATH adjust */
98 /* towards adjust_limit gradually. */
99 /* max_pow is a loudness squared or a power. */
100 if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
101 if (gfc->ATH->adjust_factor >= 1.0) {
102 gfc->ATH->adjust_factor = 1.0;
105 /* preceding frame has lower ATH adjust; */
106 /* ascend only to the preceding adjust_limit */
107 /* in case there is leading low volume */
108 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
109 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
112 gfc->ATH->adjust_limit = 1.0;
114 else { /* adjustment curve */
115 /* about 32 dB maximum adjust (0.000625) */
116 FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
117 if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
118 gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
119 if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
120 gfc->ATH->adjust_factor = adj_lim_new;
124 if (gfc->ATH->adjust_limit >= adj_lim_new) {
125 gfc->ATH->adjust_factor = adj_lim_new;
127 else { /* preceding frame has lower ATH adjust; */
128 /* ascend only to the preceding adjust_limit */
129 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
130 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
134 gfc->ATH->adjust_limit = adj_lim_new;
138 /***********************************************************************
140 * some simple statistics
142 * bitrate index 0: free bitrate -> not allowed in VBR mode
143 * : bitrates, kbps depending on MPEG version
144 * bitrate index 15: forbidden
152 ***********************************************************************/
155 updateStats(lame_internal_flags * const gfc)
157 SessionConfig_t const *const cfg = &gfc->cfg;
158 EncResult_t *eov = &gfc->ov_enc;
160 assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
161 assert(0 <= eov->mode_ext && eov->mode_ext < 4);
163 /* count bitrate indices */
164 eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
165 eov->bitrate_channelmode_hist[15][4]++;
167 /* count 'em for every mode extension in case of 2 channel encoding */
168 if (cfg->channels_out == 2) {
169 eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
170 eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
172 for (gr = 0; gr < cfg->mode_gr; ++gr) {
173 for (ch = 0; ch < cfg->channels_out; ++ch) {
174 int bt = gfc->l3_side.tt[gr][ch].block_type;
175 if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
177 eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
178 eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
179 eov->bitrate_blocktype_hist[15][bt]++;
180 eov->bitrate_blocktype_hist[15][5]++;
189 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
191 SessionConfig_t const *const cfg = &gfc->cfg;
195 if (gfc->lame_encode_frame_init == 0) {
196 sample_t primebuff0[286 + 1152 + 576];
197 sample_t primebuff1[286 + 1152 + 576];
198 int const framesize = 576 * cfg->mode_gr;
199 /* prime the MDCT/polyphase filterbank with a short block */
201 gfc->lame_encode_frame_init = 1;
202 memset(primebuff0, 0, sizeof(primebuff0));
203 memset(primebuff1, 0, sizeof(primebuff1));
204 for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
207 if (cfg->channels_out == 2)
211 primebuff0[i] = inbuf[0][j];
212 if (cfg->channels_out == 2)
213 primebuff1[i] = inbuf[1][j];
217 /* polyphase filtering / mdct */
218 for (gr = 0; gr < cfg->mode_gr; gr++) {
219 for (ch = 0; ch < cfg->channels_out; ch++) {
220 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
223 mdct_sub48(gfc, primebuff0, primebuff1);
225 /* check FFT will not use a negative starting offset */
227 # error FFTOFFSET greater than 576: FFT uses a negative offset
229 /* check if we have enough data for FFT */
230 assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
231 /* check if we have enough data for polyphase filterbank */
232 assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
243 /************************************************************************
245 * encodeframe() Layer 3
247 * encode a single frame
249 ************************************************************************
254 inbuf: |--------------|--------------|--------------|
257 Polyphase (18 windows, each shifted 32)
259 window1 <----512---->
260 window18 <----512---->
263 window1 <----512---->
264 window18 <----512---->
268 MDCT output: |--------------|--------------|--------------|
270 FFT's <---------1024---------->
271 <---------1024-------->
275 inbuf = buffer of PCM data size=MP3 framesize
276 encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
277 so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
279 psy-model FFT has a 1 granule delay, so we feed it data for the
281 FFT is centered over granule: 224+576+224
282 So FFT starts at: 576-224-MDCTDELAY
284 MPEG2: FFT ends at: BLKSIZE+576-224-MDCTDELAY (1328)
285 MPEG1: FFT ends at: BLKSIZE+2*576-224-MDCTDELAY (1904)
287 MPEG2: polyphase first window: [0..511]
288 18th window: [544..1055] (1056)
289 MPEG1: 36th window: [1120..1631] (1632)
290 data needed: 512+framesize-32
292 A close look newmdct.c shows that the polyphase filterbank
293 only uses data from [0..510] for each window. Perhaps because the window
294 used by the filterbank is zero for the last point, so Takehiro's
295 code doesn't bother to compute with it.
297 FFT starts at 576-224-MDCTDELAY (304) = 576-FFTOFFSET
301 typedef FLOAT chgrdata[2][2];
305 lame_encode_mp3_frame( /* Output */
306 lame_internal_flags * gfc, /* Context */
307 sample_t const *inbuf_l, /* Input */
308 sample_t const *inbuf_r, /* Input */
309 unsigned char *mp3buf, /* Output */
312 SessionConfig_t const *const cfg = &gfc->cfg;
314 static III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
315 static III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
316 const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
317 const sample_t *inbuf[2];
319 static FLOAT tot_ener[2][4];
320 FLOAT ms_ener_ratio[2] = { .5, .5 };
321 FLOAT pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
331 if (gfc->lame_encode_frame_init == 0) {
333 lame_encode_frame_init(gfc, inbuf);
338 /********************** padding *****************************/
339 /* padding method as described in
340 * "MPEG-Layer3 / Bitstream Syntax and Decoding"
341 * by Martin Sieler, Ralph Sperschneider
343 * note: there is no padding for the very first frame
345 * Robert Hegemann 2000-06-22
347 gfc->ov_enc.padding = FALSE;
348 if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
349 gfc->sv_enc.slot_lag += cfg->samplerate_out;
350 gfc->ov_enc.padding = TRUE;
355 /****************************************
356 * Stage 1: psychoacoustic model *
357 ****************************************/
360 /* psychoacoustic model
361 * psy model has a 1 granule (576) delay that we must compensate for
365 const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
368 for (gr = 0; gr < cfg->mode_gr; gr++) {
370 for (ch = 0; ch < cfg->channels_out; ch++) {
371 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
374 ret = L3psycho_anal_vbr(gfc, bufp, gr,
375 masking_LR, masking_MS,
376 pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
380 if (cfg->mode == JOINT_STEREO) {
381 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
382 if (ms_ener_ratio[gr] > 0)
383 ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
386 /* block type flags */
387 for (ch = 0; ch < cfg->channels_out; ch++) {
388 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
389 cod_info->block_type = blocktype[ch];
390 cod_info->mixed_block_flag = 0;
396 /* auto-adjust of ATH, useful for low volume */
400 /****************************************
402 ****************************************/
404 /* polyphase filtering / mdct */
405 mdct_sub48(gfc, inbuf[0], inbuf[1]);
408 /****************************************
409 * Stage 3: MS/LR decision *
410 ****************************************/
412 /* Here will be selected MS or LR coding of the 2 stereo channels */
413 gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
416 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
418 else if (cfg->mode == JOINT_STEREO) {
419 /* ms_ratio = is scaled, for historical reasons, to look like
420 a ratio of side_channel / total.
421 0 = signal is 100% mono
422 .5 = L & R uncorrelated
425 /* [0] and [1] are the results for the two granules in MPEG-1,
426 * in MPEG-2 it's only a faked averaging of the same value
427 * _prev is the value of the last granule of the previous frame
428 * _next is the value of the first granule of the next frame
433 for (gr = 0; gr < cfg->mode_gr; gr++) {
434 for (ch = 0; ch < cfg->channels_out; ch++) {
435 sum_pe_MS += pe_MS[gr][ch];
436 sum_pe_LR += pe[gr][ch];
440 /* based on PE: M/S coding would not use much more bits than L/R */
441 if (sum_pe_MS <= 1.00 * sum_pe_LR) {
443 gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
444 gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
446 if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
448 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
453 /* bit and noise allocation */
454 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
455 masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
459 masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
464 /* copy data for MP3 frame analyzer */
465 if (cfg->analysis && gfc->pinfo != NULL) {
466 for (gr = 0; gr < cfg->mode_gr; gr++) {
467 for (ch = 0; ch < cfg->channels_out; ch++) {
468 gfc->pinfo->ms_ratio[gr] = 0;
469 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
470 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
471 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
472 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
473 /* in psymodel, LR and MS data was stored in pinfo.
474 switch to MS data: */
475 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
476 gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
477 memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
478 sizeof(gfc->pinfo->energy[gr][ch]));
485 /****************************************
486 * Stage 4: quantization loop *
487 ****************************************/
489 if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
490 static FLOAT const fircoef[9] = {
491 -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
492 7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
499 for (i = 0; i < 18; i++)
500 gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
503 for (gr = 0; gr < cfg->mode_gr; gr++)
504 for (ch = 0; ch < cfg->channels_out; ch++)
506 gfc->sv_enc.pefirbuf[18] = f;
508 f = gfc->sv_enc.pefirbuf[9];
509 for (i = 0; i < 9; i++)
510 f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
512 f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
513 for (gr = 0; gr < cfg->mode_gr; gr++) {
514 for (ch = 0; ch < cfg->channels_out; ch++) {
519 gfc->iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
522 /****************************************
523 * Stage 5: bitstream formatting *
524 ****************************************/
527 /* write the frame to the bitstream */
528 (void) format_bitstream(gfc);
530 /* copy mp3 bit buffer into array */
531 mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
534 if (cfg->write_lame_tag) {
538 if (cfg->analysis && gfc->pinfo != NULL) {
539 int framesize = 576 * cfg->mode_gr;
540 for (ch = 0; ch < cfg->channels_out; ch++) {
542 for (j = 0; j < FFTOFFSET; j++)
543 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
544 for (j = FFTOFFSET; j < 1600; j++) {
545 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
548 gfc->sv_qnt.masking_lower = 1.0;
550 set_frame_pinfo(gfc, masking);
553 ++gfc->ov_enc.frame_number;