src/lib/dl/ext/lame/encoder.c

   1 /*
   2  *      LAME MP3 encoding engine
   3  *
   4  *      Copyright (c) 1999 Mark Taylor
   5  *      Copyright (c) 2000-2002 Takehiro Tominaga
   6  *      Copyright (c) 2000-2011 Robert Hegemann
   7  *      Copyright (c) 2001 Gabriel Bouvigne
   8  *      Copyright (c) 2001 John Dahlstrom
   9  *
  10  * This library is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Library General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2 of the License, or (at your option) any later version.
  14  *
  15  * This library is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Library General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Library General Public
  21  * License along with this library; if not, write to the
  22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23  * Boston, MA 02111-1307, USA.
  24  */
  25
  26 /* $Id: encoder.c,v 1.111 2011/05/07 16:05:17 rbrito Exp $ */
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include <config.h>
  30 #endif
  31
  32
  33 #include "lame.h"
  34 #include "machine.h"
  35 #include "encoder.h"
  36 #include "util.h"
  37 #include "lame_global_flags.h"
  38 #include "newmdct.h"
  39 #include "psymodel.h"
  40 #include "lame-analysis.h"
  41 #include "bitstream.h"
  42 #include "vbrtag.h"
  43 #include "quantize_pvt.h"
  44
  45
  46
  47 /*
  48  * auto-adjust of ATH, useful for low volume
  49  * Gabriel Bouvigne 3 feb 2001
  50  *
  51  * modifies some values in
  52  *   gfp->internal_flags->ATH
  53  *   (gfc->ATH)
  54  */
  55 static void
  56 adjust_ATH(lame_internal_flags const *const gfc)
  57 {
  58     SessionConfig_t const *const cfg = &gfc->cfg;
  59     FLOAT   gr2_max, max_pow;
  60
  61     if (gfc->ATH->use_adjust == 0) {
  62         gfc->ATH->adjust_factor = 1.0; /* no adjustment */
  63         return;
  64     }
  65
  66     /* jd - 2001 mar 12, 27, jun 30 */
  67     /* loudness based on equal loudness curve; */
  68     /* use granule with maximum combined loudness */
  69     max_pow = gfc->ov_psy.loudness_sq[0][0];
  70     gr2_max = gfc->ov_psy.loudness_sq[1][0];
  71     if (cfg->channels_out == 2) {
  72         max_pow += gfc->ov_psy.loudness_sq[0][1];
  73         gr2_max += gfc->ov_psy.loudness_sq[1][1];
  74     }
  75     else {
  76         max_pow += max_pow;
  77         gr2_max += gr2_max;
  78     }
  79     if (cfg->mode_gr == 2) {
  80         max_pow = Max(max_pow, gr2_max);
  81     }
  82     max_pow *= 0.5;     /* max_pow approaches 1.0 for full band noise */
  83
  84     /* jd - 2001 mar 31, jun 30 */
  85     /* user tuning of ATH adjustment region */
  86     max_pow *= gfc->ATH->aa_sensitivity_p;
  87
  88     /*  adjust ATH depending on range of maximum value
  89      */
  90
  91     /* jd - 2001 feb27, mar12,20, jun30, jul22 */
  92     /* continuous curves based on approximation */
  93     /* to GB's original values. */
  94     /* For an increase in approximate loudness, */
  95     /* set ATH adjust to adjust_limit immediately */
  96     /* after a delay of one frame. */
  97     /* For a loudness decrease, reduce ATH adjust */
  98     /* towards adjust_limit gradually. */
  99     /* max_pow is a loudness squared or a power. */
 100     if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
 101         if (gfc->ATH->adjust_factor >= 1.0) {
 102             gfc->ATH->adjust_factor = 1.0;
 103         }
 104         else {
 105             /* preceding frame has lower ATH adjust; */
 106             /* ascend only to the preceding adjust_limit */
 107             /* in case there is leading low volume */
 108             if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
 109                 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
 110             }
 111         }
 112         gfc->ATH->adjust_limit = 1.0;
 113     }
 114     else {              /* adjustment curve */
 115         /* about 32 dB maximum adjust (0.000625) */
 116         FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
 117         if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
 118             gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
 119             if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
 120                 gfc->ATH->adjust_factor = adj_lim_new;
 121             }
 122         }
 123         else {          /* ascend */
 124             if (gfc->ATH->adjust_limit >= adj_lim_new) {
 125                 gfc->ATH->adjust_factor = adj_lim_new;
 126             }
 127             else {      /* preceding frame has lower ATH adjust; */
 128                 /* ascend only to the preceding adjust_limit */
 129                 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
 130                     gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
 131                 }
 132             }
 133         }
 134         gfc->ATH->adjust_limit = adj_lim_new;
 135     }
 136 }
 137
 138 /***********************************************************************
 139  *
 140  *  some simple statistics
 141  *
 142  *  bitrate index 0: free bitrate -> not allowed in VBR mode
 143  *  : bitrates, kbps depending on MPEG version
 144  *  bitrate index 15: forbidden
 145  *
 146  *  mode_ext:
 147  *  0:  LR
 148  *  1:  LR-i
 149  *  2:  MS
 150  *  3:  MS-i
 151  *
 152  ***********************************************************************/
 153
 154 static void
 155 updateStats(lame_internal_flags * const gfc)
 156 {
 157     SessionConfig_t const *const cfg = &gfc->cfg;
 158     EncResult_t *eov = &gfc->ov_enc;
 159     int     gr, ch;
 160     assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
 161     assert(0 <= eov->mode_ext && eov->mode_ext < 4);
 162
 163     /* count bitrate indices */
 164     eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
 165     eov->bitrate_channelmode_hist[15][4]++;
 166
 167     /* count 'em for every mode extension in case of 2 channel encoding */
 168     if (cfg->channels_out == 2) {
 169         eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
 170         eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
 171     }
 172     for (gr = 0; gr < cfg->mode_gr; ++gr) {
 173         for (ch = 0; ch < cfg->channels_out; ++ch) {
 174             int     bt = gfc->l3_side.tt[gr][ch].block_type;
 175             if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
 176                 bt = 4;
 177             eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
 178             eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
 179             eov->bitrate_blocktype_hist[15][bt]++;
 180             eov->bitrate_blocktype_hist[15][5]++;
 181         }
 182     }
 183 }
 184
 185
 186
 187
 188 static void
 189 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
 190 {
 191     SessionConfig_t const *const cfg = &gfc->cfg;
 192
 193     int     ch, gr;
 194
 195     if (gfc->lame_encode_frame_init == 0) {
 196         sample_t primebuff0[286 + 1152 + 576];
 197         sample_t primebuff1[286 + 1152 + 576];
 198         int const framesize = 576 * cfg->mode_gr;
 199         /* prime the MDCT/polyphase filterbank with a short block */
 200         int     i, j;
 201         gfc->lame_encode_frame_init = 1;
 202         memset(primebuff0, 0, sizeof(primebuff0));
 203         memset(primebuff1, 0, sizeof(primebuff1));
 204         for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
 205             if (i < framesize) {
 206                 primebuff0[i] = 0;
 207                 if (cfg->channels_out == 2)
 208                     primebuff1[i] = 0;
 209             }
 210             else {
 211                 primebuff0[i] = inbuf[0][j];
 212                 if (cfg->channels_out == 2)
 213                     primebuff1[i] = inbuf[1][j];
 214                 ++j;
 215             }
 216         }
 217         /* polyphase filtering / mdct */
 218         for (gr = 0; gr < cfg->mode_gr; gr++) {
 219             for (ch = 0; ch < cfg->channels_out; ch++) {
 220                 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
 221             }
 222         }
 223         mdct_sub48(gfc, primebuff0, primebuff1);
 224
 225         /* check FFT will not use a negative starting offset */
 226 #if 576 < FFTOFFSET
 227 # error FFTOFFSET greater than 576: FFT uses a negative offset
 228 #endif
 229         /* check if we have enough data for FFT */
 230         assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
 231         /* check if we have enough data for polyphase filterbank */
 232         assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
 233     }
 234
 235 }
 236
 237
 238
 239
 240
 241
 242
 243 /************************************************************************
 244 *
 245 * encodeframe()           Layer 3
 246 *
 247 * encode a single frame
 248 *
 249 ************************************************************************
 250 lame_encode_frame()
 251
 252
 253                        gr 0            gr 1
 254 inbuf:           |--------------|--------------|--------------|
 255
 256
 257 Polyphase (18 windows, each shifted 32)
 258 gr 0:
 259 window1          <----512---->
 260 window18                 <----512---->
 261
 262 gr 1:
 263 window1                         <----512---->
 264 window18                                <----512---->
 265
 266
 267
 268 MDCT output:  |--------------|--------------|--------------|
 269
 270 FFT's                    <---------1024---------->
 271                                          <---------1024-------->
 272
 273
 274
 275     inbuf = buffer of PCM data size=MP3 framesize
 276     encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
 277     so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
 278
 279     psy-model FFT has a 1 granule delay, so we feed it data for the
 280     next granule.
 281     FFT is centered over granule:  224+576+224
 282     So FFT starts at:   576-224-MDCTDELAY
 283
 284     MPEG2:  FFT ends at:  BLKSIZE+576-224-MDCTDELAY      (1328)
 285     MPEG1:  FFT ends at:  BLKSIZE+2*576-224-MDCTDELAY    (1904)
 286
 287     MPEG2:  polyphase first window:  [0..511]
 288                       18th window:   [544..1055]          (1056)
 289     MPEG1:            36th window:   [1120..1631]         (1632)
 290             data needed:  512+framesize-32
 291
 292     A close look newmdct.c shows that the polyphase filterbank
 293     only uses data from [0..510] for each window.  Perhaps because the window
 294     used by the filterbank is zero for the last point, so Takehiro's
 295     code doesn't bother to compute with it.
 296
 297     FFT starts at 576-224-MDCTDELAY (304)  = 576-FFTOFFSET
 298
 299 */
 300
 301 typedef FLOAT chgrdata[2][2];
 302
 303
 304 int
 305 lame_encode_mp3_frame(       /* Output */
 306                          lame_internal_flags * gfc, /* Context */
 307                          sample_t const *inbuf_l, /* Input */
 308                          sample_t const *inbuf_r, /* Input */
 309                          unsigned char *mp3buf, /* Output */
 310                          int mp3buf_size)
 311 {                       /* Output */
 312     SessionConfig_t const *const cfg = &gfc->cfg;
 313     int     mp3count;
 314     static III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
 315     static III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
 316     const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
 317     const sample_t *inbuf[2];
 318
 319     static FLOAT   tot_ener[2][4];
 320     FLOAT   ms_ener_ratio[2] = { .5, .5 };
 321     FLOAT   pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
 322     0., 0.}, {
 323     0., 0.}};
 324     FLOAT (*pe_use)[2];
 325
 326     int     ch, gr;
 327
 328     inbuf[0] = inbuf_l;
 329     inbuf[1] = inbuf_r;
 330
 331     if (gfc->lame_encode_frame_init == 0) {
 332         /*first run? */
 333         lame_encode_frame_init(gfc, inbuf);
 334
 335     }
 336
 337
 338     /********************** padding *****************************/
 339     /* padding method as described in
 340      * "MPEG-Layer3 / Bitstream Syntax and Decoding"
 341      * by Martin Sieler, Ralph Sperschneider
 342      *
 343      * note: there is no padding for the very first frame
 344      *
 345      * Robert Hegemann 2000-06-22
 346      */
 347     gfc->ov_enc.padding = FALSE;
 348     if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
 349         gfc->sv_enc.slot_lag += cfg->samplerate_out;
 350         gfc->ov_enc.padding = TRUE;
 351     }
 352
 353
 354
 355     /****************************************
 356     *   Stage 1: psychoacoustic model       *
 357     ****************************************/
 358
 359     {
 360         /* psychoacoustic model
 361          * psy model has a 1 granule (576) delay that we must compensate for
 362          * (mt 6/99).
 363          */
 364         int     ret;
 365         const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
 366         int     blocktype[2];
 367
 368         for (gr = 0; gr < cfg->mode_gr; gr++) {
 369
 370             for (ch = 0; ch < cfg->channels_out; ch++) {
 371                 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
 372             }
 373
 374             ret = L3psycho_anal_vbr(gfc, bufp, gr,
 375                                     masking_LR, masking_MS,
 376                                     pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
 377             if (ret != 0)
 378                 return -4;
 379
 380             if (cfg->mode == JOINT_STEREO) {
 381                 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
 382                 if (ms_ener_ratio[gr] > 0)
 383                     ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
 384             }
 385
 386             /* block type flags */
 387             for (ch = 0; ch < cfg->channels_out; ch++) {
 388                 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
 389                 cod_info->block_type = blocktype[ch];
 390                 cod_info->mixed_block_flag = 0;
 391             }
 392         }
 393     }
 394
 395
 396     /* auto-adjust of ATH, useful for low volume */
 397     adjust_ATH(gfc);
 398
 399
 400     /****************************************
 401     *   Stage 2: MDCT                       *
 402     ****************************************/
 403
 404     /* polyphase filtering / mdct */
 405     mdct_sub48(gfc, inbuf[0], inbuf[1]);
 406
 407
 408     /****************************************
 409     *   Stage 3: MS/LR decision             *
 410     ****************************************/
 411
 412     /* Here will be selected MS or LR coding of the 2 stereo channels */
 413     gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
 414
 415     if (cfg->force_ms) {
 416         gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
 417     }
 418     else if (cfg->mode == JOINT_STEREO) {
 419         /* ms_ratio = is scaled, for historical reasons, to look like
 420            a ratio of side_channel / total.
 421            0 = signal is 100% mono
 422            .5 = L & R uncorrelated
 423          */
 424
 425         /* [0] and [1] are the results for the two granules in MPEG-1,
 426          * in MPEG-2 it's only a faked averaging of the same value
 427          * _prev is the value of the last granule of the previous frame
 428          * _next is the value of the first granule of the next frame
 429          */
 430
 431         FLOAT   sum_pe_MS = 0;
 432         FLOAT   sum_pe_LR = 0;
 433         for (gr = 0; gr < cfg->mode_gr; gr++) {
 434             for (ch = 0; ch < cfg->channels_out; ch++) {
 435                 sum_pe_MS += pe_MS[gr][ch];
 436                 sum_pe_LR += pe[gr][ch];
 437             }
 438         }
 439
 440         /* based on PE: M/S coding would not use much more bits than L/R */
 441         if (sum_pe_MS <= 1.00 * sum_pe_LR) {
 442
 443             gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
 444             gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
 445
 446             if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
 447
 448                 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
 449             }
 450         }
 451     }
 452
 453     /* bit and noise allocation */
 454     if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
 455         masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
 456         pe_use = pe_MS;
 457     }
 458     else {
 459         masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
 460         pe_use = pe;
 461     }
 462
 463
 464     /* copy data for MP3 frame analyzer */
 465     if (cfg->analysis && gfc->pinfo != NULL) {
 466         for (gr = 0; gr < cfg->mode_gr; gr++) {
 467             for (ch = 0; ch < cfg->channels_out; ch++) {
 468                 gfc->pinfo->ms_ratio[gr] = 0;
 469                 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
 470                 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
 471                 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
 472                 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
 473                 /* in psymodel, LR and MS data was stored in pinfo.
 474                    switch to MS data: */
 475                 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
 476                     gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
 477                     memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
 478                            sizeof(gfc->pinfo->energy[gr][ch]));
 479                 }
 480             }
 481         }
 482     }
 483
 484
 485     /****************************************
 486     *   Stage 4: quantization loop          *
 487     ****************************************/
 488
 489     if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
 490         static FLOAT const fircoef[9] = {
 491             -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
 492             7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
 493             0.187098 * 5
 494         };
 495
 496         int     i;
 497         FLOAT   f;
 498
 499         for (i = 0; i < 18; i++)
 500             gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
 501
 502         f = 0.0;
 503         for (gr = 0; gr < cfg->mode_gr; gr++)
 504             for (ch = 0; ch < cfg->channels_out; ch++)
 505                 f += pe_use[gr][ch];
 506         gfc->sv_enc.pefirbuf[18] = f;
 507
 508         f = gfc->sv_enc.pefirbuf[9];
 509         for (i = 0; i < 9; i++)
 510             f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
 511
 512         f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
 513         for (gr = 0; gr < cfg->mode_gr; gr++) {
 514             for (ch = 0; ch < cfg->channels_out; ch++) {
 515                 pe_use[gr][ch] *= f;
 516             }
 517         }
 518     }
 519     gfc->iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
 520
 521
 522     /****************************************
 523     *   Stage 5: bitstream formatting       *
 524     ****************************************/
 525
 526
 527     /*  write the frame to the bitstream  */
 528     (void) format_bitstream(gfc);
 529
 530     /* copy mp3 bit buffer into array */
 531     mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
 532
 533
 534     if (cfg->write_lame_tag) {
 535         AddVbrFrame(gfc);
 536     }
 537
 538     if (cfg->analysis && gfc->pinfo != NULL) {
 539         int     framesize = 576 * cfg->mode_gr;
 540         for (ch = 0; ch < cfg->channels_out; ch++) {
 541             int     j;
 542             for (j = 0; j < FFTOFFSET; j++)
 543                 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
 544             for (j = FFTOFFSET; j < 1600; j++) {
 545                 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
 546             }
 547         }
 548         gfc->sv_qnt.masking_lower = 1.0;
 549
 550         set_frame_pinfo(gfc, masking);
 551     }
 552
 553     ++gfc->ov_enc.frame_number;
 554
 555     updateStats(gfc);
 556
 557     return mp3count;
 558 }