"Fossies" - the Fresh Open Source Software Archive

Member "ffmpeg-4.0.1/libavcodec/aac.h" (20 Apr 2018, 13189 Bytes) of package /linux/misc/ffmpeg-4.0.1.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aac.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 3.4.2_vs_4.0.

    1 /*
    2  * AAC definitions and structures
    3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
    4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
    5  *
    6  * This file is part of FFmpeg.
    7  *
    8  * FFmpeg is free software; you can redistribute it and/or
    9  * modify it under the terms of the GNU Lesser General Public
   10  * License as published by the Free Software Foundation; either
   11  * version 2.1 of the License, or (at your option) any later version.
   12  *
   13  * FFmpeg is distributed in the hope that it will be useful,
   14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   16  * Lesser General Public License for more details.
   17  *
   18  * You should have received a copy of the GNU Lesser General Public
   19  * License along with FFmpeg; if not, write to the Free Software
   20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   21  */
   22 
   23 /**
   24  * @file
   25  * AAC definitions and structures
   26  * @author Oded Shimon  ( ods15 ods15 dyndns org )
   27  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
   28  */
   29 
   30 #ifndef AVCODEC_AAC_H
   31 #define AVCODEC_AAC_H
   32 
   33 
   34 #include "aac_defines.h"
   35 #include "libavutil/float_dsp.h"
   36 #include "libavutil/fixed_dsp.h"
   37 #include "avcodec.h"
   38 #if !USE_FIXED
   39 #include "mdct15.h"
   40 #endif
   41 #include "fft.h"
   42 #include "mpeg4audio.h"
   43 #include "sbr.h"
   44 
   45 #include <stdint.h>
   46 
   47 #define MAX_CHANNELS 64
   48 #define MAX_ELEM_ID 16
   49 
   50 #define TNS_MAX_ORDER 20
   51 #define MAX_LTP_LONG_SFB 40
   52 
   53 #define CLIP_AVOIDANCE_FACTOR 0.95f
   54 
   55 enum RawDataBlockType {
   56     TYPE_SCE,
   57     TYPE_CPE,
   58     TYPE_CCE,
   59     TYPE_LFE,
   60     TYPE_DSE,
   61     TYPE_PCE,
   62     TYPE_FIL,
   63     TYPE_END,
   64 };
   65 
   66 enum ExtensionPayloadID {
   67     EXT_FILL,
   68     EXT_FILL_DATA,
   69     EXT_DATA_ELEMENT,
   70     EXT_DYNAMIC_RANGE = 0xb,
   71     EXT_SBR_DATA      = 0xd,
   72     EXT_SBR_DATA_CRC  = 0xe,
   73 };
   74 
   75 enum WindowSequence {
   76     ONLY_LONG_SEQUENCE,
   77     LONG_START_SEQUENCE,
   78     EIGHT_SHORT_SEQUENCE,
   79     LONG_STOP_SEQUENCE,
   80 };
   81 
   82 enum BandType {
   83     ZERO_BT        = 0,     ///< Scalefactors and spectral data are all zero.
   84     FIRST_PAIR_BT  = 5,     ///< This and later band types encode two values (rather than four) with one code word.
   85     ESC_BT         = 11,    ///< Spectral data are coded with an escape sequence.
   86     RESERVED_BT    = 12,    ///< Band types following are encoded differently from others.
   87     NOISE_BT       = 13,    ///< Spectral data are scaled white noise not coded in the bitstream.
   88     INTENSITY_BT2  = 14,    ///< Scalefactor data are intensity stereo positions (out of phase).
   89     INTENSITY_BT   = 15,    ///< Scalefactor data are intensity stereo positions (in phase).
   90 };
   91 
   92 #define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10)
   93 
   94 enum ChannelPosition {
   95     AAC_CHANNEL_OFF   = 0,
   96     AAC_CHANNEL_FRONT = 1,
   97     AAC_CHANNEL_SIDE  = 2,
   98     AAC_CHANNEL_BACK  = 3,
   99     AAC_CHANNEL_LFE   = 4,
  100     AAC_CHANNEL_CC    = 5,
  101 };
  102 
  103 /**
  104  * The point during decoding at which channel coupling is applied.
  105  */
  106 enum CouplingPoint {
  107     BEFORE_TNS,
  108     BETWEEN_TNS_AND_IMDCT,
  109     AFTER_IMDCT = 3,
  110 };
  111 
  112 /**
  113  * Output configuration status
  114  */
  115 enum OCStatus {
  116     OC_NONE,        ///< Output unconfigured
  117     OC_TRIAL_PCE,   ///< Output configuration under trial specified by an inband PCE
  118     OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header
  119     OC_GLOBAL_HDR,  ///< Output configuration set in a global header but not yet locked
  120     OC_LOCKED,      ///< Output configuration locked in place
  121 };
  122 
  123 typedef struct OutputConfiguration {
  124     MPEG4AudioConfig m4ac;
  125     uint8_t layout_map[MAX_ELEM_ID*4][3];
  126     int layout_map_tags;
  127     int channels;
  128     uint64_t channel_layout;
  129     enum OCStatus status;
  130 } OutputConfiguration;
  131 
  132 /**
  133  * Predictor State
  134  */
  135 typedef struct PredictorState {
  136     AAC_FLOAT cor0;
  137     AAC_FLOAT cor1;
  138     AAC_FLOAT var0;
  139     AAC_FLOAT var1;
  140     AAC_FLOAT r0;
  141     AAC_FLOAT r1;
  142     AAC_FLOAT k1;
  143     AAC_FLOAT x_est;
  144 } PredictorState;
  145 
  146 #define MAX_PREDICTORS 672
  147 
  148 #define SCALE_DIV_512    36    ///< scalefactor difference that corresponds to scale difference in 512 times
  149 #define SCALE_ONE_POS   140    ///< scalefactor index that corresponds to scale=1.0
  150 #define SCALE_MAX_POS   255    ///< scalefactor index maximum value
  151 #define SCALE_MAX_DIFF   60    ///< maximum scalefactor difference allowed by standard
  152 #define SCALE_DIFF_ZERO  60    ///< codebook index corresponding to zero scalefactor indices difference
  153 
  154 #define POW_SF2_ZERO    200    ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);
  155 
  156 #define NOISE_PRE       256    ///< preamble for NOISE_BT, put in bitstream with the first noise band
  157 #define NOISE_PRE_BITS    9    ///< length of preamble
  158 #define NOISE_OFFSET     90    ///< subtracted from global gain, used as offset for the preamble
  159 
  160 /**
  161  * Long Term Prediction
  162  */
  163 typedef struct LongTermPrediction {
  164     int8_t present;
  165     int16_t lag;
  166     int coef_idx;
  167     INTFLOAT coef;
  168     int8_t used[MAX_LTP_LONG_SFB];
  169 } LongTermPrediction;
  170 
  171 /**
  172  * Individual Channel Stream
  173  */
  174 typedef struct IndividualChannelStream {
  175     uint8_t max_sfb;            ///< number of scalefactor bands per group
  176     enum WindowSequence window_sequence[2];
  177     uint8_t use_kb_window[2];   ///< If set, use Kaiser-Bessel window, otherwise use a sine window.
  178     int num_window_groups;
  179     uint8_t group_len[8];
  180     LongTermPrediction ltp;
  181     const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
  182     const uint8_t *swb_sizes;   ///< table of scalefactor band sizes for a particular window
  183     int num_swb;                ///< number of scalefactor window bands
  184     int num_windows;
  185     int tns_max_bands;
  186     int predictor_present;
  187     int predictor_initialized;
  188     int predictor_reset_group;
  189     int predictor_reset_count[31];  ///< used by encoder to count prediction resets
  190     uint8_t prediction_used[41];
  191     uint8_t window_clipping[8]; ///< set if a certain window is near clipping
  192     float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it
  193 } IndividualChannelStream;
  194 
  195 /**
  196  * Temporal Noise Shaping
  197  */
  198 typedef struct TemporalNoiseShaping {
  199     int present;
  200     int n_filt[8];
  201     int length[8][4];
  202     int direction[8][4];
  203     int order[8][4];
  204     int coef_idx[8][4][TNS_MAX_ORDER];
  205     INTFLOAT coef[8][4][TNS_MAX_ORDER];
  206 } TemporalNoiseShaping;
  207 
  208 /**
  209  * Dynamic Range Control - decoded from the bitstream but not processed further.
  210  */
  211 typedef struct DynamicRangeControl {
  212     int pce_instance_tag;                           ///< Indicates with which program the DRC info is associated.
  213     int dyn_rng_sgn[17];                            ///< DRC sign information; 0 - positive, 1 - negative
  214     int dyn_rng_ctl[17];                            ///< DRC magnitude information
  215     int exclude_mask[MAX_CHANNELS];                 ///< Channels to be excluded from DRC processing.
  216     int band_incr;                                  ///< Number of DRC bands greater than 1 having DRC info.
  217     int interpolation_scheme;                       ///< Indicates the interpolation scheme used in the SBR QMF domain.
  218     int band_top[17];                               ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
  219     int prog_ref_level;                             /**< A reference level for the long-term program audio level for all
  220                                                      *   channels combined.
  221                                                      */
  222 } DynamicRangeControl;
  223 
  224 typedef struct Pulse {
  225     int num_pulse;
  226     int start;
  227     int pos[4];
  228     int amp[4];
  229 } Pulse;
  230 
  231 /**
  232  * coupling parameters
  233  */
  234 typedef struct ChannelCoupling {
  235     enum CouplingPoint coupling_point;  ///< The point during decoding at which coupling is applied.
  236     int num_coupled;       ///< number of target elements
  237     enum RawDataBlockType type[8];   ///< Type of channel element to be coupled - SCE or CPE.
  238     int id_select[8];      ///< element id
  239     int ch_select[8];      /**< [0] shared list of gains; [1] list of gains for right channel;
  240                             *   [2] list of gains for left channel; [3] lists of gains for both channels
  241                             */
  242     INTFLOAT gain[16][120];
  243 } ChannelCoupling;
  244 
  245 /**
  246  * Single Channel Element - used for both SCE and LFE elements.
  247  */
  248 typedef struct SingleChannelElement {
  249     IndividualChannelStream ics;
  250     TemporalNoiseShaping tns;
  251     Pulse pulse;
  252     enum BandType band_type[128];                   ///< band types
  253     enum BandType band_alt[128];                    ///< alternative band type (used by encoder)
  254     int band_type_run_end[120];                     ///< band type run end points
  255     INTFLOAT sf[120];                               ///< scalefactors
  256     int sf_idx[128];                                ///< scalefactor indices (used by encoder)
  257     uint8_t zeroes[128];                            ///< band is not coded (used by encoder)
  258     uint8_t can_pns[128];                           ///< band is allowed to PNS (informative)
  259     float  is_ener[128];                            ///< Intensity stereo pos (used by encoder)
  260     float pns_ener[128];                            ///< Noise energy values (used by encoder)
  261     DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024];   ///< coefficients for IMDCT, pristine
  262     DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024];    ///< coefficients for IMDCT, maybe processed
  263     DECLARE_ALIGNED(32, INTFLOAT, saved)[1536];     ///< overlap
  264     DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048];   ///< PCM output buffer
  265     DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP
  266     DECLARE_ALIGNED(32, AAC_FLOAT, lcoeffs)[1024];  ///< MDCT of LTP coefficients (used by encoder)
  267     DECLARE_ALIGNED(32, AAC_FLOAT, prcoeffs)[1024]; ///< Main prediction coefs (used by encoder)
  268     PredictorState predictor_state[MAX_PREDICTORS];
  269     INTFLOAT *ret;                                  ///< PCM output
  270 } SingleChannelElement;
  271 
  272 /**
  273  * channel element - generic struct for SCE/CPE/CCE/LFE
  274  */
  275 typedef struct ChannelElement {
  276     int present;
  277     // CPE specific
  278     int common_window;        ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
  279     int     ms_mode;          ///< Signals mid/side stereo flags coding mode (used by encoder)
  280     uint8_t is_mode;          ///< Set if any bands have been encoded using intensity stereo (used by encoder)
  281     uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each scalefactor window band
  282     uint8_t is_mask[128];     ///< Set if intensity stereo is used (used by encoder)
  283     // shared
  284     SingleChannelElement ch[2];
  285     // CCE specific
  286     ChannelCoupling coup;
  287     SpectralBandReplication sbr;
  288 } ChannelElement;
  289 
  290 /**
  291  * main AAC context
  292  */
  293 struct AACContext {
  294     AVClass        *class;
  295     AVCodecContext *avctx;
  296     AVFrame *frame;
  297 
  298     int is_saved;                 ///< Set if elements have stored overlap from previous frame.
  299     DynamicRangeControl che_drc;
  300 
  301     /**
  302      * @name Channel element related data
  303      * @{
  304      */
  305     ChannelElement          *che[4][MAX_ELEM_ID];
  306     ChannelElement  *tag_che_map[4][MAX_ELEM_ID];
  307     int tags_mapped;
  308     int warned_remapping_once;
  309     /** @} */
  310 
  311     /**
  312      * @name temporary aligned temporary buffers
  313      * (We do not want to have these on the stack.)
  314      * @{
  315      */
  316     DECLARE_ALIGNED(32, INTFLOAT, buf_mdct)[1024];
  317     /** @} */
  318 
  319     /**
  320      * @name Computed / set up during initialization
  321      * @{
  322      */
  323     FFTContext mdct;
  324     FFTContext mdct_small;
  325     FFTContext mdct_ld;
  326     FFTContext mdct_ltp;
  327 #if USE_FIXED
  328     AVFixedDSPContext *fdsp;
  329 #else
  330     MDCT15Context *mdct120;
  331     MDCT15Context *mdct480;
  332     MDCT15Context *mdct960;
  333     AVFloatDSPContext *fdsp;
  334 #endif /* USE_FIXED */
  335     int random_state;
  336     /** @} */
  337 
  338     /**
  339      * @name Members used for output
  340      * @{
  341      */
  342     SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement
  343     /** @} */
  344 
  345 
  346     /**
  347      * @name Japanese DTV specific extension
  348      * @{
  349      */
  350     int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel
  351     int dmono_mode;      ///< 0->not dmono, 1->use first channel, 2->use second channel
  352     /** @} */
  353 
  354     DECLARE_ALIGNED(32, INTFLOAT, temp)[128];
  355 
  356     OutputConfiguration oc[2];
  357     int warned_num_aac_frames;
  358     int warned_960_sbr;
  359 
  360     int warned_gain_control;
  361 
  362     /* aacdec functions pointers */
  363     void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);
  364     void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);
  365     void (*apply_tns)(INTFLOAT coef[1024], TemporalNoiseShaping *tns,
  366                       IndividualChannelStream *ics, int decode);
  367     void (*windowing_and_mdct_ltp)(AACContext *ac, INTFLOAT *out,
  368                                    INTFLOAT *in, IndividualChannelStream *ics);
  369     void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);
  370     void (*vector_pow43)(int *coefs, int len);
  371     void (*subband_scale)(int *dst, int *src, int scale, int offset, int len);
  372 
  373 };
  374 
  375 void ff_aacdec_init_mips(AACContext *c);
  376 
  377 #endif /* AVCODEC_AAC_H */