diff --git a/Changelog b/Changelog index 4f937216eb..c1c886193e 100644 --- a/Changelog +++ b/Changelog @@ -10,6 +10,7 @@ version : - Raw AMR-NB and AMR-WB demuxers - TiVo ty/ty+ demuxer - Intel QSV-accelerated MJPEG encoding +- PCE support for extended channel layouts in the AAC encoder version 3.4: diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 3efcbda403..51e467f754 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -50,6 +50,40 @@ static AVOnce aac_table_init = AV_ONCE_INIT; +static void put_pce(PutBitContext *pb, AVCodecContext *avctx) +{ + int i, j; + AACEncContext *s = avctx->priv_data; + AACPCEInfo *pce = &s->pce; + + put_bits(pb, 4, 0); + + put_bits(pb, 2, avctx->profile); + put_bits(pb, 4, s->samplerate_index); + + put_bits(pb, 4, pce->num_ele[0]); /* Front */ + put_bits(pb, 4, pce->num_ele[1]); /* Side */ + put_bits(pb, 4, pce->num_ele[2]); /* Back */ + put_bits(pb, 2, pce->num_ele[3]); /* LFE */ + put_bits(pb, 3, 0); /* Assoc data */ + put_bits(pb, 4, 0); /* CCs */ + + put_bits(pb, 1, 0); /* Stereo mixdown */ + put_bits(pb, 1, 0); /* Mono mixdown */ + put_bits(pb, 1, 0); /* Something else */ + + for (i = 0; i < 4; i++) { + for (j = 0; j < pce->num_ele[i]; j++) { + if (i < 3) + put_bits(pb, 1, pce->pairing[i][j]); + put_bits(pb, 4, pce->index[i][j]); + } + } + + avpriv_align_put_bits(pb); + put_bits(pb, 8, 0); +} + /** * Make AAC audio config object. * @see 1.6.2.1 "Syntax - AudioSpecificConfig" @@ -58,7 +92,7 @@ static void put_audio_specific_config(AVCodecContext *avctx) { PutBitContext pb; AACEncContext *s = avctx->priv_data; - int channels = s->channels - (s->channels == 8 ? 1 : 0); + int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0)); init_put_bits(&pb, avctx->extradata, avctx->extradata_size); put_bits(&pb, 5, s->profile+1); //profile @@ -68,6 +102,8 @@ static void put_audio_specific_config(AVCodecContext *avctx) put_bits(&pb, 1, 0); //frame length - 1024 samples put_bits(&pb, 1, 0); //does not depend on core coder put_bits(&pb, 1, 0); //is not extension + if (s->needs_pce) + put_pce(&pb, avctx); //Explicitly Mark SBR absent put_bits(&pb, 11, 0x2b7); //sync extension @@ -488,7 +524,7 @@ static void copy_input_samples(AACEncContext *s, const AVFrame *frame) { int ch; int end = 2048 + (frame ? frame->nb_samples : 0); - const uint8_t *channel_map = aac_chan_maps[s->channels - 1]; + const uint8_t *channel_map = s->reorder_map; /* copy and remap input samples */ for (ch = 0; ch < s->channels; ch++) { @@ -920,16 +956,36 @@ static av_cold int aac_encode_init(AVCodecContext *avctx) /* Constants */ s->last_frame_pb_count = 0; - avctx->extradata_size = 5; + avctx->extradata_size = 20; avctx->frame_size = 1024; avctx->initial_padding = 1024; s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120; /* Channel map and unspecified bitrate guessing */ s->channels = avctx->channels; - ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7, - "Unsupported number of channels: %d\n", s->channels); - s->chan_map = aac_chan_configs[s->channels-1]; + + s->needs_pce = 1; + for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) { + if (avctx->channel_layout == aac_normal_chan_layouts[i]) { + s->needs_pce = s->options.pce; + break; + } + } + + if (s->needs_pce) { + for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++) + if (avctx->channel_layout == aac_pce_configs[i].layout) + break; + ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout\n"); + av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout\n"); + s->pce = aac_pce_configs[i]; + s->reorder_map = s->pce.reorder_map; + s->chan_map = s->pce.config_map; + } else { + s->reorder_map = aac_chan_maps[s->channels - 1]; + s->chan_map = aac_chan_configs[s->channels - 1]; + } + if (!avctx->bit_rate) { for (i = 1; i <= s->chan_map[0]; i++) { avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */ @@ -1062,6 +1118,7 @@ static const AVOption aacenc_options[] = { {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, + {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, {NULL} }; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index ea2d3b9628..5a015ca92e 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -45,6 +45,7 @@ typedef struct AACEncOptions { int pns; int tns; int ltp; + int pce; int pred; int mid_side; int intensity_stereo; @@ -89,6 +90,286 @@ typedef struct AACQuantizeBandCostCacheEntry { uint16_t generation; } AACQuantizeBandCostCacheEntry; +typedef struct AACPCEInfo { + int64_t layout; + int num_ele[4]; ///< front, side, back, lfe + int pairing[3][8]; ///< front, side, back + int index[4][8]; ///< front, side, back, lfe + uint8_t config_map[16]; ///< configs the encoder's channel specific settings + uint8_t reorder_map[16]; ///< maps channels from lavc to aac order +} AACPCEInfo; + +/** + * List of PCE (Program Configuration Element) for the channel layouts listed + * in channel_layout.h + * + * For those wishing in the future to add other layouts: + * + * - num_ele: number of elements in each group of front, side, back, lfe channels + * (an element is of type SCE (single channel), CPE (channel pair) for + * the first 3 groups; and is LFE for LFE group). + * + * - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group + * + * - index: there are three independent indices for SCE, CPE and LFE; + * they are incremented irrespective of the group to which the element belongs; + * they are not reset when going from one group to another + * + * Example: for 7.0 channel layout, + * .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group) + * .index = { { 0, 0 }, { 1 }, { 2 }, }, + * (index is 0 for the single SCE but goes from 0 to 2 for the CPEs) + * + * The index order impacts the channel ordering. But is otherwise arbitrary + * (the sequence could have been 2, 0, 1 instead of 0, 1, 2). + * + * Spec allows for discontinuous indices, e.g. if one has a total of two SCE, + * SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder + * which at this time requires that indices fully cover some range starting + * from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15). + * + * - config_map: total number of elements and their types. Beware, the way the + * types are ordered impacts the final channel ordering. + * + * - reorder_map: reorders the channels. + * + */ +static const AACPCEInfo aac_pce_configs[] = { + { + .layout = AV_CH_LAYOUT_MONO, + .num_ele = { 1, 0, 0, 0 }, + .pairing = { { 0 }, }, + .index = { { 0 }, }, + .config_map = { 1, TYPE_SCE, }, + .reorder_map = { 0 }, + }, + { + .layout = AV_CH_LAYOUT_STEREO, + .num_ele = { 1, 0, 0, 0 }, + .pairing = { { 1 }, }, + .index = { { 0 }, }, + .config_map = { 1, TYPE_CPE, }, + .reorder_map = { 0, 1 }, + }, + { + .layout = AV_CH_LAYOUT_2POINT1, + .num_ele = { 1, 0, 0, 1 }, + .pairing = { { 1 }, }, + .index = { { 0 },{ 0 },{ 0 },{ 0 } }, + .config_map = { 2, TYPE_CPE, TYPE_LFE }, + .reorder_map = { 0, 1, 2 }, + }, + { + .layout = AV_CH_LAYOUT_2_1, + .num_ele = { 1, 0, 1, 0 }, + .pairing = { { 1 },{ 0 },{ 0 } }, + .index = { { 0 },{ 0 },{ 0 }, }, + .config_map = { 2, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2 }, + }, + { + .layout = AV_CH_LAYOUT_SURROUND, + .num_ele = { 2, 0, 0, 0 }, + .pairing = { { 1, 0 }, }, + .index = { { 0, 0 }, }, + .config_map = { 2, TYPE_CPE, TYPE_SCE, }, + .reorder_map = { 0, 1, 2 }, + }, + { + .layout = AV_CH_LAYOUT_3POINT1, + .num_ele = { 2, 0, 0, 1 }, + .pairing = { { 1, 0 }, }, + .index = { { 0, 0 }, { 0 }, { 0 }, { 0 }, }, + .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_LFE }, + .reorder_map = { 0, 1, 2, 3 }, + }, + { + .layout = AV_CH_LAYOUT_4POINT0, + .num_ele = { 2, 0, 1, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 0 }, }, + .index = { { 0, 0 }, { 0 }, { 1 } }, + .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3 }, + }, + { + .layout = AV_CH_LAYOUT_4POINT1, + .num_ele = { 2, 1, 1, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 0 }, }, + .index = { { 0, 0 }, { 1 }, { 2 }, { 0 } }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4 }, + }, + { + .layout = AV_CH_LAYOUT_2_2, + .num_ele = { 1, 1, 0, 0 }, + .pairing = { { 1 }, { 1 }, }, + .index = { { 0 }, { 1 }, }, + .config_map = { 2, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3 }, + }, + { + .layout = AV_CH_LAYOUT_QUAD, + .num_ele = { 1, 0, 1, 0 }, + .pairing = { { 1 }, { 0 }, { 1 }, }, + .index = { { 0 }, { 0 }, { 1 } }, + .config_map = { 2, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3 }, + }, + { + .layout = AV_CH_LAYOUT_5POINT0, + .num_ele = { 2, 1, 0, 0 }, + .pairing = { { 1, 0 }, { 1 }, }, + .index = { { 0, 0 }, { 1 } }, + .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4 }, + }, + { + .layout = AV_CH_LAYOUT_5POINT1, + .num_ele = { 2, 1, 1, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1 }, }, + .index = { { 0, 0 }, { 1 }, { 1 } }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5 }, + }, + { + .layout = AV_CH_LAYOUT_5POINT0_BACK, + .num_ele = { 2, 0, 1, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1 } }, + .index = { { 0, 0 }, { 0 }, { 1 } }, + .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4 }, + }, + { + .layout = AV_CH_LAYOUT_5POINT1_BACK, + .num_ele = { 2, 1, 1, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1 }, }, + .index = { { 0, 0 }, { 1 }, { 1 } }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5 }, + }, + { + .layout = AV_CH_LAYOUT_6POINT0, + .num_ele = { 2, 1, 1, 0 }, + .pairing = { { 1, 0 }, { 1 }, { 0 }, }, + .index = { { 0, 0 }, { 1 }, { 1 } }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5 }, + }, + { + .layout = AV_CH_LAYOUT_6POINT0_FRONT, + .num_ele = { 2, 1, 0, 0 }, + .pairing = { { 1, 1 }, { 1 } }, + .index = { { 1, 0 }, { 2 }, }, + .config_map = { 3, TYPE_CPE, TYPE_CPE, TYPE_CPE, }, + .reorder_map = { 0, 1, 2, 3, 4, 5 }, + }, + { + .layout = AV_CH_LAYOUT_HEXAGONAL, + .num_ele = { 2, 0, 2, 0 }, + .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, }, + .index = { { 0, 0 },{ 0 },{ 1, 1 } }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, }, + .reorder_map = { 0, 1, 2, 3, 4, 5 }, + }, + { + .layout = AV_CH_LAYOUT_6POINT1, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, }, + .index = { { 0, 0 },{ 1 },{ 1, 2 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6 }, + }, + { + .layout = AV_CH_LAYOUT_6POINT1_BACK, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, }, + .index = { { 0, 0 }, { 1 }, { 1, 2 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6 }, + }, + { + .layout = AV_CH_LAYOUT_6POINT1_FRONT, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, }, + .index = { { 0, 0 }, { 1 }, { 1, 2 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6 }, + }, + { + .layout = AV_CH_LAYOUT_7POINT0, + .num_ele = { 2, 1, 1, 0 }, + .pairing = { { 1, 0 }, { 1 }, { 1 }, }, + .index = { { 0, 0 }, { 1 }, { 2 }, }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6 }, + }, + { + .layout = AV_CH_LAYOUT_7POINT0_FRONT, + .num_ele = { 2, 1, 1, 0 }, + .pairing = { { 1, 0 }, { 1 }, { 1 }, }, + .index = { { 0, 0 }, { 1 }, { 2 }, }, + .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6 }, + }, + { + .layout = AV_CH_LAYOUT_7POINT1, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, }, + .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + }, + { + .layout = AV_CH_LAYOUT_7POINT1_WIDE, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 }, { 0 },{ 1, 1 }, }, + .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + }, + { + .layout = AV_CH_LAYOUT_7POINT1_WIDE_BACK, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, }, + .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + }, + { + .layout = AV_CH_LAYOUT_OCTAGONAL, + .num_ele = { 2, 1, 2, 0 }, + .pairing = { { 1, 0 }, { 1 }, { 1, 0 }, }, + .index = { { 0, 0 }, { 1 }, { 2, 1 } }, + .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 }, + }, + { /* Meant for order 2/mixed ambisonics */ + .layout = AV_CH_LAYOUT_OCTAGONAL | AV_CH_TOP_CENTER, + .num_ele = { 2, 2, 2, 0 }, + .pairing = { { 1, 0 }, { 1, 0 }, { 1, 0 }, }, + .index = { { 0, 0 }, { 1, 1 }, { 2, 2 } }, + .config_map = { 6, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }, + }, + { /* Meant for order 2/mixed ambisonics */ + .layout = AV_CH_LAYOUT_6POINT0_FRONT | AV_CH_BACK_CENTER | + AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_TOP_CENTER, + .num_ele = { 2, 2, 2, 0 }, + .pairing = { { 1, 1 }, { 1, 0 }, { 1, 0 }, }, + .index = { { 0, 1 }, { 2, 0 }, { 3, 1 } }, + .config_map = { 6, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, + }, + { + .layout = AV_CH_LAYOUT_HEXADECAGONAL, + .num_ele = { 4, 2, 4, 0 }, + .pairing = { { 1, 0, 1, 0 }, { 1, 1 }, { 1, 0, 1, 0 }, }, + .index = { { 0, 0, 1, 1 }, { 2, 3 }, { 4, 2, 5, 3 } }, + .config_map = { 10, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE }, + .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + }, +}; + /** * AAC encoder context */ @@ -99,12 +380,15 @@ typedef struct AACEncContext { FFTContext mdct1024; ///< long (1024 samples) frame transform context FFTContext mdct128; ///< short (128 samples) frame transform context AVFloatDSPContext *fdsp; - float *planar_samples[8]; ///< saved preprocessed input + AACPCEInfo pce; ///< PCE data, if needed + float *planar_samples[16]; ///< saved preprocessed input int profile; ///< copied from avctx + int needs_pce; ///< flag for non-standard layout LPCContext lpc; ///< used by TNS int samplerate_index; ///< MPEG-4 samplerate index int channels; ///< channel count + const uint8_t *reorder_map; ///< lavc to aac reorder map const uint8_t *chan_map; ///< channel configuration map ChannelElement *cpe; ///< channel elements diff --git a/libavcodec/aacenctab.h b/libavcodec/aacenctab.h index 5fc9411232..c852a29f53 100644 --- a/libavcodec/aacenctab.h +++ b/libavcodec/aacenctab.h @@ -36,13 +36,24 @@ /** Total number of codebooks, including special ones **/ #define CB_TOT_ALL 15 -#define AAC_MAX_CHANNELS 8 +#define AAC_MAX_CHANNELS 16 extern const uint8_t *ff_aac_swb_size_1024[]; extern const int ff_aac_swb_size_1024_len; extern const uint8_t *ff_aac_swb_size_128[]; extern const int ff_aac_swb_size_128_len; +/* Supported layouts without using a PCE */ +static const int64_t aac_normal_chan_layouts[7] = { + AV_CH_LAYOUT_MONO, + AV_CH_LAYOUT_STEREO, + AV_CH_LAYOUT_SURROUND, + AV_CH_LAYOUT_4POINT0, + AV_CH_LAYOUT_5POINT0, + AV_CH_LAYOUT_5POINT1, + AV_CH_LAYOUT_7POINT1, +}; + /** default channel configurations */ static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6] = { {1, TYPE_SCE}, // 1 channel - single channel element