diff --git a/Changelog b/Changelog index 59b14ae35c..de863d1725 100644 --- a/Changelog +++ b/Changelog @@ -28,6 +28,7 @@ version - ffprobe -read_intervals option - Lossless and alpha support for WebP decoder - Error Resilient AAC syntax (ER AAC LC) decoding +- Low Delay AAC (ER AAC LD) decoding version 2.0: diff --git a/libavcodec/aac.h b/libavcodec/aac.h index 209c715e36..cda14777c4 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -290,6 +290,7 @@ struct AACContext { */ FFTContext mdct; FFTContext mdct_small; + FFTContext mdct_ld; FFTContext mdct_ltp; FmtConvertContext fmt_conv; AVFloatDSPContext fdsp; diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 5816054458..34205057b7 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -856,6 +856,13 @@ static int decode_audio_specific_config(AACContext *ac, m4ac->sampling_index); return AVERROR_INVALIDDATA; } + if (m4ac->object_type == AOT_ER_AAC_LD && + (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) { + av_log(avctx, AV_LOG_ERROR, + "invalid low delay sampling rate index %d\n", + m4ac->sampling_index); + return AVERROR_INVALIDDATA; + } skip_bits_long(&gb, i); @@ -864,6 +871,7 @@ static int decode_audio_specific_config(AACContext *ac, case AOT_AAC_LC: case AOT_AAC_LTP: case AOT_ER_AAC_LC: + case AOT_ER_AAC_LD: if ((ret = decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config)) < 0) return ret; @@ -1033,12 +1041,15 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) 352); ff_mdct_init(&ac->mdct, 11, 1, 1.0 / (32768.0 * 1024.0)); + ff_mdct_init(&ac->mdct_ld, 10, 1, 1.0 / (32768.0 * 512.0)); ff_mdct_init(&ac->mdct_small, 8, 1, 1.0 / (32768.0 * 128.0)); ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0 * 32768.0); // window initialization ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); + ff_kbd_window_init(ff_aac_kbd_long_512, 4.0, 512); ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); ff_init_ff_sine_windows(10); + ff_init_ff_sine_windows( 9); ff_init_ff_sine_windows( 7); cbrt_tableinit(); @@ -1111,6 +1122,14 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, } ics->window_sequence[1] = ics->window_sequence[0]; ics->window_sequence[0] = get_bits(gb, 2); + if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD && + ics->window_sequence[0] != ONLY_LONG_SEQUENCE) { + av_log(ac->avctx, AV_LOG_ERROR, + "AAC LD is only defined for ONLY_LONG_SEQUENCE but " + "window sequence %d found.\n", ics->window_sequence[0]); + ics->window_sequence[0] = ONLY_LONG_SEQUENCE; + return AVERROR_INVALIDDATA; + } ics->use_kb_window[1] = ics->use_kb_window[0]; ics->use_kb_window[0] = get_bits1(gb); ics->num_window_groups = 1; @@ -1134,8 +1153,15 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, } else { ics->max_sfb = get_bits(gb, 6); ics->num_windows = 1; - ics->swb_offset = ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index]; - ics->num_swb = ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index]; + if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD) { + ics->swb_offset = ff_swb_offset_512[ac->oc[1].m4ac.sampling_index]; + ics->num_swb = ff_aac_num_swb_512[ac->oc[1].m4ac.sampling_index]; + if (!ics->num_swb || !ics->swb_offset) + return AVERROR_BUG; + } else { + ics->swb_offset = ff_swb_offset_1024[ac->oc[1].m4ac.sampling_index]; + ics->num_swb = ff_aac_num_swb_1024[ac->oc[1].m4ac.sampling_index]; + } ics->tns_max_bands = ff_tns_max_bands_1024[ac->oc[1].m4ac.sampling_index]; ics->predictor_present = get_bits1(gb); ics->predictor_reset_group = 0; @@ -1150,6 +1176,11 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, "Prediction is not allowed in AAC-LC.\n"); goto fail; } else { + if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD) { + av_log(ac->avctx, AV_LOG_ERROR, + "LTP in ER AAC LD not yet implemented.\n"); + return AVERROR_PATCHWELCOME; + } if ((ics->ltp.present = get_bits(gb, 1))) decode_ltp(&ics->ltp, gb, ics->max_sfb); } @@ -2393,6 +2424,25 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) } } +static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce) +{ + IndividualChannelStream *ics = &sce->ics; + float *in = sce->coeffs; + float *out = sce->ret; + float *saved = sce->saved; + const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_512 : ff_sine_512; + float *buf = ac->buf_mdct; + + // imdct + ac->mdct.imdct_half(&ac->mdct_ld, buf, in); + + // window overlapping + ac->fdsp.vector_fmul_window(out, saved, buf, lwindow_prev, 256); + + // buffer update + memcpy(saved, buf + 256, 256 * sizeof(float)); +} + /** * Apply dependent channel coupling (applied before IMDCT). * @@ -2489,6 +2539,11 @@ static void apply_channel_coupling(AACContext *ac, ChannelElement *cc, static void spectral_to_sample(AACContext *ac) { int i, type; + void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce); + if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD) + imdct_and_window = imdct_and_windowing_ld; + else + imdct_and_window = ac->imdct_and_windowing; for (type = 3; type >= 0; type--) { for (i = 0; i < MAX_ELEM_ID; i++) { ChannelElement *che = ac->che[type][i]; @@ -2510,11 +2565,11 @@ static void spectral_to_sample(AACContext *ac) if (type <= TYPE_CPE) apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling); if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { - ac->imdct_and_windowing(ac, &che->ch[0]); + imdct_and_window(ac, &che->ch[0]); if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) ac->update_ltp(ac, &che->ch[0]); if (type == TYPE_CPE) { - ac->imdct_and_windowing(ac, &che->ch[1]); + imdct_and_window(ac, &che->ch[1]); if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) ac->update_ltp(ac, &che->ch[1]); } @@ -2599,6 +2654,9 @@ static int aac_decode_er_frame(AVCodecContext *avctx, void *data, int samples = 1024; int chan_config = ac->oc[1].m4ac.chan_config; + if (ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD) + samples >>= 1; + ac->frame = data; if ((err = frame_configure_elements(avctx)) < 0) @@ -2887,6 +2945,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) ff_mdct_end(&ac->mdct); ff_mdct_end(&ac->mdct_small); + ff_mdct_end(&ac->mdct_ld); ff_mdct_end(&ac->mdct_ltp); return 0; } diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c index 6cbb8c4c37..08c44e5d35 100644 --- a/libavcodec/aactab.c +++ b/libavcodec/aactab.c @@ -34,12 +34,17 @@ #include DECLARE_ALIGNED(32, float, ff_aac_kbd_long_1024)[1024]; +DECLARE_ALIGNED(32, float, ff_aac_kbd_long_512 )[512]; DECLARE_ALIGNED(32, float, ff_aac_kbd_short_128)[128]; const uint8_t ff_aac_num_swb_1024[] = { 41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40 }; +const uint8_t ff_aac_num_swb_512[] = { + 0, 0, 0, 36, 36, 37, 31, 31, 0, 0, 0, 0, 0 +}; + const uint8_t ff_aac_num_swb_128[] = { 12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15 }; @@ -1114,6 +1119,14 @@ static const uint16_t swb_offset_1024_48[] = { 928, 1024 }; +static const uint16_t swb_offset_512_48[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 68, 76, 84, 92, 100, 112, 124, 136, + 148, 164, 184, 208, 236, 268, 300, 332, + 364, 396, 428, 460, 512 +}; + static const uint16_t swb_offset_128_48[] = { 0, 4, 8, 12, 16, 20, 28, 36, 44, 56, 68, 80, 96, 112, 128 @@ -1129,6 +1142,14 @@ static const uint16_t swb_offset_1024_32[] = { 928, 960, 992, 1024 }; +static const uint16_t swb_offset_512_32[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 64, + 72, 80, 88, 96, 108, 120, 132, 144, + 160, 176, 192, 212, 236, 260, 288, 320, + 352, 384, 416, 448, 480, 512 +}; + static const uint16_t swb_offset_1024_24[] = { 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 52, 60, 68, 76, @@ -1138,6 +1159,13 @@ static const uint16_t swb_offset_1024_24[] = { 600, 652, 704, 768, 832, 896, 960, 1024 }; +static const uint16_t swb_offset_512_24[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 52, 60, 68, 80, + 92, 104, 120, 140, 164, 192, 224, 256, + 288, 320, 352, 384, 416, 448, 480, 512, +}; + static const uint16_t swb_offset_128_24[] = { 0, 4, 8, 12, 16, 20, 24, 28, 36, 44, 52, 64, 76, 92, 108, 128 @@ -1179,6 +1207,14 @@ const uint16_t * const ff_swb_offset_1024[] = { swb_offset_1024_8 }; +const uint16_t * const ff_swb_offset_512[] = { + NULL, NULL, NULL, + swb_offset_512_48, swb_offset_512_48, swb_offset_512_32, + swb_offset_512_24, swb_offset_512_24, NULL, + NULL, NULL, NULL, + NULL +}; + const uint16_t * const ff_swb_offset_128[] = { /* The last entry on the following row is swb_offset_128_64 but is a duplicate of swb_offset_128_96. */ diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h index 6ed3b4a6be..e06f6824ae 100644 --- a/libavcodec/aactab.h +++ b/libavcodec/aactab.h @@ -45,6 +45,7 @@ * @{ */ DECLARE_ALIGNED(32, extern float, ff_aac_kbd_long_1024)[1024]; +DECLARE_ALIGNED(32, extern float, ff_aac_kbd_long_512 )[512]; DECLARE_ALIGNED(32, extern float, ff_aac_kbd_short_128)[128]; // @} @@ -52,6 +53,7 @@ DECLARE_ALIGNED(32, extern float, ff_aac_kbd_short_128)[128]; * @{ */ extern const uint8_t ff_aac_num_swb_1024[]; +extern const uint8_t ff_aac_num_swb_512 []; extern const uint8_t ff_aac_num_swb_128 []; // @} @@ -69,6 +71,7 @@ extern const float *ff_aac_codebook_vector_vals[]; extern const uint16_t *ff_aac_codebook_vector_idx[]; extern const uint16_t * const ff_swb_offset_1024[13]; +extern const uint16_t * const ff_swb_offset_512 [13]; extern const uint16_t * const ff_swb_offset_128 [13]; extern const uint8_t ff_tns_max_bands_1024[13]; diff --git a/libavcodec/version.h b/libavcodec/version.h index 55065c3ca7..48ca4bb54c 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #include "libavutil/avutil.h" #define LIBAVCODEC_VERSION_MAJOR 55 -#define LIBAVCODEC_VERSION_MINOR 32 +#define LIBAVCODEC_VERSION_MINOR 33 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \