From ca6e7708b42e7d33ba3053bcd447d52a077bca25 Mon Sep 17 00:00:00 2001 From: Carl Eugen Hoyos Date: Tue, 30 Mar 2010 22:09:14 +0000 Subject: [PATCH] Add spectral extension to the E-AC-3 decoder. Original patch by Justin, updated and resubmitted by Christophe Gisquet, christophe D gisquet A gmail Originally committed as revision 22734 to svn://svn.ffmpeg.org/ffmpeg/trunk --- Changelog | 1 + libavcodec/ac3dec.c | 119 ++++++++++++++++++++++++++++++++++---- libavcodec/ac3dec.h | 47 +++++++++++++++ libavcodec/ac3dec_data.c | 6 ++ libavcodec/ac3dec_data.h | 1 + libavcodec/avcodec.h | 2 +- libavcodec/eac3dec.c | 105 +++++++++++++++++++++++++++++---- libavcodec/eac3dec_data.c | 39 +++++++++++++ libavcodec/eac3dec_data.h | 1 + 9 files changed, 300 insertions(+), 21 deletions(-) diff --git a/Changelog b/Changelog index 8b8b2e713d..ce991c708e 100644 --- a/Changelog +++ b/Changelog @@ -68,6 +68,7 @@ version : - HTTP Digest authentication - RTMP/RTMPT/RTMPS/RTMPE/RTMPTE protocol support via librtmp - Psygnosis YOP demuxer and video decoder +- spectral extension support in the E-AC-3 decoder diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index 9f45af3280..1656a4c540 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -815,14 +815,105 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) /* spectral extension strategy */ if (s->eac3 && (!blk || get_bits1(gbc))) { - if (get_bits1(gbc)) { - av_log_missing_feature(s->avctx, "Spectral extension", 1); - return -1; + s->spx_in_use = get_bits1(gbc); + if (s->spx_in_use) { + int dst_start_freq, dst_end_freq, src_start_freq, + start_subband, end_subband; + + /* determine which channels use spx */ + if (s->channel_mode == AC3_CHMODE_MONO) { + s->channel_uses_spx[1] = 1; + } else { + for (ch = 1; ch <= fbw_channels; ch++) + s->channel_uses_spx[ch] = get_bits1(gbc); + } + + /* get the frequency bins of the spx copy region and the spx start + and end subbands */ + dst_start_freq = get_bits(gbc, 2); + start_subband = get_bits(gbc, 3) + 2; + if (start_subband > 7) + start_subband += start_subband - 7; + end_subband = get_bits(gbc, 3) + 5; + if (end_subband > 7) + end_subband += end_subband - 7; + dst_start_freq = dst_start_freq * 12 + 25; + src_start_freq = start_subband * 12 + 25; + dst_end_freq = end_subband * 12 + 25; + + /* check validity of spx ranges */ + if (start_subband >= end_subband) { + av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension " + "range (%d >= %d)\n", start_subband, end_subband); + return -1; + } + if (dst_start_freq >= src_start_freq) { + av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension " + "copy start bin (%d >= %d)\n", dst_start_freq, src_start_freq); + return -1; + } + + s->spx_dst_start_freq = dst_start_freq; + s->spx_src_start_freq = src_start_freq; + s->spx_dst_end_freq = dst_end_freq; + + decode_band_structure(gbc, blk, s->eac3, 0, + start_subband, end_subband, + ff_eac3_default_spx_band_struct, + &s->num_spx_bands, + s->spx_band_sizes); + } else { + for (ch = 1; ch <= fbw_channels; ch++) { + s->channel_uses_spx[ch] = 0; + s->first_spx_coords[ch] = 1; + } } - /* TODO: parse spectral extension strategy info */ } - /* TODO: spectral extension coordinates */ + /* spectral extension coordinates */ + if (s->spx_in_use) { + for (ch = 1; ch <= fbw_channels; ch++) { + if (s->channel_uses_spx[ch]) { + if (s->first_spx_coords[ch] || get_bits1(gbc)) { + float spx_blend; + int bin, master_spx_coord; + + s->first_spx_coords[ch] = 0; + spx_blend = get_bits(gbc, 5) * (1.0f/32); + master_spx_coord = get_bits(gbc, 2) * 3; + + bin = s->spx_src_start_freq; + for (bnd = 0; bnd < s->num_spx_bands; bnd++) { + int bandsize; + int spx_coord_exp, spx_coord_mant; + float nratio, sblend, nblend, spx_coord; + + /* calculate blending factors */ + bandsize = s->spx_band_sizes[bnd]; + nratio = ((float)((bin + (bandsize >> 1))) / s->spx_dst_end_freq) - spx_blend; + nratio = av_clipf(nratio, 0.0f, 1.0f); + nblend = sqrtf(3.0f * nratio); // noise is scaled by sqrt(3) to give unity variance + sblend = sqrtf(1.0f - nratio); + bin += bandsize; + + /* decode spx coordinates */ + spx_coord_exp = get_bits(gbc, 4); + spx_coord_mant = get_bits(gbc, 2); + if (spx_coord_exp == 15) spx_coord_mant <<= 1; + else spx_coord_mant += 4; + spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord); + spx_coord = spx_coord_mant * (1.0f/(1<<23)); + + /* multiply noise and signal blending factors by spx coordinate */ + s->spx_noise_blend [ch][bnd] = nblend * spx_coord; + s->spx_signal_blend[ch][bnd] = sblend * spx_coord; + } + } + } else { + s->first_spx_coords[ch] = 1; + } + } + } /* coupling strategy */ if (s->eac3 ? s->cpl_strategy_exists[blk] : get_bits1(gbc)) { @@ -859,9 +950,9 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) s->phase_flags_in_use = get_bits1(gbc); /* coupling frequency range */ - /* TODO: modify coupling end freq if spectral extension is used */ cpl_start_subband = get_bits(gbc, 4); - cpl_end_subband = get_bits(gbc, 4) + 3; + cpl_end_subband = s->spx_in_use ? (s->spx_src_start_freq - 37) / 12 : + get_bits(gbc, 4) + 3; if (cpl_start_subband >= cpl_end_subband) { av_log(s->avctx, AV_LOG_ERROR, "invalid coupling range (%d >= %d)\n", cpl_start_subband, cpl_end_subband); @@ -934,8 +1025,11 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) if (channel_mode == AC3_CHMODE_STEREO) { if ((s->eac3 && !blk) || get_bits1(gbc)) { s->num_rematrixing_bands = 4; - if(cpl_in_use && s->start_freq[CPL_CH] <= 61) + if (cpl_in_use && s->start_freq[CPL_CH] <= 61) { s->num_rematrixing_bands -= 1 + (s->start_freq[CPL_CH] == 37); + } else if (s->spx_in_use && s->spx_src_start_freq <= 61) { + s->num_rematrixing_bands--; + } for(bnd=0; bndnum_rematrixing_bands; bnd++) s->rematrixing_flags[bnd] = get_bits1(gbc); } else if (!blk) { @@ -960,6 +1054,8 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) int prev = s->end_freq[ch]; if (s->channel_in_cpl[ch]) s->end_freq[ch] = s->start_freq[CPL_CH]; + else if (s->channel_uses_spx[ch]) + s->end_freq[ch] = s->spx_src_start_freq; else { int bandwidth_code = get_bits(gbc, 6); if (bandwidth_code > 60) { @@ -1156,8 +1252,6 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) /* TODO: generate enhanced coupling coordinates and uncouple */ - /* TODO: apply spectral extension */ - /* recover coefficients if rematrixing is in use */ if(s->channel_mode == AC3_CHMODE_STEREO) do_rematrixing(s); @@ -1173,6 +1267,11 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) s->dsp.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256); } + /* apply spectral extension to high frequency bins */ + if (s->spx_in_use) { + ff_eac3_apply_spectral_extension(s); + } + /* downmix and MDCT. order depends on whether block switching is used for any channel in this block. this is because coefficients for the long and short transforms cannot be mixed. */ diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h index 78869b5e26..bd555bdde1 100644 --- a/libavcodec/ac3dec.h +++ b/libavcodec/ac3dec.h @@ -22,6 +22,29 @@ /** * @file libavcodec/ac3.h * Common code between the AC-3 and E-AC-3 decoders. + * + * Summary of MDCT Coefficient Grouping: + * The individual MDCT coefficient indices are often referred to in the + * (E-)AC-3 specification as frequency bins. These bins are grouped together + * into subbands of 12 coefficients each. The subbands are grouped together + * into bands as defined in the bitstream by the band structures, which + * determine the number of bands and the size of each band. The full spectrum + * of 256 frequency bins is divided into 1 DC bin + 21 subbands = 253 bins. + * This system of grouping coefficients is used for channel bandwidth, stereo + * rematrixing, channel coupling, enhanced coupling, and spectral extension. + * + * +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+-+ + * |1| |12| | [12|12|12|12] | | | | | | | | | | | | |3| + * +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+-+ + * ~~~ ~~~~ ~~~~~~~~~~~~~ ~~~ + * | | | | + * | | | 3 unused frequency bins--+ + * | | | + * | | +--1 band containing 4 subbands + * | | + * | +--1 subband of 12 frequency bins + * | + * +--DC frequency bin */ #ifndef AVCODEC_AC3DEC_H @@ -43,6 +66,7 @@ #define AC3_MAX_COEFS 256 #define AC3_BLOCK_SIZE 256 #define MAX_BLOCKS 6 +#define SPX_MAX_BANDS 17 typedef struct { AVCodecContext *avctx; ///< parent context @@ -89,6 +113,22 @@ typedef struct { int cpl_coords[AC3_MAX_CHANNELS][18]; ///< coupling coordinates (cplco) ///@} +///@defgroup spx spectral extension +///@{ + int spx_in_use; ///< spectral extension in use (spxinu) + uint8_t channel_uses_spx[AC3_MAX_CHANNELS]; ///< channel uses spectral extension (chinspx) + int8_t spx_atten_code[AC3_MAX_CHANNELS]; ///< spx attenuation code (spxattencod) + int spx_src_start_freq; ///< spx start frequency bin + int spx_dst_end_freq; ///< spx end frequency bin + int spx_dst_start_freq; ///< spx starting frequency bin for copying (copystartmant) + ///< the copy region ends at the start of the spx region. + int num_spx_bands; ///< number of spx bands (nspxbnds) + uint8_t spx_band_sizes[SPX_MAX_BANDS]; ///< number of bins in each spx band + uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states (firstspxcos) + float spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor (nblendfact) + float spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact) +///@} + ///@defgroup aht adaptive hybrid transform int channel_uses_aht[AC3_MAX_CHANNELS]; ///< channel AHT in use (chahtinu) int pre_mantissa[AC3_MAX_CHANNELS][AC3_MAX_COEFS][MAX_BLOCKS]; ///< pre-IDCT mantissas @@ -182,4 +222,11 @@ void ff_eac3_decode_transform_coeffs_aht_ch(AC3DecodeContext *s, int ch); void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); +/** + * Apply spectral extension to each channel by copying lower frequency + * coefficients to higher frequency bins and applying side information to + * approximate the original high frequency signal. + */ +void ff_eac3_apply_spectral_extension(AC3DecodeContext *s); + #endif /* AVCODEC_AC3DEC_H */ diff --git a/libavcodec/ac3dec_data.c b/libavcodec/ac3dec_data.c index 907a3aed80..abe359b889 100644 --- a/libavcodec/ac3dec_data.c +++ b/libavcodec/ac3dec_data.c @@ -64,3 +64,9 @@ const uint8_t ff_eac3_hebap_tab[64] = { */ const uint8_t ff_eac3_default_cpl_band_struct[18] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 }; + +/** + * Table E2.15 Default Spectral Extension Banding Structure + */ +const uint8_t ff_eac3_default_spx_band_struct[17] = +{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; diff --git a/libavcodec/ac3dec_data.h b/libavcodec/ac3dec_data.h index 8d9db0518d..9ed7c73188 100644 --- a/libavcodec/ac3dec_data.h +++ b/libavcodec/ac3dec_data.h @@ -29,5 +29,6 @@ extern const uint8_t ff_ac3_rematrix_band_tab[5]; extern const uint8_t ff_eac3_hebap_tab[64]; extern const uint8_t ff_eac3_default_cpl_band_struct[18]; +extern const uint8_t ff_eac3_default_spx_band_struct[17]; #endif /* AVCODEC_AC3DEC_DATA_H */ diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index a1efa0f0ab..5a461c623c 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -30,7 +30,7 @@ #include "libavutil/avutil.h" #define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 62 +#define LIBAVCODEC_VERSION_MINOR 63 #define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/libavcodec/eac3dec.c b/libavcodec/eac3dec.c index 3784ccfb1d..52d15c83f8 100644 --- a/libavcodec/eac3dec.c +++ b/libavcodec/eac3dec.c @@ -23,10 +23,6 @@ /* * There are several features of E-AC-3 that this decoder does not yet support. * - * Spectral Extension - * There is a patch to get this working for the two samples we have that - * use it, but it needs some minor changes in order to be accepted. - * * Enhanced Coupling * No known samples exist. If any ever surface, this feature should not be * too difficult to implement. @@ -67,6 +63,95 @@ typedef enum { #define EAC3_SR_CODE_REDUCED 3 +void ff_eac3_apply_spectral_extension(AC3DecodeContext *s) +{ + int bin, bnd, ch, i; + uint8_t wrapflag[SPX_MAX_BANDS]={1,0,}, num_copy_sections, copy_sizes[SPX_MAX_BANDS]; + float rms_energy[SPX_MAX_BANDS]; + + /* Set copy index mapping table. Set wrap flags to apply a notch filter at + wrap points later on. */ + bin = s->spx_dst_start_freq; + num_copy_sections = 0; + for (bnd = 0; bnd < s->num_spx_bands; bnd++) { + int copysize; + int bandsize = s->spx_band_sizes[bnd]; + if (bin + bandsize > s->spx_src_start_freq) { + copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq; + bin = s->spx_dst_start_freq; + wrapflag[bnd] = 1; + } + for (i = 0; i < bandsize; i += copysize) { + if (bin == s->spx_src_start_freq) { + copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq; + bin = s->spx_dst_start_freq; + } + copysize = FFMIN(bandsize - i, s->spx_src_start_freq - bin); + bin += copysize; + } + } + copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq; + + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (!s->channel_uses_spx[ch]) + continue; + + /* Copy coeffs from normal bands to extension bands */ + bin = s->spx_src_start_freq; + for (i = 0; i < num_copy_sections; i++) { + memcpy(&s->transform_coeffs[ch][bin], + &s->transform_coeffs[ch][s->spx_dst_start_freq], + copy_sizes[i]*sizeof(float)); + bin += copy_sizes[i]; + } + + /* Calculate RMS energy for each SPX band. */ + bin = s->spx_src_start_freq; + for (bnd = 0; bnd < s->num_spx_bands; bnd++) { + int bandsize = s->spx_band_sizes[bnd]; + float accum = 0.0f; + for (i = 0; i < bandsize; i++) { + float coeff = s->transform_coeffs[ch][bin++]; + accum += coeff * coeff; + } + rms_energy[bnd] = sqrtf(accum / bandsize); + } + + /* Apply a notch filter at transitions between normal and extension + bands and at all wrap points. */ + if (s->spx_atten_code[ch] >= 0) { + const float *atten_tab = ff_eac3_spx_atten_tab[s->spx_atten_code[ch]]; + bin = s->spx_src_start_freq - 2; + for (bnd = 0; bnd < s->num_spx_bands; bnd++) { + if (wrapflag[bnd]) { + float *coeffs = &s->transform_coeffs[ch][bin]; + coeffs[0] *= atten_tab[0]; + coeffs[1] *= atten_tab[1]; + coeffs[2] *= atten_tab[2]; + coeffs[3] *= atten_tab[1]; + coeffs[4] *= atten_tab[0]; + } + bin += s->spx_band_sizes[bnd]; + } + } + + /* Apply noise-blended coefficient scaling based on previously + calculated RMS energy, blending factors, and SPX coordinates for + each band. */ + bin = s->spx_src_start_freq; + for (bnd = 0; bnd < s->num_spx_bands; bnd++) { + float nscale = s->spx_noise_blend[ch][bnd] * rms_energy[bnd] * (1.0f/(1<<31)); + float sscale = s->spx_signal_blend[ch][bnd]; + for (i = 0; i < s->spx_band_sizes[bnd]; i++) { + float noise = nscale * (int32_t)av_lfg_get(&s->dith_state); + s->transform_coeffs[ch][bin] *= sscale; + s->transform_coeffs[ch][bin++] += noise; + } + } + } +} + + /** lrint(M_SQRT2*cos(2*M_PI/12)*(1<<23)) */ #define COEFF_0 10273905LL @@ -492,12 +577,11 @@ int ff_eac3_parse_header(AC3DecodeContext *s) } /* spectral extension attenuation data */ - if (parse_spx_atten_data) { - av_log_missing_feature(s->avctx, "Spectral extension attenuation", 1); - for (ch = 1; ch <= s->fbw_channels; ch++) { - if (get_bits1(gbc)) { // channel has spx attenuation - skip_bits(gbc, 5); // skip spx attenuation code - } + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (parse_spx_atten_data && get_bits1(gbc)) { + s->spx_atten_code[ch] = get_bits(gbc, 5); + } else { + s->spx_atten_code[ch] = -1; } } @@ -514,6 +598,7 @@ int ff_eac3_parse_header(AC3DecodeContext *s) /* syntax state initialization */ for (ch = 1; ch <= s->fbw_channels; ch++) { + s->first_spx_coords[ch] = 1; s->first_cpl_coords[ch] = 1; } s->first_cpl_leak = 1; diff --git a/libavcodec/eac3dec_data.c b/libavcodec/eac3dec_data.c index 6c6a551804..9e9f8a254b 100644 --- a/libavcodec/eac3dec_data.c +++ b/libavcodec/eac3dec_data.c @@ -1093,3 +1093,42 @@ const uint8_t ff_eac3_frm_expstr[32][6] = { { EXP_D45, EXP_D45, EXP_D45, EXP_D45, EXP_D25, EXP_REUSE}, { EXP_D45, EXP_D45, EXP_D45, EXP_D45, EXP_D45, EXP_D45}, }; + +/** + * Table E.25: Spectral Extension Attenuation Table + * ff_eac3_spx_atten_tab[code][bin]=pow(2.0,(bin+1)*(code+1)/-15.0); + */ +const float ff_eac3_spx_atten_tab[32][3] = { + { 0.954841603910416503f, 0.911722488558216804f, 0.870550563296124125f }, + { 0.911722488558216804f, 0.831237896142787758f, 0.757858283255198995f }, + { 0.870550563296124125f, 0.757858283255198995f, 0.659753955386447100f }, + { 0.831237896142787758f, 0.690956439983888004f, 0.574349177498517438f }, + { 0.793700525984099792f, 0.629960524947436595f, 0.500000000000000000f }, + { 0.757858283255198995f, 0.574349177498517438f, 0.435275281648062062f }, + { 0.723634618720189082f, 0.523647061410313364f, 0.378929141627599553f }, + { 0.690956439983888004f, 0.477420801955208307f, 0.329876977693223550f }, + { 0.659753955386447100f, 0.435275281648062062f, 0.287174588749258719f }, + { 0.629960524947436595f, 0.396850262992049896f, 0.250000000000000000f }, + { 0.601512518041058319f, 0.361817309360094541f, 0.217637640824031003f }, + { 0.574349177498517438f, 0.329876977693223550f, 0.189464570813799776f }, + { 0.548412489847312945f, 0.300756259020529160f, 0.164938488846611775f }, + { 0.523647061410313364f, 0.274206244923656473f, 0.143587294374629387f }, + { 0.500000000000000000f, 0.250000000000000000f, 0.125000000000000000f }, + { 0.477420801955208307f, 0.227930622139554201f, 0.108818820412015502f }, + { 0.455861244279108402f, 0.207809474035696939f, 0.094732285406899888f }, + { 0.435275281648062062f, 0.189464570813799776f, 0.082469244423305887f }, + { 0.415618948071393879f, 0.172739109995972029f, 0.071793647187314694f }, + { 0.396850262992049896f, 0.157490131236859149f, 0.062500000000000000f }, + { 0.378929141627599553f, 0.143587294374629387f, 0.054409410206007751f }, + { 0.361817309360094541f, 0.130911765352578369f, 0.047366142703449930f }, + { 0.345478219991944002f, 0.119355200488802049f, 0.041234622211652958f }, + { 0.329876977693223550f, 0.108818820412015502f, 0.035896823593657347f }, + { 0.314980262473718298f, 0.099212565748012460f, 0.031250000000000000f }, + { 0.300756259020529160f, 0.090454327340023621f, 0.027204705103003875f }, + { 0.287174588749258719f, 0.082469244423305887f, 0.023683071351724965f }, + { 0.274206244923656473f, 0.075189064755132290f, 0.020617311105826479f }, + { 0.261823530705156682f, 0.068551561230914118f, 0.017948411796828673f }, + { 0.250000000000000000f, 0.062500000000000000f, 0.015625000000000000f }, + { 0.238710400977604098f, 0.056982655534888536f, 0.013602352551501938f }, + { 0.227930622139554201f, 0.051952368508924235f, 0.011841535675862483f } +}; diff --git a/libavcodec/eac3dec_data.h b/libavcodec/eac3dec_data.h index 76dd154568..133183398f 100644 --- a/libavcodec/eac3dec_data.h +++ b/libavcodec/eac3dec_data.h @@ -31,5 +31,6 @@ extern const int16_t ff_eac3_gaq_remap_2_4_b[9][2]; extern const int16_t (* const ff_eac3_mantissa_vq[8])[6]; extern const uint8_t ff_eac3_frm_expstr[32][6]; +extern const float ff_eac3_spx_atten_tab[32][3]; #endif /* AVCODEC_EAC3DEC_DATA_H */