From 025ccf1f8bd669f45b628acf51e7febcb1fddd3b Mon Sep 17 00:00:00 2001 From: Nathan Caldwell Date: Sat, 17 Dec 2011 18:45:55 -0700 Subject: [PATCH] aacenc: Request normalized float samples instead of converting s16 samples to float. Signed-off-by: Alex Converse --- libavcodec/aacenc.c | 12 ++++++------ libavcodec/aacenc.h | 2 +- libavcodec/aacpsy.c | 10 +++++----- libavcodec/psymodel.c | 7 +++---- libavcodec/psymodel.h | 7 +++---- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 33396b2e83..3bd04bffe1 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -167,7 +167,7 @@ static void put_audio_specific_config(AVCodecContext *avctx) } static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s, - SingleChannelElement *sce, short *audio) + SingleChannelElement *sce, float *audio) { int i, k; const int chans = avctx->channels; @@ -434,7 +434,7 @@ static int aac_encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) { AACEncContext *s = avctx->priv_data; - int16_t *samples = s->samples, *samples2, *la; + float *samples = s->samples, *samples2, *la; ChannelElement *cpe; int i, ch, w, g, chans, tag, start_ch; int chan_el_counter[4]; @@ -452,7 +452,7 @@ static int aac_encode_frame(AVCodecContext *avctx, for (i = 0; i < s->chan_map[0]; i++) { tag = s->chan_map[i+1]; chans = tag == TYPE_CPE ? 2 : 1; - ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, + ff_psy_preprocess(s->psypp, (float*)data + start_ch, samples2 + start_ch, start_ch, chans); start_ch += chans; } @@ -621,9 +621,9 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s) ff_init_ff_sine_windows(10); ff_init_ff_sine_windows(7); - if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 1.0)) + if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) return ret; - if (ret = ff_mdct_init(&s->mdct128, 8, 0, 1.0)) + if (ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) return ret; return 0; @@ -722,7 +722,7 @@ AVCodec ff_aac_encoder = { .encode = aac_encode_frame, .close = aac_encode_end, .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, - .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, + .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"), .priv_class = &aacenc_class, }; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index acd185a809..1f5e6503ca 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -58,7 +58,7 @@ typedef struct AACEncContext { FFTContext mdct1024; ///< long (1024 samples) frame transform context FFTContext mdct128; ///< short (128 samples) frame transform context DSPContext dsp; - int16_t *samples; ///< saved preprocessed input + float *samples; ///< saved preprocessed input int samplerate_index; ///< MPEG-4 samplerate index const uint8_t *chan_map; ///< channel configuration map diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c index 577d8fba80..5e9e3913e8 100644 --- a/libavcodec/aacpsy.c +++ b/libavcodec/aacpsy.c @@ -776,9 +776,8 @@ static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int u ctx->next_window_seq = blocktype; } -static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, - const int16_t *audio, const int16_t *la, - int channel, int prev_type) +static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, + const float *la, int channel, int prev_type) { AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; AacPsyChannel *pch = &pctx->ch[channel]; @@ -796,7 +795,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS]; float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 }; int chans = ctx->avctx->channels; - const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans; + const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans; int j, att_sum = 0; /* LAME comment: apply high pass filter of fs/4 */ @@ -808,7 +807,8 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]); sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]); } - hpfsmpl[i] = sum1 + sum2; + /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768. Tuning this for normalized floats would be difficult. */ + hpfsmpl[i] = (sum1 + sum2) * 32768.0f; } /* Calculate the energies of each sub-shortblock */ diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c index 740d859456..49df1189e4 100644 --- a/libavcodec/psymodel.c +++ b/libavcodec/psymodel.c @@ -112,14 +112,13 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av return ctx; } -void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, - const int16_t *audio, int16_t *dest, - int tag, int channels) +void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, const float *audio, + float *dest, int tag, int channels) { int ch, i; if (ctx->fstate) { for (ch = 0; ch < channels; ch++) - ff_iir_filter(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size, + ff_iir_filter_flt(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size, audio + ch, ctx->avctx->channels, dest + ch, ctx->avctx->channels); } else { diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h index 3e866e40cc..03d078ed58 100644 --- a/libavcodec/psymodel.h +++ b/libavcodec/psymodel.h @@ -109,7 +109,7 @@ typedef struct FFPsyModel { * * @return suggested window information in a structure */ - FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); + FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); /** * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. @@ -179,9 +179,8 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av * @param tag channel number * @param channels number of channel to preprocess (some additional work may be done on stereo pair) */ -void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, - const int16_t *audio, int16_t *dest, - int tag, int channels); +void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, const float *audio, + float *dest, int tag, int channels); /** * Cleanup audio preprocessing module.