1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

psymodel: Add channels and channel groups to the psymodel.

This commit is contained in:
Nathan Caldwell 2011-06-15 02:50:25 -06:00 committed by Alex Converse
parent a3e1f80e8b
commit 0bc01cc9fe
5 changed files with 91 additions and 25 deletions

View File

@ -345,7 +345,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
float cost_stay_here, cost_get_here; float cost_stay_here, cost_get_here;
float rd = 0.0f; float rd = 0.0f;
for (w = 0; w < group_len; w++) { for (w = 0; w < group_len; w++) {
FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb]; FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
rd += quantize_band_cost(s, sce->coeffs + start + w*128, rd += quantize_band_cost(s, sce->coeffs + start + w*128,
s->scoefs + start + w*128, size, s->scoefs + start + w*128, size,
sce->sf_idx[(win+w)*16+swb], cb, sce->sf_idx[(win+w)*16+swb], cb,
@ -625,7 +625,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
qmin = INT_MAX; qmin = INT_MAX;
qmax = 0.0f; qmax = 0.0f;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
if (band->energy <= band->threshold || band->threshold == 0.0f) { if (band->energy <= band->threshold || band->threshold == 0.0f) {
sce->zeroes[(w+w2)*16+g] = 1; sce->zeroes[(w+w2)*16+g] = 1;
continue; continue;
@ -654,7 +654,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
float dist = 0; float dist = 0;
int cb = find_min_book(maxval, sce->sf_idx[w*16+g]); int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
q + q0, cb, lambda / band->threshold, INFINITY, NULL); q + q0, cb, lambda / band->threshold, INFINITY, NULL);
} }
@ -727,7 +727,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
int nz = 0; int nz = 0;
float uplim = 0.0f; float uplim = 0.0f;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
uplim += band->threshold; uplim += band->threshold;
if (band->energy <= band->threshold || band->threshold == 0.0f) { if (band->energy <= band->threshold || band->threshold == 0.0f) {
sce->zeroes[(w+w2)*16+g] = 1; sce->zeroes[(w+w2)*16+g] = 1;
@ -1027,7 +1027,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
for (g = 0; g < sce->ics.num_swb; g++) { for (g = 0; g < sce->ics.num_swb; g++) {
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
if (band->energy <= band->threshold) { if (band->energy <= band->threshold) {
sce->sf_idx[(w+w2)*16+g] = 218; sce->sf_idx[(w+w2)*16+g] = 218;
sce->zeroes[(w+w2)*16+g] = 1; sce->zeroes[(w+w2)*16+g] = 1;
@ -1065,8 +1065,8 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
float dist1 = 0.0f, dist2 = 0.0f; float dist1 = 0.0f, dist2 = 0.0f;
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g]; FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g]; FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
float minthr = FFMIN(band0->threshold, band1->threshold); float minthr = FFMIN(band0->threshold, band1->threshold);
float maxthr = FFMAX(band0->threshold, band1->threshold); float maxthr = FFMAX(band0->threshold, band1->threshold);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {

View File

@ -210,7 +210,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
sizes[1] = swb_size_128[i]; sizes[1] = swb_size_128[i];
lengths[0] = ff_aac_num_swb_1024[i]; lengths[0] = ff_aac_num_swb_1024[i];
lengths[1] = ff_aac_num_swb_128[i]; lengths[1] = ff_aac_num_swb_128[i];
ff_psy_init(&s->psy, avctx, 2, sizes, lengths); ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], &s->chan_map[1]);
s->psypp = ff_psy_preprocess_init(avctx); s->psypp = ff_psy_preprocess_init(avctx);
s->coder = &ff_aac_coders[2]; s->coder = &ff_aac_coders[2];
@ -570,8 +570,8 @@ static int aac_encode_frame(AVCodecContext *avctx,
put_bits(&s->pb, 3, tag); put_bits(&s->pb, 3, tag);
put_bits(&s->pb, 4, chan_el_counter[tag]++); put_bits(&s->pb, 4, chan_el_counter[tag]++);
for (ch = 0; ch < chans; ch++) { for (ch = 0; ch < chans; ch++) {
s->cur_channel = start_ch + ch; s->cur_channel = start_ch * 2 + ch;
s->psy.model->analyze(&s->psy, s->cur_channel, cpe->ch[ch].coeffs, &wi[ch]); s->psy.model->analyze(&s->psy, start_ch + ch, cpe->ch[ch].coeffs, &wi[ch]);
s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
} }
cpe->common_window = 0; cpe->common_window = 0;
@ -587,7 +587,7 @@ static int aac_encode_frame(AVCodecContext *avctx,
} }
} }
} }
s->cur_channel = start_ch; s->cur_channel = start_ch * 2;
if (s->options.stereo_mode && cpe->common_window) { if (s->options.stereo_mode && cpe->common_window) {
if (s->options.stereo_mode > 0) { if (s->options.stereo_mode > 0) {
IndividualChannelStream *ics = &cpe->ch[0].ics; IndividualChannelStream *ics = &cpe->ch[0].ics;

View File

@ -627,7 +627,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
} }
/* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */ /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
ctx->pe[channel] = pe; ctx->ch[channel].entropy = pe;
desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8); desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
/* NOTE: PE correction is kept simple. During initial testing it had very /* NOTE: PE correction is kept simple. During initial testing it had very
@ -731,7 +731,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
for (w = 0; w < wi->num_windows*16; w += 16) { for (w = 0; w < wi->num_windows*16; w += 16) {
for (g = 0; g < num_bands; g++) { for (g = 0; g < num_bands; g++) {
AacPsyBand *band = &pch->band[w+g]; AacPsyBand *band = &pch->band[w+g];
FFPsyBand *psy_band = &ctx->psy_bands[channel*PSY_MAX_BANDS+w+g]; FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
psy_band->threshold = band->thr; psy_band->threshold = band->thr;
psy_band->energy = band->energy; psy_band->energy = band->energy;
@ -921,5 +921,6 @@ const FFPsyModel ff_aac_psy_model =
.init = psy_3gpp_init, .init = psy_3gpp_init,
.window = psy_lame_window, .window = psy_lame_window,
.analyze = psy_3gpp_analyze, .analyze = psy_3gpp_analyze,
.analyze_group = NULL,
.end = psy_3gpp_end, .end = psy_3gpp_end,
}; };

View File

@ -25,16 +25,31 @@
extern const FFPsyModel ff_aac_psy_model; extern const FFPsyModel ff_aac_psy_model;
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
int num_lens, const uint8_t **bands, const int* num_bands,
const uint8_t **bands, const int* num_bands) int num_groups, const uint8_t *group_map)
{ {
int i, j, k = 0;
ctx->avctx = avctx; ctx->avctx = avctx;
ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels); ctx->ch = av_mallocz(sizeof(ctx->ch[0]) * avctx->channels * 2);
ctx->group = av_mallocz(sizeof(ctx->group[0]) * num_groups);
ctx->bands = av_malloc (sizeof(ctx->bands[0]) * num_lens); ctx->bands = av_malloc (sizeof(ctx->bands[0]) * num_lens);
ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens); ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens);
memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens); memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens);
memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens); memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens);
/* assign channels to groups (with virtual channels for coupling) */
for (i = 0; i < num_groups; i++) {
/* NOTE: Add 1 to handle the AAC chan_config without modification.
* This has the side effect of allowing an array of 0s to map
* to one channel per group.
*/
ctx->group[i].num_ch = group_map[i] + 1;
for (j = 0; j < ctx->group[i].num_ch * 2; j++)
ctx->group[i].ch[j] = &ctx->ch[k++];
}
switch (ctx->avctx->codec_id) { switch (ctx->avctx->codec_id) {
case CODEC_ID_AAC: case CODEC_ID_AAC:
ctx->model = &ff_aac_psy_model; ctx->model = &ff_aac_psy_model;
@ -45,13 +60,24 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
return 0; return 0;
} }
FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel)
{
int i = 0, ch = 0;
while (ch <= channel)
ch += ctx->group[i++].num_ch;
return &ctx->group[i-1];
}
av_cold void ff_psy_end(FFPsyContext *ctx) av_cold void ff_psy_end(FFPsyContext *ctx)
{ {
if (ctx->model->end) if (ctx->model->end)
ctx->model->end(ctx); ctx->model->end(ctx);
av_freep(&ctx->bands); av_freep(&ctx->bands);
av_freep(&ctx->num_bands); av_freep(&ctx->num_bands);
av_freep(&ctx->psy_bands); av_freep(&ctx->group);
av_freep(&ctx->ch);
} }
typedef struct FFPsyPreprocessContext{ typedef struct FFPsyPreprocessContext{

View File

@ -40,6 +40,23 @@ typedef struct FFPsyBand {
float perceptual_weight; float perceptual_weight;
} FFPsyBand; } FFPsyBand;
/**
* single channel psychoacoustic information
*/
typedef struct FFPsyChannel {
FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information
float entropy; ///< total PE for this channel
} FFPsyChannel;
/**
* psychoacoustic information for an arbitrary group of channels
*/
typedef struct FFPsyChannelGroup {
FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group
uint8_t num_ch; ///< number of channels in this group
uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group
} FFPsyChannelGroup;
/** /**
* windowing related information * windowing related information
*/ */
@ -58,14 +75,14 @@ typedef struct FFPsyContext {
AVCodecContext *avctx; ///< encoder context AVCodecContext *avctx; ///< encoder context
const struct FFPsyModel *model; ///< encoder-specific model functions const struct FFPsyModel *model; ///< encoder-specific model functions
FFPsyBand *psy_bands; ///< frame bands information FFPsyChannel *ch; ///< single channel information
FFPsyChannelGroup *group; ///< channel group information
int num_groups; ///< number of channel groups
uint8_t **bands; ///< scalefactor band sizes for possible frame sizes uint8_t **bands; ///< scalefactor band sizes for possible frame sizes
int *num_bands; ///< number of scalefactor bands for possible frame sizes int *num_bands; ///< number of scalefactor bands for possible frame sizes
int num_lens; ///< number of scalefactor band sets int num_lens; ///< number of scalefactor band sets
float pe[PSY_MAX_CHANS]; ///< total PE for each channel in the frame
struct { struct {
int size; ///< size of the bitresevoir in bits int size; ///< size of the bitresevoir in bits
int bits; ///< number of bits used in the bitresevoir int bits; ///< number of bits used in the bitresevoir
@ -95,7 +112,7 @@ typedef struct FFPsyModel {
FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
/** /**
* Perform psychoacoustic analysis and set band info (threshold, energy). * Perform psychoacoustic analysis and set band info (threshold, energy) for a single channel.
* *
* @param ctx model context * @param ctx model context
* @param channel audio channel number * @param channel audio channel number
@ -104,6 +121,16 @@ typedef struct FFPsyModel {
*/ */
void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi); void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi);
/**
* Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.
*
* @param ctx model context
* @param channel channel number of the first channel in the group to perform analysis on
* @param coeffs array of pointers to the transformed coefficients
* @param wi window information for the channels in the group
*/
void (*analyze_group)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);
void (*end) (FFPsyContext *apc); void (*end) (FFPsyContext *apc);
} FFPsyModel; } FFPsyModel;
@ -115,12 +142,24 @@ typedef struct FFPsyModel {
* @param num_lens number of possible frame lengths * @param num_lens number of possible frame lengths
* @param bands scalefactor band lengths for all frame lengths * @param bands scalefactor band lengths for all frame lengths
* @param num_bands number of scalefactor bands for all frame lengths * @param num_bands number of scalefactor bands for all frame lengths
* @param num_groups number of channel groups
* @param group_map array with # of channels in group - 1, for each group
* *
* @return zero if successful, a negative value if not * @return zero if successful, a negative value if not
*/ */
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
int num_lens, const uint8_t **bands, const int* num_bands,
const uint8_t **bands, const int* num_bands); int num_groups, const uint8_t *group_map);
/**
* Determine what group a channel belongs to.
*
* @param ctx psymodel context
* @param channel channel to locate the group for
*
* @return pointer to the FFPsyChannelGroup this channel belongs to
*/
FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel);
/** /**
* Cleanup model context at the end. * Cleanup model context at the end.