You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	psymodel: Add channels and channel groups to the psymodel.
This commit is contained in:
		
				
					committed by
					
						 Alex Converse
						Alex Converse
					
				
			
			
				
	
			
			
			
						parent
						
							a3e1f80e8b
						
					
				
				
					commit
					0bc01cc9fe
				
			| @@ -345,7 +345,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce | ||||
|                 float cost_stay_here, cost_get_here; | ||||
|                 float rd = 0.0f; | ||||
|                 for (w = 0; w < group_len; w++) { | ||||
|                     FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb]; | ||||
|                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; | ||||
|                     rd += quantize_band_cost(s, sce->coeffs + start + w*128, | ||||
|                                              s->scoefs + start + w*128, size, | ||||
|                                              sce->sf_idx[(win+w)*16+swb], cb, | ||||
| @@ -625,7 +625,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, | ||||
|             qmin = INT_MAX; | ||||
|             qmax = 0.0f; | ||||
|             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { | ||||
|                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; | ||||
|                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; | ||||
|                 if (band->energy <= band->threshold || band->threshold == 0.0f) { | ||||
|                     sce->zeroes[(w+w2)*16+g] = 1; | ||||
|                     continue; | ||||
| @@ -654,7 +654,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, | ||||
|                     float dist = 0; | ||||
|                     int cb = find_min_book(maxval, sce->sf_idx[w*16+g]); | ||||
|                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { | ||||
|                         FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; | ||||
|                         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; | ||||
|                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], | ||||
|                                                    q + q0, cb, lambda / band->threshold, INFINITY, NULL); | ||||
|                     } | ||||
| @@ -727,7 +727,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, | ||||
|             int nz = 0; | ||||
|             float uplim = 0.0f; | ||||
|             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { | ||||
|                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; | ||||
|                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; | ||||
|                 uplim += band->threshold; | ||||
|                 if (band->energy <= band->threshold || band->threshold == 0.0f) { | ||||
|                     sce->zeroes[(w+w2)*16+g] = 1; | ||||
| @@ -1027,7 +1027,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, | ||||
|     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { | ||||
|         for (g = 0; g < sce->ics.num_swb; g++) { | ||||
|             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { | ||||
|                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; | ||||
|                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; | ||||
|                 if (band->energy <= band->threshold) { | ||||
|                     sce->sf_idx[(w+w2)*16+g] = 218; | ||||
|                     sce->zeroes[(w+w2)*16+g] = 1; | ||||
| @@ -1065,8 +1065,8 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, | ||||
|             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { | ||||
|                 float dist1 = 0.0f, dist2 = 0.0f; | ||||
|                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { | ||||
|                     FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g]; | ||||
|                     FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g]; | ||||
|                     FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; | ||||
|                     FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; | ||||
|                     float minthr = FFMIN(band0->threshold, band1->threshold); | ||||
|                     float maxthr = FFMAX(band0->threshold, band1->threshold); | ||||
|                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { | ||||
|   | ||||
| @@ -210,7 +210,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx) | ||||
|     sizes[1]   = swb_size_128[i]; | ||||
|     lengths[0] = ff_aac_num_swb_1024[i]; | ||||
|     lengths[1] = ff_aac_num_swb_128[i]; | ||||
|     ff_psy_init(&s->psy, avctx, 2, sizes, lengths); | ||||
|     ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], &s->chan_map[1]); | ||||
|     s->psypp = ff_psy_preprocess_init(avctx); | ||||
|     s->coder = &ff_aac_coders[2]; | ||||
|  | ||||
| @@ -570,8 +570,8 @@ static int aac_encode_frame(AVCodecContext *avctx, | ||||
|             put_bits(&s->pb, 3, tag); | ||||
|             put_bits(&s->pb, 4, chan_el_counter[tag]++); | ||||
|             for (ch = 0; ch < chans; ch++) { | ||||
|                 s->cur_channel = start_ch + ch; | ||||
|                 s->psy.model->analyze(&s->psy, s->cur_channel, cpe->ch[ch].coeffs, &wi[ch]); | ||||
|                 s->cur_channel = start_ch * 2 + ch; | ||||
|                 s->psy.model->analyze(&s->psy, start_ch + ch, cpe->ch[ch].coeffs, &wi[ch]); | ||||
|                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); | ||||
|             } | ||||
|             cpe->common_window = 0; | ||||
| @@ -587,7 +587,7 @@ static int aac_encode_frame(AVCodecContext *avctx, | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             s->cur_channel = start_ch; | ||||
|             s->cur_channel = start_ch * 2; | ||||
|             if (s->options.stereo_mode && cpe->common_window) { | ||||
|                 if (s->options.stereo_mode > 0) { | ||||
|                     IndividualChannelStream *ics = &cpe->ch[0].ics; | ||||
|   | ||||
| @@ -627,7 +627,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, | ||||
|     } | ||||
|  | ||||
|     /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */ | ||||
|     ctx->pe[channel] = pe; | ||||
|     ctx->ch[channel].entropy = pe; | ||||
|     desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8); | ||||
|     desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); | ||||
|     /* NOTE: PE correction is kept simple. During initial testing it had very | ||||
| @@ -731,7 +731,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, | ||||
|     for (w = 0; w < wi->num_windows*16; w += 16) { | ||||
|         for (g = 0; g < num_bands; g++) { | ||||
|             AacPsyBand *band     = &pch->band[w+g]; | ||||
|             FFPsyBand  *psy_band = &ctx->psy_bands[channel*PSY_MAX_BANDS+w+g]; | ||||
|             FFPsyBand  *psy_band = &ctx->ch[channel].psy_bands[w+g]; | ||||
|  | ||||
|             psy_band->threshold = band->thr; | ||||
|             psy_band->energy    = band->energy; | ||||
| @@ -921,5 +921,6 @@ const FFPsyModel ff_aac_psy_model = | ||||
|     .init    = psy_3gpp_init, | ||||
|     .window  = psy_lame_window, | ||||
|     .analyze = psy_3gpp_analyze, | ||||
|     .analyze_group = NULL, | ||||
|     .end     = psy_3gpp_end, | ||||
| }; | ||||
|   | ||||
| @@ -25,16 +25,31 @@ | ||||
|  | ||||
| extern const FFPsyModel ff_aac_psy_model; | ||||
|  | ||||
| av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, | ||||
|                         int num_lens, | ||||
|                         const uint8_t **bands, const int* num_bands) | ||||
| av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, | ||||
|                         const uint8_t **bands, const int* num_bands, | ||||
|                         int num_groups, const uint8_t *group_map) | ||||
| { | ||||
|     int i, j, k = 0; | ||||
|  | ||||
|     ctx->avctx = avctx; | ||||
|     ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels); | ||||
|     ctx->ch        = av_mallocz(sizeof(ctx->ch[0]) * avctx->channels * 2); | ||||
|     ctx->group     = av_mallocz(sizeof(ctx->group[0]) * num_groups); | ||||
|     ctx->bands     = av_malloc (sizeof(ctx->bands[0])     * num_lens); | ||||
|     ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens); | ||||
|     memcpy(ctx->bands,     bands,     sizeof(ctx->bands[0])     *  num_lens); | ||||
|     memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) *  num_lens); | ||||
|  | ||||
|     /* assign channels to groups (with virtual channels for coupling) */ | ||||
|     for (i = 0; i < num_groups; i++) { | ||||
|         /* NOTE: Add 1 to handle the AAC chan_config without modification. | ||||
|          *       This has the side effect of allowing an array of 0s to map | ||||
|          *       to one channel per group. | ||||
|          */ | ||||
|         ctx->group[i].num_ch = group_map[i] + 1; | ||||
|         for (j = 0; j < ctx->group[i].num_ch * 2; j++) | ||||
|             ctx->group[i].ch[j]  = &ctx->ch[k++]; | ||||
|     } | ||||
|  | ||||
|     switch (ctx->avctx->codec_id) { | ||||
|     case CODEC_ID_AAC: | ||||
|         ctx->model = &ff_aac_psy_model; | ||||
| @@ -45,13 +60,24 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel) | ||||
| { | ||||
|     int i = 0, ch = 0; | ||||
|  | ||||
|     while (ch <= channel) | ||||
|         ch += ctx->group[i++].num_ch; | ||||
|  | ||||
|     return &ctx->group[i-1]; | ||||
| } | ||||
|  | ||||
| av_cold void ff_psy_end(FFPsyContext *ctx) | ||||
| { | ||||
|     if (ctx->model->end) | ||||
|         ctx->model->end(ctx); | ||||
|     av_freep(&ctx->bands); | ||||
|     av_freep(&ctx->num_bands); | ||||
|     av_freep(&ctx->psy_bands); | ||||
|     av_freep(&ctx->group); | ||||
|     av_freep(&ctx->ch); | ||||
| } | ||||
|  | ||||
| typedef struct FFPsyPreprocessContext{ | ||||
|   | ||||
| @@ -40,6 +40,23 @@ typedef struct FFPsyBand { | ||||
|     float perceptual_weight; | ||||
| } FFPsyBand; | ||||
|  | ||||
| /** | ||||
|  * single channel psychoacoustic information | ||||
|  */ | ||||
| typedef struct FFPsyChannel { | ||||
|     FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information | ||||
|     float     entropy;                  ///< total PE for this channel | ||||
| } FFPsyChannel; | ||||
|  | ||||
| /** | ||||
|  * psychoacoustic information for an arbitrary group of channels | ||||
|  */ | ||||
| typedef struct FFPsyChannelGroup { | ||||
|     FFPsyChannel *ch[PSY_MAX_CHANS];  ///< pointers to the individual channels in the group | ||||
|     uint8_t num_ch;                   ///< number of channels in this group | ||||
|     uint8_t coupling[PSY_MAX_BANDS];  ///< allow coupling for this band in the group | ||||
| } FFPsyChannelGroup; | ||||
|  | ||||
| /** | ||||
|  * windowing related information | ||||
|  */ | ||||
| @@ -58,14 +75,14 @@ typedef struct FFPsyContext { | ||||
|     AVCodecContext *avctx;            ///< encoder context | ||||
|     const struct FFPsyModel *model;   ///< encoder-specific model functions | ||||
|  | ||||
|     FFPsyBand *psy_bands;             ///< frame bands information | ||||
|     FFPsyChannel      *ch;            ///< single channel information | ||||
|     FFPsyChannelGroup *group;         ///< channel group information | ||||
|     int num_groups;                   ///< number of channel groups | ||||
|  | ||||
|     uint8_t **bands;                  ///< scalefactor band sizes for possible frame sizes | ||||
|     int     *num_bands;               ///< number of scalefactor bands for possible frame sizes | ||||
|     int num_lens;                     ///< number of scalefactor band sets | ||||
|  | ||||
|     float pe[PSY_MAX_CHANS];          ///< total PE for each channel in the frame | ||||
|  | ||||
|     struct { | ||||
|         int size;                     ///< size of the bitresevoir in bits | ||||
|         int bits;                     ///< number of bits used in the bitresevoir | ||||
| @@ -95,7 +112,7 @@ typedef struct FFPsyModel { | ||||
|     FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); | ||||
|  | ||||
|     /** | ||||
|      * Perform psychoacoustic analysis and set band info (threshold, energy). | ||||
|      * Perform psychoacoustic analysis and set band info (threshold, energy) for a single channel. | ||||
|      * | ||||
|      * @param ctx     model context | ||||
|      * @param channel audio channel number | ||||
| @@ -104,6 +121,16 @@ typedef struct FFPsyModel { | ||||
|      */ | ||||
|     void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi); | ||||
|  | ||||
|     /** | ||||
|      * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. | ||||
|      * | ||||
|      * @param ctx      model context | ||||
|      * @param channel  channel number of the first channel in the group to perform analysis on | ||||
|      * @param coeffs   array of pointers to the transformed coefficients | ||||
|      * @param wi       window information for the channels in the group | ||||
|      */ | ||||
|     void (*analyze_group)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); | ||||
|  | ||||
|     void (*end)    (FFPsyContext *apc); | ||||
| } FFPsyModel; | ||||
|  | ||||
| @@ -115,12 +142,24 @@ typedef struct FFPsyModel { | ||||
|  * @param num_lens   number of possible frame lengths | ||||
|  * @param bands      scalefactor band lengths for all frame lengths | ||||
|  * @param num_bands  number of scalefactor bands for all frame lengths | ||||
|  * @param num_groups number of channel groups | ||||
|  * @param group_map  array with # of channels in group - 1, for each group | ||||
|  * | ||||
|  * @return zero if successful, a negative value if not | ||||
|  */ | ||||
| av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, | ||||
|                         int num_lens, | ||||
|                         const uint8_t **bands, const int* num_bands); | ||||
| av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, | ||||
|                         const uint8_t **bands, const int* num_bands, | ||||
|                         int num_groups, const uint8_t *group_map); | ||||
|  | ||||
| /** | ||||
|  * Determine what group a channel belongs to. | ||||
|  * | ||||
|  * @param ctx     psymodel context | ||||
|  * @param channel channel to locate the group for | ||||
|  * | ||||
|  * @return pointer to the FFPsyChannelGroup this channel belongs to | ||||
|  */ | ||||
| FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); | ||||
|  | ||||
| /** | ||||
|  * Cleanup model context at the end. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user