FFmpeg/libavcodec/aaccoder_twoloop.h

/*
 * AAC encoder twoloop coder
 * Copyright (C) 2008-2009 Konstantin Shishkov
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * AAC encoder twoloop coder
 * @author Konstantin Shishkov
 */

/**
 * This file contains a template for the twoloop coder function.
 * It needs to be provided, externally, as an already included declaration,
 * the following functions from aacenc_quantization/util.h. They're not included
 * explicitly here to make it possible to provide alternative implementations:
 *  - quantize_band_cost
 *  - abs_pow34_v
 *  - find_max_val
 *  - find_min_book
 */

#ifndef AVCODEC_AACCODER_TWOLOOP_H
#define AVCODEC_AACCODER_TWOLOOP_H

#include <float.h>
#include "libavutil/mathematics.h"
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
#include "aacenc.h"
#include "aactab.h"
#include "aacenctab.h"
#include "aac_tablegen_decl.h"


/**
 * two-loop quantizers search taken from ISO 13818-7 Appendix C
 */
static void search_for_quantizers_twoloop(AVCodecContext *avctx,
                                          AACEncContext *s,
                                          SingleChannelElement *sce,
                                          const float lambda)
{
    int start = 0, i, w, w2, g;
    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
    float dists[128] = { 0 }, uplims[128] = { 0 };
    float maxvals[128];
    int fflag, minscaler;
    int its  = 0;
    int allz = 0;
    float minthr = INFINITY;

    // for values above this the decoder might end up in an endless loop
    // due to always having more bits than what can be encoded.
    destbits = FFMIN(destbits, 5800);
    //XXX: some heuristic to determine initial quantizers will reduce search time
    //determine zero bands and upper limits
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
            int nz = 0;
            float uplim = 0.0f, energy = 0.0f;
            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                uplim  += band->threshold;
                energy += band->energy;
                if (band->energy <= band->threshold || band->threshold == 0.0f) {
                    sce->zeroes[(w+w2)*16+g] = 1;
                    continue;
                }
                nz = 1;
            }
            uplims[w*16+g] = uplim *512;
            sce->zeroes[w*16+g] = !nz;
            if (nz)
                minthr = FFMIN(minthr, uplim);
            allz |= nz;
        }
    }
    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        for (g = 0;  g < sce->ics.num_swb; g++) {
            if (sce->zeroes[w*16+g]) {
                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
                continue;
            }
            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
        }
    }

    if (!allz)
        return;
    abs_pow34_v(s->scoefs, sce->coeffs, 1024);

    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
        start = w*128;
        for (g = 0;  g < sce->ics.num_swb; g++) {
            const float *scaled = s->scoefs + start;
            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
            start += sce->ics.swb_sizes[g];
        }
    }

    //perform two-loop search
    //outer loop - improve quality
    do {
        int tbits, qstep;
        minscaler = sce->sf_idx[0];
        //inner loop - quantize spectrum to fit into given number of bits
        qstep = its ? 1 : 32;
        do {
            int prev = -1;
            tbits = 0;
            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
                start = w*128;
                for (g = 0;  g < sce->ics.num_swb; g++) {
                    const float *coefs = &sce->coeffs[start];
                    const float *scaled = &s->scoefs[start];
                    int bits = 0;
                    int cb;
                    float dist = 0.0f;

                    if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
                        start += sce->ics.swb_sizes[g];
                        continue;
                    }
                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
                    cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                        int b;
                        dist += quantize_band_cost(s, coefs + w2*128,
                                                   scaled + w2*128,
                                                   sce->ics.swb_sizes[g],
                                                   sce->sf_idx[w*16+g],
                                                   cb,
                                                   1.0f,
                                                   INFINITY,
                                                   &b,
                                                   0);
                        bits += b;
                    }
                    dists[w*16+g] = dist - bits;
                    if (prev != -1) {
                        bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
                    }
                    tbits += bits;
                    start += sce->ics.swb_sizes[g];
                    prev = sce->sf_idx[w*16+g];
                }
            }
            if (tbits > destbits) {
                for (i = 0; i < 128; i++)
                    if (sce->sf_idx[i] < 218 - qstep)
                        sce->sf_idx[i] += qstep;
            } else {
                for (i = 0; i < 128; i++)
                    if (sce->sf_idx[i] > 60 - qstep)
                        sce->sf_idx[i] -= qstep;
            }
            qstep >>= 1;
            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
                qstep = 1;
        } while (qstep);

        fflag = 0;
        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);

        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
            for (g = 0; g < sce->ics.num_swb; g++) {
                int prevsc = sce->sf_idx[w*16+g];
                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
                        sce->sf_idx[w*16+g]--;
                    else //Try to make sure there is some energy in every band
                        sce->sf_idx[w*16+g]-=2;
                }
                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
                if (sce->sf_idx[w*16+g] != prevsc)
                    fflag = 1;
                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
            }
        }
        its++;
    } while (fflag && its < 10);
}

#endif /* AVCODEC_AACCODER_TWOLOOP_H */
AAC encoder: refactor to resynchronize MIPS port This patch refactors the AAC coders to reuse code between the MIPS port and the regular, portable C code. There were two main functions that had to use hand-optimized versions of quantization code: - search_for_quantizers_twoloop - codebook_trellis_rate Those two were split into their own template header files so they can be inlined inside both the MIPS port and the generic code. In each context, they'll link to their specialized implementations, and thus be optimized by the compiler. This approach I believe is better than maintaining several copies of each function. As past experience has proven, having to keep those in sync was error prone. In this way, they will remain in sync by default. Also, an implementation of the dequantized output argument for the optimized quantize_and_encode functions is included in the patch. While the current implementation of search_for_pred still isn't using it, future iterations of main prediction probably will. It should not imply any measurable performance hit while not being used. 2015-09-15 08:59:45 +02:00			`/*`
			`* AAC encoder twoloop coder`
			`* Copyright (C) 2008-2009 Konstantin Shishkov`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`/**`
			`* @file`
			`* AAC encoder twoloop coder`
			`* @author Konstantin Shishkov`
			`*/`

			`/**`
			`* This file contains a template for the twoloop coder function.`
			`* It needs to be provided, externally, as an already included declaration,`
			`* the following functions from aacenc_quantization/util.h. They're not included`
			`* explicitly here to make it possible to provide alternative implementations:`
			`* - quantize_band_cost`
			`* - abs_pow34_v`
			`* - find_max_val`
			`* - find_min_book`
			`*/`

			`#ifndef AVCODEC_AACCODER_TWOLOOP_H`
			`#define AVCODEC_AACCODER_TWOLOOP_H`

			`#include <float.h>`
			`#include "libavutil/mathematics.h"`
			`#include "avcodec.h"`
			`#include "put_bits.h"`
			`#include "aac.h"`
			`#include "aacenc.h"`
			`#include "aactab.h"`
			`#include "aacenctab.h"`
			`#include "aac_tablegen_decl.h"`


			`/**`
			`* two-loop quantizers search taken from ISO 13818-7 Appendix C`
			`*/`
			`static void search_for_quantizers_twoloop(AVCodecContext *avctx,`
			`AACEncContext *s,`
			`SingleChannelElement *sce,`
			`const float lambda)`
			`{`
			`int start = 0, i, w, w2, g;`
			`int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);`
			`float dists[128] = { 0 }, uplims[128] = { 0 };`
			`float maxvals[128];`
			`int fflag, minscaler;`
			`int its = 0;`
			`int allz = 0;`
			`float minthr = INFINITY;`

			`// for values above this the decoder might end up in an endless loop`
			`// due to always having more bits than what can be encoded.`
			`destbits = FFMIN(destbits, 5800);`
			`//XXX: some heuristic to determine initial quantizers will reduce search time`
			`//determine zero bands and upper limits`
			`for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {`
			`for (g = 0; g < sce->ics.num_swb; g++) {`
			`int nz = 0;`
			`float uplim = 0.0f, energy = 0.0f;`
			`for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {`
			`FFPsyBand band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)16+g];`
			`uplim += band->threshold;`
			`energy += band->energy;`
			`if (band->energy <= band->threshold \|\| band->threshold == 0.0f) {`
			`sce->zeroes[(w+w2)*16+g] = 1;`
			`continue;`
			`}`
			`nz = 1;`
			`}`
			`uplims[w16+g] = uplim 512;`
			`sce->zeroes[w*16+g] = !nz;`
			`if (nz)`
			`minthr = FFMIN(minthr, uplim);`
			`allz \|= nz;`
			`}`
			`}`
			`for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {`
			`for (g = 0; g < sce->ics.num_swb; g++) {`
			`if (sce->zeroes[w*16+g]) {`
			`sce->sf_idx[w*16+g] = SCALE_ONE_POS;`
			`continue;`
			`}`
			`sce->sf_idx[w16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w16+g]/minthr)*4,59);`
			`}`
			`}`

			`if (!allz)`
			`return;`
			`abs_pow34_v(s->scoefs, sce->coeffs, 1024);`

			`for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {`
			`start = w*128;`
			`for (g = 0; g < sce->ics.num_swb; g++) {`
			`const float *scaled = s->scoefs + start;`
			`maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);`
			`start += sce->ics.swb_sizes[g];`
			`}`
			`}`

			`//perform two-loop search`
			`//outer loop - improve quality`
			`do {`
			`int tbits, qstep;`
			`minscaler = sce->sf_idx[0];`
			`//inner loop - quantize spectrum to fit into given number of bits`
			`qstep = its ? 1 : 32;`
			`do {`
			`int prev = -1;`
			`tbits = 0;`
			`for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {`
			`start = w*128;`
			`for (g = 0; g < sce->ics.num_swb; g++) {`
			`const float *coefs = &sce->coeffs[start];`
			`const float *scaled = &s->scoefs[start];`
			`int bits = 0;`
			`int cb;`
			`float dist = 0.0f;`

			`if (sce->zeroes[w16+g] \|\| sce->sf_idx[w16+g] >= 218) {`
			`start += sce->ics.swb_sizes[g];`
			`continue;`
			`}`
			`minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);`
			`cb = find_min_book(maxvals[w16+g], sce->sf_idx[w16+g]);`
			`for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {`
			`int b;`
			`dist += quantize_band_cost(s, coefs + w2*128,`
			`scaled + w2*128,`
			`sce->ics.swb_sizes[g],`
			`sce->sf_idx[w*16+g],`
			`cb,`
			`1.0f,`
			`INFINITY,`
			`&b,`
			`0);`
			`bits += b;`
			`}`
			`dists[w*16+g] = dist - bits;`
			`if (prev != -1) {`
			`bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];`
			`}`
			`tbits += bits;`
			`start += sce->ics.swb_sizes[g];`
			`prev = sce->sf_idx[w*16+g];`
			`}`
			`}`
			`if (tbits > destbits) {`
			`for (i = 0; i < 128; i++)`
			`if (sce->sf_idx[i] < 218 - qstep)`
			`sce->sf_idx[i] += qstep;`
			`} else {`
			`for (i = 0; i < 128; i++)`
			`if (sce->sf_idx[i] > 60 - qstep)`
			`sce->sf_idx[i] -= qstep;`
			`}`
			`qstep >>= 1;`
			`if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)`
			`qstep = 1;`
			`} while (qstep);`

			`fflag = 0;`
			`minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);`

			`for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {`
			`for (g = 0; g < sce->ics.num_swb; g++) {`
			`int prevsc = sce->sf_idx[w*16+g];`
			`if (dists[w16+g] > uplims[w16+g] && sce->sf_idx[w*16+g] > 60) {`
			`if (find_min_book(maxvals[w16+g], sce->sf_idx[w16+g]-1))`
			`sce->sf_idx[w*16+g]--;`
			`else //Try to make sure there is some energy in every band`
			`sce->sf_idx[w*16+g]-=2;`
			`}`
			`sce->sf_idx[w16+g] = av_clip(sce->sf_idx[w16+g], minscaler, minscaler + SCALE_MAX_DIFF);`
			`sce->sf_idx[w16+g] = FFMIN(sce->sf_idx[w16+g], 219);`
			`if (sce->sf_idx[w*16+g] != prevsc)`
			`fflag = 1;`
			`sce->band_type[w16+g] = find_min_book(maxvals[w16+g], sce->sf_idx[w*16+g]);`
			`}`
			`}`
			`its++;`
			`} while (fflag && its < 10);`
			`}`

			`#endif /* AVCODEC_AACCODER_TWOLOOP_H */`