1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-21 10:55:51 +02:00

aaccoder: rewrite PNS implementation

This commit rewrites the PNS implementation and significantly
improves sonic quality.

The previous implementation marked an incredibly big amount
of SFBs to predict when there was no need for this and this
resulted in quite a large amount of artifacts. Also the
quantization was incorrect (av_clip(4+log2f(...))) which
led to 3x the intensity for PNS values leading to even more
artifacts.

This commit rewrites the PNS search function and introduces
a major change: the PNS values are synthesized and are compared
to the current coefficients in addition to passing through
the revised checks to see whether PNS can be used.

This decreases distortions and makes the current PNS implementation
mainly focused on replacing any low-power non-zero bands as well
as adding any zeroed bands back.

The current encoder's performance is enough (especially with
IS) so PNS isn't really required except to fill in the occasional
few bands as well as extend any zeroed high frequency, so this
combination which is already enabled by default works
to get as much quality as it can within the bits allowed.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
This commit is contained in:
Rostislav Pehlivanov 2015-09-06 15:13:18 +01:00
parent 8848c8440e
commit 033e58941e

View File

@ -49,16 +49,15 @@
#include "aacenc_pred.h"
/** Frequency in Hz for lower limit of noise substitution **/
#define NOISE_LOW_LIMIT 4500
#define NOISE_LOW_LIMIT 4000
/* Energy spread threshold value below which no PNS is used, this corresponds to
* typically around 17Khz, after which PNS usage decays ending at 19Khz */
#define NOISE_SPREAD_THRESHOLD 0.5f
/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
* beyond which no PNS is used (since the SFBs contain tone rather than noise) */
#define NOISE_SPREAD_THRESHOLD 0.9673f
/* This constant gets divided by lambda to return ~1.65 which when multiplied
* by the band->threshold and compared to band->energy is the boundary between
* excessive PNS and little PNS usage. */
#define NOISE_LAMBDA_NUMERATOR 252.1f
/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
* replace low energy non zero bands */
#define NOISE_LAMBDA_REPLACE 1.948f
/**
* structure used in optimal codebook search
@ -863,30 +862,82 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
{
int start = 0, w, w2, g;
FFPsyBand *band;
int w, g, w2, i, start, count = 0;
float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
float *NOR34 = &s->scoefs[3*128];
const float lambda = s->lambda;
const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/120.f);
const float thr_mult = NOISE_LAMBDA_NUMERATOR/lambda;
const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/100.f);
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
start = 0;
for (g = 0; g < sce->ics.num_swb; g++) {
if (start*freq_mult > NOISE_LOW_LIMIT*(lambda/170.0f)) {
float energy = 0.0f, threshold = 0.0f, spread = 0.0f;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
energy += band->energy;
threshold += band->threshold;
spread += band->spread;
}
if (spread > spread_threshold*sce->ics.group_len[w] &&
((sce->zeroes[w*16+g] && energy >= threshold) ||
energy < threshold*thr_mult*sce->ics.group_len[w])) {
sce->band_type[w*16+g] = NOISE_BT;
sce->pns_ener[w*16+g] = energy / sce->ics.group_len[w];
sce->zeroes[w*16+g] = 0;
int noise_sfi, try_pns = 0;
float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
float energy = 0.0f, threshold = 0.0f, spread = 0.0f;
if (start*freq_mult < NOISE_LOW_LIMIT) {
start += sce->ics.swb_sizes[g];
continue;
}
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
energy += band->energy;
spread += band->spread;
threshold += band->threshold;
}
sce->pns_ener[w*16+g] = energy;
if (sce->zeroes[w*16+g]) {
try_pns = 1;
} else if (energy < threshold) {
try_pns = 1;
} else if (spread > spread_threshold) {
try_pns = 0;
} else if (energy < threshold*thr_mult) {
try_pns = 1;
}
if (!try_pns || !energy) {
start += sce->ics.swb_sizes[g];
continue;
}
noise_sfi = av_clip(roundf(log2f(energy)*2), -100, 155); /* Quantize */
noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
float band_energy, scale;
band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
for (i = 0; i < sce->ics.swb_sizes[g]; i++)
PNS[i] = s->random_state = lcg_random(s->random_state);
band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
scale = noise_amp/sqrtf(band_energy);
s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
abs_pow34_v(NOR34, &sce->coeffs[start+(w+w2)*128], sce->ics.swb_sizes[g]);
abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]);
dist1 += quantize_band_cost(s, &sce->coeffs[start + (w+w2)*128],
NOR34,
sce->ics.swb_sizes[g],
sce->sf_idx[(w+w2)*16+g],
sce->band_alt[(w+w2)*16+g],
lambda/band->threshold, INFINITY, NULL, 0);
dist2 += quantize_band_cost(s, PNS,
PNS34,
sce->ics.swb_sizes[g],
noise_sfi,
NOISE_BT,
lambda/band->threshold, INFINITY, NULL, 0);
}
if (dist2 < dist1) {
sce->band_type[w*16+g] = NOISE_BT;
sce->zeroes[w*16+g] = 0;
if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */
sce->band_type[w*16+g-2] == NOISE_BT) {
sce->band_type[w*16+g-1] = NOISE_BT;
sce->zeroes[w*16+g-1] = 0;
}
count++;
}
start += sce->ics.swb_sizes[g];
}