mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
opus: add a native Opus encoder
This marks the first time anyone has written an Opus encoder without using any libopus code. The aim of the encoder is to prove how far the format can go by writing the craziest encoder for it. Right now the encoder's basic, it only supports CBR encoding, however internally every single feature the CELT layer has is implemented (except the pitch pre-filter which needs to work well with the rest of whatever gets implemented). Psychoacoustic and rate control systems are under development. The encoder takes in frames of 120 samples and depending on the value of opus_delay the plan is to use the extra buffered frames as lookahead. Right now the encoder will pick the nearest largest legal frame size and won't use the lookahead, but that'll change once there's a psychoacoustic system. Even though its a pretty basic encoder its already outperforming any other native encoder FFmpeg has by a huge amount. The PVQ search algorithm is faster and more accurate than libopus's algorithm so the encoder's performance is close to that of libopus at zero complexity (libopus has more SIMD). The algorithm might be ported to libopus or other codecs using PVQ in the future. The encoder still has a few minor bugs, like desyncs at ultra low bitrates (below 9kbps with 20ms frames). Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
This commit is contained in:
parent
07b78340dd
commit
5f47c85e5c
1
configure
vendored
1
configure
vendored
@ -2492,6 +2492,7 @@ nuv_decoder_select="idctdsp lzo"
|
|||||||
on2avc_decoder_select="mdct"
|
on2avc_decoder_select="mdct"
|
||||||
opus_decoder_deps="swresample"
|
opus_decoder_deps="swresample"
|
||||||
opus_decoder_select="mdct15"
|
opus_decoder_select="mdct15"
|
||||||
|
opus_encoder_select="audio_frame_queue audiodsp mdct15"
|
||||||
png_decoder_select="zlib"
|
png_decoder_select="zlib"
|
||||||
png_encoder_select="llvidencdsp zlib"
|
png_encoder_select="llvidencdsp zlib"
|
||||||
prores_decoder_select="blockdsp idctdsp"
|
prores_decoder_select="blockdsp idctdsp"
|
||||||
|
@ -444,6 +444,7 @@ OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o
|
|||||||
OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o
|
OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o
|
||||||
OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o opus_rc.o \
|
OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o opus_rc.o \
|
||||||
opus_pvq.o opus_silk.o opustab.o vorbis_data.o
|
opus_pvq.o opus_silk.o opustab.o vorbis_data.o
|
||||||
|
OBJS-$(CONFIG_OPUS_ENCODER) += opusenc.o opus_rc.o opustab.o opus_pvq.o
|
||||||
OBJS-$(CONFIG_PAF_AUDIO_DECODER) += pafaudio.o
|
OBJS-$(CONFIG_PAF_AUDIO_DECODER) += pafaudio.o
|
||||||
OBJS-$(CONFIG_PAF_VIDEO_DECODER) += pafvideo.o
|
OBJS-$(CONFIG_PAF_VIDEO_DECODER) += pafvideo.o
|
||||||
OBJS-$(CONFIG_PAM_DECODER) += pnmdec.o pnm.o
|
OBJS-$(CONFIG_PAM_DECODER) += pnmdec.o pnm.o
|
||||||
|
@ -449,7 +449,7 @@ void avcodec_register_all(void)
|
|||||||
REGISTER_DECODER(MPC8, mpc8);
|
REGISTER_DECODER(MPC8, mpc8);
|
||||||
REGISTER_ENCDEC (NELLYMOSER, nellymoser);
|
REGISTER_ENCDEC (NELLYMOSER, nellymoser);
|
||||||
REGISTER_DECODER(ON2AVC, on2avc);
|
REGISTER_DECODER(ON2AVC, on2avc);
|
||||||
REGISTER_DECODER(OPUS, opus);
|
REGISTER_ENCDEC (OPUS, opus);
|
||||||
REGISTER_DECODER(PAF_AUDIO, paf_audio);
|
REGISTER_DECODER(PAF_AUDIO, paf_audio);
|
||||||
REGISTER_DECODER(QCELP, qcelp);
|
REGISTER_DECODER(QCELP, qcelp);
|
||||||
REGISTER_DECODER(QDM2, qdm2);
|
REGISTER_DECODER(QDM2, qdm2);
|
||||||
|
@ -61,14 +61,23 @@ enum CeltBlockSize {
|
|||||||
|
|
||||||
typedef struct CeltBlock {
|
typedef struct CeltBlock {
|
||||||
float energy[CELT_MAX_BANDS];
|
float energy[CELT_MAX_BANDS];
|
||||||
|
float lin_energy[CELT_MAX_BANDS];
|
||||||
|
float error_energy[CELT_MAX_BANDS];
|
||||||
float prev_energy[2][CELT_MAX_BANDS];
|
float prev_energy[2][CELT_MAX_BANDS];
|
||||||
|
|
||||||
uint8_t collapse_masks[CELT_MAX_BANDS];
|
uint8_t collapse_masks[CELT_MAX_BANDS];
|
||||||
|
|
||||||
|
int band_bins[CELT_MAX_BANDS]; /* MDCT bins per band */
|
||||||
|
float *band_coeffs[CELT_MAX_BANDS];
|
||||||
|
|
||||||
/* buffer for mdct output + postfilter */
|
/* buffer for mdct output + postfilter */
|
||||||
DECLARE_ALIGNED(32, float, buf)[2048];
|
DECLARE_ALIGNED(32, float, buf)[2048];
|
||||||
DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE];
|
DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE];
|
||||||
|
|
||||||
|
/* Used by the encoder */
|
||||||
|
DECLARE_ALIGNED(32, float, overlap)[120];
|
||||||
|
DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE];
|
||||||
|
|
||||||
/* postfilter parameters */
|
/* postfilter parameters */
|
||||||
int pf_period_new;
|
int pf_period_new;
|
||||||
float pf_gains_new[3];
|
float pf_gains_new[3];
|
||||||
@ -94,6 +103,12 @@ struct CeltFrame {
|
|||||||
int end_band;
|
int end_band;
|
||||||
int coded_bands;
|
int coded_bands;
|
||||||
int transient;
|
int transient;
|
||||||
|
int intra;
|
||||||
|
int pfilter;
|
||||||
|
int skip_band_floor;
|
||||||
|
int tf_select;
|
||||||
|
int alloc_trim;
|
||||||
|
int alloc_boost[CELT_MAX_BANDS];
|
||||||
int blocks; /* number of iMDCT blocks in the frame, depends on transient */
|
int blocks; /* number of iMDCT blocks in the frame, depends on transient */
|
||||||
int blocksize; /* size of each block */
|
int blocksize; /* size of each block */
|
||||||
int silence; /* Frame is filled with silence */
|
int silence; /* Frame is filled with silence */
|
||||||
@ -109,6 +124,7 @@ struct CeltFrame {
|
|||||||
int framebits;
|
int framebits;
|
||||||
int remaining;
|
int remaining;
|
||||||
int remaining2;
|
int remaining2;
|
||||||
|
int caps [CELT_MAX_BANDS];
|
||||||
int fine_bits [CELT_MAX_BANDS];
|
int fine_bits [CELT_MAX_BANDS];
|
||||||
int fine_priority[CELT_MAX_BANDS];
|
int fine_priority[CELT_MAX_BANDS];
|
||||||
int pulses [CELT_MAX_BANDS];
|
int pulses [CELT_MAX_BANDS];
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012 Andrew D'Addesio
|
* Copyright (c) 2012 Andrew D'Addesio
|
||||||
* Copyright (c) 2013-2014 Mozilla Corporation
|
* Copyright (c) 2013-2014 Mozilla Corporation
|
||||||
* Copyright (c) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
|
* Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
@ -78,7 +78,7 @@ static inline void celt_normalize_residual(const int * av_restrict iy, float * a
|
|||||||
X[i] = g * iy[i];
|
X[i] = g * iy[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
static void celt_exp_rotation1(float *X, uint32_t len, uint32_t stride,
|
static void celt_exp_rotation_impl(float *X, uint32_t len, uint32_t stride,
|
||||||
float c, float s)
|
float c, float s)
|
||||||
{
|
{
|
||||||
float *Xptr;
|
float *Xptr;
|
||||||
@ -105,7 +105,7 @@ static void celt_exp_rotation1(float *X, uint32_t len, uint32_t stride,
|
|||||||
|
|
||||||
static inline void celt_exp_rotation(float *X, uint32_t len,
|
static inline void celt_exp_rotation(float *X, uint32_t len,
|
||||||
uint32_t stride, uint32_t K,
|
uint32_t stride, uint32_t K,
|
||||||
enum CeltSpread spread)
|
enum CeltSpread spread, const int encode)
|
||||||
{
|
{
|
||||||
uint32_t stride2 = 0;
|
uint32_t stride2 = 0;
|
||||||
float c, s;
|
float c, s;
|
||||||
@ -133,9 +133,15 @@ static inline void celt_exp_rotation(float *X, uint32_t len,
|
|||||||
extract_collapse_mask().*/
|
extract_collapse_mask().*/
|
||||||
len /= stride;
|
len /= stride;
|
||||||
for (i = 0; i < stride; i++) {
|
for (i = 0; i < stride; i++) {
|
||||||
|
if (encode) {
|
||||||
|
celt_exp_rotation_impl(X + i * len, len, 1, c, -s);
|
||||||
if (stride2)
|
if (stride2)
|
||||||
celt_exp_rotation1(X + i * len, len, stride2, s, c);
|
celt_exp_rotation_impl(X + i * len, len, stride2, s, -c);
|
||||||
celt_exp_rotation1(X + i * len, len, 1, c, s);
|
} else {
|
||||||
|
if (stride2)
|
||||||
|
celt_exp_rotation_impl(X + i * len, len, stride2, s, c);
|
||||||
|
celt_exp_rotation_impl(X + i * len, len, 1, c, s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -270,6 +276,18 @@ static inline int celt_compute_qn(int N, int b, int offset, int pulse_cap,
|
|||||||
return qn;
|
return qn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert the quantized vector to an index */
|
||||||
|
static inline uint32_t celt_icwrsi(uint32_t N, const int *y)
|
||||||
|
{
|
||||||
|
int i, idx = 0, sum = 0;
|
||||||
|
for (i = N - 1; i >= 0; i--) {
|
||||||
|
const uint32_t i_s = CELT_PVQ_U(N - i, sum + FFABS(y[i]) + 1);
|
||||||
|
idx += CELT_PVQ_U(N - i, sum) + (y[i] < 0)*i_s;
|
||||||
|
sum += FFABS(y[i]);
|
||||||
|
}
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
// this code was adapted from libopus
|
// this code was adapted from libopus
|
||||||
static inline uint64_t celt_cwrsi(uint32_t N, uint32_t K, uint32_t i, int *y)
|
static inline uint64_t celt_cwrsi(uint32_t N, uint32_t K, uint32_t i, int *y)
|
||||||
{
|
{
|
||||||
@ -356,12 +374,74 @@ static inline uint64_t celt_cwrsi(uint32_t N, uint32_t K, uint32_t i, int *y)
|
|||||||
return norm;
|
return norm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void celt_encode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K)
|
||||||
|
{
|
||||||
|
ff_opus_rc_enc_uint(rc, celt_icwrsi(N, y), CELT_PVQ_V(N, K));
|
||||||
|
}
|
||||||
|
|
||||||
static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K)
|
static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, uint32_t K)
|
||||||
{
|
{
|
||||||
const uint32_t idx = ff_opus_rc_dec_uint(rc, CELT_PVQ_V(N, K));
|
const uint32_t idx = ff_opus_rc_dec_uint(rc, CELT_PVQ_V(N, K));
|
||||||
return celt_cwrsi(N, K, idx, y);
|
return celt_cwrsi(N, K, idx, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Faster than libopus's search, operates entirely in the signed domain.
|
||||||
|
* Slightly worse/better depending on N, K and the input vector.
|
||||||
|
*/
|
||||||
|
static void celt_pvq_search(float *X, int *y, int K, int N)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
float res = 0.0f, y_norm = 0.0f, xy_norm = 0.0f;
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
res += FFABS(X[i]);
|
||||||
|
|
||||||
|
res = K/res;
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
y[i] = lrintf(res*X[i]);
|
||||||
|
y_norm += y[i]*y[i];
|
||||||
|
xy_norm += y[i]*X[i];
|
||||||
|
K -= FFABS(y[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (K) {
|
||||||
|
int max_idx = 0, phase = FFSIGN(K);
|
||||||
|
float max_den = 1.0f, max_num = 0.0f;
|
||||||
|
y_norm += 1.0f;
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
float xy_new = xy_norm + 1*phase*FFABS(X[i]);
|
||||||
|
float y_new = y_norm + 2*phase*FFABS(y[i]);
|
||||||
|
xy_new = xy_new * xy_new;
|
||||||
|
if ((max_den*xy_new) > (y_new*max_num)) {
|
||||||
|
max_den = y_new;
|
||||||
|
max_num = xy_new;
|
||||||
|
max_idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
K -= phase;
|
||||||
|
|
||||||
|
phase *= FFSIGN(X[max_idx]);
|
||||||
|
xy_norm += 1*phase*X[max_idx];
|
||||||
|
y_norm += 2*phase*y[max_idx];
|
||||||
|
y[max_idx] += phase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K,
|
||||||
|
enum CeltSpread spread, uint32_t blocks, float gain)
|
||||||
|
{
|
||||||
|
int y[176];
|
||||||
|
|
||||||
|
celt_exp_rotation(X, N, blocks, K, spread, 1);
|
||||||
|
celt_pvq_search(X, y, K, N);
|
||||||
|
celt_encode_pulses(rc, y, N, K);
|
||||||
|
return celt_extract_collapse_mask(y, N, blocks);
|
||||||
|
}
|
||||||
|
|
||||||
/** Decode pulse vector and combine the result with the pitch vector to produce
|
/** Decode pulse vector and combine the result with the pitch vector to produce
|
||||||
the final normalised signal in the current band. */
|
the final normalised signal in the current band. */
|
||||||
static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K,
|
static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K,
|
||||||
@ -371,7 +451,7 @@ static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint3
|
|||||||
|
|
||||||
gain /= sqrtf(celt_decode_pulses(rc, y, N, K));
|
gain /= sqrtf(celt_decode_pulses(rc, y, N, K));
|
||||||
celt_normalize_residual(y, X, N, gain);
|
celt_normalize_residual(y, X, N, gain);
|
||||||
celt_exp_rotation(X, N, blocks, K, spread);
|
celt_exp_rotation(X, N, blocks, K, spread, 0);
|
||||||
return celt_extract_collapse_mask(y, N, blocks);
|
return celt_extract_collapse_mask(y, N, blocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -725,5 +805,353 @@ uint32_t ff_celt_decode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
|
|||||||
}
|
}
|
||||||
cm = av_mod_uintp2(cm, blocks);
|
cm = av_mod_uintp2(cm, blocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return cm;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This has to be, AND MUST BE done by the psychoacoustic system, this has a very
|
||||||
|
* big impact on the entire quantization and especially huge on transients */
|
||||||
|
static int celt_calc_theta(const float *X, const float *Y, int coupling, int N)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
float e[2] = { 0.0f, 0.0f };
|
||||||
|
for (j = 0; j < N; j++) {
|
||||||
|
if (coupling) { /* Coupling case */
|
||||||
|
e[0] += (X[j] + Y[j])*(X[j] + Y[j]);
|
||||||
|
e[1] += (X[j] - Y[j])*(X[j] - Y[j]);
|
||||||
|
} else {
|
||||||
|
e[0] += X[j]*X[j];
|
||||||
|
e[1] += Y[j]*Y[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lrintf(32768.0f*atan2f(sqrtf(e[1]), sqrtf(e[0]))/M_PI);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void celt_stereo_is_decouple(float *X, float *Y, float e_l, float e_r, int N)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
const float energy_n = 1.0f/(sqrtf(e_l*e_l + e_r*e_r) + FLT_EPSILON);
|
||||||
|
e_l *= energy_n;
|
||||||
|
e_r *= energy_n;
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
X[i] = e_l*X[i] + e_r*Y[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void celt_stereo_ms_decouple(float *X, float *Y, int N)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
const float decouple_norm = 1.0f/sqrtf(2.0f);
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
const float Xret = X[i];
|
||||||
|
X[i] = (X[i] + Y[i])*decouple_norm;
|
||||||
|
Y[i] = (Y[i] - Xret)*decouple_norm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
|
||||||
|
float *X, float *Y, int N, int b, uint32_t blocks,
|
||||||
|
float *lowband, int duration, float *lowband_out, int level,
|
||||||
|
float gain, float *lowband_scratch, int fill)
|
||||||
|
{
|
||||||
|
const uint8_t *cache;
|
||||||
|
int dualstereo, split;
|
||||||
|
int imid = 0, iside = 0;
|
||||||
|
//uint32_t N0 = N;
|
||||||
|
int N_B;
|
||||||
|
//int N_B0;
|
||||||
|
int B0 = blocks;
|
||||||
|
int time_divide = 0;
|
||||||
|
int recombine = 0;
|
||||||
|
int inv = 0;
|
||||||
|
float mid = 0, side = 0;
|
||||||
|
int longblocks = (B0 == 1);
|
||||||
|
uint32_t cm = 0;
|
||||||
|
|
||||||
|
//N_B0 = N_B = N / blocks;
|
||||||
|
split = dualstereo = (Y != NULL);
|
||||||
|
|
||||||
|
if (N == 1) {
|
||||||
|
/* special case for one sample - the decoder's output will be +- 1.0f!!! */
|
||||||
|
int i;
|
||||||
|
float *x = X;
|
||||||
|
for (i = 0; i <= dualstereo; i++) {
|
||||||
|
if (f->remaining2 >= 1<<3) {
|
||||||
|
ff_opus_rc_put_raw(rc, x[0] < 0, 1);
|
||||||
|
f->remaining2 -= 1 << 3;
|
||||||
|
b -= 1 << 3;
|
||||||
|
}
|
||||||
|
x = Y;
|
||||||
|
}
|
||||||
|
if (lowband_out)
|
||||||
|
lowband_out[0] = X[0];
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!dualstereo && level == 0) {
|
||||||
|
int tf_change = f->tf_change[band];
|
||||||
|
int k;
|
||||||
|
if (tf_change > 0)
|
||||||
|
recombine = tf_change;
|
||||||
|
/* Band recombining to increase frequency resolution */
|
||||||
|
|
||||||
|
if (lowband &&
|
||||||
|
(recombine || ((N_B & 1) == 0 && tf_change < 0) || B0 > 1)) {
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < N; j++)
|
||||||
|
lowband_scratch[j] = lowband[j];
|
||||||
|
lowband = lowband_scratch;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k = 0; k < recombine; k++) {
|
||||||
|
celt_haar1(X, N >> k, 1 << k);
|
||||||
|
fill = ff_celt_bit_interleave[fill & 0xF] | ff_celt_bit_interleave[fill >> 4] << 2;
|
||||||
|
}
|
||||||
|
blocks >>= recombine;
|
||||||
|
N_B <<= recombine;
|
||||||
|
|
||||||
|
/* Increasing the time resolution */
|
||||||
|
while ((N_B & 1) == 0 && tf_change < 0) {
|
||||||
|
celt_haar1(X, N_B, blocks);
|
||||||
|
fill |= fill << blocks;
|
||||||
|
blocks <<= 1;
|
||||||
|
N_B >>= 1;
|
||||||
|
time_divide++;
|
||||||
|
tf_change++;
|
||||||
|
}
|
||||||
|
B0 = blocks;
|
||||||
|
//N_B0 = N_B;
|
||||||
|
|
||||||
|
/* Reorganize the samples in time order instead of frequency order */
|
||||||
|
if (B0 > 1)
|
||||||
|
celt_deinterleave_hadamard(f->scratch, X, N_B >> recombine,
|
||||||
|
B0 << recombine, longblocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we need 1.5 more bit than we can produce, split the band in two. */
|
||||||
|
cache = ff_celt_cache_bits +
|
||||||
|
ff_celt_cache_index[(duration + 1) * CELT_MAX_BANDS + band];
|
||||||
|
if (!dualstereo && duration >= 0 && b > cache[cache[0]] + 12 && N > 2) {
|
||||||
|
N >>= 1;
|
||||||
|
Y = X + N;
|
||||||
|
split = 1;
|
||||||
|
duration -= 1;
|
||||||
|
if (blocks == 1)
|
||||||
|
fill = (fill & 1) | (fill << 1);
|
||||||
|
blocks = (blocks + 1) >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (split) {
|
||||||
|
int qn;
|
||||||
|
int itheta = celt_calc_theta(X, Y, dualstereo, N);
|
||||||
|
int mbits, sbits, delta;
|
||||||
|
int qalloc;
|
||||||
|
int pulse_cap;
|
||||||
|
int offset;
|
||||||
|
int orig_fill;
|
||||||
|
int tell;
|
||||||
|
|
||||||
|
/* Decide on the resolution to give to the split parameter theta */
|
||||||
|
pulse_cap = ff_celt_log_freq_range[band] + duration * 8;
|
||||||
|
offset = (pulse_cap >> 1) - (dualstereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE :
|
||||||
|
CELT_QTHETA_OFFSET);
|
||||||
|
qn = (dualstereo && band >= f->intensity_stereo) ? 1 :
|
||||||
|
celt_compute_qn(N, b, offset, pulse_cap, dualstereo);
|
||||||
|
tell = opus_rc_tell_frac(rc);
|
||||||
|
|
||||||
|
if (qn != 1) {
|
||||||
|
|
||||||
|
itheta = (itheta*qn + 8192) >> 14;
|
||||||
|
|
||||||
|
/* Entropy coding of the angle. We use a uniform pdf for the
|
||||||
|
* time split, a step for stereo, and a triangular one for the rest. */
|
||||||
|
if (dualstereo && N > 2)
|
||||||
|
ff_opus_rc_enc_uint_step(rc, itheta, qn / 2);
|
||||||
|
else if (dualstereo || B0 > 1)
|
||||||
|
ff_opus_rc_enc_uint(rc, itheta, qn + 1);
|
||||||
|
else
|
||||||
|
ff_opus_rc_enc_uint_tri(rc, itheta, qn);
|
||||||
|
itheta = itheta * 16384 / qn;
|
||||||
|
|
||||||
|
if (dualstereo) {
|
||||||
|
if (itheta == 0)
|
||||||
|
celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band], f->block[1].lin_energy[band], N);
|
||||||
|
else
|
||||||
|
celt_stereo_ms_decouple(X, Y, N);
|
||||||
|
}
|
||||||
|
} else if (dualstereo) {
|
||||||
|
inv = itheta > 8192;
|
||||||
|
if (inv)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
for (j=0;j<N;j++)
|
||||||
|
Y[j] = -Y[j];
|
||||||
|
}
|
||||||
|
celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band], f->block[1].lin_energy[band], N);
|
||||||
|
|
||||||
|
if (b > 2 << 3 && f->remaining2 > 2 << 3) {
|
||||||
|
ff_opus_rc_enc_log(rc, inv, 2);
|
||||||
|
} else {
|
||||||
|
inv = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
itheta = 0;
|
||||||
|
}
|
||||||
|
qalloc = opus_rc_tell_frac(rc) - tell;
|
||||||
|
b -= qalloc;
|
||||||
|
|
||||||
|
orig_fill = fill;
|
||||||
|
if (itheta == 0) {
|
||||||
|
imid = 32767;
|
||||||
|
iside = 0;
|
||||||
|
fill = av_mod_uintp2(fill, blocks);
|
||||||
|
delta = -16384;
|
||||||
|
} else if (itheta == 16384) {
|
||||||
|
imid = 0;
|
||||||
|
iside = 32767;
|
||||||
|
fill &= ((1 << blocks) - 1) << blocks;
|
||||||
|
delta = 16384;
|
||||||
|
} else {
|
||||||
|
imid = celt_cos(itheta);
|
||||||
|
iside = celt_cos(16384-itheta);
|
||||||
|
/* This is the mid vs side allocation that minimizes squared error
|
||||||
|
in that band. */
|
||||||
|
delta = ROUND_MUL16((N - 1) << 7, celt_log2tan(iside, imid));
|
||||||
|
}
|
||||||
|
|
||||||
|
mid = imid / 32768.0f;
|
||||||
|
side = iside / 32768.0f;
|
||||||
|
|
||||||
|
/* This is a special case for N=2 that only works for stereo and takes
|
||||||
|
advantage of the fact that mid and side are orthogonal to encode
|
||||||
|
the side with just one bit. */
|
||||||
|
if (N == 2 && dualstereo) {
|
||||||
|
int c;
|
||||||
|
int sign = 0;
|
||||||
|
float tmp;
|
||||||
|
float *x2, *y2;
|
||||||
|
mbits = b;
|
||||||
|
/* Only need one bit for the side */
|
||||||
|
sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0;
|
||||||
|
mbits -= sbits;
|
||||||
|
c = (itheta > 8192);
|
||||||
|
f->remaining2 -= qalloc+sbits;
|
||||||
|
|
||||||
|
x2 = c ? Y : X;
|
||||||
|
y2 = c ? X : Y;
|
||||||
|
if (sbits) {
|
||||||
|
sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
|
||||||
|
ff_opus_rc_put_raw(rc, sign, 1);
|
||||||
|
}
|
||||||
|
sign = 1 - 2 * sign;
|
||||||
|
/* We use orig_fill here because we want to fold the side, but if
|
||||||
|
itheta==16384, we'll have cleared the low bits of fill. */
|
||||||
|
cm = ff_celt_encode_band(f, rc, band, x2, NULL, N, mbits, blocks,
|
||||||
|
lowband, duration, lowband_out, level, gain,
|
||||||
|
lowband_scratch, orig_fill);
|
||||||
|
/* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
|
||||||
|
and there's no need to worry about mixing with the other channel. */
|
||||||
|
y2[0] = -sign * x2[1];
|
||||||
|
y2[1] = sign * x2[0];
|
||||||
|
X[0] *= mid;
|
||||||
|
X[1] *= mid;
|
||||||
|
Y[0] *= side;
|
||||||
|
Y[1] *= side;
|
||||||
|
tmp = X[0];
|
||||||
|
X[0] = tmp - Y[0];
|
||||||
|
Y[0] = tmp + Y[0];
|
||||||
|
tmp = X[1];
|
||||||
|
X[1] = tmp - Y[1];
|
||||||
|
Y[1] = tmp + Y[1];
|
||||||
|
} else {
|
||||||
|
/* "Normal" split code */
|
||||||
|
float *next_lowband2 = NULL;
|
||||||
|
float *next_lowband_out1 = NULL;
|
||||||
|
int next_level = 0;
|
||||||
|
int rebalance;
|
||||||
|
|
||||||
|
/* Give more bits to low-energy MDCTs than they would
|
||||||
|
* otherwise deserve */
|
||||||
|
if (B0 > 1 && !dualstereo && (itheta & 0x3fff)) {
|
||||||
|
if (itheta > 8192)
|
||||||
|
/* Rough approximation for pre-echo masking */
|
||||||
|
delta -= delta >> (4 - duration);
|
||||||
|
else
|
||||||
|
/* Corresponds to a forward-masking slope of
|
||||||
|
* 1.5 dB per 10 ms */
|
||||||
|
delta = FFMIN(0, delta + (N << 3 >> (5 - duration)));
|
||||||
|
}
|
||||||
|
mbits = av_clip((b - delta) / 2, 0, b);
|
||||||
|
sbits = b - mbits;
|
||||||
|
f->remaining2 -= qalloc;
|
||||||
|
|
||||||
|
if (lowband && !dualstereo)
|
||||||
|
next_lowband2 = lowband + N; /* >32-bit split case */
|
||||||
|
|
||||||
|
/* Only stereo needs to pass on lowband_out.
|
||||||
|
* Otherwise, it's handled at the end */
|
||||||
|
if (dualstereo)
|
||||||
|
next_lowband_out1 = lowband_out;
|
||||||
|
else
|
||||||
|
next_level = level + 1;
|
||||||
|
|
||||||
|
rebalance = f->remaining2;
|
||||||
|
if (mbits >= sbits) {
|
||||||
|
/* In stereo mode, we do not apply a scaling to the mid
|
||||||
|
* because we need the normalized mid for folding later */
|
||||||
|
cm = ff_celt_encode_band(f, rc, band, X, NULL, N, mbits, blocks,
|
||||||
|
lowband, duration, next_lowband_out1,
|
||||||
|
next_level, dualstereo ? 1.0f : (gain * mid),
|
||||||
|
lowband_scratch, fill);
|
||||||
|
|
||||||
|
rebalance = mbits - (rebalance - f->remaining2);
|
||||||
|
if (rebalance > 3 << 3 && itheta != 0)
|
||||||
|
sbits += rebalance - (3 << 3);
|
||||||
|
|
||||||
|
/* For a stereo split, the high bits of fill are always zero,
|
||||||
|
* so no folding will be done to the side. */
|
||||||
|
cm |= ff_celt_encode_band(f, rc, band, Y, NULL, N, sbits, blocks,
|
||||||
|
next_lowband2, duration, NULL,
|
||||||
|
next_level, gain * side, NULL,
|
||||||
|
fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
|
||||||
|
} else {
|
||||||
|
/* For a stereo split, the high bits of fill are always zero,
|
||||||
|
* so no folding will be done to the side. */
|
||||||
|
cm = ff_celt_encode_band(f, rc, band, Y, NULL, N, sbits, blocks,
|
||||||
|
next_lowband2, duration, NULL,
|
||||||
|
next_level, gain * side, NULL,
|
||||||
|
fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
|
||||||
|
|
||||||
|
rebalance = sbits - (rebalance - f->remaining2);
|
||||||
|
if (rebalance > 3 << 3 && itheta != 16384)
|
||||||
|
mbits += rebalance - (3 << 3);
|
||||||
|
|
||||||
|
/* In stereo mode, we do not apply a scaling to the mid because
|
||||||
|
* we need the normalized mid for folding later */
|
||||||
|
cm |= ff_celt_encode_band(f, rc, band, X, NULL, N, mbits, blocks,
|
||||||
|
lowband, duration, next_lowband_out1,
|
||||||
|
next_level, dualstereo ? 1.0f : (gain * mid),
|
||||||
|
lowband_scratch, fill);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* This is the basic no-split case */
|
||||||
|
uint32_t q = celt_bits2pulses(cache, b);
|
||||||
|
uint32_t curr_bits = celt_pulses2bits(cache, q);
|
||||||
|
f->remaining2 -= curr_bits;
|
||||||
|
|
||||||
|
/* Ensures we can never bust the budget */
|
||||||
|
while (f->remaining2 < 0 && q > 0) {
|
||||||
|
f->remaining2 += curr_bits;
|
||||||
|
curr_bits = celt_pulses2bits(cache, --q);
|
||||||
|
f->remaining2 -= curr_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (q != 0) {
|
||||||
|
/* Finally do the actual quantization */
|
||||||
|
cm = celt_alg_quant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
|
||||||
|
f->spread, blocks, gain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return cm;
|
return cm;
|
||||||
}
|
}
|
||||||
|
@ -32,4 +32,10 @@ uint32_t ff_celt_decode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
|
|||||||
float *lowband, int duration, float *lowband_out, int level,
|
float *lowband, int duration, float *lowband_out, int level,
|
||||||
float gain, float *lowband_scratch, int fill);
|
float gain, float *lowband_scratch, int fill);
|
||||||
|
|
||||||
|
/* Encodes a band using PVQ */
|
||||||
|
uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
|
||||||
|
float *X, float *Y, int N, int b, uint32_t blocks,
|
||||||
|
float *lowband, int duration, float *lowband_out, int level,
|
||||||
|
float gain, float *lowband_scratch, int fill);
|
||||||
|
|
||||||
#endif /* AVCODEC_OPUS_PVQ_H */
|
#endif /* AVCODEC_OPUS_PVQ_H */
|
||||||
|
1130
libavcodec/opusenc.c
Normal file
1130
libavcodec/opusenc.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user