1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00
FFmpeg/libavcodec/atrac9dec.c
Rostislav Pehlivanov 755e618399 lavc: implement an ATRAC9 decoder
This commit implements a full ATRAC9 decoder, a simple low-delay codec
developed by Sony and used in most PSVita games, some PS3 games and some
PS4 games. Its similar to AAC in that it uses Huffman coded scalefactors
but instead of vector quantization it just Huffman codes the spectral
coefficients (in a way similar to how Opus splits band energy coding
into coarse and fine precision). It opts to write rather large Huffman
codes by packing several small coefficients into one Huffman coded
symbol, though I don't believe this increases efficiency at all.
Band extension implements SBC in a simple way, first it mirrors the
lower spectrum onto the higher frequencies and then it uses one of 5
filters to shape it. Noise substitution is implemented via 2 of them.
Unlike previous ATRAC codecs, there's no QMF, this is a standard MDCT
codec.

Based off of the reverse engineering work of Alex Barney.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
2018-07-03 20:15:01 +01:00

930 lines
30 KiB
C

/*
* ATRAC9 decoder
* Copyright (c) 2018 Rostislav Pehlivanov <atomnuker@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "internal.h"
#include "get_bits.h"
#include "fft.h"
#include "atrac9tab.h"
#include "libavutil/lfg.h"
#include "libavutil/float_dsp.h"
typedef struct ATRAC9ChannelData {
int band_ext;
int q_unit_cnt;
int band_ext_data[4];
int32_t scalefactors[31];
int32_t scalefactors_prev[31];
int precision_coarse[30];
int precision_fine[30];
int precision_mask[30];
int codebookset[30];
int32_t q_coeffs_coarse[256];
int32_t q_coeffs_fine[256];
DECLARE_ALIGNED(32, float, coeffs )[256];
DECLARE_ALIGNED(32, float, prev_win)[128];
} ATRAC9ChannelData;
typedef struct ATRAC9BlockData {
ATRAC9ChannelData channel[2];
/* Base */
int band_count;
int q_unit_cnt;
int q_unit_cnt_prev;
/* Stereo block only */
int stereo_q_unit;
/* Band extension only */
int has_band_ext;
int has_band_ext_data;
int band_ext_q_unit;
int band_ext_mode;
/* Gradient */
int grad_mode;
int grad_boundary;
int gradient[31];
/* Stereo */
int cpe_base_channel;
int is_signs[30];
} ATRAC9BlockData;
typedef struct ATRAC9Context {
AVCodecContext *avctx;
AVFloatDSPContext *fdsp;
FFTContext imdct;
ATRAC9BlockData block[5];
AVLFG lfg;
/* Set on init */
int frame_log2;
int avg_frame_size;
int frame_count;
int samplerate_idx;
const ATRAC9BlockConfig *block_config;
/* Generated on init */
VLC sf_vlc[2][8]; /* Signed/unsigned, length */
VLC coeff_vlc[2][8][4]; /* Cookbook, precision, cookbook index */
uint8_t alloc_curve[48][48];
DECLARE_ALIGNED(32, float, imdct_win)[256];
DECLARE_ALIGNED(32, float, temp)[256];
} ATRAC9Context;
static inline int parse_gradient(ATRAC9Context *s, ATRAC9BlockData *b,
GetBitContext *gb)
{
int grad_range[2];
int grad_value[2];
int values, sign, base;
uint8_t *curve;
float scale;
b->grad_mode = get_bits(gb, 2);
if (b->grad_mode) {
grad_range[0] = get_bits(gb, 5);
grad_range[1] = 31;
grad_value[0] = get_bits(gb, 5);
grad_value[1] = 31;
} else {
grad_range[0] = get_bits(gb, 6);
grad_range[1] = get_bits(gb, 6) + 1;
grad_value[0] = get_bits(gb, 5);
grad_value[1] = get_bits(gb, 5);
}
b->grad_boundary = get_bits(gb, 4);
if (grad_range[0] >= grad_range[1] || grad_range[1] > 47)
return AVERROR_INVALIDDATA;
if (grad_value[0] >= grad_value[1] || grad_value[1] >= 32)
return AVERROR_INVALIDDATA;
if (b->grad_boundary > b->q_unit_cnt)
return AVERROR_INVALIDDATA;
values = grad_value[1] - grad_value[0];
sign = 1 - 2*(values < 0);
base = grad_value[0] + sign;
scale = (FFABS(values) - 1) / 31.0f;
curve = s->alloc_curve[grad_range[1] - grad_range[0] - 1];
for (int i = 0; i <= b->q_unit_cnt; i++)
b->gradient[i] = grad_value[i >= grad_range[0]];
for (int i = grad_range[0]; i < grad_range[1]; i++)
b->gradient[i] = base + sign*((int)(scale*curve[i - grad_range[0]]));
return 0;
}
static inline void calc_precision(ATRAC9Context *s, ATRAC9BlockData *b,
ATRAC9ChannelData *c)
{
memset(c->precision_mask, 0, sizeof(c->precision_mask));
for (int i = 1; i < b->q_unit_cnt; i++) {
const int delta = FFABS(c->scalefactors[i] - c->scalefactors[i - 1]) - 1;
if (delta > 0) {
const int neg = c->scalefactors[i - 1] > c->scalefactors[i];
c->precision_mask[i - neg] += FFMIN(delta, 5);
}
}
if (b->grad_mode) {
for (int i = 0; i < b->q_unit_cnt; i++) {
c->precision_coarse[i] = c->scalefactors[i];
c->precision_coarse[i] += c->precision_mask[i] - b->gradient[i];
if (c->precision_coarse[i] < 0)
continue;
switch (b->grad_mode) {
case 1:
c->precision_coarse[i] >>= 1;
break;
case 2:
c->precision_coarse[i] = (3 * c->precision_coarse[i]) >> 3;
break;
case 3:
c->precision_coarse[i] >>= 2;
break;
}
}
} else {
for (int i = 0; i < b->q_unit_cnt; i++)
c->precision_coarse[i] = c->scalefactors[i] - b->gradient[i];
}
for (int i = 0; i < b->q_unit_cnt; i++)
c->precision_coarse[i] = FFMAX(c->precision_coarse[i], 1);
for (int i = 0; i < b->grad_boundary; i++)
c->precision_coarse[i]++;
for (int i = 0; i < b->q_unit_cnt; i++) {
c->precision_fine[i] = 0;
if (c->precision_coarse[i] > 15) {
c->precision_fine[i] = c->precision_coarse[i] - 15;
c->precision_coarse[i] = 15;
}
}
}
static inline int parse_band_ext(ATRAC9Context *s, ATRAC9BlockData *b,
GetBitContext *gb, int stereo)
{
int ext_band = 0;
if (b->has_band_ext) {
ext_band = at9_tab_band_ext_group[b->q_unit_cnt - 13][2];
if (stereo) {
b->channel[1].band_ext = get_bits(gb, 2);
b->channel[1].band_ext = ext_band > 2 ? b->channel[1].band_ext : 4;
} else {
skip_bits1(gb);
}
}
b->has_band_ext_data = get_bits1(gb);
if (!b->has_band_ext_data)
return 0;
if (!b->has_band_ext) {
b->band_ext_mode = get_bits(gb, 2);
skip_bits_long(gb, get_bits(gb, 5));
return 0;
}
b->channel[0].band_ext = get_bits(gb, 2);
b->channel[0].band_ext = ext_band > 2 ? b->channel[0].band_ext : 4;
if (!get_bits(gb, 5))
return 0;
for (int i = 0; i <= stereo; i++) {
ATRAC9ChannelData *c = &b->channel[i];
const int count = at9_tab_band_ext_cnt[c->band_ext][ext_band];
for (int j = 0; j < count; j++) {
int len = at9_tab_band_ext_lengths[c->band_ext][ext_band][j];
c->band_ext_data[j] = get_bits(gb, len);
}
}
return 0;
}
static inline int read_scalefactors(ATRAC9Context *s, ATRAC9BlockData *b,
ATRAC9ChannelData *c, GetBitContext *gb,
int channel_idx, int first_in_pkt)
{
static const int mode_map[2][4] = { { 0, 1, 2, 3 }, { 0, 2, 3, 4 } };
const int mode = mode_map[channel_idx][get_bits(gb, 2)];
memset(c->scalefactors, 0, sizeof(c->scalefactors));
if (first_in_pkt && (mode == 4 || ((mode == 3) && !channel_idx))) {
av_log(s->avctx, AV_LOG_ERROR, "Invalid scalefactor coding mode!\n");
return AVERROR_INVALIDDATA;
}
switch (mode) {
case 0: { /* VLC delta offset */
const uint8_t *sf_weights = at9_tab_sf_weights[get_bits(gb, 3)];
const int base = get_bits(gb, 5);
const int len = get_bits(gb, 2) + 3;
const VLC *tab = &s->sf_vlc[0][len];
c->scalefactors[0] = get_bits(gb, len);
for (int i = 1; i < b->band_ext_q_unit; i++) {
int val = c->scalefactors[i - 1] + get_vlc2(gb, tab->table, 9, 2);
c->scalefactors[i] = val & ((1 << len) - 1);
}
for (int i = 0; i < b->band_ext_q_unit; i++)
c->scalefactors[i] += base - sf_weights[i];
break;
}
case 1: { /* CLC offset */
const int len = get_bits(gb, 2) + 2;
const int base = len < 5 ? get_bits(gb, 5) : 0;
for (int i = 0; i < b->band_ext_q_unit; i++)
c->scalefactors[i] = base + get_bits(gb, len);
break;
}
case 2:
case 4: { /* VLC dist to baseline */
const int *baseline = mode == 4 ? c->scalefactors_prev :
channel_idx ? b->channel[0].scalefactors :
c->scalefactors_prev;
const int baseline_len = mode == 4 ? b->q_unit_cnt_prev :
channel_idx ? b->band_ext_q_unit :
b->q_unit_cnt_prev;
const int len = get_bits(gb, 2) + 2;
const int unit_cnt = FFMIN(b->band_ext_q_unit, baseline_len);
const VLC *tab = &s->sf_vlc[1][len];
for (int i = 0; i < unit_cnt; i++) {
int dist = get_vlc2(gb, tab->table, 9, 2);
c->scalefactors[i] = baseline[i] + dist;
}
for (int i = unit_cnt; i < b->band_ext_q_unit; i++)
c->scalefactors[i] = get_bits(gb, 5);
break;
}
case 3: { /* VLC offset with baseline */
const int *baseline = channel_idx ? b->channel[0].scalefactors :
c->scalefactors_prev;
const int baseline_len = channel_idx ? b->band_ext_q_unit :
b->q_unit_cnt_prev;
const int base = get_bits(gb, 5) - (1 << (5 - 1));
const int len = get_bits(gb, 2) + 1;
const int unit_cnt = FFMIN(b->band_ext_q_unit, baseline_len);
const VLC *tab = &s->sf_vlc[0][len];
c->scalefactors[0] = get_bits(gb, len);
for (int i = 1; i < unit_cnt; i++) {
int val = c->scalefactors[i - 1] + get_vlc2(gb, tab->table, 9, 2);
c->scalefactors[i] = val & ((1 << len) - 1);
}
for (int i = 0; i < unit_cnt; i++)
c->scalefactors[i] += base + baseline[i];
for (int i = unit_cnt; i < b->band_ext_q_unit; i++)
c->scalefactors[i] = get_bits(gb, 5);
break;
}
}
for (int i = 0; i < b->band_ext_q_unit; i++)
if (c->scalefactors[i] < 0 || c->scalefactors[i] > 31)
return AVERROR_INVALIDDATA;
memcpy(c->scalefactors_prev, c->scalefactors, sizeof(c->scalefactors));
return 0;
}
static inline void calc_codebook_idx(ATRAC9Context *s, ATRAC9BlockData *b,
ATRAC9ChannelData *c)
{
int avg = 0;
const int last_sf = c->scalefactors[c->q_unit_cnt];
memset(c->codebookset, 0, sizeof(c->codebookset));
if (c->q_unit_cnt <= 1)
return;
if (s->samplerate_idx > 7)
return;
c->scalefactors[c->q_unit_cnt] = c->scalefactors[c->q_unit_cnt - 1];
if (c->q_unit_cnt > 12) {
for (int i = 0; i < 12; i++)
avg += c->scalefactors[i];
avg = (avg + 6) / 12;
}
for (int i = 8; i < c->q_unit_cnt; i++) {
const int prev = c->scalefactors[i - 1];
const int cur = c->scalefactors[i ];
const int next = c->scalefactors[i + 1];
const int min = FFMIN(prev, next);
if ((cur - min >= 3 || 2*cur - prev - next >= 3))
c->codebookset[i] = 1;
}
for (int i = 12; i < c->q_unit_cnt; i++) {
const int cur = c->scalefactors[i];
const int cnd = at9_q_unit_to_coeff_cnt[i] == 16;
const int min = FFMIN(c->scalefactors[i + 1], c->scalefactors[i - 1]);
if (c->codebookset[i])
continue;
c->codebookset[i] = (((cur - min) >= 2) && (cur >= (avg - cnd)));
}
c->scalefactors[c->q_unit_cnt] = last_sf;
}
static inline void read_coeffs_coarse(ATRAC9Context *s, ATRAC9BlockData *b,
ATRAC9ChannelData *c, GetBitContext *gb)
{
const int max_prec = s->samplerate_idx > 7 ? 1 : 7;
memset(c->q_coeffs_coarse, 0, sizeof(c->q_coeffs_coarse));
for (int i = 0; i < c->q_unit_cnt; i++) {
int *coeffs = &c->q_coeffs_coarse[at9_q_unit_to_coeff_idx[i]];
const int bands = at9_q_unit_to_coeff_cnt[i];
const int prec = c->precision_coarse[i] + 1;
if (prec <= max_prec) {
const int cb = c->codebookset[i];
const int cbi = at9_q_unit_to_codebookidx[i];
const VLC *tab = &s->coeff_vlc[cb][prec][cbi];
const HuffmanCodebook *huff = &at9_huffman_coeffs[cb][prec][cbi];
const int groups = bands >> huff->value_cnt_pow;
for (int j = 0; j < groups; j++) {
uint16_t val = get_vlc2(gb, tab->table, 9, huff->max_bit_size);
for (int k = 0; k < huff->value_cnt; k++) {
coeffs[k] = sign_extend(val, huff->value_bits);
val >>= huff->value_bits;
}
coeffs += huff->value_cnt;
}
} else {
for (int j = 0; j < bands; j++)
coeffs[j] = sign_extend(get_bits(gb, prec), prec);
}
}
}
static inline void read_coeffs_fine(ATRAC9Context *s, ATRAC9BlockData *b,
ATRAC9ChannelData *c, GetBitContext *gb)
{
memset(c->q_coeffs_fine, 0, sizeof(c->q_coeffs_fine));
for (int i = 0; i < c->q_unit_cnt; i++) {
const int start = at9_q_unit_to_coeff_idx[i + 0];
const int end = at9_q_unit_to_coeff_idx[i + 1];
const int len = c->precision_fine[i] + 1;
if (c->precision_fine[i] <= 0)
continue;
for (int j = start; j < end; j++)
c->q_coeffs_fine[j] = sign_extend(get_bits(gb, len), len);
}
}
static inline void dequantize(ATRAC9Context *s, ATRAC9BlockData *b,
ATRAC9ChannelData *c)
{
memset(c->coeffs, 0, sizeof(c->coeffs));
for (int i = 0; i < c->q_unit_cnt; i++) {
const int start = at9_q_unit_to_coeff_idx[i + 0];
const int end = at9_q_unit_to_coeff_idx[i + 1];
const float coarse_c = at9_quant_step_coarse[c->precision_coarse[i]];
const float fine_c = at9_quant_step_fine[c->precision_fine[i]];
for (int j = start; j < end; j++) {
const float vc = c->q_coeffs_coarse[j] * coarse_c;
const float vf = c->q_coeffs_fine[j] * fine_c;
c->coeffs[j] = vc + vf;
}
}
}
static inline void apply_intensity_stereo(ATRAC9Context *s, ATRAC9BlockData *b,
const int stereo)
{
float *src = b->channel[ b->cpe_base_channel].coeffs;
float *dst = b->channel[!b->cpe_base_channel].coeffs;
if (!stereo)
return;
if (b->q_unit_cnt <= b->stereo_q_unit)
return;
for (int i = b->stereo_q_unit; i < b->q_unit_cnt; i++) {
const int sign = b->is_signs[i];
const int start = at9_q_unit_to_coeff_idx[i + 0];
const int end = at9_q_unit_to_coeff_idx[i + 1];
for (int j = start; j < end; j++)
dst[j] = sign*src[j];
}
}
static inline void apply_scalefactors(ATRAC9Context *s, ATRAC9BlockData *b,
const int stereo)
{
for (int i = 0; i <= stereo; i++) {
float *coeffs = b->channel[i].coeffs;
for (int j = 0; j < b->q_unit_cnt; j++) {
const int start = at9_q_unit_to_coeff_idx[j + 0];
const int end = at9_q_unit_to_coeff_idx[j + 1];
const int scalefactor = b->channel[i].scalefactors[j];
const float scale = at9_scalefactor_c[scalefactor];
for (int k = start; k < end; k++)
coeffs[k] *= scale;
}
}
}
static inline void fill_with_noise(ATRAC9Context *s, ATRAC9ChannelData *c,
int start, int count)
{
float maxval = 0.0f;
for (int i = 0; i < count; i += 2) {
double tmp[2];
av_bmg_get(&s->lfg, tmp);
c->coeffs[start + i + 0] = tmp[0];
c->coeffs[start + i + 1] = tmp[1];
maxval = FFMAX(FFABS(tmp[0]), FFMAX(FFABS(tmp[1]), maxval));
}
/* Normalize */
for (int i = 0; i < count; i++)
c->coeffs[start + i] /= maxval;
}
static inline void scale_band_ext_coeffs(ATRAC9ChannelData *c, float sf[6],
const int s_unit, const int e_unit)
{
for (int i = s_unit; i < e_unit; i++) {
const int start = at9_q_unit_to_coeff_idx[i + 0];
const int end = at9_q_unit_to_coeff_idx[i + 1];
for (int j = start; j < end; j++)
c->coeffs[j] *= sf[i - s_unit];
}
}
static inline void apply_band_extension(ATRAC9Context *s, ATRAC9BlockData *b,
const int stereo)
{
const int bc = at9_tab_band_ext_group[b->q_unit_cnt - 13][2];
const int g_units[4] = { /* A, B, C, total units */
b->q_unit_cnt,
at9_tab_band_ext_group[b->q_unit_cnt - 13][0],
at9_tab_band_ext_group[b->q_unit_cnt - 13][1],
FFMAX(g_units[2], 22),
};
const int g_bins[4] = { /* A, B, C, total bins */
at9_q_unit_to_coeff_idx[g_units[0]],
at9_q_unit_to_coeff_idx[g_units[1]],
at9_q_unit_to_coeff_idx[g_units[2]],
at9_q_unit_to_coeff_idx[g_units[3]],
};
if (!b->has_band_ext || !b->has_band_ext_data)
return;
for (int ch = 0; ch <= stereo; ch++) {
ATRAC9ChannelData *c = &b->channel[ch];
/* Mirror the spectrum */
for (int i = 0; i < 3; i++)
for (int j = 0; j < (g_bins[i + 1] - g_bins[i + 0]); j++)
c->coeffs[g_bins[i] + j] = c->coeffs[g_bins[i] - j - 1];
switch (c->band_ext) {
case 0: {
int l;
float sf[6] = { 0.0f };
const int n_start = at9_q_unit_to_coeff_idx[g_units[3] - 1];
const int n_cnt = at9_q_unit_to_coeff_cnt[g_units[3] - 1];
switch (bc) {
case 3:
sf[0] = at9_band_ext_scales_m0[0][0][c->band_ext_data[0]];
sf[1] = at9_band_ext_scales_m0[0][1][c->band_ext_data[0]];
sf[2] = at9_band_ext_scales_m0[0][2][c->band_ext_data[1]];
sf[3] = at9_band_ext_scales_m0[0][3][c->band_ext_data[2]];
sf[4] = at9_band_ext_scales_m0[0][4][c->band_ext_data[3]];
break;
case 4:
sf[0] = at9_band_ext_scales_m0[1][0][c->band_ext_data[0]];
sf[1] = at9_band_ext_scales_m0[1][1][c->band_ext_data[0]];
sf[2] = at9_band_ext_scales_m0[1][2][c->band_ext_data[1]];
sf[3] = at9_band_ext_scales_m0[1][3][c->band_ext_data[2]];
sf[4] = at9_band_ext_scales_m0[1][4][c->band_ext_data[3]];
break;
case 5:
sf[0] = at9_band_ext_scales_m0[2][0][c->band_ext_data[0]];
sf[1] = at9_band_ext_scales_m0[2][1][c->band_ext_data[1]];
sf[2] = at9_band_ext_scales_m0[2][2][c->band_ext_data[1]];
break;
}
l = g_units[3] - g_units[0] - 1;
sf[l] = at9_scalefactor_c[c->scalefactors[g_units[0]]];
fill_with_noise(s, c, n_start, n_cnt);
scale_band_ext_coeffs(c, sf, g_units[0], g_units[3]);
break;
}
case 1: {
float sf[6];
for (int i = g_units[0]; i < g_units[3]; i++)
sf[i - g_units[0]] = at9_scalefactor_c[c->scalefactors[i]];
fill_with_noise(s, c, g_bins[0], g_bins[3] - g_bins[0]);
scale_band_ext_coeffs(c, sf, g_units[0], g_units[3]);
break;
}
case 2: {
const float g_sf[2] = {
at9_band_ext_scales_m2[c->band_ext_data[0]],
at9_band_ext_scales_m2[c->band_ext_data[1]],
};
for (int i = 0; i < 2; i++)
for (int j = g_bins[i + 0]; j < g_bins[i + 1]; j++)
c->coeffs[j] *= g_sf[i];
break;
}
case 3: {
float scale = at9_band_ext_scales_m3[c->band_ext_data[0]][0];
float rate = at9_band_ext_scales_m3[c->band_ext_data[1]][1];
rate = pow(2, rate);
for (int i = g_bins[0]; i < g_bins[3]; i++) {
scale *= rate;
c->coeffs[i] *= scale;
}
break;
}
case 4: {
const float m = at9_band_ext_scales_m4[c->band_ext_data[0]];
const float g_sf[3] = { 0.7079468f*m, 0.5011902f*m, 0.3548279f*m };
for (int i = 0; i < 3; i++)
for (int j = g_bins[i + 0]; j < g_bins[i + 1]; j++)
c->coeffs[j] *= g_sf[i];
break;
}
}
}
}
static int atrac9_decode_block(ATRAC9Context *s, GetBitContext *gb,
ATRAC9BlockData *b, AVFrame *frame,
int frame_idx, int block_idx)
{
const int first_in_pkt = !get_bits1(gb);
const int reuse_params = get_bits1(gb);
const int stereo = s->block_config->type[block_idx] == ATRAC9_BLOCK_TYPE_CPE;
if (first_in_pkt && reuse_params) {
av_log(s->avctx, AV_LOG_ERROR, "Invalid block flags!\n");
return AVERROR_INVALIDDATA;
}
/* Band parameters */
if (!reuse_params) {
int stereo_band, ext_band;
const int min_band_count = s->samplerate_idx > 7 ? 1 : 3;
b->band_count = get_bits(gb, 4) + min_band_count;
b->q_unit_cnt = at9_tab_band_q_unit_map[b->band_count];
b->band_ext_q_unit = b->stereo_q_unit = b->q_unit_cnt;
if (b->band_count > at9_tab_sri_max_bands[s->samplerate_idx]) {
av_log(s->avctx, AV_LOG_ERROR, "Invalid band count %i!\n",
b->band_count);
return AVERROR_INVALIDDATA;
}
if (stereo) {
stereo_band = get_bits(gb, 4) + min_band_count;
if (stereo_band > b->band_count) {
av_log(s->avctx, AV_LOG_ERROR, "Invalid stereo band %i!\n",
stereo_band);
return AVERROR_INVALIDDATA;
}
b->stereo_q_unit = at9_tab_band_q_unit_map[stereo_band];
}
b->has_band_ext = get_bits1(gb);
if (b->has_band_ext) {
ext_band = get_bits(gb, 4) + min_band_count;
if (ext_band < b->band_count) {
av_log(s->avctx, AV_LOG_ERROR, "Invalid extension band %i!\n",
ext_band);
return AVERROR_INVALIDDATA;
}
b->band_ext_q_unit = at9_tab_band_q_unit_map[ext_band];
}
}
/* Calculate bit alloc gradient */
if (parse_gradient(s, b, gb))
return AVERROR_INVALIDDATA;
/* IS data */
b->cpe_base_channel = 0;
if (stereo) {
b->cpe_base_channel = get_bits1(gb);
if (get_bits1(gb)) {
for (int i = b->stereo_q_unit; i < b->q_unit_cnt; i++)
b->is_signs[i] = 1 - 2*get_bits1(gb);
} else {
for (int i = 0; i < FF_ARRAY_ELEMS(b->is_signs); i++)
b->is_signs[i] = 1;
}
}
/* Band extension */
if (parse_band_ext(s, b, gb, stereo))
return AVERROR_INVALIDDATA;
/* Scalefactors */
for (int i = 0; i <= stereo; i++) {
ATRAC9ChannelData *c = &b->channel[i];
c->q_unit_cnt = i == b->cpe_base_channel ? b->q_unit_cnt :
b->stereo_q_unit;
if (read_scalefactors(s, b, c, gb, i, first_in_pkt))
return AVERROR_INVALIDDATA;
calc_precision (s, b, c);
calc_codebook_idx (s, b, c);
read_coeffs_coarse(s, b, c, gb);
read_coeffs_fine (s, b, c, gb);
dequantize (s, b, c);
}
b->q_unit_cnt_prev = b->has_band_ext ? b->band_ext_q_unit : b->q_unit_cnt;
apply_intensity_stereo(s, b, stereo);
apply_scalefactors (s, b, stereo);
apply_band_extension (s, b, stereo);
/* iMDCT */
for (int i = 0; i <= stereo; i++) {
ATRAC9ChannelData *c = &b->channel[i];
const int dst_idx = s->block_config->plane_map[block_idx][i];
const int wsize = 1 << s->frame_log2;
const ptrdiff_t offset = wsize*frame_idx*sizeof(float);
float *dst = (float *)(frame->extended_data[dst_idx] + offset);
s->imdct.imdct_half(&s->imdct, s->temp, c->coeffs);
s->fdsp->vector_fmul_window(dst, c->prev_win, s->temp,
s->imdct_win, wsize >> 1);
memcpy(c->prev_win, s->temp + (wsize >> 1), sizeof(float)*wsize >> 1);
}
return 0;
}
static int atrac9_decode_frame(AVCodecContext *avctx, void *data,
int *got_frame_ptr, AVPacket *avpkt)
{
int ret;
GetBitContext gb;
AVFrame *frame = data;
ATRAC9Context *s = avctx->priv_data;
const int frames = FFMIN(avpkt->size / s->avg_frame_size, s->frame_count);
frame->nb_samples = (1 << s->frame_log2) * frames;
ret = ff_get_buffer(avctx, frame, 0);
if (ret < 0)
return ret;
init_get_bits8(&gb, avpkt->data, avpkt->size);
for (int i = 0; i < frames; i++) {
for (int j = 0; j < s->block_config->count; j++) {
ret = atrac9_decode_block(s, &gb, &s->block[j], frame, i, j);
if (ret)
return ret;
align_get_bits(&gb);
}
}
*got_frame_ptr = 1;
return avctx->block_align;
}
static void atrac9_decode_flush(AVCodecContext *avctx)
{
ATRAC9Context *s = avctx->priv_data;
for (int j = 0; j < s->block_config->count; j++) {
ATRAC9BlockData *b = &s->block[j];
const int stereo = s->block_config->type[j] == ATRAC9_BLOCK_TYPE_CPE;
for (int i = 0; i <= stereo; i++) {
ATRAC9ChannelData *c = &b->channel[i];
memset(c->prev_win, 0, sizeof(c->prev_win));
}
}
}
static av_cold int atrac9_decode_close(AVCodecContext *avctx)
{
ATRAC9Context *s = avctx->priv_data;
for (int i = 1; i < 7; i++)
ff_free_vlc(&s->sf_vlc[0][i]);
for (int i = 2; i < 6; i++)
ff_free_vlc(&s->sf_vlc[1][i]);
for (int i = 0; i < 2; i++)
for (int j = 0; j < 8; j++)
for (int k = 0; k < 4; k++)
ff_free_vlc(&s->coeff_vlc[i][j][k]);
ff_mdct_end(&s->imdct);
av_free(s->fdsp);
return 0;
}
static av_cold int atrac9_decode_init(AVCodecContext *avctx)
{
GetBitContext gb;
ATRAC9Context *s = avctx->priv_data;
int version, block_config_idx, superframe_idx, alloc_c_len;
s->avctx = avctx;
av_lfg_init(&s->lfg, 0xFBADF00D);
if (avctx->extradata_size != 12) {
av_log(avctx, AV_LOG_ERROR, "Invalid extradata length!\n");
return AVERROR_INVALIDDATA;
}
version = AV_RL32(avctx->extradata);
if (version > 2) {
av_log(avctx, AV_LOG_ERROR, "Unsupported version (%i)!\n", version);
return AVERROR_INVALIDDATA;
}
init_get_bits8(&gb, avctx->extradata + 4, avctx->extradata_size);
if (get_bits(&gb, 8) != 0xFE) {
av_log(avctx, AV_LOG_ERROR, "Incorrect magic byte!\n");
return AVERROR_INVALIDDATA;
}
s->samplerate_idx = get_bits(&gb, 4);
avctx->sample_rate = at9_tab_samplerates[s->samplerate_idx];
block_config_idx = get_bits(&gb, 3);
if (block_config_idx > 5) {
av_log(avctx, AV_LOG_ERROR, "Incorrect block config!\n");
return AVERROR_INVALIDDATA;
}
s->block_config = &at9_block_layout[block_config_idx];
avctx->channel_layout = s->block_config->channel_layout;
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
if (get_bits1(&gb)) {
av_log(avctx, AV_LOG_ERROR, "Incorrect verification bit!\n");
return AVERROR_INVALIDDATA;
}
/* Average frame size in bytes */
s->avg_frame_size = get_bits(&gb, 11) + 1;
superframe_idx = get_bits(&gb, 2);
if (superframe_idx & 1) {
av_log(avctx, AV_LOG_ERROR, "Invalid superframe index!\n");
return AVERROR_INVALIDDATA;
}
s->frame_count = 1 << superframe_idx;
s->frame_log2 = at9_tab_sri_frame_log2[s->samplerate_idx];
if (ff_mdct_init(&s->imdct, s->frame_log2 + 1, 1, 1.0f / 32768.0f))
return AVERROR(ENOMEM);
s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
if (!s->fdsp)
return AVERROR(ENOMEM);
/* iMDCT window */
for (int i = 0; i < (1 << s->frame_log2); i++) {
const int len = 1 << s->frame_log2;
const float sidx = ( i + 0.5f) / len;
const float eidx = (len - i - 0.5f) / len;
const float s_c = sinf(sidx*M_PI - M_PI_2)*0.5f + 0.5f;
const float e_c = sinf(eidx*M_PI - M_PI_2)*0.5f + 0.5f;
s->imdct_win[i] = s_c / ((s_c * s_c) + (e_c * e_c));
}
/* Allocation curve */
alloc_c_len = FF_ARRAY_ELEMS(at9_tab_b_dist);
for (int i = 1; i <= alloc_c_len; i++)
for (int j = 0; j < i; j++)
s->alloc_curve[i - 1][j] = at9_tab_b_dist[(j * alloc_c_len) / i];
/* Unsigned scalefactor VLCs */
for (int i = 1; i < 7; i++) {
const HuffmanCodebook *hf = &at9_huffman_sf_unsigned[i];
init_vlc(&s->sf_vlc[0][i], 9, hf->size, hf->bits, 1, 1, hf->codes,
2, 2, 0);
}
/* Signed scalefactor VLCs */
for (int i = 2; i < 6; i++) {
const HuffmanCodebook *hf = &at9_huffman_sf_signed[i];
int nums = hf->size;
int16_t sym[32];
for (int j = 0; j < nums; j++)
sym[j] = sign_extend(j, hf->value_bits);
ff_init_vlc_sparse(&s->sf_vlc[1][i], 9, hf->size, hf->bits, 1, 1,
hf->codes, 2, 2, sym, sizeof(*sym), sizeof(*sym), 0);
}
/* Coefficient VLCs */
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 8; j++) {
for (int k = 0; k < 4; k++) {
const HuffmanCodebook *hf = &at9_huffman_coeffs[i][j][k];
init_vlc(&s->coeff_vlc[i][j][k], 9, hf->size, hf->bits, 1, 1,
hf->codes, 2, 2, 0);
}
}
}
return 0;
}
AVCodec ff_atrac9_decoder = {
.name = "atrac9",
.long_name = NULL_IF_CONFIG_SMALL("ATRAC9 (Adaptive TRansform Acoustic Coding 9)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_ATRAC9,
.priv_data_size = sizeof(ATRAC9Context),
.init = atrac9_decode_init,
.close = atrac9_decode_close,
.decode = atrac9_decode_frame,
.flush = atrac9_decode_flush,
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
};