mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
20f9727018
Up until now, codec.h contains both public and private parts of AVCodec. This exposes the internals of AVCodec to users and leads them into the temptation of actually using them and forces us to forward-declare structures and types that users can't use at all. This commit changes this by adding a new structure FFCodec to codec_internal.h that extends AVCodec, i.e. contains the public AVCodec as first member; the private fields of AVCodec are moved to this structure, leaving codec.h clean. Reviewed-by: Anton Khirnov <anton@khirnov.net> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
390 lines
13 KiB
C
390 lines
13 KiB
C
/*
|
|
* Voxware MetaSound decoder
|
|
* Copyright (c) 2013 Konstantin Shishkov
|
|
* based on TwinVQ decoder
|
|
* Copyright (c) 2009 Vitor Sessak
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <inttypes.h>
|
|
#include <math.h>
|
|
#include <stdint.h>
|
|
|
|
#include "libavutil/channel_layout.h"
|
|
#include "libavutil/float_dsp.h"
|
|
|
|
#define BITSTREAM_READER_LE
|
|
#include "avcodec.h"
|
|
#include "codec_internal.h"
|
|
#include "fft.h"
|
|
#include "get_bits.h"
|
|
#include "lsp.h"
|
|
#include "sinewin.h"
|
|
|
|
#include "twinvq.h"
|
|
#include "metasound_data.h"
|
|
|
|
static void add_peak(float period, int width, const float *shape,
|
|
float ppc_gain, float *speech, int len)
|
|
{
|
|
int i, j, center;
|
|
const float *shape_end = shape + len;
|
|
|
|
// First peak centered around zero
|
|
for (i = 0; i < width / 2; i++)
|
|
speech[i] += ppc_gain * *shape++;
|
|
|
|
for (i = 1; i < ROUNDED_DIV(len, width); i++) {
|
|
center = (int)(i * period + 0.5);
|
|
for (j = -width / 2; j < (width + 1) / 2; j++)
|
|
speech[j + center] += ppc_gain * *shape++;
|
|
}
|
|
|
|
// For the last block, be careful not to go beyond the end of the buffer
|
|
center = (int)(i * period + 0.5);
|
|
for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++)
|
|
speech[j + center] += ppc_gain * *shape++;
|
|
}
|
|
|
|
static void decode_ppc(TwinVQContext *tctx, int period_coef, int g_coef,
|
|
const float *shape, float *speech)
|
|
{
|
|
const TwinVQModeTab *mtab = tctx->mtab;
|
|
int channels = tctx->avctx->ch_layout.nb_channels;
|
|
int isampf = tctx->avctx->sample_rate / 1000;
|
|
int ibps = tctx->avctx->bit_rate / (1000 * channels);
|
|
int width;
|
|
|
|
float ratio = (float)mtab->size / isampf;
|
|
float min_period, max_period, period_range, period;
|
|
float some_mult;
|
|
|
|
float pgain_base, pgain_step, ppc_gain;
|
|
|
|
if (channels == 1) {
|
|
min_period = log2(ratio * 0.2);
|
|
max_period = min_period + log2(6);
|
|
} else {
|
|
min_period = (int)(ratio * 0.2 * 400 + 0.5) / 400.0;
|
|
max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0;
|
|
}
|
|
period_range = max_period - min_period;
|
|
period = min_period + period_coef * period_range /
|
|
((1 << mtab->ppc_period_bit) - 1);
|
|
if (channels == 1)
|
|
period = powf(2.0, period);
|
|
else
|
|
period = (int)(period * 400 + 0.5) / 400.0;
|
|
|
|
switch (isampf) {
|
|
case 8: some_mult = 2.0; break;
|
|
case 11: some_mult = 3.0; break;
|
|
case 16: some_mult = 3.0; break;
|
|
case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
|
|
case 44: some_mult = 8.0; break;
|
|
default: some_mult = 4.0;
|
|
}
|
|
|
|
width = (int)(some_mult / (mtab->size / period) * mtab->ppc_shape_len);
|
|
if (isampf == 22 && ibps == 32)
|
|
width = (int)((2.0 / period + 1) * width + 0.5);
|
|
|
|
pgain_base = channels == 2 ? 25000.0 : 20000.0;
|
|
pgain_step = pgain_base / ((1 << mtab->pgain_bit) - 1);
|
|
ppc_gain = 1.0 / 8192 *
|
|
twinvq_mulawinv(pgain_step * g_coef + pgain_step / 2,
|
|
pgain_base, TWINVQ_PGAIN_MU);
|
|
|
|
add_peak(period, width, shape, ppc_gain, speech, mtab->ppc_shape_len);
|
|
}
|
|
|
|
static void dec_bark_env(TwinVQContext *tctx, const uint8_t *in, int use_hist,
|
|
int ch, float *out, float gain,
|
|
enum TwinVQFrameType ftype)
|
|
{
|
|
const TwinVQModeTab *mtab = tctx->mtab;
|
|
int i, j;
|
|
float *hist = tctx->bark_hist[ftype][ch];
|
|
float val = ((const float []) { 0.4, 0.35, 0.28 })[ftype];
|
|
int bark_n_coef = mtab->fmode[ftype].bark_n_coef;
|
|
int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef;
|
|
int idx = 0;
|
|
int channels = tctx->avctx->ch_layout.nb_channels;
|
|
|
|
if (channels == 1)
|
|
val = 0.5;
|
|
for (i = 0; i < fw_cb_len; i++)
|
|
for (j = 0; j < bark_n_coef; j++, idx++) {
|
|
float tmp2 = mtab->fmode[ftype].bark_cb[fw_cb_len * in[j] + i] *
|
|
(1.0 / 2048);
|
|
float st;
|
|
|
|
if (channels == 1)
|
|
st = use_hist ?
|
|
tmp2 + val * hist[idx] + 1.0 : tmp2 + 1.0;
|
|
else
|
|
st = use_hist ? (1.0 - val) * tmp2 + val * hist[idx] + 1.0
|
|
: tmp2 + 1.0;
|
|
|
|
hist[idx] = tmp2;
|
|
if (st < 0.1)
|
|
st = 0.1;
|
|
|
|
twinvq_memset_float(out, st * gain,
|
|
mtab->fmode[ftype].bark_tab[idx]);
|
|
out += mtab->fmode[ftype].bark_tab[idx];
|
|
}
|
|
}
|
|
|
|
static void read_cb_data(TwinVQContext *tctx, GetBitContext *gb,
|
|
uint8_t *dst, enum TwinVQFrameType ftype)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < tctx->n_div[ftype]; i++) {
|
|
int bs_second_part = (i >= tctx->bits_main_spec_change[ftype]);
|
|
|
|
*dst++ = get_bits(gb, tctx->bits_main_spec[0][ftype][bs_second_part]);
|
|
*dst++ = get_bits(gb, tctx->bits_main_spec[1][ftype][bs_second_part]);
|
|
}
|
|
}
|
|
|
|
static int metasound_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
|
|
const uint8_t *buf, int buf_size)
|
|
{
|
|
TwinVQFrameData *bits;
|
|
const TwinVQModeTab *mtab = tctx->mtab;
|
|
int channels = tctx->avctx->ch_layout.nb_channels;
|
|
int sub;
|
|
GetBitContext gb;
|
|
int i, j, k, ret;
|
|
|
|
if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0)
|
|
return ret;
|
|
|
|
for (tctx->cur_frame = 0; tctx->cur_frame < tctx->frames_per_packet;
|
|
tctx->cur_frame++) {
|
|
bits = tctx->bits + tctx->cur_frame;
|
|
|
|
bits->window_type = get_bits(&gb, TWINVQ_WINDOW_TYPE_BITS);
|
|
|
|
if (bits->window_type > 8) {
|
|
av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n");
|
|
return AVERROR_INVALIDDATA;
|
|
}
|
|
|
|
bits->ftype = ff_twinvq_wtype_to_ftype_table[tctx->bits[tctx->cur_frame].window_type];
|
|
|
|
sub = mtab->fmode[bits->ftype].sub;
|
|
|
|
if (bits->ftype != TWINVQ_FT_SHORT && !tctx->is_6kbps)
|
|
get_bits(&gb, 2);
|
|
|
|
read_cb_data(tctx, &gb, bits->main_coeffs, bits->ftype);
|
|
|
|
for (i = 0; i < channels; i++)
|
|
for (j = 0; j < sub; j++)
|
|
for (k = 0; k < mtab->fmode[bits->ftype].bark_n_coef; k++)
|
|
bits->bark1[i][j][k] =
|
|
get_bits(&gb, mtab->fmode[bits->ftype].bark_n_bit);
|
|
|
|
for (i = 0; i < channels; i++)
|
|
for (j = 0; j < sub; j++)
|
|
bits->bark_use_hist[i][j] = get_bits1(&gb);
|
|
|
|
if (bits->ftype == TWINVQ_FT_LONG) {
|
|
for (i = 0; i < channels; i++)
|
|
bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
|
|
} else {
|
|
for (i = 0; i < channels; i++) {
|
|
bits->gain_bits[i] = get_bits(&gb, TWINVQ_GAIN_BITS);
|
|
for (j = 0; j < sub; j++)
|
|
bits->sub_gain_bits[i * sub + j] =
|
|
get_bits(&gb, TWINVQ_SUB_GAIN_BITS);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < channels; i++) {
|
|
bits->lpc_hist_idx[i] = get_bits(&gb, mtab->lsp_bit0);
|
|
bits->lpc_idx1[i] = get_bits(&gb, mtab->lsp_bit1);
|
|
|
|
for (j = 0; j < mtab->lsp_split; j++)
|
|
bits->lpc_idx2[i][j] = get_bits(&gb, mtab->lsp_bit2);
|
|
}
|
|
|
|
if (bits->ftype == TWINVQ_FT_LONG) {
|
|
read_cb_data(tctx, &gb, bits->ppc_coeffs, 3);
|
|
for (i = 0; i < channels; i++) {
|
|
bits->p_coef[i] = get_bits(&gb, mtab->ppc_period_bit);
|
|
bits->g_coef[i] = get_bits(&gb, mtab->pgain_bit);
|
|
}
|
|
}
|
|
|
|
// subframes are aligned to nibbles
|
|
if (get_bits_count(&gb) & 3)
|
|
skip_bits(&gb, 4 - (get_bits_count(&gb) & 3));
|
|
}
|
|
|
|
return (get_bits_count(&gb) + 7) / 8;
|
|
}
|
|
|
|
typedef struct MetasoundProps {
|
|
uint32_t tag;
|
|
int bit_rate;
|
|
int channels;
|
|
int sample_rate;
|
|
} MetasoundProps;
|
|
|
|
static const MetasoundProps codec_props[] = {
|
|
{ MKTAG('V','X','0','3'), 6, 1, 8000 },
|
|
{ MKTAG('V','X','0','4'), 12, 2, 8000 },
|
|
|
|
{ MKTAG('V','O','X','i'), 8, 1, 8000 },
|
|
{ MKTAG('V','O','X','j'), 10, 1, 11025 },
|
|
{ MKTAG('V','O','X','k'), 16, 1, 16000 },
|
|
{ MKTAG('V','O','X','L'), 24, 1, 22050 },
|
|
{ MKTAG('V','O','X','q'), 32, 1, 44100 },
|
|
{ MKTAG('V','O','X','r'), 40, 1, 44100 },
|
|
{ MKTAG('V','O','X','s'), 48, 1, 44100 },
|
|
{ MKTAG('V','O','X','t'), 16, 2, 8000 },
|
|
{ MKTAG('V','O','X','u'), 20, 2, 11025 },
|
|
{ MKTAG('V','O','X','v'), 32, 2, 16000 },
|
|
{ MKTAG('V','O','X','w'), 48, 2, 22050 },
|
|
{ MKTAG('V','O','X','x'), 64, 2, 44100 },
|
|
{ MKTAG('V','O','X','y'), 80, 2, 44100 },
|
|
{ MKTAG('V','O','X','z'), 96, 2, 44100 },
|
|
|
|
{ 0, 0, 0, 0 }
|
|
};
|
|
|
|
static av_cold int metasound_decode_init(AVCodecContext *avctx)
|
|
{
|
|
int isampf, ibps;
|
|
TwinVQContext *tctx = avctx->priv_data;
|
|
uint32_t tag;
|
|
const MetasoundProps *props = codec_props;
|
|
int channels;
|
|
|
|
if (!avctx->extradata || avctx->extradata_size < 16) {
|
|
av_log(avctx, AV_LOG_ERROR, "Missing or incomplete extradata\n");
|
|
return AVERROR_INVALIDDATA;
|
|
}
|
|
|
|
tag = AV_RL32(avctx->extradata + 12);
|
|
|
|
for (;;) {
|
|
if (!props->tag) {
|
|
av_log(avctx, AV_LOG_ERROR, "Could not find tag %08"PRIX32"\n", tag);
|
|
return AVERROR_INVALIDDATA;
|
|
}
|
|
if (props->tag == tag) {
|
|
avctx->sample_rate = props->sample_rate;
|
|
channels = props->channels;
|
|
avctx->bit_rate = props->bit_rate * 1000;
|
|
isampf = avctx->sample_rate / 1000;
|
|
break;
|
|
}
|
|
props++;
|
|
}
|
|
|
|
if (channels <= 0 || channels > TWINVQ_CHANNELS_MAX) {
|
|
av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n",
|
|
channels);
|
|
return AVERROR_INVALIDDATA;
|
|
}
|
|
av_channel_layout_uninit(&avctx->ch_layout);
|
|
av_channel_layout_default(&avctx->ch_layout, channels);
|
|
|
|
ibps = avctx->bit_rate / (1000 * channels);
|
|
|
|
switch ((channels << 16) + (isampf << 8) + ibps) {
|
|
case (1 << 16) + ( 8 << 8) + 6:
|
|
tctx->mtab = &ff_metasound_mode0806;
|
|
break;
|
|
case (2 << 16) + ( 8 << 8) + 6:
|
|
tctx->mtab = &ff_metasound_mode0806s;
|
|
break;
|
|
case (1 << 16) + ( 8 << 8) + 8:
|
|
tctx->mtab = &ff_metasound_mode0808;
|
|
break;
|
|
case (2 << 16) + ( 8 << 8) + 8:
|
|
tctx->mtab = &ff_metasound_mode0808s;
|
|
break;
|
|
case (1 << 16) + (11 << 8) + 10:
|
|
tctx->mtab = &ff_metasound_mode1110;
|
|
break;
|
|
case (2 << 16) + (11 << 8) + 10:
|
|
tctx->mtab = &ff_metasound_mode1110s;
|
|
break;
|
|
case (1 << 16) + (16 << 8) + 16:
|
|
tctx->mtab = &ff_metasound_mode1616;
|
|
break;
|
|
case (2 << 16) + (16 << 8) + 16:
|
|
tctx->mtab = &ff_metasound_mode1616s;
|
|
break;
|
|
case (1 << 16) + (22 << 8) + 24:
|
|
tctx->mtab = &ff_metasound_mode2224;
|
|
break;
|
|
case (2 << 16) + (22 << 8) + 24:
|
|
tctx->mtab = &ff_metasound_mode2224s;
|
|
break;
|
|
case (1 << 16) + (44 << 8) + 32:
|
|
case (2 << 16) + (44 << 8) + 32:
|
|
tctx->mtab = &ff_metasound_mode4432;
|
|
break;
|
|
case (1 << 16) + (44 << 8) + 40:
|
|
case (2 << 16) + (44 << 8) + 40:
|
|
tctx->mtab = &ff_metasound_mode4440;
|
|
break;
|
|
case (1 << 16) + (44 << 8) + 48:
|
|
case (2 << 16) + (44 << 8) + 48:
|
|
tctx->mtab = &ff_metasound_mode4448;
|
|
break;
|
|
default:
|
|
av_log(avctx, AV_LOG_ERROR,
|
|
"This version does not support %d kHz - %d kbit/s/ch mode.\n",
|
|
isampf, ibps);
|
|
return AVERROR(ENOSYS);
|
|
}
|
|
|
|
tctx->codec = TWINVQ_CODEC_METASOUND;
|
|
tctx->read_bitstream = metasound_read_bitstream;
|
|
tctx->dec_bark_env = dec_bark_env;
|
|
tctx->decode_ppc = decode_ppc;
|
|
tctx->frame_size = avctx->bit_rate * tctx->mtab->size
|
|
/ avctx->sample_rate;
|
|
tctx->is_6kbps = ibps == 6;
|
|
|
|
return ff_twinvq_decode_init(avctx);
|
|
}
|
|
|
|
const FFCodec ff_metasound_decoder = {
|
|
.p.name = "metasound",
|
|
.p.long_name = NULL_IF_CONFIG_SMALL("Voxware MetaSound"),
|
|
.p.type = AVMEDIA_TYPE_AUDIO,
|
|
.p.id = AV_CODEC_ID_METASOUND,
|
|
.priv_data_size = sizeof(TwinVQContext),
|
|
.init = metasound_decode_init,
|
|
.close = ff_twinvq_decode_close,
|
|
.decode = ff_twinvq_decode_frame,
|
|
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
|
|
.p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
|
|
AV_SAMPLE_FMT_NONE },
|
|
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
|
|
};
|