1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00
FFmpeg/libavformat/iamf_parse.c
Marvin Scholz 47844f5869 avformat/iamf_parse: Fix return of uninitialized value
The ret value here is not yet intialized so the return would return
uninitialized data. What was probably meant to be checked here was the
return value of ffio_read_size, which can return an error.

Introduced in 38bcb3ba7b3424abd772c72f8bdf445d75285e88

Fixes: CID1618758
Signed-off-by: James Almer <jamrial@gmail.com>
(cherry picked from commit b6a0eab528695c39a0c52889db0c1ce5dd6d99f3)
2024-08-31 21:23:27 -03:00

1126 lines
38 KiB
C

/*
* Immersive Audio Model and Formats parsing
* Copyright (c) 2023 James Almer <jamrial@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/iamf.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavcodec/get_bits.h"
#include "libavcodec/flac.h"
#include "libavcodec/leb.h"
#include "libavcodec/mpeg4audio.h"
#include "libavcodec/put_bits.h"
#include "avio_internal.h"
#include "iamf_parse.h"
#include "isom.h"
static int opus_decoder_config(IAMFCodecConfig *codec_config,
AVIOContext *pb, int len)
{
int left = len - avio_tell(pb);
if (left < 11 || codec_config->audio_roll_distance >= 0)
return AVERROR_INVALIDDATA;
codec_config->extradata = av_malloc(left + 8);
if (!codec_config->extradata)
return AVERROR(ENOMEM);
AV_WB32(codec_config->extradata, MKBETAG('O','p','u','s'));
AV_WB32(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
codec_config->extradata_size = avio_read(pb, codec_config->extradata + 8, left);
if (codec_config->extradata_size < left)
return AVERROR_INVALIDDATA;
codec_config->extradata_size += 8;
codec_config->sample_rate = 48000;
return 0;
}
static int aac_decoder_config(IAMFCodecConfig *codec_config,
AVIOContext *pb, int len, void *logctx)
{
MPEG4AudioConfig cfg = { 0 };
int object_type_id, codec_id, stream_type;
int ret, tag, left;
if (codec_config->audio_roll_distance >= 0)
return AVERROR_INVALIDDATA;
ff_mp4_read_descr(logctx, pb, &tag);
if (tag != MP4DecConfigDescrTag)
return AVERROR_INVALIDDATA;
object_type_id = avio_r8(pb);
if (object_type_id != 0x40)
return AVERROR_INVALIDDATA;
stream_type = avio_r8(pb);
if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
return AVERROR_INVALIDDATA;
avio_skip(pb, 3); // buffer size db
avio_skip(pb, 4); // rc_max_rate
avio_skip(pb, 4); // avg bitrate
codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
if (codec_id && codec_id != codec_config->codec_id)
return AVERROR_INVALIDDATA;
left = ff_mp4_read_descr(logctx, pb, &tag);
if (tag != MP4DecSpecificDescrTag ||
!left || left > (len - avio_tell(pb)))
return AVERROR_INVALIDDATA;
// We pad extradata here because avpriv_mpeg4audio_get_config2() needs it.
codec_config->extradata = av_malloc((size_t)left + AV_INPUT_BUFFER_PADDING_SIZE);
if (!codec_config->extradata)
return AVERROR(ENOMEM);
ret = ffio_read_size(pb, codec_config->extradata, left);
if (ret < 0)
return ret;
codec_config->extradata_size = left;
memset(codec_config->extradata + codec_config->extradata_size, 0,
AV_INPUT_BUFFER_PADDING_SIZE);
ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
codec_config->extradata_size, 1, logctx);
if (ret < 0)
return ret;
codec_config->sample_rate = cfg.sample_rate;
return 0;
}
static int flac_decoder_config(IAMFCodecConfig *codec_config,
AVIOContext *pb, int len)
{
int left;
if (codec_config->audio_roll_distance)
return AVERROR_INVALIDDATA;
avio_skip(pb, 4); // METADATA_BLOCK_HEADER
left = len - avio_tell(pb);
if (left < FLAC_STREAMINFO_SIZE)
return AVERROR_INVALIDDATA;
codec_config->extradata = av_malloc(left);
if (!codec_config->extradata)
return AVERROR(ENOMEM);
codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
if (codec_config->extradata_size < left)
return AVERROR_INVALIDDATA;
codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
return 0;
}
static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
AVIOContext *pb, int len)
{
static const enum AVCodecID sample_fmt[2][3] = {
{ AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
{ AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
};
int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
if (sample_format > 1 || sample_size > 2U || codec_config->audio_roll_distance)
return AVERROR_INVALIDDATA;
codec_config->codec_id = sample_fmt[sample_format][sample_size];
codec_config->sample_rate = avio_rb32(pb);
if (len - avio_tell(pb))
return AVERROR_INVALIDDATA;
return 0;
}
static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
{
IAMFCodecConfig **tmp, *codec_config = NULL;
FFIOContext b;
AVIOContext *pbc;
uint8_t *buf;
enum AVCodecID avcodec_id;
unsigned codec_config_id, nb_samples, codec_id;
int16_t audio_roll_distance;
int ret;
buf = av_malloc(len);
if (!buf)
return AVERROR(ENOMEM);
ret = avio_read(pb, buf, len);
if (ret != len) {
if (ret >= 0)
ret = AVERROR_INVALIDDATA;
goto fail;
}
ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
pbc = &b.pub;
codec_config_id = ffio_read_leb(pbc);
codec_id = avio_rb32(pbc);
nb_samples = ffio_read_leb(pbc);
audio_roll_distance = avio_rb16(pbc);
switch(codec_id) {
case MKBETAG('O','p','u','s'):
avcodec_id = AV_CODEC_ID_OPUS;
break;
case MKBETAG('m','p','4','a'):
avcodec_id = AV_CODEC_ID_AAC;
break;
case MKBETAG('f','L','a','C'):
avcodec_id = AV_CODEC_ID_FLAC;
break;
default:
avcodec_id = AV_CODEC_ID_NONE;
break;
}
for (int i = 0; i < c->nb_codec_configs; i++)
if (c->codec_configs[i]->codec_config_id == codec_config_id) {
ret = AVERROR_INVALIDDATA;
goto fail;
}
tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs));
if (!tmp) {
ret = AVERROR(ENOMEM);
goto fail;
}
c->codec_configs = tmp;
codec_config = av_mallocz(sizeof(*codec_config));
if (!codec_config) {
ret = AVERROR(ENOMEM);
goto fail;
}
codec_config->codec_config_id = codec_config_id;
codec_config->codec_id = avcodec_id;
codec_config->nb_samples = nb_samples;
codec_config->audio_roll_distance = audio_roll_distance;
switch(codec_id) {
case MKBETAG('O','p','u','s'):
ret = opus_decoder_config(codec_config, pbc, len);
break;
case MKBETAG('m','p','4','a'):
ret = aac_decoder_config(codec_config, pbc, len, s);
break;
case MKBETAG('f','L','a','C'):
ret = flac_decoder_config(codec_config, pbc, len);
break;
case MKBETAG('i','p','c','m'):
ret = ipcm_decoder_config(codec_config, pbc, len);
break;
default:
break;
}
if (ret < 0)
goto fail;
if ((codec_config->nb_samples > INT_MAX) || codec_config->nb_samples <= 0 ||
(-codec_config->audio_roll_distance > INT_MAX / codec_config->nb_samples)) {
ret = AVERROR_INVALIDDATA;
goto fail;
}
c->codec_configs[c->nb_codec_configs++] = codec_config;
len -= avio_tell(pbc);
if (len)
av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);
ret = 0;
fail:
av_free(buf);
if (ret < 0) {
if (codec_config)
av_free(codec_config->extradata);
av_free(codec_config);
}
return ret;
}
static int update_extradata(AVCodecParameters *codecpar)
{
GetBitContext gb;
PutBitContext pb;
int ret;
switch(codecpar->codec_id) {
case AV_CODEC_ID_OPUS:
AV_WB8(codecpar->extradata + 9, codecpar->ch_layout.nb_channels);
AV_WL16(codecpar->extradata + 10, AV_RB16(codecpar->extradata + 10)); // Byte swap pre-skip
AV_WL32(codecpar->extradata + 12, AV_RB32(codecpar->extradata + 12)); // Byte swap sample rate
AV_WL16(codecpar->extradata + 16, AV_RB16(codecpar->extradata + 16)); // Byte swap Output Gain
break;
case AV_CODEC_ID_AAC: {
uint8_t buf[5];
init_put_bits(&pb, buf, sizeof(buf));
ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
if (ret < 0)
return ret;
ret = get_bits(&gb, 5);
put_bits(&pb, 5, ret);
if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
put_bits(&pb, 6, get_bits(&gb, 6));
ret = get_bits(&gb, 4);
put_bits(&pb, 4, ret);
if (ret == 0x0f)
put_bits(&pb, 24, get_bits(&gb, 24));
skip_bits(&gb, 4);
put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
ret = put_bits_left(&pb);
put_bits(&pb, ret, get_bits_long(&gb, ret));
flush_put_bits(&pb);
memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
break;
}
case AV_CODEC_ID_FLAC: {
uint8_t buf[13];
init_put_bits(&pb, buf, sizeof(buf));
ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
if (ret < 0)
return ret;
put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
skip_bits(&gb, 3);
put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
ret = put_bits_left(&pb);
put_bits(&pb, ret, get_bits(&gb, ret));
flush_put_bits(&pb);
memcpy(codecpar->extradata, buf, sizeof(buf));
break;
}
}
return 0;
}
static int scalable_channel_layout_config(void *s, AVIOContext *pb,
IAMFAudioElement *audio_element,
const IAMFCodecConfig *codec_config)
{
int nb_layers, k = 0;
nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
// skip_bits(&gb, 5); //reserved
if (nb_layers > 6 || nb_layers == 0)
return AVERROR_INVALIDDATA;
audio_element->layers = av_calloc(nb_layers, sizeof(*audio_element->layers));
if (!audio_element->layers)
return AVERROR(ENOMEM);
audio_element->nb_layers = nb_layers;
for (int i = 0; i < nb_layers; i++) {
AVIAMFLayer *layer;
int loudspeaker_layout, output_gain_is_present_flag;
int substream_count, coupled_substream_count;
int ret, byte = avio_r8(pb);
layer = av_iamf_audio_element_add_layer(audio_element->element);
if (!layer)
return AVERROR(ENOMEM);
loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
if ((byte >> 2) & 1)
layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN;
substream_count = avio_r8(pb);
coupled_substream_count = avio_r8(pb);
if (substream_count + k > audio_element->nb_substreams)
return AVERROR_INVALIDDATA;
audio_element->layers[i].substream_count = substream_count;
audio_element->layers[i].coupled_substream_count = coupled_substream_count;
if (output_gain_is_present_flag) {
layer->output_gain_flags = avio_r8(pb) >> 2; // get_bits(&gb, 6);
layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
}
if (loudspeaker_layout < 10)
av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
else
layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
.nb_channels = substream_count +
coupled_substream_count };
for (int j = 0; j < substream_count; j++) {
IAMFSubStream *substream = &audio_element->substreams[k++];
substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
(AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
ret = update_extradata(substream->codecpar);
if (ret < 0)
return ret;
}
}
return 0;
}
static int ambisonics_config(void *s, AVIOContext *pb,
IAMFAudioElement *audio_element,
const IAMFCodecConfig *codec_config)
{
AVIAMFLayer *layer;
unsigned ambisonics_mode;
int output_channel_count, substream_count, order;
int ret;
ambisonics_mode = ffio_read_leb(pb);
if (ambisonics_mode > 1)
return 0;
output_channel_count = avio_r8(pb); // C
substream_count = avio_r8(pb); // N
if (audio_element->nb_substreams != substream_count)
return AVERROR_INVALIDDATA;
order = floor(sqrt(output_channel_count - 1));
/* incomplete order - some harmonics are missing */
if ((order + 1) * (order + 1) != output_channel_count)
return AVERROR_INVALIDDATA;
audio_element->layers = av_mallocz(sizeof(*audio_element->layers));
if (!audio_element->layers)
return AVERROR(ENOMEM);
audio_element->nb_layers = 1;
audio_element->layers->substream_count = substream_count;
layer = av_iamf_audio_element_add_layer(audio_element->element);
if (!layer)
return AVERROR(ENOMEM);
layer->ambisonics_mode = ambisonics_mode;
if (ambisonics_mode == 0) {
for (int i = 0; i < substream_count; i++) {
IAMFSubStream *substream = &audio_element->substreams[i];
substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
ret = update_extradata(substream->codecpar);
if (ret < 0)
return ret;
}
layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM;
layer->ch_layout.nb_channels = output_channel_count;
layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map));
if (!layer->ch_layout.u.map)
return AVERROR(ENOMEM);
for (int i = 0; i < output_channel_count; i++)
layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
} else {
int coupled_substream_count = avio_r8(pb); // M
int nb_demixing_matrix = substream_count + coupled_substream_count;
int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
audio_element->layers->coupled_substream_count = coupled_substream_count;
layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
if (!layer->demixing_matrix)
return AVERROR(ENOMEM);
for (int i = 0; i < demixing_matrix_size; i++)
layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
for (int i = 0; i < substream_count; i++) {
IAMFSubStream *substream = &audio_element->substreams[i];
substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
(AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
ret = update_extradata(substream->codecpar);
if (ret < 0)
return ret;
}
}
return 0;
}
static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
unsigned int type,
const IAMFAudioElement *audio_element,
AVIAMFParamDefinition **out_param_definition)
{
IAMFParamDefinition *param_definition = NULL;
AVIAMFParamDefinition *param;
unsigned int parameter_id, parameter_rate, mode;
unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
size_t param_size;
parameter_id = ffio_read_leb(pb);
for (int i = 0; i < c->nb_param_definitions; i++)
if (c->param_definitions[i]->param->parameter_id == parameter_id) {
param_definition = c->param_definitions[i];
break;
}
parameter_rate = ffio_read_leb(pb);
mode = avio_r8(pb) >> 7;
if (mode == 0) {
duration = ffio_read_leb(pb);
if (!duration)
return AVERROR_INVALIDDATA;
constant_subblock_duration = ffio_read_leb(pb);
if (constant_subblock_duration == 0)
nb_subblocks = ffio_read_leb(pb);
else
nb_subblocks = duration / constant_subblock_duration;
}
param = av_iamf_param_definition_alloc(type, nb_subblocks, &param_size);
if (!param)
return AVERROR(ENOMEM);
for (int i = 0; i < nb_subblocks; i++) {
void *subblock = av_iamf_param_definition_get_subblock(param, i);
unsigned int subblock_duration = constant_subblock_duration;
if (constant_subblock_duration == 0)
subblock_duration = ffio_read_leb(pb);
switch (type) {
case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
AVIAMFMixGain *mix = subblock;
mix->subblock_duration = subblock_duration;
break;
}
case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
AVIAMFDemixingInfo *demix = subblock;
demix->subblock_duration = subblock_duration;
// DefaultDemixingInfoParameterData
av_assert0(audio_element);
demix->dmixp_mode = avio_r8(pb) >> 5;
audio_element->element->default_w = avio_r8(pb) >> 4;
break;
}
case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
AVIAMFReconGain *recon = subblock;
recon->subblock_duration = subblock_duration;
break;
}
default:
av_free(param);
return AVERROR_INVALIDDATA;
}
}
param->parameter_id = parameter_id;
param->parameter_rate = parameter_rate;
param->duration = duration;
param->constant_subblock_duration = constant_subblock_duration;
param->nb_subblocks = nb_subblocks;
if (param_definition) {
if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
av_log(s, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
av_free(param);
return AVERROR_INVALIDDATA;
}
} else {
IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
sizeof(*c->param_definitions));
if (!tmp) {
av_free(param);
return AVERROR(ENOMEM);
}
c->param_definitions = tmp;
param_definition = av_mallocz(sizeof(*param_definition));
if (!param_definition) {
av_free(param);
return AVERROR(ENOMEM);
}
param_definition->param = param;
param_definition->mode = !mode;
param_definition->param_size = param_size;
param_definition->audio_element = audio_element;
c->param_definitions[c->nb_param_definitions++] = param_definition;
}
av_assert0(out_param_definition);
*out_param_definition = param;
return 0;
}
static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
{
const IAMFCodecConfig *codec_config;
AVIAMFAudioElement *element;
IAMFAudioElement **tmp, *audio_element = NULL;
FFIOContext b;
AVIOContext *pbc;
uint8_t *buf;
unsigned audio_element_id, nb_substreams, codec_config_id, num_parameters;
int audio_element_type, ret;
buf = av_malloc(len);
if (!buf)
return AVERROR(ENOMEM);
ret = avio_read(pb, buf, len);
if (ret != len) {
if (ret >= 0)
ret = AVERROR_INVALIDDATA;
goto fail;
}
ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
pbc = &b.pub;
audio_element_id = ffio_read_leb(pbc);
for (int i = 0; i < c->nb_audio_elements; i++)
if (c->audio_elements[i]->audio_element_id == audio_element_id) {
av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
ret = AVERROR_INVALIDDATA;
goto fail;
}
audio_element_type = avio_r8(pbc) >> 5;
if (audio_element_type > AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
av_log(s, AV_LOG_DEBUG, "Unknown audio_element_type referenced in an audio element. Ignoring\n");
ret = 0;
goto fail;
}
codec_config_id = ffio_read_leb(pbc);
codec_config = ff_iamf_get_codec_config(c, codec_config_id);
if (!codec_config) {
av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id);
ret = AVERROR_INVALIDDATA;
goto fail;
}
if (codec_config->codec_id == AV_CODEC_ID_NONE) {
av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
ret = 0;
goto fail;
}
tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements));
if (!tmp) {
ret = AVERROR(ENOMEM);
goto fail;
}
c->audio_elements = tmp;
audio_element = av_mallocz(sizeof(*audio_element));
if (!audio_element) {
ret = AVERROR(ENOMEM);
goto fail;
}
nb_substreams = ffio_read_leb(pbc);
audio_element->codec_config_id = codec_config_id;
audio_element->audio_element_id = audio_element_id;
audio_element->substreams = av_calloc(nb_substreams, sizeof(*audio_element->substreams));
if (!audio_element->substreams) {
ret = AVERROR(ENOMEM);
goto fail;
}
audio_element->nb_substreams = nb_substreams;
element = audio_element->element = av_iamf_audio_element_alloc();
if (!element) {
ret = AVERROR(ENOMEM);
goto fail;
}
audio_element->celement = element;
element->audio_element_type = audio_element_type;
for (int i = 0; i < audio_element->nb_substreams; i++) {
IAMFSubStream *substream = &audio_element->substreams[i];
substream->codecpar = avcodec_parameters_alloc();
if (!substream->codecpar) {
ret = AVERROR(ENOMEM);
goto fail;
}
substream->audio_substream_id = ffio_read_leb(pbc);
substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
substream->codecpar->codec_id = codec_config->codec_id;
substream->codecpar->frame_size = codec_config->nb_samples;
substream->codecpar->sample_rate = codec_config->sample_rate;
substream->codecpar->seek_preroll = -codec_config->audio_roll_distance * codec_config->nb_samples;
switch(substream->codecpar->codec_id) {
case AV_CODEC_ID_AAC:
case AV_CODEC_ID_FLAC:
case AV_CODEC_ID_OPUS:
substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
if (!substream->codecpar->extradata) {
ret = AVERROR(ENOMEM);
goto fail;
}
memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
substream->codecpar->extradata_size = codec_config->extradata_size;
break;
}
}
num_parameters = ffio_read_leb(pbc);
if (num_parameters && audio_element_type != 0) {
av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
" for Scene representations\n", num_parameters);
ret = AVERROR_INVALIDDATA;
goto fail;
}
for (int i = 0; i < num_parameters; i++) {
unsigned type;
type = ffio_read_leb(pbc);
if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
ret = AVERROR_INVALIDDATA;
else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING)
ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info);
else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN)
ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info);
else {
unsigned param_definition_size = ffio_read_leb(pbc);
avio_skip(pbc, param_definition_size);
}
if (ret < 0)
goto fail;
}
if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
if (ret < 0)
goto fail;
} else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
ret = ambisonics_config(s, pbc, audio_element, codec_config);
if (ret < 0)
goto fail;
} else {
av_assert0(0);
}
c->audio_elements[c->nb_audio_elements++] = audio_element;
len -= avio_tell(pbc);
if (len)
av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);
ret = 0;
fail:
av_free(buf);
if (ret < 0)
ff_iamf_free_audio_element(&audio_element);
return ret;
}
static int label_string(AVIOContext *pb, char **label)
{
uint8_t buf[128];
avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
if (pb->error)
return pb->error;
if (pb->eof_reached)
return AVERROR_INVALIDDATA;
*label = av_strdup(buf);
if (!*label)
return AVERROR(ENOMEM);
return 0;
}
static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
{
AVIAMFMixPresentation *mix;
IAMFMixPresentation **tmp, *mix_presentation = NULL;
FFIOContext b;
AVIOContext *pbc;
uint8_t *buf;
unsigned nb_submixes, mix_presentation_id;
int ret;
buf = av_malloc(len);
if (!buf)
return AVERROR(ENOMEM);
ret = avio_read(pb, buf, len);
if (ret != len) {
if (ret >= 0)
ret = AVERROR_INVALIDDATA;
goto fail;
}
ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
pbc = &b.pub;
mix_presentation_id = ffio_read_leb(pbc);
for (int i = 0; i < c->nb_mix_presentations; i++)
if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) {
av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
ret = AVERROR_INVALIDDATA;
goto fail;
}
tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations));
if (!tmp) {
ret = AVERROR(ENOMEM);
goto fail;
}
c->mix_presentations = tmp;
mix_presentation = av_mallocz(sizeof(*mix_presentation));
if (!mix_presentation) {
ret = AVERROR(ENOMEM);
goto fail;
}
mix_presentation->mix_presentation_id = mix_presentation_id;
mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
if (!mix) {
ret = AVERROR(ENOMEM);
goto fail;
}
mix_presentation->cmix = mix;
mix_presentation->count_label = ffio_read_leb(pbc);
mix_presentation->language_label = av_calloc(mix_presentation->count_label,
sizeof(*mix_presentation->language_label));
if (!mix_presentation->language_label) {
ret = AVERROR(ENOMEM);
goto fail;
}
for (int i = 0; i < mix_presentation->count_label; i++) {
ret = label_string(pbc, &mix_presentation->language_label[i]);
if (ret < 0)
goto fail;
}
for (int i = 0; i < mix_presentation->count_label; i++) {
char *annotation = NULL;
ret = label_string(pbc, &annotation);
if (ret < 0)
goto fail;
ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
if (ret < 0)
goto fail;
}
nb_submixes = ffio_read_leb(pbc);
for (int i = 0; i < nb_submixes; i++) {
AVIAMFSubmix *sub_mix;
unsigned nb_elements, nb_layouts;
sub_mix = av_iamf_mix_presentation_add_submix(mix);
if (!sub_mix) {
ret = AVERROR(ENOMEM);
goto fail;
}
nb_elements = ffio_read_leb(pbc);
for (int j = 0; j < nb_elements; j++) {
AVIAMFSubmixElement *submix_element;
IAMFAudioElement *audio_element = NULL;
unsigned int rendering_config_extension_size;
submix_element = av_iamf_submix_add_element(sub_mix);
if (!submix_element) {
ret = AVERROR(ENOMEM);
goto fail;
}
submix_element->audio_element_id = ffio_read_leb(pbc);
for (int k = 0; k < c->nb_audio_elements; k++)
if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
audio_element = c->audio_elements[k];
break;
}
if (!audio_element) {
av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
submix_element->audio_element_id, mix_presentation_id);
ret = AVERROR_INVALIDDATA;
goto fail;
}
for (int k = 0; k < mix_presentation->count_label; k++) {
char *annotation = NULL;
ret = label_string(pbc, &annotation);
if (ret < 0)
goto fail;
ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
if (ret < 0)
goto fail;
}
submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;
rendering_config_extension_size = ffio_read_leb(pbc);
avio_skip(pbc, rendering_config_extension_size);
ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
NULL,
&submix_element->element_mix_config);
if (ret < 0)
goto fail;
submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
}
ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
if (ret < 0)
goto fail;
sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
nb_layouts = ffio_read_leb(pbc);
for (int j = 0; j < nb_layouts; j++) {
AVIAMFSubmixLayout *submix_layout;
int info_type;
int byte = avio_r8(pbc);
submix_layout = av_iamf_submix_add_layout(sub_mix);
if (!submix_layout) {
ret = AVERROR(ENOMEM);
goto fail;
}
submix_layout->layout_type = byte >> 6;
if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS ||
submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
submix_layout->layout_type, mix_presentation_id);
ret = AVERROR_INVALIDDATA;
goto fail;
}
if (submix_layout->layout_type == 2) {
int sound_system;
sound_system = (byte >> 2) & 0xF;
if (sound_system >= FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
ret = AVERROR_INVALIDDATA;
goto fail;
}
av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
}
info_type = avio_r8(pbc);
submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
if (info_type & 1)
submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
if (info_type & 2) {
unsigned int num_anchored_loudness = avio_r8(pbc);
for (int k = 0; k < num_anchored_loudness; k++) {
unsigned int anchor_element = avio_r8(pbc);
AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
submix_layout->dialogue_anchored_loudness = anchored_loudness;
else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
submix_layout->album_anchored_loudness = anchored_loudness;
else
av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
}
}
if (info_type & 0xFC) {
unsigned int info_type_size = ffio_read_leb(pbc);
avio_skip(pbc, info_type_size);
}
}
}
c->mix_presentations[c->nb_mix_presentations++] = mix_presentation;
len -= avio_tell(pbc);
if (len)
av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);
ret = 0;
fail:
av_free(buf);
if (ret < 0)
ff_iamf_free_mix_presentation(&mix_presentation);
return ret;
}
int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
unsigned *skip_samples, unsigned *discard_padding)
{
GetBitContext gb;
int ret, extension_flag, trimming, start;
unsigned skip = 0, discard = 0;
unsigned size;
ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
if (ret < 0)
return ret;
*type = get_bits(&gb, 5);
/*redundant =*/ get_bits1(&gb);
trimming = get_bits1(&gb);
extension_flag = get_bits1(&gb);
*obu_size = get_leb(&gb);
if (*obu_size > INT_MAX)
return AVERROR_INVALIDDATA;
start = get_bits_count(&gb) / 8;
if (trimming) {
discard = get_leb(&gb); // num_samples_to_trim_at_end
skip = get_leb(&gb); // num_samples_to_trim_at_start
}
if (skip_samples)
*skip_samples = skip;
if (discard_padding)
*discard_padding = discard;
if (extension_flag) {
unsigned int extension_bytes;
extension_bytes = get_leb(&gb);
if (extension_bytes > INT_MAX / 8)
return AVERROR_INVALIDDATA;
skip_bits_long(&gb, extension_bytes * 8);
}
if (get_bits_left(&gb) < 0)
return AVERROR_INVALIDDATA;
size = *obu_size + start;
if (size > INT_MAX)
return AVERROR_INVALIDDATA;
*obu_size -= get_bits_count(&gb) / 8 - start;
*start_pos = size - *obu_size;
return size;
}
int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
int max_size, void *log_ctx)
{
uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
int ret;
while (1) {
unsigned obu_size;
enum IAMF_OBU_Type type;
int start_pos, len, size;
if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
return ret;
size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
if (size < 0)
return size;
memset(header + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
if (len < 0 || obu_size > max_size) {
av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n");
avio_seek(pb, -size, SEEK_CUR);
return len;
}
if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
avio_seek(pb, -size, SEEK_CUR);
break;
}
avio_seek(pb, -(size - start_pos), SEEK_CUR);
switch (type) {
case IAMF_OBU_IA_CODEC_CONFIG:
ret = codec_config_obu(log_ctx, c, pb, obu_size);
break;
case IAMF_OBU_IA_AUDIO_ELEMENT:
ret = audio_element_obu(log_ctx, c, pb, obu_size);
break;
case IAMF_OBU_IA_MIX_PRESENTATION:
ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
break;
default: {
int64_t offset = avio_skip(pb, obu_size);
if (offset < 0)
ret = offset;
break;
}
}
if (ret < 0) {
av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type);
return ret;
}
max_size -= obu_size + start_pos;
if (max_size < 0)
return AVERROR_INVALIDDATA;
if (!max_size)
break;
}
return 0;
}