mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-03-23 04:24:35 +02:00
matroska: redo seekhead handling
In particular, this reads chained seekheads. This makes seeking faster in files which have the index indirectly linked through 2 seekheads. As a side-effect, this warns when reading level-1 (toplevel) elements multiple times (other than seekheads, clusters, and void/crc). Such elements are not valid and likely break everything. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
a5c9befbf4
commit
cac2295b21
@ -71,6 +71,7 @@ typedef enum {
|
|||||||
EBML_UTF8,
|
EBML_UTF8,
|
||||||
EBML_BIN,
|
EBML_BIN,
|
||||||
EBML_NEST,
|
EBML_NEST,
|
||||||
|
EBML_LEVEL1,
|
||||||
EBML_PASS,
|
EBML_PASS,
|
||||||
EBML_STOP,
|
EBML_STOP,
|
||||||
EBML_SINT,
|
EBML_SINT,
|
||||||
@ -252,6 +253,12 @@ typedef struct {
|
|||||||
EbmlList blocks;
|
EbmlList blocks;
|
||||||
} MatroskaCluster;
|
} MatroskaCluster;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint64_t id;
|
||||||
|
uint64_t pos;
|
||||||
|
int parsed;
|
||||||
|
} MatroskaLevel1Element;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
AVFormatContext *ctx;
|
AVFormatContext *ctx;
|
||||||
|
|
||||||
@ -290,6 +297,10 @@ typedef struct {
|
|||||||
/* File has a CUES element, but we defer parsing until it is needed. */
|
/* File has a CUES element, but we defer parsing until it is needed. */
|
||||||
int cues_parsing_deferred;
|
int cues_parsing_deferred;
|
||||||
|
|
||||||
|
/* Level1 elements and whether they were read yet */
|
||||||
|
MatroskaLevel1Element level1_elems[64];
|
||||||
|
int num_level1_elems;
|
||||||
|
|
||||||
int current_cluster_num_blocks;
|
int current_cluster_num_blocks;
|
||||||
int64_t current_cluster_pos;
|
int64_t current_cluster_pos;
|
||||||
MatroskaCluster current_cluster;
|
MatroskaCluster current_cluster;
|
||||||
@ -551,13 +562,13 @@ static EbmlSyntax matroska_seekhead[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static EbmlSyntax matroska_segment[] = {
|
static EbmlSyntax matroska_segment[] = {
|
||||||
{ MATROSKA_ID_INFO, EBML_NEST, 0, 0, { .n = matroska_info } },
|
{ MATROSKA_ID_INFO, EBML_LEVEL1, 0, 0, { .n = matroska_info } },
|
||||||
{ MATROSKA_ID_TRACKS, EBML_NEST, 0, 0, { .n = matroska_tracks } },
|
{ MATROSKA_ID_TRACKS, EBML_LEVEL1, 0, 0, { .n = matroska_tracks } },
|
||||||
{ MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, { .n = matroska_attachments } },
|
{ MATROSKA_ID_ATTACHMENTS, EBML_LEVEL1, 0, 0, { .n = matroska_attachments } },
|
||||||
{ MATROSKA_ID_CHAPTERS, EBML_NEST, 0, 0, { .n = matroska_chapters } },
|
{ MATROSKA_ID_CHAPTERS, EBML_LEVEL1, 0, 0, { .n = matroska_chapters } },
|
||||||
{ MATROSKA_ID_CUES, EBML_NEST, 0, 0, { .n = matroska_index } },
|
{ MATROSKA_ID_CUES, EBML_LEVEL1, 0, 0, { .n = matroska_index } },
|
||||||
{ MATROSKA_ID_TAGS, EBML_NEST, 0, 0, { .n = matroska_tags } },
|
{ MATROSKA_ID_TAGS, EBML_LEVEL1, 0, 0, { .n = matroska_tags } },
|
||||||
{ MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, { .n = matroska_seekhead } },
|
{ MATROSKA_ID_SEEKHEAD, EBML_LEVEL1, 0, 0, { .n = matroska_seekhead } },
|
||||||
{ MATROSKA_ID_CLUSTER, EBML_STOP },
|
{ MATROSKA_ID_CLUSTER, EBML_STOP },
|
||||||
{ 0 }
|
{ 0 }
|
||||||
};
|
};
|
||||||
@ -976,6 +987,42 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate and return the entry for the level1 element with the given ID. If
|
||||||
|
* an entry already exists, return the existing entry.
|
||||||
|
*/
|
||||||
|
static MatroskaLevel1Element *matroska_find_level1_elem(MatroskaDemuxContext *matroska,
|
||||||
|
uint32_t id)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
MatroskaLevel1Element *elem;
|
||||||
|
|
||||||
|
// Some files link to all clusters; useless.
|
||||||
|
if (id == MATROSKA_ID_CLUSTER)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
// There can be multiple seekheads.
|
||||||
|
if (id != MATROSKA_ID_SEEKHEAD) {
|
||||||
|
for (i = 0; i < matroska->num_level1_elems; i++) {
|
||||||
|
if (matroska->level1_elems[i].id == id)
|
||||||
|
return &matroska->level1_elems[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only a completely broken file would have more elements.
|
||||||
|
// It also provides a low-effort way to escape from circular seekheads
|
||||||
|
// (every iteration will add a level1 entry).
|
||||||
|
if (matroska->num_level1_elems >= FF_ARRAY_ELEMS(matroska->level1_elems)) {
|
||||||
|
av_log(matroska->ctx, AV_LOG_ERROR, "Too many level1 elements or circular seekheads.\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
elem = &matroska->level1_elems[matroska->num_level1_elems++];
|
||||||
|
*elem = (MatroskaLevel1Element){.id = id};
|
||||||
|
|
||||||
|
return elem;
|
||||||
|
}
|
||||||
|
|
||||||
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
|
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
|
||||||
EbmlSyntax *syntax, void *data)
|
EbmlSyntax *syntax, void *data)
|
||||||
{
|
{
|
||||||
@ -994,6 +1041,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
|
|||||||
uint64_t length;
|
uint64_t length;
|
||||||
int res;
|
int res;
|
||||||
void *newelem;
|
void *newelem;
|
||||||
|
MatroskaLevel1Element *level1_elem;
|
||||||
|
|
||||||
data = (char *) data + syntax->data_offset;
|
data = (char *) data + syntax->data_offset;
|
||||||
if (syntax->list_elem_size) {
|
if (syntax->list_elem_size) {
|
||||||
@ -1036,11 +1084,20 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
|
|||||||
case EBML_BIN:
|
case EBML_BIN:
|
||||||
res = ebml_read_binary(pb, length, data);
|
res = ebml_read_binary(pb, length, data);
|
||||||
break;
|
break;
|
||||||
|
case EBML_LEVEL1:
|
||||||
case EBML_NEST:
|
case EBML_NEST:
|
||||||
if ((res = ebml_read_master(matroska, length)) < 0)
|
if ((res = ebml_read_master(matroska, length)) < 0)
|
||||||
return res;
|
return res;
|
||||||
if (id == MATROSKA_ID_SEGMENT)
|
if (id == MATROSKA_ID_SEGMENT)
|
||||||
matroska->segment_start = avio_tell(matroska->ctx->pb);
|
matroska->segment_start = avio_tell(matroska->ctx->pb);
|
||||||
|
if (id == MATROSKA_ID_CUES)
|
||||||
|
matroska->cues_parsing_deferred = 0;
|
||||||
|
if (syntax->type == EBML_LEVEL1 &&
|
||||||
|
(level1_elem = matroska_find_level1_elem(matroska, syntax->id))) {
|
||||||
|
if (level1_elem->parsed)
|
||||||
|
av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
|
||||||
|
level1_elem->parsed = 1;
|
||||||
|
}
|
||||||
return ebml_parse_nest(matroska, syntax->def.n, data);
|
return ebml_parse_nest(matroska, syntax->def.n, data);
|
||||||
case EBML_PASS:
|
case EBML_PASS:
|
||||||
return ebml_parse_id(matroska, syntax->def.n, id, data);
|
return ebml_parse_id(matroska, syntax->def.n, id, data);
|
||||||
@ -1071,6 +1128,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data)
|
|||||||
case EBML_BIN:
|
case EBML_BIN:
|
||||||
av_freep(&((EbmlBin *) data_off)->data);
|
av_freep(&((EbmlBin *) data_off)->data);
|
||||||
break;
|
break;
|
||||||
|
case EBML_LEVEL1:
|
||||||
case EBML_NEST:
|
case EBML_NEST:
|
||||||
if (syntax[i].list_elem_size) {
|
if (syntax[i].list_elem_size) {
|
||||||
EbmlList *list = data_off;
|
EbmlList *list = data_off;
|
||||||
@ -1356,24 +1414,17 @@ static void matroska_convert_tags(AVFormatContext *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
|
static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
|
||||||
int idx)
|
uint64_t pos)
|
||||||
{
|
{
|
||||||
EbmlList *seekhead_list = &matroska->seekhead;
|
|
||||||
uint32_t level_up = matroska->level_up;
|
uint32_t level_up = matroska->level_up;
|
||||||
uint32_t saved_id = matroska->current_id;
|
uint32_t saved_id = matroska->current_id;
|
||||||
MatroskaSeekhead *seekhead = seekhead_list->elem;
|
|
||||||
int64_t before_pos = avio_tell(matroska->ctx->pb);
|
int64_t before_pos = avio_tell(matroska->ctx->pb);
|
||||||
MatroskaLevel level;
|
MatroskaLevel level;
|
||||||
int64_t offset;
|
int64_t offset;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (idx >= seekhead_list->nb_elem ||
|
|
||||||
seekhead[idx].id == MATROSKA_ID_SEEKHEAD ||
|
|
||||||
seekhead[idx].id == MATROSKA_ID_CLUSTER)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* seek */
|
/* seek */
|
||||||
offset = seekhead[idx].pos + matroska->segment_start;
|
offset = pos + matroska->segment_start;
|
||||||
if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
|
if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
|
||||||
/* We don't want to lose our seekhead level, so we add
|
/* We don't want to lose our seekhead level, so we add
|
||||||
* a dummy. This is a crude hack. */
|
* a dummy. This is a crude hack. */
|
||||||
@ -1410,37 +1461,35 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
|
|||||||
static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
|
static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
|
||||||
{
|
{
|
||||||
EbmlList *seekhead_list = &matroska->seekhead;
|
EbmlList *seekhead_list = &matroska->seekhead;
|
||||||
int64_t before_pos = avio_tell(matroska->ctx->pb);
|
|
||||||
int i;
|
int i;
|
||||||
int nb_elem;
|
|
||||||
|
|
||||||
// we should not do any seeking in the streaming case
|
// we should not do any seeking in the streaming case
|
||||||
if (!matroska->ctx->pb->seekable ||
|
if (!matroska->ctx->pb->seekable ||
|
||||||
(matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
|
(matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// do not read entries that are added while parsing seekhead entries
|
for (i = 0; i < seekhead_list->nb_elem; i++) {
|
||||||
nb_elem = seekhead_list->nb_elem;
|
MatroskaSeekhead *seekheads = seekhead_list->elem;
|
||||||
|
uint32_t id = seekheads[i].id;
|
||||||
|
uint64_t pos = seekheads[i].pos;
|
||||||
|
|
||||||
for (i = 0; i < nb_elem; i++) {
|
MatroskaLevel1Element *elem = matroska_find_level1_elem(matroska, id);
|
||||||
MatroskaSeekhead *seekhead = seekhead_list->elem;
|
if (!elem || elem->parsed)
|
||||||
if (seekhead[i].pos <= before_pos)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
elem->pos = pos;
|
||||||
|
|
||||||
// defer cues parsing until we actually need cue data.
|
// defer cues parsing until we actually need cue data.
|
||||||
if (seekhead[i].id == MATROSKA_ID_CUES) {
|
if (id == MATROSKA_ID_CUES)
|
||||||
matroska->cues_parsing_deferred = 1;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
if (matroska_parse_seekhead_entry(matroska, i) < 0) {
|
if (matroska_parse_seekhead_entry(matroska, pos) < 0) {
|
||||||
// mark index as broken
|
// mark index as broken
|
||||||
matroska->cues_parsing_deferred = -1;
|
matroska->cues_parsing_deferred = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (nb_elem != seekhead_list->nb_elem) {
|
elem->parsed = 1;
|
||||||
avpriv_request_sample(matroska->ctx, "recursive SeekHead elements");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1474,17 +1523,18 @@ static void matroska_add_index_entries(MatroskaDemuxContext *matroska)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
|
static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
|
||||||
EbmlList *seekhead_list = &matroska->seekhead;
|
|
||||||
MatroskaSeekhead *seekhead = seekhead_list->elem;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < seekhead_list->nb_elem; i++)
|
for (i = 0; i < matroska->num_level1_elems; i++) {
|
||||||
if (seekhead[i].id == MATROSKA_ID_CUES)
|
MatroskaLevel1Element *elem = &matroska->level1_elems[i];
|
||||||
|
if (elem->id == MATROSKA_ID_CUES && !elem->parsed) {
|
||||||
|
if (matroska_parse_seekhead_entry(matroska, elem->pos) < 0)
|
||||||
|
matroska->cues_parsing_deferred = -1;
|
||||||
|
elem->parsed = 1;
|
||||||
break;
|
break;
|
||||||
av_assert1(i <= seekhead_list->nb_elem);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (matroska_parse_seekhead_entry(matroska, i) < 0)
|
|
||||||
matroska->cues_parsing_deferred = -1;
|
|
||||||
matroska_add_index_entries(matroska);
|
matroska_add_index_entries(matroska);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2014,6 +2064,7 @@ static int matroska_read_header(AVFormatContext *s)
|
|||||||
int i, j, res;
|
int i, j, res;
|
||||||
|
|
||||||
matroska->ctx = s;
|
matroska->ctx = s;
|
||||||
|
matroska->cues_parsing_deferred = 1;
|
||||||
|
|
||||||
/* First read the EBML header. */
|
/* First read the EBML header. */
|
||||||
if (ebml_parse(matroska, ebml_syntax, &ebml) ||
|
if (ebml_parse(matroska, ebml_syntax, &ebml) ||
|
||||||
|
Loading…
x
Reference in New Issue
Block a user