[FFmpeg-devel] [PATCH 23/37] avformat/matroskadec: Redo level handling
Andreas Rheinhardt
andreas.rheinhardt at gmail.com
Fri May 17 01:30:07 EEST 2019
This commit changes how levels are handled: If the level used for
ebml_parse ends directly after an element that has been consumed, then
ebml_parse ends the level itself (and any known-length levels that end
there as well) and informs the caller via the return value; if the
current level is of unknown-length, then the level is ended as soon as
an element that is not valid on the current level, but on a higher
level is encountered (or if EOF has been encountered).
This is designed for situations where one wants to parse master elements
incrementally, i.e. not in one go via ebml_parse_nest.
The (incremental) parsing of clusters still mixes levels by using a
syntax list that contains elements from different levels and the level
is still ended manually via a call to ebml_level_end if the last cluster
was an unknown-length cluster (known-length clusters are already ended
when their last element is read), but only if the next element is a
cluster, too. A different level 1 element following an unknown-length
cluster will currently simply be presumed to be part of the earlier
cluster. Fixing this will be done in a future patch. The modifications
to matroska_parse_cluster contained in this patch are only intended not
to cause regressions.
Nevertheless, the fact that known-length levels are automatically ended
in ebml_parse when their last element has been read already fixes a bogus
error message introduced in 9326117b that was emitted when a known-length
cluster is followed by another level 1 element other than a cluster in
which case the cluster's level was not ended (which only happened when
a new cluster has been encountered) so that the length check (introduced
in 9326117b) failed for the level 1 element as it is of course not
contained in the previous cluster. Most Matroska files were affected by
this.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
---
libavformat/matroskadec.c | 103 ++++++++++++++++++++++++++++++--------
1 file changed, 82 insertions(+), 21 deletions(-)
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 31c70c84d4..97e25c4c99 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -71,6 +71,8 @@
#define EBML_UNKNOWN_LENGTH UINT64_MAX /* EBML unknown length, in uint64_t */
#define NEEDS_CHECKING 2 /* Indicates that some error checks
* still need to be performed */
+#define LEVEL_ENDED 3 /* return value of ebml_parse when the
+ * syntax level used for parsing ended. */
typedef enum {
EBML_NONE,
@@ -1083,7 +1085,7 @@ static EbmlSyntax *ebml_parse_id(EbmlSyntax *syntax, uint32_t id)
static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
void *data)
{
- int i, res = 0;
+ int i, res;
for (i = 0; syntax[i].id; i++)
switch (syntax[i].type) {
@@ -1108,10 +1110,16 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
break;
}
- while (!res && !ebml_level_end(matroska))
+ if (!matroska->levels[matroska->num_levels - 1].length) {
+ matroska->num_levels--;
+ return 0;
+ }
+
+ do {
res = ebml_parse(matroska, syntax, data);
+ } while (!res);
- return res;
+ return res == LEVEL_ENDED ? 0 : res;
}
static int is_ebml_id_valid(uint32_t id)
@@ -1180,17 +1188,26 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
uint32_t id;
uint64_t length;
int64_t pos = avio_tell(pb);
- int res, update_pos = 1;
+ int res, update_pos = 1, level_check;
void *newelem;
MatroskaLevel1Element *level1_elem;
+ MatroskaLevel *level = matroska->num_levels ? &matroska->levels[matroska->num_levels - 1] : NULL;
if (!matroska->current_id) {
uint64_t id;
res = ebml_read_num(matroska, pb, 4, &id, 0);
if (res < 0) {
- // in live mode, finish parsing if EOF is reached.
- return (matroska->is_live && pb->eof_reached &&
- res == AVERROR_EOF) ? 1 : res;
+ if (pb->eof_reached && res == AVERROR_EOF) {
+ if (matroska->is_live)
+ // in live mode, finish parsing if EOF is reached.
+ return 1;
+ if (level && level->length == EBML_UNKNOWN_LENGTH && pos == avio_tell(pb)) {
+ // Unknown-length levels automatically end at EOF.
+ matroska->num_levels--;
+ return LEVEL_ENDED;
+ }
+ }
+ return res;
}
matroska->current_id = id | 1 << 7 * res;
} else
@@ -1199,13 +1216,22 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
id = matroska->current_id;
syntax = ebml_parse_id(syntax, id);
- if (!syntax->id && id == MATROSKA_ID_CLUSTER &&
- matroska->num_levels > 0 &&
- matroska->levels[matroska->num_levels - 1].length == EBML_UNKNOWN_LENGTH)
- return 0; // we reached the end of an unknown size cluster
if (!syntax->id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
- av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
- update_pos = 0;
+ if (level && level->length == EBML_UNKNOWN_LENGTH) {
+ // Unknown-length levels end when an element from an upper level
+ // in the hierarchy is encountered.
+ while (syntax->def.n) {
+ syntax = ebml_parse_id(syntax->def.n, id);
+ if (syntax->id) {
+ matroska->num_levels--;
+ return LEVEL_ENDED;
+ }
+ };
+ }
+
+ av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32" at pos. "
+ "%"PRId64"\n", id, pos);
+ update_pos = 0; /* Don't update resync_pos as an error might have happened. */
}
data = (char *) data + syntax->data_offset;
@@ -1240,26 +1266,34 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
uint64_t elem_end = pos + length,
level_end = level->start + level->length;
- if (level_end < elem_end) {
+ if (elem_end < level_end) {
+ level_check = 0;
+ } else if (elem_end == level_end) {
+ level_check = LEVEL_ENDED;
+ } else {
av_log(matroska->ctx, AV_LOG_ERROR,
"Element at 0x%"PRIx64" ending at 0x%"PRIx64" exceeds "
"containing master element ending at 0x%"PRIx64"\n",
pos, elem_end, level_end);
return AVERROR_INVALIDDATA;
}
+ } else if (length != EBML_UNKNOWN_LENGTH) {
+ level_check = 0;
} else if (level->length != EBML_UNKNOWN_LENGTH) {
av_log(matroska->ctx, AV_LOG_ERROR, "Unknown-sized element "
"at 0x%"PRIx64" inside parent with finite size\n", pos);
return AVERROR_INVALIDDATA;
- } else if (length == EBML_UNKNOWN_LENGTH && id != MATROSKA_ID_CLUSTER) {
+ } else if (id != MATROSKA_ID_CLUSTER) {
// According to the specifications only clusters and segments
// are allowed to be unknown-sized.
av_log(matroska->ctx, AV_LOG_ERROR,
"Found unknown-sized element other than a cluster at "
"0x%"PRIx64". Dropping the invalid element.\n", pos);
return AVERROR_INVALIDDATA;
- }
- }
+ } else
+ level_check = 0;
+ } else
+ level_check = 0;
if (update_pos) {
// We have found an element that is allowed at this place
@@ -1300,7 +1334,9 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
level1_elem->parsed = 1;
}
- return ebml_parse_nest(matroska, syntax->def.n, data);
+ if (res = ebml_parse_nest(matroska, syntax->def.n, data))
+ return res;
+ break;
case EBML_STOP:
return 1;
default:
@@ -1330,7 +1366,7 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
else
res = AVERROR_EOF;
} else
- res = 0;
+ goto level_check;
}
if (res == AVERROR_INVALIDDATA)
@@ -1339,8 +1375,24 @@ static int ebml_parse(MatroskaDemuxContext *matroska,
av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
else if (res == AVERROR_EOF)
av_log(matroska->ctx, AV_LOG_ERROR, "File ended prematurely\n");
+
+ return res;
}
- return res;
+
+level_check:
+ if (level_check == LEVEL_ENDED && matroska->num_levels) {
+ level = &matroska->levels[matroska->num_levels - 1];
+ pos = avio_tell(pb);
+
+ // Given that pos >= level->start no check for
+ // level->length != EBML_UNKNOWN_LENGTH is necessary.
+ while (matroska->num_levels && pos == level->start + level->length) {
+ matroska->num_levels--;
+ level--;
+ }
+ }
+
+ return level_check;
}
static void ebml_free(EbmlSyntax *syntax, void *data)
@@ -1699,6 +1751,10 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
matroska->current_id = 0;
ret = ebml_parse(matroska, matroska_segment, matroska);
+ if (ret == LEVEL_ENDED) {
+ /* This can only happen if the seek brought us beyond EOF. */
+ ret = AVERROR_EOF;
+ }
}
}
/* Seek back - notice that in all instances where this is used it is safe
@@ -3559,9 +3615,11 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
res = ebml_parse(matroska,
matroska_cluster_parsing,
cluster);
+ else
+ cluster->pos = 0;
}
- if (!res && block->bin.size > 0) {
+ if (res >= 0 && block->bin.size > 0) {
int is_keyframe = block->non_simple ? block->reference == INT64_MIN : -1;
uint8_t* additional = block->additional.size > 0 ?
block->additional.data : NULL;
@@ -3576,6 +3634,9 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
block->discard_padding);
}
+ if (res == LEVEL_ENDED)
+ cluster->pos = 0;
+
ebml_free(matroska_blockgroup, block);
memset(block, 0, sizeof(*block));
--
2.21.0
More information about the ffmpeg-devel
mailing list