[FFmpeg-devel] [PATCH 15/21] avformat/matroskadec: Redo level handling
Steve Lhomme
robux4 at ycbcr.xyz
Sun Apr 7 10:55:00 EEST 2019
On 3/27/2019 12:18 PM, Andreas Rheinhardt via ffmpeg-devel wrote:
> This commit changes how levels are handled: If the level used for
> ebml_parse ends directly after an element that has been consumed, then
> ebml_parse ends the level itself (and any finite-sized levels that end
> there as well) and informs the caller via the return value; if the
> current level is unknown-sized, then the level is ended as soon as
> an element that is not valid on the current level is encountered.
>
> This is designed for situations where one wants to parse master elements
> incrementally, i.e. not in one go via ebml_parse_nest.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at googlemail.com>
> ---
> libavformat/matroskadec.c | 104 +++++++++++++++++++++++++++++++-------
> 1 file changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 42f1c21042..32cf57685f 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -69,6 +69,8 @@
> #include "qtpalette.h"
>
> #define EBML_UNKNOWN_LENGTH UINT64_MAX /* EBML unknown length, in uint64_t */
> +#define LEVEL_ENDED 2 /* return value of ebml_parse when the
> + * syntax level used for parsing ended. */
>
> typedef enum {
> EBML_NONE,
> @@ -1041,16 +1043,32 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
> uint32_t id, void *data)
> {
> int i;
> +
> for (i = 0; syntax[i].id; i++)
> if (id == syntax[i].id)
> break;
> - if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
> - matroska->num_levels > 0 &&
> - matroska->levels[matroska->num_levels - 1].length == EBML_UNKNOWN_LENGTH)
> - return 0; // we reached the end of an unknown size cluster
> +
> if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
> - av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
> + if (!matroska->num_levels || matroska->levels[matroska->num_levels - 1].length
> + != EBML_UNKNOWN_LENGTH) {
> + av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
Would this mean a Segment with unknown length would produce this log ?
Or Segments are not part of the levels at all ? If so, same question
with level 1 elements which have an unknown length.
> + } else if (matroska->num_levels > 1) {
> + // Unknown-sized master elements (except root elements) end
> + // when an id not known to be allowed in them is encountered.
> + matroska->num_levels--;
> + return LEVEL_ENDED;
> + } else if (matroska->num_levels == 1) {
> + AVIOContext *pb = matroska->ctx->pb;
> + int64_t pos = avio_tell(pb);
> + // An unkown level 1 element inside an unknown-sized segment
> + // is considered an error.
> + av_log(matroska->ctx, AV_LOG_ERROR, "Found unknown element id 0x%"PRIX32
> + " at pos. %"PRIu64" (0x%"PRIx64") in an unknown-sized segment. "
> + "Will be treated as error.\n", id, pos, pos);
> + return AVERROR_INVALIDDATA;
> + }
> }
> +
> return ebml_parse_elem(matroska, &syntax[i], data);
> }
>
> @@ -1061,9 +1079,24 @@ static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
> uint64_t id;
> int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id, 0);
> if (res < 0) {
> - // in live mode, finish parsing if EOF is reached.
> - return (matroska->is_live && matroska->ctx->pb->eof_reached &&
> - res == AVERROR_EOF) ? 1 : res;
> + if (matroska->ctx->pb->eof_reached && res == AVERROR_EOF) {
> + if (matroska->is_live)
> + // in live mode, finish parsing if EOF is reached.
> + return 1;
> + if (matroska->num_levels) {
> + MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
> +
> + if (level->length == EBML_UNKNOWN_LENGTH) {
> + // Unknown-sized elements automatically end at EOF.
> + matroska->num_levels = 0;
> + return LEVEL_ENDED;
> + } else if (avio_tell(matroska->ctx->pb) < level->start + level->length) {
> + av_log(matroska->ctx, AV_LOG_ERROR, "File ended before "
> + "its declared end\n");
> + }
> + }
> + }
> + return res;
> }
> matroska->current_id = id | 1 << 7 * res;
> }
> @@ -1098,10 +1131,15 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
> break;
> }
>
> - while (!res && !ebml_level_end(matroska))
> + if (!matroska->levels[matroska->num_levels - 1].length) {
> + matroska->num_levels--;
> + return 0;
> + }
> +
> + while (!res)
> res = ebml_parse(matroska, syntax, data);
>
> - return res;
> + return res == LEVEL_ENDED ? 0 : res;
> }
>
> static int is_ebml_id_valid(uint32_t id)
> @@ -1169,7 +1207,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
> AVIOContext *pb = matroska->ctx->pb;
> uint32_t id = syntax->id;
> uint64_t length;
> - int res;
> + int res, level_check;
> void *newelem;
> MatroskaLevel1Element *level1_elem;
>
> @@ -1195,7 +1233,8 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
> length, max_lengths[syntax->type], syntax->type);
> return AVERROR_INVALIDDATA;
> }
> - if (matroska->num_levels > 0) {
> +
> + if (matroska->num_levels) {
> MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
> int64_t pos = avio_tell(pb);
>
> @@ -1204,18 +1243,24 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
> uint64_t elem_end = pos + length,
> level_end = level->start + level->length;
>
> - if (level_end < elem_end) {
> + if (elem_end < level_end) {
> + level_check = 0;
> + } else if (elem_end == level_end) {
> + level_check = LEVEL_ENDED;
> + } else {
> av_log(matroska->ctx, AV_LOG_ERROR,
> "Element at 0x%"PRIx64" ending at 0x%"PRIx64" exceeds "
> "containing master element ending at 0x%"PRIx64"\n",
> pos, elem_end, level_end);
> return AVERROR_INVALIDDATA;
> }
> + } else if (length != EBML_UNKNOWN_LENGTH) {
> + level_check = 0;
> } else if (level->length != EBML_UNKNOWN_LENGTH) {
> av_log(matroska->ctx, AV_LOG_ERROR, "Unknown-sized element "
> "at 0x%"PRIx64" inside parent with finite size\n", pos);
> return AVERROR_INVALIDDATA;
> - } else if (length == EBML_UNKNOWN_LENGTH && id != MATROSKA_ID_CLUSTER) {
> + } else if (id != MATROSKA_ID_CLUSTER) {
> // According to the specifications only clusters and segments
> // are allowed to be unknown-sized.
> av_log(matroska->ctx, AV_LOG_ERROR,
> @@ -1223,7 +1268,8 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
> "0x%"PRIx64". Dropping the invalid element.\n", pos);
> return AVERROR_INVALIDDATA;
> }
> - }
> + } else
> + level_check = 0;
> }
>
> switch (syntax->type) {
> @@ -1257,19 +1303,36 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
> av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
> level1_elem->parsed = 1;
> }
> - return ebml_parse_nest(matroska, syntax->def.n, data);
> + res = ebml_parse_nest(matroska, syntax->def.n, data);
> + break;
> case EBML_STOP:
> return 1;
> default:
> if (ffio_limit(pb, length) != length)
> return AVERROR(EIO);
> - return avio_skip(pb, length) < 0 ? AVERROR(EIO) : 0;
> + res = avio_skip(pb, length) < 0 ? AVERROR(EIO) : 0;
> }
> if (res == AVERROR_INVALIDDATA)
> av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
> else if (res == AVERROR(EIO))
> av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
> - return res;
> +
> + if (res < 0 || res == 1)
> + return res;
> +
> + if (level_check == LEVEL_ENDED && matroska->num_levels) {
> + MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
> + int64_t pos = avio_tell(pb);
> +
> + // Given that pos >= level->start no check for
> + // level->length != EBML_UNKNOWN_LENGTH is necessary.
> + while (matroska->num_levels && pos == level->start + level->length) {
> + matroska->num_levels--;
> + level--;
> + }
It seems you're assuming that unknown length can only return to level 0
(Segment). But an unknown length Cluster could be followed by another
unknown length Cluster.
> + }
> +
> + return level_check;
> }
>
> static void ebml_free(EbmlSyntax *syntax, void *data)
> @@ -3491,7 +3554,7 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
> cluster);
> }
>
> - if (!res && block->bin.size > 0) {
> + if ((!res || res == LEVEL_ENDED) && block->bin.size > 0) {
> int is_keyframe = block->non_simple ? block->reference == INT64_MIN : -1;
> uint8_t* additional = block->additional.size > 0 ?
> block->additional.data : NULL;
> @@ -3506,6 +3569,9 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
> block->discard_padding);
> }
>
> + if (res == LEVEL_ENDED)
> + cluster->pos = 0;
This seems odd.
> +
> ebml_free(matroska_blockgroup, block);
> memset(block, 0, sizeof(*block));
>
> --
> 2.19.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list