[FFmpeg-devel] [PATCH 3/5] lavf/matroskadec: support standard (non-WebM) WebVTT formatting
Andreas Rheinhardt
andreas.rheinhardt at gmail.com
Sat Jun 20 12:14:39 EEST 2020
rcombs:
> Fixes ticket #5641
> ---
> libavformat/matroska.c | 11 ++--
> libavformat/matroskadec.c | 111 ++++++++++++++++++++++++--------------
> 2 files changed, 77 insertions(+), 45 deletions(-)
>
> diff --git a/libavformat/matroska.c b/libavformat/matroska.c
> index 7c56aba403..962fa496f4 100644
> --- a/libavformat/matroska.c
> +++ b/libavformat/matroska.c
> @@ -60,16 +60,12 @@ const CodecTags ff_mkv_codec_tags[]={
> {"A_VORBIS" , AV_CODEC_ID_VORBIS},
> {"A_WAVPACK4" , AV_CODEC_ID_WAVPACK},
>
> - {"D_WEBVTT/SUBTITLES" , AV_CODEC_ID_WEBVTT},
> - {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT},
> - {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
> - {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT},
> -
> {"S_TEXT/UTF8" , AV_CODEC_ID_SUBRIP},
> {"S_TEXT/UTF8" , AV_CODEC_ID_TEXT},
> {"S_TEXT/ASCII" , AV_CODEC_ID_TEXT},
> {"S_TEXT/ASS" , AV_CODEC_ID_ASS},
> {"S_TEXT/SSA" , AV_CODEC_ID_ASS},
> + {"S_TEXT/WEBVTT" , AV_CODEC_ID_WEBVTT},
> {"S_ASS" , AV_CODEC_ID_ASS},
> {"S_SSA" , AV_CODEC_ID_ASS},
> {"S_VOBSUB" , AV_CODEC_ID_DVD_SUBTITLE},
> @@ -77,6 +73,11 @@ const CodecTags ff_mkv_codec_tags[]={
> {"S_HDMV/PGS" , AV_CODEC_ID_HDMV_PGS_SUBTITLE},
> {"S_HDMV/TEXTST" , AV_CODEC_ID_HDMV_TEXT_SUBTITLE},
>
> + {"D_WEBVTT/SUBTITLES" , AV_CODEC_ID_WEBVTT},
> + {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT},
> + {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
> + {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT},
> +
> {"V_AV1" , AV_CODEC_ID_AV1},
> {"V_DIRAC" , AV_CODEC_ID_DIRAC},
> {"V_FFV1" , AV_CODEC_ID_FFV1},
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index cff7f0cb54..1e4947e209 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -3305,62 +3305,81 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
> uint8_t *data, int data_len,
> uint64_t timecode,
> uint64_t duration,
> - int64_t pos)
> + int64_t pos,
> + uint8_t *additional, uint64_t additional_id, int additional_size)
> {
> AVPacket pktl, *pkt = &pktl;
> - uint8_t *id, *settings, *text, *buf;
> - int id_len, settings_len, text_len;
> + uint8_t *id, *settings, *comment, *text, *buf;
> + int id_len = 0, settings_len = 0, comment_len = 0, text_len;
> uint8_t *p, *q;
> int err;
> + int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9);
>
> if (data_len <= 0)
> return AVERROR_INVALIDDATA;
>
> - p = data;
> - q = data + data_len;
> -
> - id = p;
> - id_len = -1;
> - while (p < q) {
> - if (*p == '\r' || *p == '\n') {
> - id_len = p - id;
> - if (*p == '\r')
> - p++;
> - break;
> + p = webm_style ? data : additional;
> + q = webm_style ? (data + data_len) : (additional + additional_size);
> +
Unfortunately all pointer arithmetic involving NULL is undefined
behaviour in C, even NULL + 0. So the above is ub when not using
webm-style and when there is no BlockAddition.
> + if (p) {
> + id = p;
> + id_len = -1;
This is unnecessary. Initializing to zero (as is already done above)
works just fine.
> + while (p < q) {
> + if (*p == '\r' || *p == '\n') {
> + id_len = p - id;
> + if (*p == '\r')
> + p++;
> + break;
> + }
> + p++;
> }
> +
> + if (p >= q || *p != '\n')
> + return AVERROR_INVALIDDATA;
> p++;
> - }
>
> - if (p >= q || *p != '\n')
> - return AVERROR_INVALIDDATA;
> - p++;
> -
> - settings = p;
> - settings_len = -1;
> - while (p < q) {
> - if (*p == '\r' || *p == '\n') {
> - settings_len = p - settings;
> - if (*p == '\r')
> - p++;
> - break;
> + settings = p;
> + settings_len = -1;
> + while (p < q) {
> + if (*p == '\r' || *p == '\n') {
> + settings_len = p - settings;
> + if (*p == '\r')
> + p++;
> + break;
> + }
> + p++;
> }
> +
> + if (p >= q || *p != '\n')
> + return AVERROR_INVALIDDATA;
You're repeating yourself.
> p++;
> +
> + if (!webm_style && p < q) {
> + if (q[-1] != '\r' && q[-1] != '\n')
The current Matroska codec mapping indeed requires the terminating
WebVTT line terminator to be retained, yet mkvmerge doesn't do so, so we
should not be so picky with this.
> + return AVERROR_INVALIDDATA;
> +
> + comment = p;
> + comment_len = q - p;
> + }
> }
>
> - if (p >= q || *p != '\n')
> - return AVERROR_INVALIDDATA;
> - p++;
> -
> - text = p;
> - text_len = q - p;
> - while (text_len > 0) {
> - const int len = text_len - 1;
> - const uint8_t c = p[len];
> - if (c != '\r' && c != '\n')
> - break;
> - text_len = len;
> + if (webm_style) {
> + text = p;
> + text_len = q - p;
> +
> + while (text_len > 0) {
> + const int len = text_len - 1;
> + const uint8_t c = p[len];
> + if (c != '\r' && c != '\n')
> + break;
> + text_len = len;
> + }
> + } else {
> + text = data;
> + text_len = data_len;
> }
>
> +
> if (text_len <= 0)
> return AVERROR_INVALIDDATA;
>
> @@ -3393,6 +3412,17 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
> memcpy(buf, settings, settings_len);
> }
>
> + if (comment_len > 0) {
> + buf = av_packet_new_side_data(pkt,
> + AV_PKT_DATA_WEBVTT_COMMENT,
> + comment_len);
> + if (!buf) {
> + av_packet_unref(pkt);
> + return AVERROR(ENOMEM);
> + }
> + memcpy(buf, comment, comment_len);
> + }
> +
> // Do we need this for subtitles?
> // pkt->flags = AV_PKT_FLAG_KEY;
>
> @@ -3656,7 +3686,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
> res = matroska_parse_webvtt(matroska, track, st,
> out_data, out_size,
> timecode, lace_duration,
> - pos);
> + pos,
> + additional, additional_id, additional_size);
> if (!buf)
> av_free(out_data);
> if (res)
>
More information about the ffmpeg-devel
mailing list