[FFmpeg-devel] [PATCH v2 1/2] avformat/hls demuxer: Add WebVTT subtitle support
softworkz
ffmpegagent at gmail.com
Fri Feb 21 17:13:21 EET 2025
From: softworkz <softworkz at hotmail.com>
This add support for WebVTT subtitles in HLS streams.
Just like for separate audio streams, it supports all available
WebVTT streams in all renditions.
No new options are added, it just works and provides subtitles streams
like any other demuxer.
The code prevents downloading subtitle segments which are farther
in the future than the main segments, to avoid loading hundreds
of subtitle segments in advance.
Signed-off-by: softworkz <softworkz at hotmail.com>
---
libavformat/hls.c | 213 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 176 insertions(+), 37 deletions(-)
diff --git a/libavformat/hls.c b/libavformat/hls.c
index 3bdc1bc848..d155202ac9 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -173,6 +173,7 @@ struct playlist {
* playlist, if any. */
int n_init_sections;
struct segment **init_sections;
+ int is_subtitle; /* Indicates if it's a subtitle playlist */
};
/*
@@ -515,13 +516,6 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
return NULL;
}
- /* TODO: handle subtitles (each segment has to parsed separately) */
- if (c->ctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
- if (type == AVMEDIA_TYPE_SUBTITLE) {
- av_log(c->ctx, AV_LOG_WARNING, "Can't support the subtitle(uri: %s)\n", info->uri);
- return NULL;
- }
-
rend = av_mallocz(sizeof(struct rendition));
if (!rend)
return NULL;
@@ -536,9 +530,14 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
/* add the playlist if this is an external rendition */
if (info->uri[0]) {
rend->playlist = new_playlist(c, info->uri, url_base);
- if (rend->playlist)
+ if (rend->playlist) {
+ if (type == AVMEDIA_TYPE_SUBTITLE) {
+ rend->playlist->is_subtitle = 1;
+ rend->playlist->is_id3_timestamped = 0;
+ }
dynarray_add(&rend->playlist->renditions,
&rend->playlist->n_renditions, rend);
+ }
}
if (info->assoc_language[0]) {
@@ -761,6 +760,9 @@ static int test_segment(AVFormatContext *s, const AVInputFormat *in_fmt, struct
} else if (!strcmp(in_fmt->name, "mpegts")) {
matchF = av_match_ext( seg->url, "ts,m2t,m2ts,mts,mpg,m4s,mpeg,mpegts")
+ 2*(ff_match_url_ext(seg->url, "ts,m2t,m2ts,mts,mpg,m4s,mpeg,mpegts") > 0);
+ } else if (!strcmp(in_fmt->name, "webvtt")) {
+ matchF = av_match_ext( seg->url, "vtt,webvtt")
+ + 2*(ff_match_url_ext(seg->url, "vtt,webvtt") > 0);
}
if (!(matchA & matchF)) {
@@ -969,7 +971,7 @@ static int parse_playlist(HLSContext *c, const char *url,
if (ptr)
seg_offset = strtoll(ptr+1, NULL, 10);
} else if (av_strstart(line, "#", NULL)) {
- av_log(c->ctx, AV_LOG_INFO, "Skip ('%s')\n", line);
+ av_log(c->ctx, AV_LOG_VERBOSE, "Skip ('%s')\n", line);
continue;
} else if (line[0]) {
if (is_variant) {
@@ -1484,7 +1486,7 @@ static int playlist_needed(struct playlist *pls)
int first_st;
/* If there is no context or streams yet, the playlist is needed */
- if (!pls->ctx || !pls->n_main_streams)
+ if ((!pls->ctx || !pls->n_main_streams) && !pls->is_subtitle)
return 1;
/* check if any of the streams in the playlist are needed */
@@ -1522,17 +1524,13 @@ static int playlist_needed(struct playlist *pls)
return 0;
}
-static int read_data(void *opaque, uint8_t *buf, int buf_size)
+static int reload_playlist(struct playlist *v, HLSContext *c)
{
- struct playlist *v = opaque;
- HLSContext *c = v->parent->priv_data;
- int ret;
- int just_opened = 0;
+ int ret = 0;
int reload_count = 0;
- int segment_retries = 0;
- struct segment *seg;
-restart:
+ v->needed = playlist_needed(v);
+
if (!v->needed)
return AVERROR_EOF;
@@ -1588,7 +1586,7 @@ reload:
av_log(v->parent, AV_LOG_WARNING, "The m3u8 list sequence may have been wrapped.\n");
}
if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
- if (v->finished)
+ if (v->finished || v->is_subtitle)
return AVERROR_EOF;
while (av_gettime_relative() - v->last_load_time < reload_interval) {
if (ff_check_interrupt(c->interrupt_callback))
@@ -1599,9 +1597,35 @@ reload:
goto reload;
}
- v->input_read_done = 0;
- seg = current_segment(v);
+ }
+ return ret;
+}
+static int read_data_continuous(void *opaque, uint8_t *buf, int buf_size)
+{
+ struct playlist *v = opaque;
+ HLSContext *c = v->parent->priv_data;
+ int ret;
+ int just_opened = 0;
+ int segment_retries = 0;
+ struct segment *seg;
+
+ if (c->http_persistent && v->input_read_done) {
+ ret = reload_playlist(v, c);
+ if (ret < 0)
+ return ret;
+ }
+
+ v->input_read_done = 0;
+
+restart:
+ ret = reload_playlist(v, c);
+ if (ret < 0)
+ return ret;
+
+ seg = current_segment(v);
+
+ if (!v->input || (c->http_persistent && v->input_read_done)) {
/* load/update Media Initialization Section, if any */
ret = update_init_section(v, seg);
if (ret)
@@ -1630,7 +1654,7 @@ reload:
} else {
segment_retries++;
}
- goto reload;
+ goto restart;
}
segment_retries = 0;
just_opened = 1;
@@ -1692,6 +1716,110 @@ reload:
goto restart;
}
+static int read_data_subtitle_segment(void *opaque, uint8_t *buf, int buf_size)
+{
+ struct playlist *v = opaque;
+ HLSContext *c = v->parent->priv_data;
+ int ret;
+ struct segment *seg;
+
+ if (!v->needed || v->cur_seq_no - v->start_seq_no >= v->n_segments) {
+ return AVERROR_EOF;
+ } else {
+ seg = current_segment(v);
+ }
+
+ if (!v->input) {
+ ret = open_input(c, v, seg, &v->input);
+ if (ret < 0) {
+ if (ff_check_interrupt(c->interrupt_callback))
+ return AVERROR_EXIT;
+ av_log(v->parent, AV_LOG_WARNING, "Failed to open segment of playlist %d\n",
+ v->index);
+ return ret;
+ }
+ }
+
+ return read_from_url(v, seg, buf, buf_size);
+}
+
+static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
+ int flags, AVDictionary **opts)
+{
+ av_log(s, AV_LOG_ERROR,
+ "A HLS playlist item '%s' referred to an external file '%s'. "
+ "Opening this file was forbidden for security reasons\n",
+ s->url, url);
+ return AVERROR(EPERM);
+}
+
+static int init_subtitle_context(struct playlist *pls)
+{
+ HLSContext *c = pls->parent->priv_data;
+ const AVInputFormat *in_fmt;
+ AVDictionary *opts = NULL;
+ int ret;
+
+ if (!(pls->ctx = avformat_alloc_context()))
+ return AVERROR(ENOMEM);
+
+ pls->read_buffer = av_malloc(INITIAL_BUFFER_SIZE);
+ if (!pls->read_buffer) {
+ avformat_free_context(pls->ctx);
+ pls->ctx = NULL;
+ return AVERROR(ENOMEM);
+ }
+
+ ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
+ read_data_subtitle_segment, NULL, NULL);
+ pls->pb.pub.seekable = 0;
+ pls->ctx->pb = &pls->pb.pub;
+ pls->ctx->io_open = nested_io_open;
+
+ ret = ff_copy_whiteblacklists(pls->ctx, pls->parent);
+ if (ret < 0)
+ return ret;
+
+ in_fmt = av_find_input_format("webvtt");
+ av_dict_copy(&opts, c->seg_format_opts, 0);
+ ret = avformat_open_input(&pls->ctx, current_segment(pls)->url, in_fmt, &opts);
+ av_dict_free(&opts);
+
+ return ret;
+}
+
+static int read_subtitle_packet(struct playlist *v, AVPacket *pkt)
+{
+ HLSContext *c = v->parent->priv_data;
+ int ret;
+
+restart:
+ ret = reload_playlist(v, c);
+ if (ret < 0)
+ return ret;
+
+ if (v->input && !v->ctx)
+ ff_format_io_close(v->parent, &v->input);
+
+ if (!v->input && !v->ctx) {
+ ret = init_subtitle_context(v);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = av_read_frame(v->ctx, v->pkt);
+ if (!ret) {
+ return ret;
+ }
+ ff_format_io_close(v->parent, &v->input);
+ v->cur_seq_no++;
+ c->cur_seq_no = v->cur_seq_no;
+
+ avformat_close_input(&v->ctx);
+
+ goto restart;
+}
+
static void add_renditions_to_variant(HLSContext *c, struct variant *var,
enum AVMediaType type, const char *group_id)
{
@@ -1853,16 +1981,6 @@ static int64_t select_cur_seq_no(HLSContext *c, struct playlist *pls)
return pls->start_seq_no;
}
-static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
- int flags, AVDictionary **opts)
-{
- av_log(s, AV_LOG_ERROR,
- "A HLS playlist item '%s' referred to an external file '%s'. "
- "Opening this file was forbidden for security reasons\n",
- s->url, url);
- return AVERROR(EPERM);
-}
-
static void add_stream_to_programs(AVFormatContext *s, struct playlist *pls, AVStream *stream)
{
HLSContext *c = s->priv_data;
@@ -2070,6 +2188,8 @@ static int hls_read_header(AVFormatContext *s)
highest_cur_seq_no = FFMAX(highest_cur_seq_no, pls->cur_seq_no);
}
+ av_dict_set(&c->seg_format_opts, "prefer_hls_mpegts_pts", "1", 0);
+
/* Open the demuxer for each playlist */
for (i = 0; i < c->n_playlists; i++) {
struct playlist *pls = c->playlists[i];
@@ -2107,8 +2227,12 @@ static int hls_read_header(AVFormatContext *s)
return AVERROR(ENOMEM);
}
- ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
- read_data, NULL, NULL);
+ if (pls->is_subtitle)
+ ffio_init_context(&pls->pb, (unsigned char*)av_strdup("WEBVTT\n"), (int)strlen("WEBVTT\n"), 0, pls,
+ NULL, NULL, NULL);
+ else
+ ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
+ read_data_continuous, NULL, NULL);
/*
* If encryption scheme is SAMPLE-AES, try to read ID3 tags of
@@ -2254,6 +2378,13 @@ static int hls_read_header(AVFormatContext *s)
if (pls->n_main_streams)
av_dict_copy(&pls->main_streams[0]->metadata, pls->ctx->metadata, 0);
+ if (pls->is_subtitle) {
+ avformat_free_context(pls->ctx);
+ pls->ctx = NULL;
+ pls->needed = 0;
+ pls->main_streams[0]->discard = AVDISCARD_ALL;
+ }
+
add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_AUDIO);
add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_VIDEO);
add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_SUBTITLE);
@@ -2296,6 +2427,8 @@ static int recheck_discard_flags(AVFormatContext *s, int first)
pls->input_read_done = 0;
ff_format_io_close(pls->parent, &pls->input_next);
pls->input_next_requested = 0;
+ if (pls->is_subtitle)
+ avformat_close_input(&pls->ctx);
pls->needed = 0;
changed = 1;
av_log(s, AV_LOG_INFO, "No longer receiving playlist %d\n", i);
@@ -2363,7 +2496,10 @@ static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
int64_t ts_diff;
AVRational tb;
struct segment *seg = NULL;
- ret = av_read_frame(pls->ctx, pls->pkt);
+ if (pls->is_subtitle)
+ ret = read_subtitle_packet(pls, pls->pkt);
+ else
+ ret = av_read_frame(pls->ctx, pls->pkt);
if (ret < 0) {
if (!avio_feof(&pls->pb.pub) && ret != AVERROR_EOF)
return ret;
@@ -2559,7 +2695,10 @@ static int hls_read_seek(AVFormatContext *s, int stream_index,
/* Reset the pos, to let the mpegts/mov demuxer know we've seeked. */
pb->pos = 0;
/* Flush the packet queue of the subdemuxer. */
- ff_read_frame_flush(pls->ctx);
+ if (pls->ctx)
+ ff_read_frame_flush(pls->ctx);
+ if (pls->is_subtitle)
+ avformat_close_input(&pls->ctx);
/* Reset the init segment so it's re-fetched and served appropiately */
pls->cur_init_section = NULL;
@@ -2628,7 +2767,7 @@ static const AVOption hls_options[] = {
OFFSET(prefer_x_start), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS},
{"allowed_extensions", "List of file extensions that hls is allowed to access",
OFFSET(allowed_extensions), AV_OPT_TYPE_STRING,
- {.str = "3gp,aac,avi,ac3,eac3,flac,mkv,m3u8,m4a,m4s,m4v,mpg,mov,mp2,mp3,mp4,mpeg,mpegts,ogg,ogv,oga,ts,vob,wav"},
+ {.str = "3gp,aac,avi,ac3,eac3,flac,mkv,m3u8,m4a,m4s,m4v,mpg,mov,mp2,mp3,mp4,mpeg,mpegts,ogg,ogv,oga,ts,vob,vtt,wav,webvtt"},
INT_MIN, INT_MAX, FLAGS},
{"extension_picky", "Be picky with all extensions matching",
OFFSET(extension_picky), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS},
--
ffmpeg-codebot
More information about the ffmpeg-devel
mailing list