[FFmpeg-devel] [PATCH 2/2] webmdashenc: Support for live stream manifests
Vignesh Venkatasubramanian
vigneshv at google.com
Wed Apr 1 01:54:11 CEST 2015
On Tue, Mar 31, 2015 at 4:51 PM, Vignesh Venkatasubramanian
<vigneshv at google.com> wrote:
> This patch adds support for creating DASH manifests for WebM Live
> Streams. It also updates the documentation and adds a fate test to
> verify the behavior of the new muxer flag.
>
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>
> ---
> doc/muxers.texi | 27 ++++-
> libavformat/webmdashenc.c | 187 ++++++++++++++++++++++++++++-----
> tests/fate/vpx.mak | 3 +
> tests/ref/fate/webm-dash-manifest-live | 26 +++++
> 4 files changed, 216 insertions(+), 27 deletions(-)
> create mode 100644 tests/ref/fate/webm-dash-manifest-live
>
> diff --git a/doc/muxers.texi b/doc/muxers.texi
> index a8225fc..089af06 100644
> --- a/doc/muxers.texi
> +++ b/doc/muxers.texi
> @@ -1210,7 +1210,11 @@ is the @option{global_header} flag.
>
> WebM DASH Manifest muxer.
>
> -This muxer implements the WebM DASH Manifest specification to generate the DASH manifest XML.
> +This muxer implements the WebM DASH Manifest specification to generate the DASH
> +manifest XML. It also supports manifest generation for DASH live streams.
> +
> +WebM DASH Specification: @url{https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification}
> +ISO DASH Specification: @url{http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip}
>
> @subsection Options
>
> @@ -1221,6 +1225,27 @@ This muxer supports the following options:
> This option has the following syntax: "id=x,streams=a,b,c id=y,streams=d,e" where x and y are the
> unique identifiers of the adaptation sets and a,b,c,d and e are the indices of the corresponding
> audio and video streams. Any number of adaptation sets can be added using this option.
> +
> + at item live
> +Set this to 1 to create a live stream DASH Manifest. Default: 0.
> +
> + at item chunk_start_index
> +Start index of the first chunk. This will go in the "startNumber" attribute of
> +the "SegmentTemplate" element in the manifest. Default: 0.
> +
> + at item chunk_duration_ms
> +Duration of each chunk in milliseconds. This will go in the "duration" attribute
> +of the "SegmentTemplate" element in the manifest. Default: 1000.
> +
> + at item utc_timing_url
> +URL of the page that will return the UTC timestamp in ISO format. This will go
> +in the "value" attribute of the "UTCTiming" element in the manifest. Default:
> +None.
> +
> + at item time_shift_buffer_depth
> +Smallest time (in seconds) shifting buffer for which any Representation is
> +guaranteed to be available. Default: 60.
> +
> @end table
>
> @subsection Example
> diff --git a/libavformat/webmdashenc.c b/libavformat/webmdashenc.c
> index 4536b7d..84cc9d8 100644
> --- a/libavformat/webmdashenc.c
> +++ b/libavformat/webmdashenc.c
> @@ -22,10 +22,14 @@
> /*
> * WebM DASH Specification:
> * https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
> + * ISO DASH Specification:
> + * http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
> */
>
> +#include <float.h>
> #include <stdint.h>
> #include <string.h>
> +#include <time.h>
>
> #include "avformat.h"
> #include "avio_internal.h"
> @@ -47,6 +51,12 @@ typedef struct WebMDashMuxContext {
> AdaptationSet *as;
> int nb_as;
> int representation_id;
> + int is_live;
> + int chunk_start_index;
> + int chunk_duration;
> + char *utc_timing_url;
> + double time_shift_buffer_depth;
> + int debug_mode;
> } WebMDashMuxContext;
>
> static const char *get_codec_name(int codec_id)
> @@ -79,19 +89,42 @@ static double get_duration(AVFormatContext *s)
>
> static void write_header(AVFormatContext *s)
> {
> + WebMDashMuxContext *w = s->priv_data;
> double min_buffer_time = 1.0;
> + time_t local_time;
> + struct tm* gmt;
> + char* gmt_iso = av_malloc(21);
> avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
> avio_printf(s->pb, "<MPD\n");
> avio_printf(s->pb, " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
> avio_printf(s->pb, " xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
> avio_printf(s->pb, " xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
> - avio_printf(s->pb, " type=\"static\"\n");
> - avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n",
> - get_duration(s));
> - avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n",
> - min_buffer_time);
> - avio_printf(s->pb, " profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
> - avio_printf(s->pb, ">\n");
> + avio_printf(s->pb, " type=\"%s\"\n", w->is_live ? "dynamic" : "static");
> + if (!w->is_live) {
> + avio_printf(s->pb, " mediaPresentationDuration=\"PT%gS\"\n",
> + get_duration(s));
> + }
> + avio_printf(s->pb, " minBufferTime=\"PT%gS\"\n", min_buffer_time);
> + avio_printf(s->pb, " profiles=\"%s\"%s",
> + w->is_live ? "urn:mpeg:dash:profile:isoff-live:2011" : "urn:webm:dash:profile:webm-on-demand:2012",
> + w->is_live ? "\n" : ">\n");
> + time(&local_time);
> + gmt = gmtime(&local_time);
> + strftime(gmt_iso, 21, "%FT%TZ", gmt);
> + if (w->debug_mode) {
> + av_strlcpy(gmt_iso, "", 1);
> + }
> + if (w->is_live) {
> + avio_printf(s->pb, " availabilityStartTime=\"%s\"\n", gmt_iso);
> + avio_printf(s->pb, " timeShiftBufferDepth=\"PT%gS\"", w->time_shift_buffer_depth);
> + avio_printf(s->pb, ">\n");
> + avio_printf(s->pb, "<UTCTiming\n");
> + avio_printf(s->pb, " schemeIdUri=\"%s\"\n",
> + w->utc_timing_url ? "urn:mpeg:dash:utc:http-iso:2014" : "urn:mpeg:dash:utc:direct:2012");
> + avio_printf(s->pb, " value=\"%s\"/>\n",
> + w->utc_timing_url ? w->utc_timing_url : gmt_iso);
> + }
> + av_free(gmt_iso);
> }
>
> static void write_footer(AVFormatContext *s)
> @@ -137,33 +170,47 @@ static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
> * Writes a Representation within an Adaptation Set. Returns 0 on success and
> * < 0 on failure.
> */
> -static int write_representation(AVFormatContext *s, AVStream *stream, int id,
> +static int write_representation(AVFormatContext *s, AVStream *stream, char *id,
> int output_width, int output_height,
> int output_sample_rate) {
> + WebMDashMuxContext *w = s->priv_data;
> AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
> AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
> AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
> AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
> AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
> - if (!irange || cues_start == NULL || cues_end == NULL || filename == NULL ||
> - !bandwidth) {
> + if ((w->is_live && (!filename)) ||
> + (!w->is_live && (!irange || !cues_start || !cues_end || !filename || !bandwidth))) {
> return -1;
> }
> - avio_printf(s->pb, "<Representation id=\"%d\"", id);
> - avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
> + avio_printf(s->pb, "<Representation id=\"%s\"", id);
> + // FIXME: For live, This should be obtained from the input file or as an AVOption.
> + avio_printf(s->pb, " bandwidth=\"%s\"",
> + w->is_live ? (stream->codec->codec_type == AVMEDIA_TYPE_AUDIO ? "128000" : "1000000") : bandwidth->value);
> if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_width)
> avio_printf(s->pb, " width=\"%d\"", stream->codec->width);
> if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height)
> avio_printf(s->pb, " height=\"%d\"", stream->codec->height);
> if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate)
> avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate);
> - avio_printf(s->pb, ">\n");
> - avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
> - avio_printf(s->pb, "<SegmentBase\n");
> - avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
> - avio_printf(s->pb, "<Initialization\n");
> - avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value);
> - avio_printf(s->pb, "</SegmentBase>\n");
> + if (w->is_live) {
> + // For live streams, Codec and Mime Type always go in the Representation tag.
> + avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(stream->codec->codec_id));
> + avio_printf(s->pb, " mimeType=\"%s/webm\"",
> + stream->codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
> + // For live streams, subsegments always start with key frames. So this
> + // is always 1.
> + avio_printf(s->pb, " startsWithSAP=\"1\"");
> + avio_printf(s->pb, ">");
> + } else {
> + avio_printf(s->pb, ">\n");
> + avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
> + avio_printf(s->pb, "<SegmentBase\n");
> + avio_printf(s->pb, " indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
> + avio_printf(s->pb, "<Initialization\n");
> + avio_printf(s->pb, " range=\"0-%s\" />\n", irange->value);
> + avio_printf(s->pb, "</SegmentBase>\n");
> + }
> avio_printf(s->pb, "</Representation>\n");
> return 0;
> }
> @@ -208,6 +255,53 @@ static int check_matching_sample_rate(AVFormatContext *s, AdaptationSet *as) {
> }
>
> /*
> + * Parses a live header filename and computes the representation id,
> + * initialization pattern and the media pattern. Pass NULL if you don't want to
> + * compute any of those 3. Returns 0 on success and non-zero on failure.
> + *
> + * Name of the header file should conform to the following pattern:
> + * <file_description>_<representation_id>.hdr where <file_description> can be
> + * anything. The chunks should be named according to the following pattern:
> + * <file_description>_<representation_id>_<chunk_number>.chk
> + */
> +static int parse_filename(char *filename, char **representation_id,
> + char **initialization_pattern, char **media_pattern) {
> + char *filename_str;
> + char *underscore_pos = NULL;
> + char *period_pos = NULL;
> + char *temp_pos = NULL;
> + filename_str = av_mallocz(strlen(filename) + 1);
> + if (!filename_str) return AVERROR(ENOMEM);
> + strncpy(filename_str, filename, strlen(filename));
> + temp_pos = av_stristr(filename_str, "_");
> + while (temp_pos) {
> + underscore_pos = temp_pos + 1;
> + temp_pos = av_stristr(temp_pos + 1, "_");
> + }
> + if (!underscore_pos) return -1;
> + period_pos = av_stristr(underscore_pos, ".");
> + if (!period_pos) return -1;
> + *(underscore_pos - 1) = 0;
> + if (representation_id) {
> + *representation_id = av_malloc(period_pos - underscore_pos + 1);
> + if (!(*representation_id)) return AVERROR(ENOMEM);
> + av_strlcpy(*representation_id, underscore_pos, period_pos - underscore_pos + 1);
> + }
> + if (initialization_pattern) {
> + *initialization_pattern = av_asprintf("%s_$RepresentationID$.hdr",
> + filename_str);
> + if (!(*initialization_pattern)) return AVERROR(ENOMEM);
> + }
> + if (media_pattern) {
> + *media_pattern = av_asprintf("%s_$RepresentationID$_$Number$.chk",
> + filename_str);
> + if (!(*media_pattern)) return AVERROR(ENOMEM);
> + }
> + av_free(filename_str);
> + return 0;
> +}
> +
> +/*
> * Writes an Adaptation Set. Returns 0 on success and < 0 on failure.
> */
> static int write_adaptation_set(AVFormatContext *s, int as_index)
> @@ -222,13 +316,14 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)
>
> // Width, Height and Sample Rate will go in the AdaptationSet tag if they
> // are the same for all contained Representations. otherwise, they will go
> - // on their respective Representation tag.
> + // on their respective Representation tag. For live streams, they always go
> + // in the Representation tag.
> int width_in_as = 1, height_in_as = 1, sample_rate_in_as = 1;
> if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
> - width_in_as = check_matching_width(s, as);
> - height_in_as = check_matching_height(s, as);
> + width_in_as = !w->is_live && check_matching_width(s, as);
> + height_in_as = !w->is_live && check_matching_height(s, as);
> } else {
> - sample_rate_in_as = check_matching_sample_rate(s, as);
> + sample_rate_in_as = !w->is_live && check_matching_sample_rate(s, as);
> }
>
> avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
> @@ -249,19 +344,53 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)
> avio_printf(s->pb, " bitstreamSwitching=\"%s\"",
> boolean[bitstream_switching(s, as)]);
> avio_printf(s->pb, " subsegmentAlignment=\"%s\"",
> - boolean[subsegment_alignment(s, as)]);
> + boolean[w->is_live || subsegment_alignment(s, as)]);
>
> for (i = 0; i < as->nb_streams; i++) {
> AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
> CLUSTER_KEYFRAME, NULL, 0);
> - if (!kf || !strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
> + if (!w->is_live && (!kf || !strncmp(kf->value, "0", 1))) subsegmentStartsWithSAP = 0;
> }
> avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
> avio_printf(s->pb, ">\n");
>
> + if (w->is_live) {
> + AVDictionaryEntry *filename =
> + av_dict_get(s->streams[as->streams[0]]->metadata, FILENAME, NULL, 0);
> + char *initialization_pattern = NULL;
> + char *media_pattern = NULL;
> + int ret = parse_filename(filename->value, NULL, &initialization_pattern,
> + &media_pattern);
> + if (ret) return ret;
> + avio_printf(s->pb, "<ContentComponent id=\"1\" type=\"%s\"/>\n",
> + codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
> + avio_printf(s->pb, "<SegmentTemplate");
> + avio_printf(s->pb, " timescale=\"1000\"");
> + avio_printf(s->pb, " duration=\"%d\"", w->chunk_duration);
> + avio_printf(s->pb, " media=\"%s\"", media_pattern);
> + avio_printf(s->pb, " startNumber=\"%d\"", w->chunk_start_index);
> + avio_printf(s->pb, " initialization=\"%s\"", initialization_pattern);
> + avio_printf(s->pb, "/>\n");
> + av_free(initialization_pattern);
> + av_free(media_pattern);
> + }
> +
> for (i = 0; i < as->nb_streams; i++) {
> - write_representation(s, s->streams[as->streams[i]], w->representation_id++,
> + char *representation_id = NULL;
> + if (w->is_live) {
> + AVDictionaryEntry *filename =
> + av_dict_get(s->streams[as->streams[i]]->metadata, FILENAME, NULL, 0);
> + if (!filename ||
> + parse_filename(filename->value, &representation_id, NULL, NULL)) {
> + return -1;
> + }
> + } else {
> + representation_id = av_asprintf("%d", w->representation_id++);
> + if (!representation_id) return -1;
> + }
> + write_representation(s, s->streams[as->streams[i]], representation_id,
> !width_in_as, !height_in_as, !sample_rate_in_as);
> + av_free(representation_id);
> }
> avio_printf(s->pb, "</AdaptationSet>\n");
> return 0;
> @@ -361,6 +490,12 @@ static int webm_dash_manifest_write_trailer(AVFormatContext *s)
> #define OFFSET(x) offsetof(WebMDashMuxContext, x)
> static const AVOption options[] = {
> { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
> + { "debug_mode", "[private option - users should never set this]. set this to 1 to create deterministic output", OFFSET(debug_mode), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
> + { "live", "set this to 1 to create a live stream manifest", OFFSET(is_live), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
> + { "chunk_start_index", "start index of the chunk", OFFSET(chunk_start_index), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
> + { "chunk_duration_ms", "duration of each chunk (in milliseconds)", OFFSET(chunk_duration), AV_OPT_TYPE_INT, {.i64 = 1000}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
> + { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
> + { "time_shift_buffer_depth", "Smallest time (in seconds) shifting buffer for which any Representation is guaranteed to be available.", OFFSET(time_shift_buffer_depth), AV_OPT_TYPE_DOUBLE, { .dbl = 60.0 }, 1.0, DBL_MAX, AV_OPT_FLAG_ENCODING_PARAM },
> { NULL },
> };
>
> diff --git a/tests/fate/vpx.mak b/tests/fate/vpx.mak
> index 83cda9c..5eaf8fb 100644
> --- a/tests/fate/vpx.mak
> +++ b/tests/fate/vpx.mak
> @@ -43,6 +43,9 @@ fate-webm-dash-manifest-unaligned-audio-streams: CMD = run ffmpeg -f webm_dash_m
> FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-representations
> fate-webm-dash-manifest-representations: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video4.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
>
> +FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-live
> +fate-webm-dash-manifest-live: CMD = run ffmpeg -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_video_360.hdr -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_audio_171.hdr -c copy -map 0 -map 1 -f webm_dash_manifest -live 1 -adaptation_sets "id=0,streams=0 id=1,streams=1" -chunk_start_index 1 -chunk_duration_ms 5000 -time_shift_buffer_depth 7200 -debug_mode 1 -
> +
> FATE_SAMPLES_AVCONV += $(FATE_VP6-yes)
> fate-vp6: $(FATE_VP6-yes)
>
> diff --git a/tests/ref/fate/webm-dash-manifest-live b/tests/ref/fate/webm-dash-manifest-live
> new file mode 100644
> index 0000000..797cced
> --- /dev/null
> +++ b/tests/ref/fate/webm-dash-manifest-live
> @@ -0,0 +1,26 @@
> +<?xml version="1.0" encoding="UTF-8"?>
> +<MPD
> + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> + xmlns="urn:mpeg:DASH:schema:MPD:2011"
> + xsi:schemaLocation="urn:mpeg:DASH:schema:MPD:2011"
> + type="dynamic"
> + minBufferTime="PT1S"
> + profiles="urn:mpeg:dash:profile:isoff-live:2011"
> + availabilityStartTime=""
> + timeShiftBufferDepth="PT7200S">
> +<UTCTiming
> + schemeIdUri="urn:mpeg:dash:utc:direct:2012"
> + value=""/>
> +<Period id="0" start="PT0S" duration="PT0S" >
> +<AdaptationSet id="0" mimeType="video/webm" codecs="vp9" bitstreamSwitching="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
> +<ContentComponent id="1" type="video"/>
> +<SegmentTemplate timescale="1000" duration="5000" media="dash_live_video_$RepresentationID$_$Number$.chk" startNumber="1" initialization="dash_live_video_$RepresentationID$.hdr"/>
> +<Representation id="360" bandwidth="1000000" width="640" height="360" codecs="vp9" mimeType="video/webm" startsWithSAP="1"></Representation>
> +</AdaptationSet>
> +<AdaptationSet id="1" mimeType="audio/webm" codecs="vorbis" bitstreamSwitching="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
> +<ContentComponent id="1" type="audio"/>
> +<SegmentTemplate timescale="1000" duration="5000" media="dash_live_audio_$RepresentationID$_$Number$.chk" startNumber="1" initialization="dash_live_audio_$RepresentationID$.hdr"/>
> +<Representation id="171" bandwidth="128000" audioSamplingRate="32000" codecs="vorbis" mimeType="audio/webm" startsWithSAP="1"></Representation>
> +</AdaptationSet>
> +</Period>
> +</MPD>
> --
> 2.2.0.rc0.207.ga3a616c
>
The newly added fate test requires these two files to be present in
"vp8/" directory. Please put them there before running them:
1) https://drive.google.com/file/d/0Bx8Q1nhO9b6MSExHUEM5U1pyWW8/view?usp=sharing
2) https://drive.google.com/file/d/0Bx8Q1nhO9b6MbG5mRWZhREhpNlU/view?usp=sharing
--
Vignesh
More information about the ffmpeg-devel
mailing list