[FFmpeg-devel] [PATCH v2 2/2] avformat/movenc: add support for TTML muxing
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Wed Jul 21 16:54:19 EEST 2021
Jan Ekström:
> From: Jan Ekström <jan.ekstrom at 24i.com>
>
> Includes basic support for both the ISMV ('dfxp') and MP4 ('stpp')
> methods. This initial version also foregoes fragmentation support
> in case the built-in sample squashing is to be utilized, as this
> eases the initial review.
>
> Additionally, add basic tests for both muxing modes in MP4.
>
> Signed-off-by: Jan Ekström <jan.ekstrom at 24i.com>
> ---
> libavformat/Makefile | 2 +-
> libavformat/isom.h | 3 +
> libavformat/movenc.c | 167 +++++++++++++++++++++++++-
> libavformat/movenc.h | 5 +
> libavformat/movenc_ttml.c | 195 +++++++++++++++++++++++++++++++
> libavformat/movenc_ttml.h | 31 +++++
> tests/fate/subtitles.mak | 4 +
> tests/ref/fate/sub-ttml-mp4-dfxp | 44 +++++++
> tests/ref/fate/sub-ttml-mp4-stpp | 44 +++++++
> 9 files changed, 492 insertions(+), 3 deletions(-)
> create mode 100644 libavformat/movenc_ttml.c
> create mode 100644 libavformat/movenc_ttml.h
> create mode 100644 tests/ref/fate/sub-ttml-mp4-dfxp
> create mode 100644 tests/ref/fate/sub-ttml-mp4-stpp
>
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index 813ddd3c20..7e0f587b41 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -337,7 +337,7 @@ OBJS-$(CONFIG_MOV_DEMUXER) += mov.o mov_chan.o mov_esds.o \
> qtpalette.o replaygain.o
> OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o hevc.o vpcc.o \
> movenchint.o mov_chan.o rtp.o \
> - movenccenc.o rawutils.o
> + movenccenc.o movenc_ttml.o rawutils.o
> OBJS-$(CONFIG_MP2_MUXER) += rawenc.o
> OBJS-$(CONFIG_MP3_DEMUXER) += mp3dec.o replaygain.o
> OBJS-$(CONFIG_MP3_MUXER) += mp3enc.o rawenc.o id3v2enc.o
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index ac1b3f3d56..34a58c79b7 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -387,4 +387,7 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
> return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
> }
>
> +#define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
> +#define MOV_MP4_TTML_TAG MKTAG('s', 't', 'p', 'p')
> +
> #endif /* AVFORMAT_ISOM_H */
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index c85efe8748..fcfb2a1775 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -56,6 +56,8 @@
> #include "hevc.h"
> #include "rtpenc.h"
> #include "mov_chan.h"
> +#include "movenc_ttml.h"
> +#include "ttmlenc.h"
> #include "vpcc.h"
>
> static const AVOption options[] = {
> @@ -119,6 +121,7 @@ static const AVClass mov_isobmff_muxer_class = {
> };
>
> static int get_moov_size(AVFormatContext *s);
> +static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt);
>
> static int utf8len(const uint8_t *b)
> {
> @@ -1787,7 +1790,29 @@ static int mov_write_subtitle_tag(AVIOContext *pb, MOVTrack *track)
>
> if (track->par->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
> mov_write_esds_tag(pb, track);
> - else if (track->par->extradata_size)
> + else if (track->par->codec_id == AV_CODEC_ID_TTML) {
> + switch (track->par->codec_tag) {
> + case MOV_ISMV_TTML_TAG:
> + // ISMV dfxp requires no extradata.
> + break;
> + case MOV_MP4_TTML_TAG:
> + // As specified in 14496-30, XMLSubtitleSampleEntry
> + // Namespace
> + avio_put_str(pb, "http://www.w3.org/ns/ttml");
> + // Empty schema_location
> + avio_w8(pb, 0);
> + // Empty auxiliary_mime_types
> + avio_w8(pb, 0);
> + break;
> + default:
> + av_log(NULL, AV_LOG_ERROR,
> + "Unknown codec tag '%s' utilized for TTML stream with "
> + "index %d (track id %d)!\n",
> + av_fourcc2str(track->par->codec_tag), track->st->index,
> + track->track_id);
> + return AVERROR(EINVAL);
> + }
> + } else if (track->par->extradata_size)
> avio_write(pb, track->par->extradata, track->par->extradata_size);
>
> if (track->mode == MODE_MP4 &&
> @@ -5253,6 +5278,68 @@ static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
> return 0;
> }
>
> +static int mov_write_squashed_packet(AVFormatContext *s, MOVTrack *track)
> +{
> + MOVMuxContext *mov = s->priv_data;
> + AVPacket *squashed_packet = mov->pkt;
> + int ret = AVERROR_BUG;
> +
> + switch (track->st->codecpar->codec_id) {
> + case AV_CODEC_ID_TTML: {
> + int had_packets = !!track->squashed_packet_queue;
> +
> + if ((ret = ff_mov_generate_squashed_ttml_packet(s, track, squashed_packet)) < 0) {
> + goto finish_squash;
> + }
> +
> + // We have generated a padding packet (no actual input packets in
> + // queue) and its duration is zero. Skipping writing it.
> + if (!had_packets && squashed_packet->duration == 0) {
> + goto finish_squash;
> + }
> +
> + track->end_reliable = 1;
> + break;
> + }
> + default:
> + ret = AVERROR(EINVAL);
> + goto finish_squash;
> + }
> +
> + squashed_packet->stream_index = track->st->index;
> +
> + ret = mov_write_single_packet(s, squashed_packet);
> +
> +finish_squash:
> + av_packet_unref(squashed_packet);
> +
> + return ret;
> +}
> +
> +static int mov_write_squashed_packets(AVFormatContext *s)
> +{
> + MOVMuxContext *mov = s->priv_data;
> +
> + for (int i = 0; i < s->nb_streams; i++) {
> + MOVTrack *track = &mov->tracks[i];
> + int ret = AVERROR_BUG;
> +
> + if (track->squash_fragment_samples_to_one && !track->entry) {
> + if ((ret = mov_write_squashed_packet(s, track)) < 0) {
> + av_log(s, AV_LOG_ERROR,
> + "Failed to write squashed packet for %s stream with "
> + "index %d and track id %d. Error: %s\n",
> + avcodec_get_name(track->st->codecpar->codec_id),
> + track->st->index, track->track_id,
> + av_err2str(ret));
> + return ret;
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> static int mov_flush_fragment(AVFormatContext *s, int force)
> {
> MOVMuxContext *mov = s->priv_data;
> @@ -5264,6 +5351,11 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
> if (!(mov->flags & FF_MOV_FLAG_FRAGMENT))
> return 0;
>
> + // Check if we have any tracks that require squashing.
> + // In that case, we'll have to write the packet here.
> + if ((ret = mov_write_squashed_packets(s)) < 0)
> + return ret;
> +
> // Try to fill in the duration of the last packet in each stream
> // from queued packets in the interleave queues. If the flushing
> // of fragments was triggered automatically by an AVPacket, we
> @@ -5739,7 +5831,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> trk->cluster[trk->entry].entries = samples_in_chunk;
> trk->cluster[trk->entry].dts = pkt->dts;
> trk->cluster[trk->entry].pts = pkt->pts;
> - if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
> + if (!trk->squash_fragment_samples_to_one &&
> + !trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
> if (!trk->frag_discont) {
> /* First packet of a new fragment. We already wrote the duration
> * of the last packet of the previous fragment based on track_duration,
> @@ -6032,6 +6125,33 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> }
> }
>
> + if (trk->squash_fragment_samples_to_one) {
> + /*
> + * If the track has to have its samples squashed into one sample,
> + * we just take it into the track's queue.
> + * This will then be utilized as the samples get written in either
> + * mov_flush_fragment or when the mux is finalized in
> + * mov_write_trailer.
> + */
> + int ret = AVERROR_BUG;
> +
> + if (pkt->pts == AV_NOPTS_VALUE) {
> + av_log(s, AV_LOG_ERROR,
> + "Packets without a valid presentation timestamp are "
> + "not supported with packet squashing!\n");
> + return AVERROR(EINVAL);
> + }
> +
> + if ((ret = avpriv_packet_list_put(&trk->squashed_packet_queue,
> + &trk->squashed_packet_queue_end,
> + pkt, av_packet_ref, 0)) < 0) {
> + return ret;
> + }
> +
> + return 0;
> + }
> +
> +
> if (trk->mode == MODE_MOV && trk->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> AVPacket *opkt = pkt;
> int reshuffle_ret, ret;
> @@ -6310,6 +6430,11 @@ static void mov_free(AVFormatContext *s)
>
> ff_mov_cenc_free(&mov->tracks[i].cenc);
> ffio_free_dyn_buf(&mov->tracks[i].mdat_buf);
> +
> + if (mov->tracks[i].squashed_packet_queue) {
> + avpriv_packet_list_free(&(mov->tracks[i].squashed_packet_queue),
> + &(mov->tracks[i].squashed_packet_queue_end));
> + }
> }
>
> av_freep(&mov->tracks);
> @@ -6700,6 +6825,36 @@ static int mov_init(AVFormatContext *s)
> }
> } else if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> track->timescale = st->time_base.den;
> +
> + if (track->par->codec_id == AV_CODEC_ID_TTML) {
> + /* 14496-30 requires us to use a single sample per fragment
> + for TTML, for which we define a per-track flag.
> +
> + We set the flag in case we are receiving TTML paragraphs
> + from the input, in other words in case we are not doing
> + stream copy. */
> + track->squash_fragment_samples_to_one =
> + ff_is_ttml_stream_paragraph_based(track->par);
> +
> + if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
> + track->squash_fragment_samples_to_one) {
> + av_log(s, AV_LOG_ERROR,
> + "Fragmentation is not currently supported for "
> + "TTML in MP4/ISMV (track synchronization between "
> + "subtitles and other media is not yet implemented)!\n");
> + return AVERROR(EINVAL);
Then don't use AVERROR(EINVAL), use AVERROR_PATCHWELCOME.
> + }
> +
> + if (track->mode == MODE_MP4 &&
> + track->par->codec_tag == MOV_ISMV_TTML_TAG &&
> + s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
> + av_log(s, AV_LOG_ERROR,
> + "ISMV style TTML support with the 'dfxp' tag in MP4 "
> + "is not officially supported, add "
> + "'-strict unofficial' if you want to use it.\n");
> + return AVERROR_EXPERIMENTAL;
> + }
> + }
> } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
> track->timescale = st->time_base.den;
> } else {
> @@ -7046,6 +7201,11 @@ static int mov_write_trailer(AVFormatContext *s)
> }
> }
>
> + // Check if we have any tracks that require squashing.
> + // In that case, we'll have to write the packet here.
> + if ((res = mov_write_squashed_packets(s)) < 0)
> + return res;
> +
> // If there were no chapters when the header was written, but there
> // are chapters now, write them in the trailer. This only works
> // when we are not doing fragments.
> @@ -7190,6 +7350,8 @@ static const AVCodecTag codec_mp4_tags[] = {
> { AV_CODEC_ID_MOV_TEXT, MKTAG('t', 'x', '3', 'g') },
> { AV_CODEC_ID_BIN_DATA, MKTAG('g', 'p', 'm', 'd') },
> { AV_CODEC_ID_MPEGH_3D_AUDIO, MKTAG('m', 'h', 'm', '1') },
> + { AV_CODEC_ID_TTML, MOV_MP4_TTML_TAG },
> + { AV_CODEC_ID_TTML, MOV_ISMV_TTML_TAG },
> { AV_CODEC_ID_NONE, 0 },
> };
> #if CONFIG_MP4_MUXER || CONFIG_PSP_MUXER
> @@ -7198,6 +7360,7 @@ static const AVCodecTag *const mp4_codec_tags_list[] = { codec_mp4_tags, NULL };
>
> static const AVCodecTag codec_ism_tags[] = {
> { AV_CODEC_ID_WMAPRO , MKTAG('w', 'm', 'a', ' ') },
> + { AV_CODEC_ID_TTML , MOV_ISMV_TTML_TAG },
> { AV_CODEC_ID_NONE , 0 },
> };
>
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index af1ea0bce6..95db1bf46d 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -26,6 +26,7 @@
>
> #include "avformat.h"
> #include "movenccenc.h"
> +#include "libavcodec/packet_internal.h"
>
> #define MOV_FRAG_INFO_ALLOC_INCREMENT 64
> #define MOV_INDEX_CLUSTER_SIZE 1024
> @@ -164,6 +165,10 @@ typedef struct MOVTrack {
> int pal_done;
>
> int is_unaligned_qt_rgb;
> +
> + unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
> +
> + PacketList *squashed_packet_queue, *squashed_packet_queue_end;
> } MOVTrack;
>
> typedef enum {
> diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
> new file mode 100644
> index 0000000000..9943830a20
> --- /dev/null
> +++ b/libavformat/movenc_ttml.c
> @@ -0,0 +1,195 @@
> +/*
> + * MP4, ISMV Muxer TTML helpers
> + * Copyright (c) 2021 24i
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avformat.h"
> +#include "avio_internal.h"
> +#include "isom.h"
> +#include "movenc.h"
> +#include "movenc_ttml.h"
> +#include "libavcodec/packet_internal.h"
> +
> +static const unsigned char empty_ttml_document[] =
> + "<tt xml:lang=\"\" xmlns=\"http://www.w3.org/ns/ttml\" />";
> +
> +static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
> +{
> + AVStream *movenc_stream = track->st, *ttml_stream = NULL;
> + AVFormatContext *ttml_ctx = NULL;
If you didn't use out_ctx directly, you could remove the cleanup code in
this function and instead reuse the cleanup code in
ff_mov_generate_squashed_ttml_packet().
> + int ret = AVERROR_BUG;
> + if ((ret = avformat_alloc_output_context2(&ttml_ctx, NULL,
> + "ttml", NULL)) < 0)
> + goto fail;
> +
> + if ((ret = avio_open_dyn_buf(&ttml_ctx->pb)) < 0)
> + goto fail;
> +
> + if (!(ttml_stream = avformat_new_stream(ttml_ctx, NULL))) {
> + ret = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> + if ((ret = avcodec_parameters_copy(ttml_stream->codecpar,
> + movenc_stream->codecpar)) < 0)
> + goto fail;
> +
> + ttml_stream->time_base = movenc_stream->time_base;
> +
> + *out_ctx = ttml_ctx;
> +
> + return 0;
> +
> +fail:
> + if (ttml_ctx) {
This check is unnecessary: Just return immediately if
avformat_alloc_output_context2() returns an error (without goto fail).
> + uint8_t *buf = NULL;
> + avio_close_dyn_buf(ttml_ctx->pb, &buf);
> + av_freep(&buf);
ffio_free_dyn_buf().
> + }
> +
> + avformat_free_context(ttml_ctx);
> +
> + return ret;
> +}
> +
> +static int mov_write_ttml_document_from_queue(AVFormatContext *s,
> + AVFormatContext *ttml_ctx,
> + MOVTrack *track,
> + int64_t *out_start_ts,
> + int64_t *out_duration)
> +{
> + int ret = AVERROR_BUG;
> + int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
> + 0 : (track->start_dts + track->track_duration);;
> + int64_t end_ts = start_ts;
> + AVPacket *looped_pkt = av_packet_alloc();
Why don't you just reuse the pkt given to
ff_mov_generate_squashed_ttml_packet() to go through the queue instead
of allocating a new one? This also avoids this expensive error string below.
> + if (!looped_pkt) {
> + av_log(s, AV_LOG_ERROR,
> + "Failed to allocate AVPacket for going through packet queue!\n");
> + return AVERROR(ENOMEM);
> + }
> +
> + if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
looped_pkt currently leaks here.
> + return ret;
> + }
> +
> + while (!avpriv_packet_list_get(&track->squashed_packet_queue,
> + &track->squashed_packet_queue_end,
> + looped_pkt)) {
> + end_ts = FFMAX(end_ts, looped_pkt->pts + looped_pkt->duration);
> +
> + // in case of the 'dfxp' muxing mode, each written document is offset
> + // to its containing sample's beginning.
> + if (track->par->codec_tag == MOV_ISMV_TTML_TAG) {
> + looped_pkt->dts = looped_pkt->pts = (looped_pkt->pts - start_ts);
> + }
> +
> + looped_pkt->stream_index = 0;
> +
> + av_packet_rescale_ts(looped_pkt, track->st->time_base,
> + ttml_ctx->streams[looped_pkt->stream_index]->time_base);
> +
> + if ((ret = av_write_frame(ttml_ctx, looped_pkt)) < 0) {
> + goto cleanup;
> + }
> +
> + av_packet_unref(looped_pkt);
> + }
> +
> + if ((ret = av_write_trailer(ttml_ctx)) < 0)
> + goto cleanup;
> +
> + *out_start_ts = start_ts;
> + *out_duration = end_ts - start_ts;
> +
> + ret = 0;
> +
> +cleanup:
> + av_packet_free(&looped_pkt);
> +
> + return ret;
> +}
> +
> +int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
> + MOVTrack *track, AVPacket *pkt)
> +{
> + AVFormatContext *ttml_ctx = NULL;
> + // values for the generated AVPacket
> + int64_t start_ts = 0;
> + int64_t duration = 0;
> +
> + int ret = AVERROR_BUG;
> +
> + if ((ret = mov_init_ttml_writer(track, &ttml_ctx)) < 0) {
> + av_log(s, AV_LOG_ERROR, "Failed to initialize the TTML writer: %s\n",
> + av_err2str(ret));
> + goto cleanup;
> + }
> +
> + if (!track->squashed_packet_queue) {
> + // empty queue, write minimal empty document with zero duration
> + avio_write(ttml_ctx->pb, empty_ttml_document,
> + sizeof(empty_ttml_document) - 1);
> + start_ts = 0;
> + duration = 0;
> + goto generate_packet;
> + }
> +
> + if ((ret = mov_write_ttml_document_from_queue(s, ttml_ctx, track,
> + &start_ts,
> + &duration)) < 0) {
> + av_log(s, AV_LOG_ERROR,
> + "Failed to generate a squashed TTML packet from the packet "
> + "queue: %s\n",
> + av_err2str(ret));
> + goto cleanup;
> + }
> +
> +generate_packet:
> + {
> + // Generate an AVPacket from the data written into the dynamic buffer.
> + uint8_t *buf = NULL;
> + int buf_len = avio_close_dyn_buf(ttml_ctx->pb, &buf);
> + ttml_ctx->pb = NULL;
> +
> + if ((ret = av_packet_from_data(pkt, buf, buf_len)) < 0) {
> + av_log(s, AV_LOG_ERROR,
> + "Failed to create a TTML AVPacket from AVIO data: %s\n",
> + av_err2str(ret));
> + av_freep(&buf);
> + goto cleanup;
> + }
> +
> + pkt->pts = pkt->dts = start_ts;
> + pkt->duration = duration;
> + pkt->flags |= AV_PKT_FLAG_KEY;
> + }
> +
> + ret = 0;
> +
> +cleanup:
> + if (ttml_ctx && ttml_ctx->pb) {
> + uint8_t *buf = NULL;
> + avio_close_dyn_buf(ttml_ctx->pb, &buf);
> + av_freep(&buf);
ffio_free_dyn_buf()
> + }
> +
> + avformat_free_context(ttml_ctx);
Generally, I do not like that you are allocating and initializing a
whole AVFormatContext, AVStream, AVCodecParameters and a dynamic buffer
etc. for each packet. Looking at ttmlenc it seems that your approach
will ensure that ttml_footer_text will be added to every packet if the
type is PACKET_TYPE_PARAGRAPH. Is this really intended? It seems odd. If
it is not, then one could reuse this stuff.
> + return ret;
> +}
> diff --git a/libavformat/movenc_ttml.h b/libavformat/movenc_ttml.h
> new file mode 100644
> index 0000000000..c71ecd0997
> --- /dev/null
> +++ b/libavformat/movenc_ttml.h
> @@ -0,0 +1,31 @@
> +/*
> + * MP4, ISMV Muxer TTML helpers
> + * Copyright (c) 2021 24i
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFORMAT_MOVENC_TTML_H
> +#define AVFORMAT_MOVENC_TTML_H
> +
> +#include "avformat.h"
> +#include "movenc.h"
> +
> +int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
> + MOVTrack *track, AVPacket *pkt);
> +
> +#endif /* AVFORMAT_MOVENC_TTML_H */
More information about the ffmpeg-devel
mailing list