[FFmpeg-devel] [PATCH v4 4/4] {avcodec, avformat}: add TTML encoder and muxer

Fri Feb 19 15:45:53 EET 2021

Quoting Jan Ekström (2021-01-22 12:20:58)
> From: Jan Ekström <jan.ekstrom at 24i.com>
> 
> Enables encoding of other subtitle formats into TTML and writing
> them out as such documents.
> 
> Signed-off-by: Jan Ekström <jan.ekstrom at 24i.com>
> ---
>  Changelog                  |   1 +
>  doc/general_contents.texi  |   1 +
>  libavcodec/Makefile        |   1 +
>  libavcodec/allcodecs.c     |   1 +
>  libavcodec/ttmlenc.c       | 179 +++++++++++++++++++++++++++++++++++++
>  libavcodec/version.h       |   4 +-
>  libavformat/Makefile       |   1 +
>  libavformat/allformats.c   |   1 +
>  libavformat/ttmlenc.c      | 166 ++++++++++++++++++++++++++++++++++
>  libavformat/version.h      |   4 +-
>  tests/fate/subtitles.mak   |   3 +
>  tests/ref/fate/sub-ttmlenc | 122 +++++++++++++++++++++++++
>  12 files changed, 480 insertions(+), 4 deletions(-)
>  create mode 100644 libavcodec/ttmlenc.c
>  create mode 100644 libavformat/ttmlenc.c
>  create mode 100644 tests/ref/fate/sub-ttmlenc
> 
> diff --git a/Changelog b/Changelog
> index 0b27c15122..9a6aeb4128 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -56,6 +56,7 @@ version <next>:
>  - shufflepixels filter
>  - tmidequalizer filter
>  - estdif filter
> +- TTML subtitle encoder and muxer
>  
>  
>  version 4.3:
> diff --git a/doc/general_contents.texi b/doc/general_contents.texi
> index 443e8ed8d1..d799382f84 100644
> --- a/doc/general_contents.texi
> +++ b/doc/general_contents.texi
> @@ -1334,6 +1334,7 @@ performance on systems without hardware floating point support).
>  @item SubViewer v1     @tab   @tab X @tab   @tab X
>  @item SubViewer        @tab   @tab X @tab   @tab X
>  @item TED Talks captions @tab @tab X @tab   @tab X
> + at item TTML             @tab X @tab   @tab X @tab
>  @item VobSub (IDX+SUB) @tab   @tab X @tab   @tab X
>  @item VPlayer          @tab   @tab X @tab   @tab X
>  @item WebVTT           @tab X @tab X @tab X @tab X
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 5ce3ee0ec9..d26e0264de 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -669,6 +669,7 @@ OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o msrledec.o
>  OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
>  OBJS-$(CONFIG_TTA_DECODER)             += tta.o ttadata.o ttadsp.o
>  OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttaencdsp.o ttadata.o
> +OBJS-$(CONFIG_TTML_ENCODER)            += ttmlenc.o ass_split.o
>  OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvqdec.o twinvq.o metasound_data.o
>  OBJS-$(CONFIG_TXD_DECODER)             += txd.o
>  OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index f00d524747..81d20c44ec 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -686,6 +686,7 @@ extern AVCodec ff_subviewer_decoder;
>  extern AVCodec ff_subviewer1_decoder;
>  extern AVCodec ff_text_encoder;
>  extern AVCodec ff_text_decoder;
> +extern AVCodec ff_ttml_encoder;
>  extern AVCodec ff_vplayer_decoder;
>  extern AVCodec ff_webvtt_encoder;
>  extern AVCodec ff_webvtt_decoder;
> diff --git a/libavcodec/ttmlenc.c b/libavcodec/ttmlenc.c
> new file mode 100644
> index 0000000000..a9b1411467
> --- /dev/null
> +++ b/libavcodec/ttmlenc.c
> +static int ttml_encode_frame(AVCodecContext *avctx, uint8_t *buf,
> +                             int bufsize, const AVSubtitle *sub)
> +{
> +    TTMLContext *s = avctx->priv_data;
> +    ASSDialog *dialog;
> +    int i;
> +
> +    av_bprint_clear(&s->buffer);
> +
> +    for (i=0; i<sub->num_rects; i++) {
> +        const char *ass = sub->rects[i]->ass;
> +
> +        if (sub->rects[i]->type != SUBTITLE_ASS) {
> +            av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
> +            return AVERROR(EINVAL);
> +        }
> +
> +#if FF_API_ASS_TIMING
> +        if (!strncmp(ass, "Dialogue: ", 10)) {
> +            int num;
> +            dialog = ff_ass_split_dialog(s->ass_ctx, ass, 0, &num);
> +
> +            for (; dialog && num--; dialog++) {
> +                ff_ass_split_override_codes(&ttml_callbacks, s, dialog->text);

Check for error?

> +            }
> +        } else {
> +#endif
> +            dialog = ff_ass_split_dialog2(s->ass_ctx, ass);
> +            if (!dialog)
> +                return AVERROR(ENOMEM);
> +
> +            ff_ass_split_override_codes(&ttml_callbacks, s, dialog->text);
> +            ff_ass_free_dialog(&dialog);
> +#if FF_API_ASS_TIMING
> +        }
> +#endif
> +    }
> +
> +    if (!av_bprint_is_complete(&s->buffer))
> +        return AVERROR(ENOMEM);
> +    if (!s->buffer.len)
> +        return 0;
> +
> +    // force null-termination, so in case our destination buffer is
> +    // too small, the return value is larger than bufsize minus null.
> +    if (av_strlcpy(buf, s->buffer.str, bufsize) > bufsize - 1) {

Is this guaranteed to not overread s->buffer.str?

> +        av_log(avctx, AV_LOG_ERROR, "Buffer too small for TTML event.\n");
> +        return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +
> +    return s->buffer.len;
> +}
> +
> +static av_cold int ttml_encode_close(AVCodecContext *avctx)
> +{
> +    TTMLContext *s = avctx->priv_data;
> +
> +    ff_ass_split_free(s->ass_ctx);
> +
> +    av_bprint_finalize(&s->buffer, NULL);
> +
> +    return 0;
> +}
> +
> +static av_cold int ttml_encode_init(AVCodecContext *avctx)
> +{
> +    int ret = AVERROR_BUG;
> +    TTMLContext *s = avctx->priv_data;
> +
> +    s->avctx   = avctx;
> +
> +    if (!(s->ass_ctx = ff_ass_split(avctx->subtitle_header))) {
> +        ret = AVERROR_INVALIDDATA;
> +        goto failure;
> +    }
> +
> +    if (!(avctx->extradata = av_malloc(4 + AV_INPUT_BUFFER_PADDING_SIZE))) {
> +        ret = AVERROR(ENOMEM);
> +        goto failure;
> +    }
> +    avctx->extradata_size = 4;
> +
> +    av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
> +
> +    return 0;
> +
> +failure:
> +    ff_ass_split_free(s->ass_ctx);
> +    av_bprint_finalize(&s->buffer, NULL);

Mark the encoder as FF_CODEC_CAP_INIT_CLEANUP and drop this?

> +
> +    return ret;
> +}
> +
> +AVCodec ff_ttml_encoder = {
> +    .name           = "ttml",
> +    .long_name      = NULL_IF_CONFIG_SMALL("TTML subtitle"),
> +    .type           = AVMEDIA_TYPE_SUBTITLE,
> +    .id             = AV_CODEC_ID_TTML,
> +    .priv_data_size = sizeof(TTMLContext),
> +    .init           = ttml_encode_init,
> +    .encode_sub     = ttml_encode_frame,
> +    .close          = ttml_encode_close,
> +};
> +static int ttml_write_header(AVFormatContext *ctx)
> +{
> +    TTMLMuxContext *ttml_ctx = ctx->priv_data;
> +    ttml_ctx->document_written = 0;
> +
> +    if (ctx->nb_streams != 1 ||
> +        ctx->streams[0]->codecpar->codec_id != AV_CODEC_ID_TTML) {
> +        av_log(ctx, AV_LOG_ERROR, "Exactly one TTML stream is required!\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    {
> +        AVStream    *st = ctx->streams[0];
> +        AVIOContext *pb = ctx->pb;
> +
> +        AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,
> +                                              0);
> +        const char *printed_lang = (lang && lang->value) ? lang->value : "";
> +
> +        // Not perfect, but decide whether the packet is a document or not
> +        // by the existence of extradata.

I'd prefer extradata to contain something. Doesn't matter much what
exactly, as long as it's unique enough.

> +        ttml_ctx->input_type = st->codecpar->extradata ?
> +                               PACKET_TYPE_PARAGRAPH :
> +                               PACKET_TYPE_DOCUMENT;
> +
> +        avpriv_set_pts_info(st, 64, 1, 1000);
> +
> +        if (ttml_ctx->input_type == PACKET_TYPE_PARAGRAPH)
> +            avio_printf(pb, ttml_header_text, printed_lang);
> +    }
> +
> +    return 0;
> +}
> +
> +static int ttml_write_packet(AVFormatContext *ctx, AVPacket *pkt)
> +{
> +    TTMLMuxContext *ttml_ctx = ctx->priv_data;
> +    AVIOContext    *pb       = ctx->pb;
> +
> +    switch (ttml_ctx->input_type) {
> +    case PACKET_TYPE_PARAGRAPH:
> +        // write out a paragraph element with the given contents.
> +        avio_printf(pb,     "      <p\n");
> +        ttml_write_time(pb, "        begin", pkt->pts);
> +        avio_w8(pb, '\n');
> +        ttml_write_time(pb, "        end",   pkt->pts + pkt->duration);
> +        avio_printf(pb, ">");
> +        avio_write(pb, pkt->data, pkt->size);
> +        avio_printf(pb, "</p>\n");
> +        break;
> +    case PACKET_TYPE_DOCUMENT:
> +        // dump the given document out as-is.
> +        if (ttml_ctx->document_written) {
> +            av_log(ctx, AV_LOG_ERROR,
> +                   "Attempting to write multiple TTML documents into a "
> +                   "single document! The XML specification forbids this "
> +                   "as there has to be a single root tag.\n");
> +            return AVERROR(EINVAL);
> +        }
> +        avio_write(pb, pkt->data, pkt->size);
> +        ttml_ctx->document_written = 1;
> +        break;
> +    default:
> +        av_log(ctx, AV_LOG_ERROR, "Invalid TTML input packet type!\n");
> +        return AVERROR(EINVAL);

This is an internal error, so should be AVERROR_BUG or an assert.

-- 
Anton Khirnov