[FFmpeg-devel] [PATCH 2/2] avformat/movenc: add support for fragmented TTML muxing

Fri Dec 23 16:41:06 EET 2022

Jan Ekström:
> From: Jan Ekström <jan.ekstrom at 24i.com>
> 
> Attempts to base the fragmentation timing on other streams
> as most receivers expect media fragments to be more or less
> aligned.
> 
> Currently does not support fragmentation on subtitle track
> only, as the subtitle packet queue timings would have to be
> checked in addition to the current fragmentation timing logic.
> 
> Signed-off-by: Jan Ekström <jan.ekstrom at 24i.com>
> ---
>  libavformat/movenc.c      |   9 ---
>  libavformat/movenc_ttml.c | 163 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 159 insertions(+), 13 deletions(-)
> 
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 7d49892283..e9a7984f8a 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -7188,15 +7188,6 @@ static int mov_init(AVFormatContext *s)
>                  track->squash_fragment_samples_to_one =
>                      ff_is_ttml_stream_paragraph_based(track->par);
>  
> -                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
> -                    track->squash_fragment_samples_to_one) {
> -                    av_log(s, AV_LOG_ERROR,
> -                           "Fragmentation is not currently supported for "
> -                           "TTML in MP4/ISMV (track synchronization between "
> -                           "subtitles and other media is not yet implemented)!\n");
> -                    return AVERROR_PATCHWELCOME;
> -                }
> -
>                  if (track->mode != MODE_ISM &&
>                      track->par->codec_tag == MOV_ISMV_TTML_TAG &&
>                      s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
> diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
> index 6deae49657..27ec7d9487 100644
> --- a/libavformat/movenc_ttml.c
> +++ b/libavformat/movenc_ttml.c
> @@ -54,6 +54,50 @@ static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
>      return 0;
>  }
>  
> +static void mov_calculate_start_and_end_of_other_tracks(
> +    AVFormatContext *s, MOVTrack *track, int64_t *start_pts, int64_t *end_pts)
> +{
> +    MOVMuxContext *mov = s->priv_data;
> +
> +    // Initialize at the end of the previous document/fragment, which is NOPTS
> +    // until the first fragment is created.
> +    int64_t max_track_end_dts = *start_pts = track->end_pts;
> +
> +    for (unsigned int i = 0; i < s->nb_streams; i++) {
> +        MOVTrack *other_track = &mov->tracks[i];
> +
> +        // Skip our own track, any other track that needs squashing,
> +        // or any track which still has its start_dts at NOPTS or
> +        // any track that did not yet get any packets.
> +        if (track == other_track ||
> +            other_track->squash_fragment_samples_to_one ||
> +            other_track->start_dts == AV_NOPTS_VALUE ||
> +            !other_track->entry) {
> +            continue;
> +        }
> +
> +        {
> +            int64_t picked_start = av_rescale_q_rnd(other_track->cluster[0].dts + other_track->cluster[0].cts,
> +                                                    other_track->st->time_base,
> +                                                    track->st->time_base,
> +                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
> +            int64_t picked_end   = av_rescale_q_rnd(other_track->end_pts,
> +                                                    other_track->st->time_base,
> +                                                    track->st->time_base,
> +                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
> +
> +            if (*start_pts == AV_NOPTS_VALUE)
> +                *start_pts = picked_start;
> +            else if (picked_start >= track->end_pts)
> +                *start_pts = FFMIN(*start_pts, picked_start);
> +
> +            max_track_end_dts = FFMAX(max_track_end_dts, picked_end);
> +        }
> +    }
> +
> +    *end_pts = max_track_end_dts;
> +}
> +
>  static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>                                                AVFormatContext *ttml_ctx,
>                                                MOVTrack *track,
> @@ -65,13 +109,87 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>      int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
>                         0 : (track->start_dts + track->track_duration);
>      int64_t end_ts   = start_ts;
> +    unsigned int time_limited = 0;
> +    PacketList back_to_queue_list = { 0 };
> +
> +    if (*out_start_ts != AV_NOPTS_VALUE) {
> +        // we have non-nopts values here, thus we have been given a time range
> +        time_limited = 1;
> +        start_ts = *out_start_ts;
> +        end_ts   = *out_start_ts + *out_duration;
> +    }
>  
>      if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
>          return ret;
>      }
>  
>      while (!avpriv_packet_list_get(&track->squashed_packet_queue, pkt)) {
> -        end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
> +        unsigned int stop_at_current_packet = 0;
> +        int64_t pts_before      = pkt->pts;
> +        int64_t duration_before = pkt->duration;
> +
> +        if (time_limited) {
> +            // special cases first:
> +            if (pkt->pts + pkt->duration < start_ts) {
> +                // too late for our fragment, unfortunately
> +                // unref and proceed to next packet in queue.
> +                av_log(s, AV_LOG_WARNING,
> +                       "Very late TTML packet in queue, dropping packet with "
> +                       "pts: %"PRId64", duration: %"PRId64"\n",
> +                       pkt->pts, pkt->duration);
> +                av_packet_unref(pkt);
> +                goto next_iteration;
> +            } else if (pkt->pts >= end_ts) {
> +                // starts after this fragment, put back to original queue
> +                ret = avpriv_packet_list_put(&track->squashed_packet_queue,
> +                                             pkt, av_packet_ref,
> +                                             FF_PACKETLIST_FLAG_PREPEND);
> +                if (ret < 0)
> +                    goto cleanup;
> +
> +                stop_at_current_packet = 1;
> +                goto next_iteration;

You can just break here and remove stop_at_current_packet.

> +            }
> +
> +            // limit packet pts to start_ts
> +            if (pkt->pts < start_ts) {
> +                pkt->duration -= start_ts - pkt->pts;
> +                pkt->pts = start_ts;
> +            }
> +
> +            if (pkt->pts + pkt->duration > end_ts) {
> +                // goes over our current fragment, create duplicate and
> +                // put it back to list after iteration has finished in
> +                // order to handle multiple subtitles at the same time.
> +                int64_t offset = end_ts - pkt->pts;
> +
> +                ret = avpriv_packet_list_put(&back_to_queue_list,
> +                                             pkt, av_packet_ref,
> +                                             FF_PACKETLIST_FLAG_PREPEND);
> +                if (ret < 0)
> +                    goto cleanup;
> +
> +                back_to_queue_list.head->pkt.pts =
> +                back_to_queue_list.head->pkt.dts =
> +                back_to_queue_list.head->pkt.pts + offset;
> +                back_to_queue_list.head->pkt.duration -= offset;
> +
> +                // and for our normal packet we just set duration to offset
> +                pkt->duration = offset;
> +            }
> +        } else {
> +            end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
> +        }
> +
> +        av_log(s, AV_LOG_TRACE,
> +               "TTML packet writeout: pts: %"PRId64" (%"PRId64"), "
> +               "duration: %"PRId64"\n",
> +               pkt->pts, pkt->pts - start_ts, pkt->duration);
> +        if (pkt->pts != pts_before || pkt->duration != duration_before) {
> +            av_log(s, AV_LOG_TRACE,
> +                   "Adjustments: pts: %"PRId64", duration: %"PRId64"\n",
> +                   pkt->pts - pts_before, pkt->duration - duration_before);
> +        }
>  
>          // in case of the 'dfxp' muxing mode, each written document is offset
>          // to its containing sample's beginning.
> @@ -89,6 +207,10 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>          }
>  
>          av_packet_unref(pkt);
> +
> +next_iteration:
> +        if (stop_at_current_packet)
> +            break;
>      }
>  
>      if ((ret = av_write_trailer(ttml_ctx)) < 0)
> @@ -100,15 +222,30 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>      ret = 0;
>  
>  cleanup:
> +    while (!avpriv_packet_list_get(&back_to_queue_list, pkt)) {

You are removing some packets from the list and then readding some of
these in a way that keeps the order of the list. Makes me wonder whether
it would not be better to iterate through the list and process the
packets without moving them from the list (and remove the packets from
the list that are not supposed to be kept there even if said packets are
in the middle of the list). The only mildly unelegant thing is that one
would need to duplicate and restore the time related fields in the loop.

> +        ret = avpriv_packet_list_put(&track->squashed_packet_queue,
> +                                     pkt, av_packet_ref,
> +                                     FF_PACKETLIST_FLAG_PREPEND);
> +
> +        // unrelated to whether we succeed or not, we unref the packet
> +        // received from the temporary list.
> +        av_packet_unref(pkt);

In this case, you should not use av_packet_ref() above at all, but move
the packet to the list; then you only need to unref the packet in the
error case.

> +
> +        if (ret < 0) {
> +            avpriv_packet_list_free(&back_to_queue_list);
> +            break;

If you returned ret directly here, the effective scope of ret would be
the loop body.

> +        }
> +    }
>      return ret;
>  }
>  
>  int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
>                                           MOVTrack *track, AVPacket *pkt)
>  {
> +    MOVMuxContext *mov = s->priv_data;
>      AVFormatContext *ttml_ctx = NULL;
>      // values for the generated AVPacket
> -    int64_t start_ts = 0;
> +    int64_t start_ts = AV_NOPTS_VALUE;
>      int64_t duration = 0;
>  
>      int ret = AVERROR_BUG;
> @@ -119,12 +256,30 @@ int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
>          goto cleanup;
>      }
>  
> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> +        int64_t calculated_start = AV_NOPTS_VALUE;
> +        int64_t calculated_end = AV_NOPTS_VALUE;
> +
> +        mov_calculate_start_and_end_of_other_tracks(s, track, &calculated_start, &calculated_end);
> +
> +        if (calculated_start != AV_NOPTS_VALUE) {
> +            start_ts = calculated_start;
> +            duration = calculated_end - calculated_start;
> +            av_log(s, AV_LOG_VERBOSE,
> +                   "Calculated subtitle fragment start: %"PRId64", "
> +                   "duration: %"PRId64"\n",
> +                   start_ts, duration);
> +        }
> +    }
> +
>      if (!track->squashed_packet_queue.head) {
>          // empty queue, write minimal empty document with zero duration
>          avio_write(ttml_ctx->pb, empty_ttml_document,
>                     sizeof(empty_ttml_document) - 1);
> -        start_ts = 0;
> -        duration = 0;
> +        if (start_ts == AV_NOPTS_VALUE) {
> +            start_ts = 0;
> +            duration = 0;
> +        }
>          goto generate_packet;
>      }
>