[FFmpeg-devel] [PATCH v16 08/16] fftools/ffmpeg: Replace sub2video with subtitle frame filtering

Fri Nov 26 15:02:26 EET 2021

Soft Works:
> This commit actually enables subtitle filtering in ffmpeg by
> sending and receiving subtitle frames to and from a filtergraph.
> 
> The heartbeat functionality from the previous sub2video implementation
> is retained and applied to all subtitle frames (bitmap, text, ..).
> 
> The other part of sub2video functionality is retained by
> auto-insertion of the new graphicsub2video filter.
> 
> Justification for changed test refs:
> 
> - sub2video
>   The new results are identical excepting the last frame which
>   is due to the implementation changes
> 
> - sub2video_basic
>   The previous results had some incorrect output because multiple
>   frames had the same dts
>   The non-empty content frames are visually identical, the different
>   CRC is due to the different blending algorithm that is being used.
> 
> - sub2video_time_limited
>   The third frame in the previous ref was a repetition, which doesn't
>   happen anymore with the new subtitle filtering.
> 
> - sub-dvb
>   Running ffprobe -show_frames on the source file shows that there
>   are 7 subtitle frames with 0 rects in the source at the start
>   and 2 at the end. This translates to the 14 and 4 additional
>   entries in the new test results.
> 
> - filter-overlay-dvdsub-2397
>   Overlay results have slightly different CRCs due to different
>   blending implementation
> 
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
>  fftools/ffmpeg.c                          | 523 +++++++++++-----------
>  fftools/ffmpeg.h                          |  15 +-
>  fftools/ffmpeg_filter.c                   | 217 ++++++---
>  fftools/ffmpeg_hw.c                       |   2 +-
>  fftools/ffmpeg_opt.c                      |   3 +-
>  tests/ref/fate/filter-overlay-dvdsub-2397 | 181 ++++----
>  tests/ref/fate/sub-dvb                    | 162 ++++---
>  tests/ref/fate/sub2video                  | 116 ++---
>  tests/ref/fate/sub2video_basic            | 135 ++----
>  tests/ref/fate/sub2video_time_limited     |   4 +-
>  10 files changed, 684 insertions(+), 674 deletions(-)
> 
> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
> index 3761ea0c38..c697c12777 100644
> --- a/fftools/ffmpeg.c
> +++ b/fftools/ffmpeg.c
> @@ -169,163 +169,6 @@ static int restore_tty;
>  static void free_input_threads(void);
>  #endif
>  
> -/* sub2video hack:
> -   Convert subtitles to video with alpha to insert them in filter graphs.
> -   This is a temporary solution until libavfilter gets real subtitles support.
> - */
> -
> -static int sub2video_get_blank_frame(InputStream *ist)
> -{
> -    int ret;
> -    AVFrame *frame = ist->sub2video.frame;
> -
> -    av_frame_unref(frame);
> -    ist->sub2video.frame->width  = ist->dec_ctx->width  ? ist->dec_ctx->width  : ist->sub2video.w;
> -    ist->sub2video.frame->height = ist->dec_ctx->height ? ist->dec_ctx->height : ist->sub2video.h;
> -    ist->sub2video.frame->format = AV_PIX_FMT_RGB32;
> -    if ((ret = av_frame_get_buffer(frame, 0)) < 0)
> -        return ret;
> -    memset(frame->data[0], 0, frame->height * frame->linesize[0]);
> -    return 0;
> -}
> -
> -static void sub2video_copy_rect(uint8_t *dst, int dst_linesize, int w, int h,
> -                                AVSubtitleRect *r)
> -{
> -    uint32_t *pal, *dst2;
> -    uint8_t *src, *src2;
> -    int x, y;
> -
> -    if (r->type != SUBTITLE_BITMAP) {
> -        av_log(NULL, AV_LOG_WARNING, "sub2video: non-bitmap subtitle\n");
> -        return;
> -    }
> -    if (r->x < 0 || r->x + r->w > w || r->y < 0 || r->y + r->h > h) {
> -        av_log(NULL, AV_LOG_WARNING, "sub2video: rectangle (%d %d %d %d) overflowing %d %d\n",
> -            r->x, r->y, r->w, r->h, w, h
> -        );
> -        return;
> -    }
> -
> -    dst += r->y * dst_linesize + r->x * 4;
> -    src = r->data[0];
> -    pal = (uint32_t *)r->data[1];
> -    for (y = 0; y < r->h; y++) {
> -        dst2 = (uint32_t *)dst;
> -        src2 = src;
> -        for (x = 0; x < r->w; x++)
> -            *(dst2++) = pal[*(src2++)];
> -        dst += dst_linesize;
> -        src += r->linesize[0];
> -    }
> -}
> -
> -static void sub2video_push_ref(InputStream *ist, int64_t pts)
> -{
> -    AVFrame *frame = ist->sub2video.frame;
> -    int i;
> -    int ret;
> -
> -    av_assert1(frame->data[0]);
> -    ist->sub2video.last_pts = frame->pts = pts;
> -    for (i = 0; i < ist->nb_filters; i++) {
> -        ret = av_buffersrc_add_frame_flags(ist->filters[i]->filter, frame,
> -                                           AV_BUFFERSRC_FLAG_KEEP_REF |
> -                                           AV_BUFFERSRC_FLAG_PUSH);
> -        if (ret != AVERROR_EOF && ret < 0)
> -            av_log(NULL, AV_LOG_WARNING, "Error while add the frame to buffer source(%s).\n",
> -                   av_err2str(ret));
> -    }
> -}
> -
> -void sub2video_update(InputStream *ist, int64_t heartbeat_pts, AVSubtitle *sub)
> -{
> -    AVFrame *frame = ist->sub2video.frame;
> -    int8_t *dst;
> -    int     dst_linesize;
> -    int num_rects, i;
> -    int64_t pts, end_pts;
> -
> -    if (!frame)
> -        return;
> -    if (sub) {
> -        pts       = av_rescale_q(sub->pts + sub->start_display_time * 1000LL,
> -                                 AV_TIME_BASE_Q, ist->st->time_base);
> -        end_pts   = av_rescale_q(sub->pts + sub->end_display_time   * 1000LL,
> -                                 AV_TIME_BASE_Q, ist->st->time_base);
> -        num_rects = sub->num_rects;
> -    } else {
> -        /* If we are initializing the system, utilize current heartbeat
> -           PTS as the start time, and show until the following subpicture
> -           is received. Otherwise, utilize the previous subpicture's end time
> -           as the fall-back value. */
> -        pts       = ist->sub2video.initialize ?
> -                    heartbeat_pts : ist->sub2video.end_pts;
> -        end_pts   = INT64_MAX;
> -        num_rects = 0;
> -    }
> -    if (sub2video_get_blank_frame(ist) < 0) {
> -        av_log(ist->dec_ctx, AV_LOG_ERROR,
> -               "Impossible to get a blank canvas.\n");
> -        return;
> -    }
> -    dst          = frame->data    [0];
> -    dst_linesize = frame->linesize[0];
> -    for (i = 0; i < num_rects; i++)
> -        sub2video_copy_rect(dst, dst_linesize, frame->width, frame->height, sub->rects[i]);
> -    sub2video_push_ref(ist, pts);
> -    ist->sub2video.end_pts = end_pts;
> -    ist->sub2video.initialize = 0;
> -}
> -
> -static void sub2video_heartbeat(InputStream *ist, int64_t pts)
> -{
> -    InputFile *infile = input_files[ist->file_index];
> -    int i, j, nb_reqs;
> -    int64_t pts2;
> -
> -    /* When a frame is read from a file, examine all sub2video streams in
> -       the same file and send the sub2video frame again. Otherwise, decoded
> -       video frames could be accumulating in the filter graph while a filter
> -       (possibly overlay) is desperately waiting for a subtitle frame. */
> -    for (i = 0; i < infile->nb_streams; i++) {
> -        InputStream *ist2 = input_streams[infile->ist_index + i];
> -        if (!ist2->sub2video.frame)
> -            continue;
> -        /* subtitles seem to be usually muxed ahead of other streams;
> -           if not, subtracting a larger time here is necessary */
> -        pts2 = av_rescale_q(pts, ist->st->time_base, ist2->st->time_base) - 1;
> -        /* do not send the heartbeat frame if the subtitle is already ahead */
> -        if (pts2 <= ist2->sub2video.last_pts)
> -            continue;
> -        if (pts2 >= ist2->sub2video.end_pts || ist2->sub2video.initialize)
> -            /* if we have hit the end of the current displayed subpicture,
> -               or if we need to initialize the system, update the
> -               overlayed subpicture and its start/end times */
> -            sub2video_update(ist2, pts2 + 1, NULL);
> -        for (j = 0, nb_reqs = 0; j < ist2->nb_filters; j++)
> -            nb_reqs += av_buffersrc_get_nb_failed_requests(ist2->filters[j]->filter);
> -        if (nb_reqs)
> -            sub2video_push_ref(ist2, pts2);
> -    }
> -}
> -
> -static void sub2video_flush(InputStream *ist)
> -{
> -    int i;
> -    int ret;
> -
> -    if (ist->sub2video.end_pts < INT64_MAX)
> -        sub2video_update(ist, INT64_MAX, NULL);
> -    for (i = 0; i < ist->nb_filters; i++) {
> -        ret = av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
> -        if (ret != AVERROR_EOF && ret < 0)
> -            av_log(NULL, AV_LOG_WARNING, "Flush the frame error.\n");
> -    }
> -}
> -
> -/* end of sub2video hack */
> -
>  static void term_exit_sigsafe(void)
>  {
>  #if HAVE_TERMIOS_H
> @@ -526,7 +369,6 @@ static void ffmpeg_cleanup(int ret)
>          avfilter_graph_free(&fg->graph);
>          for (j = 0; j < fg->nb_inputs; j++) {
>              InputFilter *ifilter = fg->inputs[j];
> -            struct InputStream *ist = ifilter->ist;
>  
>              while (av_fifo_size(ifilter->frame_queue)) {
>                  AVFrame *frame;
> @@ -536,15 +378,6 @@ static void ffmpeg_cleanup(int ret)
>              }
>              av_fifo_freep(&ifilter->frame_queue);
>              av_freep(&ifilter->displaymatrix);
> -            if (ist->sub2video.sub_queue) {
> -                while (av_fifo_size(ist->sub2video.sub_queue)) {
> -                    AVSubtitle sub;
> -                    av_fifo_generic_read(ist->sub2video.sub_queue,
> -                                         &sub, sizeof(sub), NULL);
> -                    avsubtitle_free(&sub);
> -                }
> -                av_fifo_freep(&ist->sub2video.sub_queue);
> -            }
>              av_buffer_unref(&ifilter->hw_frames_ctx);
>              av_freep(&ifilter->name);
>              av_freep(&fg->inputs[j]);
> @@ -635,12 +468,14 @@ static void ffmpeg_cleanup(int ret)
>          av_frame_free(&ist->decoded_frame);
>          av_packet_free(&ist->pkt);
>          av_dict_free(&ist->decoder_opts);
> -        avsubtitle_free(&ist->prev_sub.subtitle);
> -        av_frame_free(&ist->sub2video.frame);
> +        av_frame_free(&ist->prev_sub.subtitle);
>          av_freep(&ist->filters);
>          av_freep(&ist->hwaccel_device);
>          av_freep(&ist->dts_buffer);
>  
> +        av_frame_free(&ist->subtitle_heartbeat.recent_sub);
> +        av_buffer_unref(&ist->subtitle_heartbeat.header);
> +
>          avcodec_free_context(&ist->dec_ctx);
>  
>          av_freep(&input_streams[i]);
> @@ -1058,17 +893,21 @@ error:
>      exit_program(1);
>  }
>  
> -static void do_subtitle_out(OutputFile *of,
> -                            OutputStream *ost,
> -                            AVSubtitle *sub)
> +static void do_subtitle_out(OutputFile *of, OutputStream *ost, AVFrame *frame)
>  {
> -    int subtitle_out_max_size = 1024 * 1024;
> +    const int subtitle_out_max_size = 1024 * 1024;
>      int subtitle_out_size, nb, i;
>      AVCodecContext *enc;
>      AVPacket *pkt = ost->pkt;
> +    AVSubtitle out_sub = { 0 };

You are adding some stuff here which is removed in patch 16. These
patches should be merged.

>      int64_t pts;
>  
> -    if (sub->pts == AV_NOPTS_VALUE) {
> +    if (!frame)
> +        return;
> +
> +    av_log(NULL, AV_LOG_TRACE, "do_subtitle_out: sub->pts: %"PRId64"  frame->pts: %"PRId64"\n", frame->subtitle_pts, frame->pts);
> +
> +    if (frame->subtitle_pts == AV_NOPTS_VALUE) {
>          av_log(NULL, AV_LOG_ERROR, "Subtitle packets must have a pts\n");
>          if (exit_on_error)
>              exit_program(1);
> @@ -1094,51 +933,58 @@ static void do_subtitle_out(OutputFile *of,
>          nb = 1;
>  
>      /* shift timestamp to honor -ss and make check_recording_time() work with -t */
> -    pts = sub->pts;
> +    pts = frame->subtitle_pts;
>      if (output_files[ost->file_index]->start_time != AV_NOPTS_VALUE)
>          pts -= output_files[ost->file_index]->start_time;
> +
> +    ost->sync_opts = av_rescale_q(pts, AV_TIME_BASE_Q, enc->time_base);
> +    if (!check_recording_time(ost))
> +        return;
> +
> +    frame->subtitle_pts = pts;
> +    // subtitle_start_time is required to be 0
> +    frame->subtitle_pts               += av_rescale_q(frame->subtitle_start_time, (AVRational){ 1, 1000 }, AV_TIME_BASE_Q);
> +    frame->subtitle_end_time  -= frame->subtitle_start_time;
> +    frame->subtitle_start_time = 0;
> +
> +    av_frame_get_subtitle(&out_sub, frame);
> +
>      for (i = 0; i < nb; i++) {
> -        unsigned save_num_rects = sub->num_rects;
> +        const unsigned save_num_rects = out_sub.num_rects;
>  
> -        ost->sync_opts = av_rescale_q(pts, AV_TIME_BASE_Q, enc->time_base);
> -        if (!check_recording_time(ost))
> -            return;
> +        ost->frames_encoded++;
>  
> -        sub->pts = pts;
> -        // start_display_time is required to be 0
> -        sub->pts               += av_rescale_q(sub->start_display_time, (AVRational){ 1, 1000 }, AV_TIME_BASE_Q);
> -        sub->end_display_time  -= sub->start_display_time;
> -        sub->start_display_time = 0;
>          if (i == 1)
> -            sub->num_rects = 0;
> +            out_sub.num_rects = 0;
>  
> -        ost->frames_encoded++;
> +        subtitle_out_size = avcodec_encode_subtitle(enc, subtitle_out, subtitle_out_max_size, &out_sub);
>  
> -        subtitle_out_size = avcodec_encode_subtitle(enc, subtitle_out,
> -                                                    subtitle_out_max_size, sub);
>          if (i == 1)
> -            sub->num_rects = save_num_rects;
> +            out_sub.num_rects = save_num_rects;
> +
>          if (subtitle_out_size < 0) {
>              av_log(NULL, AV_LOG_FATAL, "Subtitle encoding failed\n");
>              exit_program(1);
>          }
>  
> -        av_packet_unref(pkt);
> +        //av_packet_unref(pkt);
>          pkt->data = subtitle_out;
>          pkt->size = subtitle_out_size;
> -        pkt->pts  = av_rescale_q(sub->pts, AV_TIME_BASE_Q, ost->mux_timebase);
> -        pkt->duration = av_rescale_q(sub->end_display_time, (AVRational){ 1, 1000 }, ost->mux_timebase);
> +        pkt->pts  = av_rescale_q(frame->subtitle_pts, AV_TIME_BASE_Q, ost->mux_timebase);
> +        pkt->duration = av_rescale_q(frame->subtitle_end_time, (AVRational){ 1, 1000 }, ost->mux_timebase);
>          if (enc->codec_id == AV_CODEC_ID_DVB_SUBTITLE) {
>              /* XXX: the pts correction is handled here. Maybe handling
>                 it in the codec would be better */
>              if (i == 0)
> -                pkt->pts += av_rescale_q(sub->start_display_time, (AVRational){ 1, 1000 }, ost->mux_timebase);
> +                pkt->pts += av_rescale_q(frame->subtitle_start_time, (AVRational){ 1, 1000 }, ost->mux_timebase);
>              else
> -                pkt->pts += av_rescale_q(sub->end_display_time, (AVRational){ 1, 1000 }, ost->mux_timebase);
> +                pkt->pts += av_rescale_q(frame->subtitle_end_time, (AVRational){ 1, 1000 }, ost->mux_timebase);
>          }
>          pkt->dts = pkt->pts;
>          output_packet(of, pkt, ost, 0);
>      }
> +
> +    avsubtitle_free(&out_sub);
>  }
>  
>  static void do_video_out(OutputFile *of,
> @@ -1568,8 +1414,26 @@ static int reap_filters(int flush)
>                  }
>                  do_audio_out(of, ost, filtered_frame);
>                  break;
> +            case AVMEDIA_TYPE_SUBTITLE:
> +
> +                if (filtered_frame->format == AV_SUBTITLE_FMT_ASS && !enc->subtitle_header 
> +                    && filtered_frame->subtitle_header) {
> +                    const char *subtitle_header = (char *)filtered_frame->subtitle_header->data;
> +                    enc->subtitle_header = (uint8_t *)av_strdup(subtitle_header);
> +                    if (!enc->subtitle_header)
> +                        return AVERROR(ENOMEM);
> +                    enc->subtitle_header_size = strlen(subtitle_header);
> +                }
> +
> +                if ((ost->enc_ctx->width == 0 || ost->enc_ctx->height == 0)
> +                    && filter->inputs[0]->w > 0 && filter->inputs[0]->h > 0 ) {
> +                    ost->enc_ctx->width = filter->inputs[0]->w;
> +                    ost->enc_ctx->height = filter->inputs[0]->h;
> +                }
> +                    
> +                do_subtitle_out(of, ost, filtered_frame);
> +                break;
>              default:
> -                // TODO support subtitle filters
>                  av_assert0(0);
>              }
>  
> @@ -1972,6 +1836,9 @@ static void flush_encoders(void)
>              AVPacket *pkt = ost->pkt;
>              int pkt_size;
>  
> +            if (!pkt)
> +                break;
> +

This seems like a rebase error: I added it in
fb215798c7a72b32e889b72efd018f26bb3f88ce and removed it in
21914e7a4e802772cc9cdeec3eec8b30da4fa95a (because it was no longer
necessary).

>              switch (enc->codec_type) {
>              case AVMEDIA_TYPE_AUDIO:
>                  desc   = "audio";
> @@ -2164,7 +2031,8 @@ static int ifilter_has_all_input_formats(FilterGraph *fg)
>      int i;
>      for (i = 0; i < fg->nb_inputs; i++) {
>          if (fg->inputs[i]->format < 0 && (fg->inputs[i]->type == AVMEDIA_TYPE_AUDIO ||
> -                                          fg->inputs[i]->type == AVMEDIA_TYPE_VIDEO))
> +                                          fg->inputs[i]->type == AVMEDIA_TYPE_VIDEO ||
> +                                          fg->inputs[i]->type == AVMEDIA_TYPE_SUBTITLE))
>              return 0;
>      }
>      return 1;
> @@ -2269,7 +2137,7 @@ static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
>          // the filtergraph was never configured
>          if (ifilter->format < 0)
>              ifilter_parameters_from_codecpar(ifilter, ifilter->ist->st->codecpar);
> -        if (ifilter->format < 0 && (ifilter->type == AVMEDIA_TYPE_AUDIO || ifilter->type == AVMEDIA_TYPE_VIDEO)) {
> +        if (ifilter->format < 0 && (ifilter->type == AVMEDIA_TYPE_AUDIO || ifilter->type == AVMEDIA_TYPE_VIDEO || ifilter->type == AVMEDIA_TYPE_SUBTITLE)) {
>              av_log(NULL, AV_LOG_ERROR, "Cannot determine format of input stream %d:%d after EOF\n", ifilter->ist->file_index, ifilter->ist->st->index);
>              return AVERROR_INVALIDDATA;
>          }
> @@ -2307,7 +2175,8 @@ static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
>  
>  static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
>  {
> -    int i, ret;
> +    int i, ret = 0;
> +    AVFrame *f;

Unused. Probably a rebase error as it has been removed in
a132614bba247afac30d3a8b1378c40bd7f672bc.

>  
>      av_assert1(ist->nb_filters > 0); /* ensure ret is initialized */
>      for (i = 0; i < ist->nb_filters; i++) {
> @@ -2508,81 +2377,214 @@ fail:
>      return err < 0 ? err : ret;
>  }
>  
> -static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
> +static void subtitle_resend_current(InputStream *ist, int64_t heartbeat_pts)
> +{
> +    AVFrame *frame;
> +    int64_t pts, end_pts;
> +
> +    if (ist->subtitle_heartbeat.recent_sub) {
> +        frame = av_frame_clone(ist->subtitle_heartbeat.recent_sub);

Unchecked allocation.

> +
> +        pts     = heartbeat_pts; //av_rescale_q(frame->subtitle_pts + frame->subtitle_start_time * 1000LL, AV_TIME_BASE_Q, ist->st->time_base);
> +        end_pts = av_rescale_q(frame->subtitle_pts + frame->subtitle_end_time   * 1000LL, AV_TIME_BASE_Q, ist->st->time_base);
> +    }
> +    else {

Put this on the same line as }.

> +        frame = av_frame_alloc();

Is it actually certain that we need a new frame and can't reuse
decoded_frame like the other functions that ultimately call
send_frame_to_filters() do?

> +        if (!frame) {
> +            av_log(ist->dec_ctx, AV_LOG_ERROR, "Unable to alloc frame (out of memory).\n");
> +            return;
> +        }
> +
> +        frame->type = AVMEDIA_TYPE_SUBTITLE;
> +        frame->format = av_get_subtitle_format_from_codecdesc(ist->dec_ctx->codec_descriptor);
> +
> +        av_frame_get_buffer2(frame, 0);
> +
> +        frame->width = ist->subtitle_heartbeat.w;
> +        frame->height = ist->subtitle_heartbeat.h;
> +
> +        pts       = (ist->subtitle_heartbeat.end_pts < INT64_MAX && ist->subtitle_heartbeat.end_pts > 0)
> +                    ? ist->subtitle_heartbeat.end_pts : heartbeat_pts;
> +        end_pts   = INT64_MAX;
> +
> +        frame->subtitle_pts = av_rescale_q(pts, ist->st->time_base, AV_TIME_BASE_Q);
> +        frame->subtitle_end_time = 1000;
> +    }
> +
> +    ////av_log(NULL, AV_LOG_WARNING, "subtitle_heartbeat: call subtitle_resend_current %"PRId64" \n", pts);

Don't add outcommented logging stuff.

> +
> +    frame->pts = pts;
> +    ist->subtitle_heartbeat.last_pts = pts;
> +    ist->subtitle_heartbeat.end_pts = end_pts;
> +
> +    send_frame_to_filters(ist, frame);

frame will leak here (the AVFrame certainly leaks; in some scenarios
(like error conditions) the AVFrame and its buffers/side data/metadata
leaks).

> +}
> +
> +static void subtitle_heartbeat(InputStream *ist, int64_t pts)
> +{
> +    int i;
> +    int64_t pts2;
> +
> +    if (ist->st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
> +        return;
> +
> +    /* When a frame is read from a file, examine all subtitle streams in
> +       the same file and send the subtitle frame again. Otherwise, decoded
> +       video frames could be accumulating in the filter graph while a filter
> +       (possibly overlay) is desperately waiting for a subtitle frame. */
> +    for (i = 0; i < nb_input_streams; i++) {
> +        InputStream *ist2 = input_streams[i];
> +        if (!ist2->subtitle_heartbeat.is_active)
> +            continue;
> +        /* subtitles seem to be usually muxed ahead of other streams;
> +           if not, subtracting a larger time here is necessary */
> +        pts2 = av_rescale_q(pts, ist->st->time_base, ist2->st->time_base) - 1;
> +        /* do not send the heartbeat frame if the subtitle is already ahead */
> +        if (pts2 <= ist2->subtitle_heartbeat.last_pts)
> +            continue;
> +        if (pts2 >= ist2->subtitle_heartbeat.end_pts) {
> +            /* if we have hit the end of the current displayed subpicture,
> +               or if we need to initialize the system, update the
> +               overlayed subpicture and its start/end times */
> +            if (ist2->subtitle_heartbeat.recent_sub)
> +                av_frame_free(&ist2->subtitle_heartbeat.recent_sub);
> +
> +            av_log(NULL, AV_LOG_DEBUG, "subtitle_heartbeat: clear + resend - pts: %"PRIi64"\n", pts2 + 1);
> +            subtitle_resend_current(ist2, pts2 + 1);
> +            continue;
> +        }
> +        if (!ist2->subtitle_heartbeat.check_buffer_requests) {
> +            unsigned j, nb_reqs;
> +            for (j = 0, nb_reqs = 0; j < ist2->nb_filters; j++)
> +                nb_reqs += av_buffersrc_get_nb_failed_requests(ist2->filters[j]->filter);
> +            if (nb_reqs) {
> +                av_log(NULL, AV_LOG_DEBUG, "subtitle_heartbeat: resend - pts: %"PRIi64"\n", pts2);
> +                subtitle_resend_current(ist2, pts2);
> +            }
> +        }
> +    }
> +}
> +
> +static InputStream *get_input_stream(OutputStream *ost)
> +{
> +    if (ost->source_index >= 0)
> +        return input_streams[ost->source_index];
> +    return NULL;
> +}
> +
> +static int decode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
>                                 int *decode_failed)
>  {
> -    AVSubtitle subtitle;
> -    int free_sub = 1;
> -    int i, ret = avcodec_decode_subtitle2(ist->dec_ctx,
> -                                          &subtitle, got_output, pkt);
> +    AVFrame *decoded_frame;
> +    AVCodecContext *avctx = ist->dec_ctx;
> +    int i = 0, ret = 0, err = 0;
> +    int64_t pts, end_pts;
> +
> +    if (!ist->decoded_frame && !(ist->decoded_frame = av_frame_alloc()))
> +        return AVERROR(ENOMEM);
> +    decoded_frame = ist->decoded_frame;
> +    decoded_frame->type = AVMEDIA_TYPE_SUBTITLE;
> +    decoded_frame->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor);
>  
> -    check_decode_result(NULL, got_output, ret);
> +    if (!ist->subtitle_heartbeat.header && avctx->subtitle_header && avctx->subtitle_header_size > 0) {
> +        ist->subtitle_heartbeat.header = av_buffer_allocz(avctx->subtitle_header_size + 1);
> +        if (!ist->subtitle_heartbeat.header)
> +            return AVERROR(ENOMEM);
> +        memcpy(ist->subtitle_heartbeat.header->data, avctx->subtitle_header, avctx->subtitle_header_size);
> +    }
> +
> +    ret = decode(avctx, decoded_frame, got_output, pkt);
> +
> +    if (ret != AVERROR_EOF)
> +        check_decode_result(NULL, got_output, ret);
>  
>      if (ret < 0 || !*got_output) {
>          *decode_failed = 1;
> -        if (!pkt->size)
> -            sub2video_flush(ist);
> +        if (!pkt->size) {
> +            // Flush
> +            for (i = 0; i < ist->nb_filters; i++) {
> +                ret = av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
> +                if (ret != AVERROR_EOF && ret < 0)
> +                    av_log(NULL, AV_LOG_WARNING, "Flush the frame error.\n");
> +            }
> +        }
>          return ret;
>      }
>  
>      if (ist->fix_sub_duration) {
>          int end = 1;
> -        if (ist->prev_sub.got_output) {
> -            end = av_rescale(subtitle.pts - ist->prev_sub.subtitle.pts,
> +        if (ist->prev_sub.got_output && ist->prev_sub.subtitle) {
> +            end = av_rescale(decoded_frame->subtitle_pts - ist->prev_sub.subtitle->subtitle_pts,
>                               1000, AV_TIME_BASE);
> -            if (end < ist->prev_sub.subtitle.end_display_time) {
> -                av_log(ist->dec_ctx, AV_LOG_DEBUG,
> +            if (end < ist->prev_sub.subtitle->subtitle_end_time) {
> +                av_log(avctx, AV_LOG_DEBUG,
>                         "Subtitle duration reduced from %"PRId32" to %d%s\n",
> -                       ist->prev_sub.subtitle.end_display_time, end,
> +                       ist->prev_sub.subtitle->subtitle_end_time, end,
>                         end <= 0 ? ", dropping it" : "");
> -                ist->prev_sub.subtitle.end_display_time = end;
> +                ist->prev_sub.subtitle->subtitle_end_time = end;
>              }
>          }
>          FFSWAP(int,        *got_output, ist->prev_sub.got_output);
>          FFSWAP(int,        ret,         ist->prev_sub.ret);
> -        FFSWAP(AVSubtitle, subtitle,    ist->prev_sub.subtitle);
> +        FFSWAP(AVFrame*,   decoded_frame, ist->prev_sub.subtitle);
>          if (end <= 0)
> -            goto out;
> +            return end;
>      }
>  
> -    if (!*got_output)
> +    if (!*got_output || !decoded_frame)
>          return ret;
>  
> -    if (ist->sub2video.frame) {
> -        sub2video_update(ist, INT64_MIN, &subtitle);
> -    } else if (ist->nb_filters) {
> -        if (!ist->sub2video.sub_queue)
> -            ist->sub2video.sub_queue = av_fifo_alloc(8 * sizeof(AVSubtitle));
> -        if (!ist->sub2video.sub_queue)
> -            exit_program(1);
> -        if (!av_fifo_space(ist->sub2video.sub_queue)) {
> -            ret = av_fifo_realloc2(ist->sub2video.sub_queue, 2 * av_fifo_size(ist->sub2video.sub_queue));
> -            if (ret < 0)
> -                exit_program(1);
> -        }
> -        av_fifo_generic_write(ist->sub2video.sub_queue, &subtitle, sizeof(subtitle), NULL);
> -        free_sub = 0;
> -    }
> +    decoded_frame->type = AVMEDIA_TYPE_SUBTITLE;
> +    decoded_frame->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor);
>  
> -    if (!subtitle.num_rects)
> -        goto out;
> +    ////if ((ret = av_frame_get_buffer2(decoded_frame, 0)) < 0)
> +    ////    return ret;
>  
> -    ist->frames_decoded++;
> +    if ((ret = av_buffer_replace(&decoded_frame->subtitle_header, ist->subtitle_heartbeat.header)) < 0)
> +        return ret;
>  
> -    for (i = 0; i < nb_output_streams; i++) {
> -        OutputStream *ost = output_streams[i];
> +    pts     = av_rescale_q(decoded_frame->subtitle_pts + decoded_frame->subtitle_start_time * 1000LL,
> +                           AV_TIME_BASE_Q, ist->st->time_base);
> +    end_pts = av_rescale_q(decoded_frame->subtitle_pts + decoded_frame->subtitle_end_time   * 1000LL,
> +                             AV_TIME_BASE_Q, ist->st->time_base);
>  
> -        if (!check_output_constraints(ist, ost) || !ost->encoding_needed
> -            || ost->enc->type != AVMEDIA_TYPE_SUBTITLE)
> -            continue;
> +    ist->subtitle_heartbeat.last_pts = decoded_frame->pts = pts;
> +    ist->subtitle_heartbeat.end_pts = end_pts;
>  
> -        do_subtitle_out(output_files[ost->file_index], ost, &subtitle);
> +    if (ist->nb_filters > 0) {
> +        AVFrame *filter_frame = av_frame_clone(decoded_frame);

If I see this correctly, then the reason that one has to do something
besides send_frame_to_filters() is that we do not add a dummy
filtergraph like is done in line 2645 of ffmpeg_opt.c for audio and video?

> +        if (!filter_frame)
> +            err = AVERROR(ENOMEM);
> +        else

Don't use an else as if this were two ordinary cases: One case is an
error condition.

> +            err = send_frame_to_filters(ist, filter_frame);

filter_frame leaks here.

>      }
>  
> -out:
> -    if (free_sub)
> -        avsubtitle_free(&subtitle);
> -    return ret;
> +    if (err >= 0) {
> +        for (i = 0; i < nb_output_streams; i++) {
> +            OutputStream *ost = output_streams[i];
> +            InputStream *ist_src = get_input_stream(ost);
> +
> +            if (!ist_src || !check_output_constraints(ist, ost) 
> +                || ist_src != ist
> +                || !ost->encoding_needed
> +                || ost->enc->type != AVMEDIA_TYPE_SUBTITLE)
> +                continue;
> +
> +            if (ost->filter && ost->filter->filter->nb_inputs > 0)
> +                continue;
> +
> +            ////if (!ost->pkt && !((ost->pkt = av_packet_alloc())))
> +            ////    exit_program(1);
> +            do_subtitle_out(output_files[ost->file_index], ost, decoded_frame);
> +        }
> +    }
> +
> +    av_frame_free(&ist->subtitle_heartbeat.recent_sub);

You should not constantly free and allocate the AVFrames, but instead
allocate them once (in new_subtitle_stream() or so).

> +    ist->subtitle_heartbeat.recent_sub = av_frame_clone(decoded_frame);
> +
> +
> +    av_frame_unref(decoded_frame);
> +    return err < 0 ? err : ret;
>  }
>  
>  static int send_filter_eof(InputStream *ist)
> @@ -2686,7 +2688,7 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>          case AVMEDIA_TYPE_SUBTITLE:
>              if (repeating)
>                  break;
> -            ret = transcode_subtitles(ist, avpkt, &got_output, &decode_failed);
> +            ret = decode_subtitles(ist, avpkt, &got_output, &decode_failed);
>              if (!pkt && ret >= 0)
>                  ret = AVERROR_EOF;
>              av_packet_unref(avpkt);
> @@ -2977,13 +2979,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
>      return 0;
>  }
>  
> -static InputStream *get_input_stream(OutputStream *ost)
> -{
> -    if (ost->source_index >= 0)
> -        return input_streams[ost->source_index];
> -    return NULL;
> -}
> -
>  static int compare_int64(const void *a, const void *b)
>  {
>      return FFDIFFSIGN(*(const int64_t *)a, *(const int64_t *)b);
> @@ -3462,7 +3457,7 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
>          break;
>      case AVMEDIA_TYPE_SUBTITLE:
>          enc_ctx->time_base = AV_TIME_BASE_Q;
> -        if (!enc_ctx->width) {
> +        if (!enc_ctx->width && ost->source_index >= 0) {
>              enc_ctx->width     = input_streams[ost->source_index]->st->codecpar->width;
>              enc_ctx->height    = input_streams[ost->source_index]->st->codecpar->height;
>          }
> @@ -3515,19 +3510,14 @@ static int init_output_stream(OutputStream *ost, AVFrame *frame,
>          }
>  
>          if (ist && ist->dec->type == AVMEDIA_TYPE_SUBTITLE && ost->enc->type == AVMEDIA_TYPE_SUBTITLE) {
> -            int input_props = 0, output_props = 0;
> -            AVCodecDescriptor const *input_descriptor =
> -                avcodec_descriptor_get(dec->codec_id);
> -            AVCodecDescriptor const *output_descriptor =
> -                avcodec_descriptor_get(ost->enc_ctx->codec_id);
> -            if (input_descriptor)
> -                input_props = input_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
> -            if (output_descriptor)
> -                output_props = output_descriptor->props & (AV_CODEC_PROP_TEXT_SUB | AV_CODEC_PROP_BITMAP_SUB);
> -            if (input_props && output_props && input_props != output_props) {
> -                snprintf(error, error_len,
> -                         "Subtitle encoding currently only possible from text to text "
> -                         "or bitmap to bitmap");
> +            AVCodecDescriptor const *input_descriptor     = avcodec_descriptor_get(dec->codec_id);
> +            AVCodecDescriptor const *output_descriptor    = avcodec_descriptor_get(ost->enc_ctx->codec_id);
> +            const enum AVSubtitleType in_subtitle_format  = output_descriptor ? av_get_subtitle_format_from_codecdesc(input_descriptor) : AV_SUBTITLE_FMT_UNKNOWN;
> +            const enum AVSubtitleType out_subtitle_format = output_descriptor ? av_get_subtitle_format_from_codecdesc(output_descriptor) : AV_SUBTITLE_FMT_UNKNOWN;
> +
> +            if (ist->nb_filters == 0 && in_subtitle_format != AV_SUBTITLE_FMT_UNKNOWN && out_subtitle_format != AV_SUBTITLE_FMT_UNKNOWN
> +                && in_subtitle_format != out_subtitle_format) {
> +                snprintf(error, error_len, "Subtitle encoding is only possible from text to text or bitmap to bitmap");
>                  return AVERROR_INVALIDDATA;
>              }
>          }
> @@ -4528,7 +4518,7 @@ static int process_input(int file_index)
>                 av_ts2timestr(input_files[ist->file_index]->ts_offset, &AV_TIME_BASE_Q));
>      }
>  
> -    sub2video_heartbeat(ist, pkt->pts);
> +    subtitle_heartbeat(ist, pkt->pts);
>  
>      process_input_packet(ist, pkt, 0);
>  
> @@ -4740,6 +4730,7 @@ static int transcode(void)
>      /* at the end of stream, we must flush the decoder buffers */
>      for (i = 0; i < nb_input_streams; i++) {
>          ist = input_streams[i];
> +        ist->subtitle_heartbeat.is_active = 0;
>          if (!input_files[ist->file_index]->eof_reached) {
>              process_input_packet(ist, NULL, 0);
>          }
> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> index 1728010f56..8a7080834a 100644
> --- a/fftools/ffmpeg.h
> +++ b/fftools/ffmpeg.h
> @@ -349,17 +349,18 @@ typedef struct InputStream {
>      struct { /* previous decoded subtitle and related variables */
>          int got_output;
>          int ret;
> -        AVSubtitle subtitle;
> +        AVFrame *subtitle;
>      } prev_sub;
>  
> -    struct sub2video {
> +    struct subtitle_heartbeat {
> +        int is_active;
> +        int check_buffer_requests;
>          int64_t last_pts;
>          int64_t end_pts;
> -        AVFifoBuffer *sub_queue;    ///< queue of AVSubtitle* before filter init
> -        AVFrame *frame;
> +        AVFrame *recent_sub;
>          int w, h;
> -        unsigned int initialize; ///< marks if sub2video_update should force an initialization
> -    } sub2video;
> +        AVBufferRef *header;
> +    } subtitle_heartbeat;
>  
>      /* decoded data from this stream goes into all those filters
>       * currently video and audio only */
> @@ -659,8 +660,6 @@ int filtergraph_is_simple(FilterGraph *fg);
>  int init_simple_filtergraph(InputStream *ist, OutputStream *ost);
>  int init_complex_filtergraph(FilterGraph *fg);
>  
> -void sub2video_update(InputStream *ist, int64_t heartbeat_pts, AVSubtitle *sub);
> -
>  int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame);
>  
>  int ffmpeg_parse_options(int argc, char **argv);
> diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
> index 452b689d62..1299529d36 100644
> --- a/fftools/ffmpeg_filter.c
> +++ b/fftools/ffmpeg_filter.c
> @@ -22,6 +22,8 @@
>  
>  #include "ffmpeg.h"
>  
> +#include <libavutil/ass_split_internal.h>
> +

ass_split_internal.h is not an installed header, hence the <> form for
headers is inappropriate. Moreover, see below.

>  #include "libavfilter/avfilter.h"
>  #include "libavfilter/buffersink.h"
>  #include "libavfilter/buffersrc.h"
> @@ -30,11 +32,9 @@
>  #include "libavutil/avstring.h"
>  #include "libavutil/bprint.h"
>  #include "libavutil/channel_layout.h"
> -#include "libavutil/display.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/pixdesc.h"
>  #include "libavutil/pixfmt.h"
> -#include "libavutil/imgutils.h"
>  #include "libavutil/samplefmt.h"
>  
>  // FIXME: YUV420P etc. are actually supported with full color range,
> @@ -221,9 +221,8 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
>      enum AVMediaType type = avfilter_pad_get_type(in->filter_ctx->input_pads, in->pad_idx);
>      int i;
>  
> -    // TODO: support other filter types
> -    if (type != AVMEDIA_TYPE_VIDEO && type != AVMEDIA_TYPE_AUDIO) {
> -        av_log(NULL, AV_LOG_FATAL, "Only video and audio filters supported "
> +    if (type != AVMEDIA_TYPE_VIDEO && type != AVMEDIA_TYPE_AUDIO && type != AVMEDIA_TYPE_SUBTITLE) {
> +        av_log(NULL, AV_LOG_FATAL, "Only video, audio and subtitle filters supported "
>                 "currently.\n");
>          exit_program(1);
>      }
> @@ -244,8 +243,9 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
>          for (i = 0; i < s->nb_streams; i++) {
>              enum AVMediaType stream_type = s->streams[i]->codecpar->codec_type;
>              if (stream_type != type &&
> -                !(stream_type == AVMEDIA_TYPE_SUBTITLE &&
> -                  type == AVMEDIA_TYPE_VIDEO /* sub2video hack */))
> +                // in the followng case we auto-insert the graphicsub2video conversion filter 
> +                // for retaining compatibility with the previous sub2video hack
> +                !(stream_type == AVMEDIA_TYPE_SUBTITLE && type == AVMEDIA_TYPE_VIDEO))
>                  continue;
>              if (check_stream_specifier(s, s->streams[i], *p == ':' ? p + 1 : p) == 1) {
>                  st = s->streams[i];
> @@ -294,6 +294,13 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
>      fg->inputs[fg->nb_inputs - 1]->type = ist->st->codecpar->codec_type;
>      fg->inputs[fg->nb_inputs - 1]->name = describe_filter_link(fg, in, 1);
>  
> +    if (ist->st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE && ist->dec_ctx) {
> +        const AVCodecDescriptor *codec_descriptor = ist->dec_ctx->codec_descriptor;
> +        if (!codec_descriptor)
> +            codec_descriptor = avcodec_descriptor_get(ist->dec_ctx->codec_id);
> +        fg->inputs[fg->nb_inputs - 1]->format = av_get_subtitle_format_from_codecdesc(codec_descriptor);
> +    }
> +
>      fg->inputs[fg->nb_inputs - 1]->frame_queue = av_fifo_alloc(8 * sizeof(AVFrame*));
>      if (!fg->inputs[fg->nb_inputs - 1]->frame_queue)
>          exit_program(1);
> @@ -416,6 +423,39 @@ static int insert_filter(AVFilterContext **last_filter, int *pad_idx,
>      return 0;
>  }
>  
> +static int configure_output_subtitle_filter(FilterGraph *fg, OutputFilter *ofilter, AVFilterInOut *out)
> +{
> +    OutputStream *ost = ofilter->ost;
> +    AVFilterContext *last_filter = out->filter_ctx;
> +    int pad_idx = out->pad_idx;
> +    int ret;
> +    char name[255];
> +
> +    snprintf(name, sizeof(name), "out_%d_%d", ost->file_index, ost->index);
> +    ret = avfilter_graph_create_filter(&ofilter->filter,
> +                                       avfilter_get_by_name("sbuffersink"),
> +                                       name, NULL, NULL, fg->graph);
> +
> +    if (ret < 0) {
> +        av_log(NULL, AV_LOG_ERROR, "Unable to create filter sbuffersink\n");
> +        return ret;
> +    }
> +
> +    ////snprintf(name, sizeof(name), "trim_out_%d_%d",
> +    ////         ost->file_index, ost->index);
> +    ////ret = insert_trim(of->start_time, of->recording_time,
> +    ////                  &last_filter, &pad_idx, name);
> +    ////if (ret < 0)
> +    ////    return ret;
> +
> +    ////ost->st->codecpar->codec_tag = MKTAG('a', 's', 's', 's');
> +
> +    if ((ret = avfilter_link(last_filter, pad_idx, ofilter->filter, 0)) < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
>  static int configure_output_video_filter(FilterGraph *fg, OutputFilter *ofilter, AVFilterInOut *out)
>  {
>      char *pix_fmts;
> @@ -594,7 +634,8 @@ static int configure_output_audio_filter(FilterGraph *fg, OutputFilter *ofilter,
>          int i;
>  
>          for (i=0; i<of->ctx->nb_streams; i++)
> -            if (of->ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
> +            if (of->ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
> +                of->ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE)
>                  break;
>  
>          if (i<of->ctx->nb_streams) {
> @@ -628,6 +669,7 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter,
>      switch (avfilter_pad_get_type(out->filter_ctx->output_pads, out->pad_idx)) {
>      case AVMEDIA_TYPE_VIDEO: return configure_output_video_filter(fg, ofilter, out);
>      case AVMEDIA_TYPE_AUDIO: return configure_output_audio_filter(fg, ofilter, out);
> +    case AVMEDIA_TYPE_SUBTITLE: return configure_output_subtitle_filter(fg, ofilter, out);
>      default: av_assert0(0); return 0;
>      }
>  }
> @@ -647,51 +689,112 @@ void check_filter_outputs(void)
>      }
>  }
>  
> -static int sub2video_prepare(InputStream *ist, InputFilter *ifilter)
> +static int configure_input_subtitle_filter(FilterGraph *fg, InputFilter *ifilter,
> +                                        AVFilterInOut *in)
>  {
> -    AVFormatContext *avf = input_files[ist->file_index]->ctx;
> -    int i, w, h;
> +    AVFilterContext *last_filter;
> +    const AVFilter *buffer_filt = avfilter_get_by_name("sbuffer");
> +    InputStream *ist = ifilter->ist;
> +    AVBPrint args;
> +    char name[255];
> +    int ret, pad_idx = 0;
> +    int w, h;
> +    AVBufferSrcParameters *par = av_buffersrc_parameters_alloc();
> +    enum AVMediaType media_type;
> +
> +    if (!par)
> +        return AVERROR(ENOMEM);
> +    memset(par, 0, sizeof(*par));

Unnecessary, as av_buffersrc_parameters_alloc() already initializes *par.

> +    par->format = AV_PIX_FMT_NONE;
> +
> +    if (ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        av_log(NULL, AV_LOG_ERROR, "Cannot connect subtitle filter to audio input\n");
> +        ret = AVERROR(EINVAL);
> +        goto fail;
> +    }
> +
> +    if (ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        av_log(NULL, AV_LOG_ERROR, "Cannot connect subtitle filter to video input\n");
> +        ret = AVERROR(EINVAL);
> +        goto fail;
> +    }
> +
> +    if (!ist->subtitle_heartbeat.header && ist->dec_ctx->subtitle_header && ist->dec_ctx->subtitle_header_size > 0) {
> +        ist->subtitle_heartbeat.header = av_buffer_allocz(ist->dec_ctx->subtitle_header_size + 1);
> +        if (!ist->subtitle_heartbeat.header)
> +            return AVERROR(ENOMEM);
> +        memcpy(ist->subtitle_heartbeat.header->data, ist->dec_ctx->subtitle_header, ist->dec_ctx->subtitle_header_size);
> +    }
> +
> +    ist->subtitle_heartbeat.is_active = 1;
>  
> -    /* Compute the size of the canvas for the subtitles stream.
> -       If the subtitles codecpar has set a size, use it. Otherwise use the
> -       maximum dimensions of the video streams in the same file. */
>      w = ifilter->width;
>      h = ifilter->height;
> +
>      if (!(w && h)) {
> -        for (i = 0; i < avf->nb_streams; i++) {
> -            if (avf->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
> -                w = FFMAX(w, avf->streams[i]->codecpar->width);
> -                h = FFMAX(h, avf->streams[i]->codecpar->height);
> -            }
> -        }
> -        if (!(w && h)) {
> -            w = FFMAX(w, 720);
> -            h = FFMAX(h, 576);
> -        }
> -        av_log(avf, AV_LOG_INFO, "sub2video: using %dx%d canvas\n", w, h);
> +        w = ist->dec_ctx->width;
> +        h = ist->dec_ctx->height;
>      }
> -    ist->sub2video.w = ifilter->width  = w;
> -    ist->sub2video.h = ifilter->height = h;
>  
> -    ifilter->width  = ist->dec_ctx->width  ? ist->dec_ctx->width  : ist->sub2video.w;
> -    ifilter->height = ist->dec_ctx->height ? ist->dec_ctx->height : ist->sub2video.h;
> +    if (!(w && h) && ist->dec_ctx->subtitle_header) {
> +        ASSSplitContext *ass_ctx = avpriv_ass_split((char *)ist->dec_ctx->subtitle_header);

avpriv functions must not be used in fftools. And what makes you so
certain that subtitle_header will only be used by ass subtitles?
Furthermore, missing check.
(Maybe ass subtitle based codecs should set AVCodecContext.width and
height based upon this play_res_x/y? This would be easy to implement in
ff_ass_subtitle_header_full()? But I don't know where
ASS_DEFAULT_PLAYRES[XY] comes from and I expect this to be the common
case given that only movtextdec and ass itself uses something different.)

> +        ASS *ass = (ASS *)ass_ctx;
> +        w = ass->script_info.play_res_x;
> +        h = ass->script_info.play_res_y;
> +        avpriv_ass_split_free(ass_ctx);
> +    }
>  
> -    /* rectangles are AV_PIX_FMT_PAL8, but we have no guarantee that the
> -       palettes for all rectangles are identical or compatible */
> -    ifilter->format = AV_PIX_FMT_RGB32;
> +    ist->subtitle_heartbeat.w = w;
> +    ist->subtitle_heartbeat.h = h;
> +    av_log(ifilter, AV_LOG_INFO, "subtitle input filter: decoding size %dx%d\n", ist->subtitle_heartbeat.w, ist->subtitle_heartbeat.h);
>  
> -    ist->sub2video.frame = av_frame_alloc();
> -    if (!ist->sub2video.frame)
> -        return AVERROR(ENOMEM);
> -    ist->sub2video.last_pts = INT64_MIN;
> -    ist->sub2video.end_pts  = INT64_MIN;
> +    ifilter->width = w;
> +    ifilter->height = h;
> +    ist->dec_ctx->width = w;
> +    ist->dec_ctx->height = h;
>  
> -    /* sub2video structure has been (re-)initialized.
> -       Mark it as such so that the system will be
> -       initialized with the first received heartbeat. */
> -    ist->sub2video.initialize = 1;
> +    ist->subtitle_heartbeat.last_pts = INT64_MIN;
> +
> +    snprintf(name, sizeof(name), "graph %d subtitle input from stream %d:%d", fg->index,
> +             ist->file_index, ist->st->index);
> +
> +
> +    av_bprint_init(&args, 0, AV_BPRINT_SIZE_AUTOMATIC);
> +    av_bprintf(&args,
> +             "subtitle_type=%d:width=%d:height=%d:time_base=%d/%d:",
> +             ifilter->format, ifilter->width, ifilter->height,
> +             ist->st->time_base.num, ist->st->time_base.den);
> +    if ((ret = avfilter_graph_create_filter(&ifilter->filter, buffer_filt, name,
> +                                            args.str, NULL, fg->graph)) < 0)
> +        goto fail;
> +
> +    par->hw_frames_ctx = ifilter->hw_frames_ctx;
> +    par->format = ifilter->format;
> +    par->width = ifilter->width;
> +    par->height = ifilter->height;
> +
> +    ret = av_buffersrc_parameters_set(ifilter->filter, par);
> +    if (ret < 0)
> +        goto fail;
> +    av_freep(&par);
> +    last_filter = ifilter->filter;
> +
> +    media_type = avfilter_pad_get_type(in->filter_ctx->input_pads, in->pad_idx);
> +    if (media_type == AVMEDIA_TYPE_VIDEO) {
> +        av_log(NULL, AV_LOG_INFO, "Auto-inserting graphicsub2video filter\n");
> +        ret = insert_filter(&last_filter, &pad_idx, "graphicsub2video", NULL);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    if ((ret = avfilter_link(last_filter, 0, in->filter_ctx, in->pad_idx)) < 0)
> +        return ret;
>  
>      return 0;
> +fail:
> +    av_freep(&par);
> +
> +    return ret;
>  }
>  
>  static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
> @@ -710,8 +813,15 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
>      char name[255];
>      int ret, pad_idx = 0;
>      int64_t tsoffset = 0;
> -    AVBufferSrcParameters *par = av_buffersrc_parameters_alloc();
> +    AVBufferSrcParameters *par;
> +
> +    if (ist->dec_ctx->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> +        // Automatically insert conversion filter to retain compatibility
> +        // with sub2video command lines
> +        return configure_input_subtitle_filter(fg, ifilter, in);
> +    }
>  
> +    par = av_buffersrc_parameters_alloc();
>      if (!par)
>          return AVERROR(ENOMEM);
>      memset(par, 0, sizeof(*par));
> @@ -726,12 +836,6 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
>      if (!fr.num)
>          fr = av_guess_frame_rate(input_files[ist->file_index]->ctx, ist->st, NULL);
>  
> -    if (ist->dec_ctx->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> -        ret = sub2video_prepare(ist, ifilter);
> -        if (ret < 0)
> -            goto fail;
> -    }
> -
>      sar = ifilter->sample_aspect_ratio;
>      if(!sar.den)
>          sar = (AVRational){0,1};
> @@ -743,7 +847,7 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
>               tb.num, tb.den, sar.num, sar.den);
>      if (fr.num && fr.den)
>          av_bprintf(&args, ":frame_rate=%d/%d", fr.num, fr.den);
> -    snprintf(name, sizeof(name), "graph %d input from stream %d:%d", fg->index,
> +    snprintf(name, sizeof(name), "graph %d video input from stream %d:%d", fg->index,
>               ist->file_index, ist->st->index);
>  
>  
> @@ -949,6 +1053,7 @@ static int configure_input_filter(FilterGraph *fg, InputFilter *ifilter,
>      switch (avfilter_pad_get_type(in->filter_ctx->input_pads, in->pad_idx)) {
>      case AVMEDIA_TYPE_VIDEO: return configure_input_video_filter(fg, ifilter, in);
>      case AVMEDIA_TYPE_AUDIO: return configure_input_audio_filter(fg, ifilter, in);
> +    case AVMEDIA_TYPE_SUBTITLE: return configure_input_subtitle_filter(fg, ifilter, in);
>      default: av_assert0(0); return 0;
>      }
>  }
> @@ -1116,19 +1221,6 @@ int configure_filtergraph(FilterGraph *fg)
>          }
>      }
>  
> -    /* process queued up subtitle packets */
> -    for (i = 0; i < fg->nb_inputs; i++) {
> -        InputStream *ist = fg->inputs[i]->ist;
> -        if (ist->sub2video.sub_queue && ist->sub2video.frame) {
> -            while (av_fifo_size(ist->sub2video.sub_queue)) {
> -                AVSubtitle tmp;
> -                av_fifo_generic_read(ist->sub2video.sub_queue, &tmp, sizeof(tmp), NULL);
> -                sub2video_update(ist, INT64_MIN, &tmp);
> -                avsubtitle_free(&tmp);
> -            }
> -        }
> -    }
> -
>      return 0;
>  
>  fail:
> @@ -1151,6 +1243,7 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame)
>      ifilter->sample_rate         = frame->sample_rate;
>      ifilter->channels            = frame->channels;
>      ifilter->channel_layout      = frame->channel_layout;
> +    ifilter->type                = frame->type;
>  
>      av_freep(&ifilter->displaymatrix);
>      sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX);
> diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c
> index 14e702bd92..be69d54aaf 100644
> --- a/fftools/ffmpeg_hw.c
> +++ b/fftools/ffmpeg_hw.c
> @@ -449,7 +449,7 @@ int hw_device_setup_for_encode(OutputStream *ost)
>      AVBufferRef *frames_ref = NULL;
>      int i;
>  
> -    if (ost->filter) {
> +    if (ost->filter && ost->filter->filter) {

I don't think that your patches make it necessary to add this (or have
you already added hardware accelerated subtitle encoding?), so it is
either already necessary in master and should be sent as a separate
commit or it is unnecessary and should be dropped.

>          frames_ref = av_buffersink_get_hw_frames_ctx(ost->filter->filter);
>          if (frames_ref &&
>              ((AVHWFramesContext*)frames_ref->data)->format ==
> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> index e55b584fd4..d443a5b8c8 100644
> --- a/fftools/ffmpeg_opt.c
> +++ b/fftools/ffmpeg_opt.c
> @@ -2207,8 +2207,9 @@ static void init_output_filter(OutputFilter *ofilter, OptionsContext *o,
>      switch (ofilter->type) {
>      case AVMEDIA_TYPE_VIDEO: ost = new_video_stream(o, oc, -1); break;
>      case AVMEDIA_TYPE_AUDIO: ost = new_audio_stream(o, oc, -1); break;
> +    case AVMEDIA_TYPE_SUBTITLE: ost = new_subtitle_stream(o, oc, -1); break;
>      default:
> -        av_log(NULL, AV_LOG_FATAL, "Only video and audio filters are supported "
> +        av_log(NULL, AV_LOG_FATAL, "Only video, audio and subtitle filters are supported "
>                 "currently.\n");
>          exit_program(1);
>      }