[FFmpeg-devel] [PATCH v5 3/3] ffmpeg: add video heartbeat capability to fix_sub_duration
Jan Ekström
jeebjp at gmail.com
Mon Oct 10 15:44:57 EEST 2022
From: Jan Ekström <jan.ekstrom at 24i.com>
Splits the currently handled subtitle at random access point
packets that can be configured to follow a specific output stream.
This way the subtitle - which is known to be shown at this time
can be split and passed to muxer before its full duration is
yet known.
Co-authored-by: Andrzej Nadachowski <andrzej.nadachowski at 24i.com>
Co-authored-by: Bernard Boulay <bernard.boulay at 24i.com>
Signed-off-by: Jan Ekström <jan.ekstrom at 24i.com>
---
doc/ffmpeg.texi | 11 ++
fftools/ffmpeg.c | 142 ++++++++++++++++++
fftools/ffmpeg.h | 8 +
fftools/ffmpeg_opt.c | 9 ++
tests/fate/ffmpeg.mak | 14 ++
.../fate/ffmpeg-fix_sub_duration_heartbeat | 48 ++++++
6 files changed, 232 insertions(+)
create mode 100644 tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index e9020b30d5..cd957fa826 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -1313,6 +1313,17 @@ List all hardware acceleration components enabled in this build of ffmpeg.
Actual runtime availability depends on the hardware and its suitable driver
being installed.
+ at item -fix_sub_duration_heartbeat[:@var{stream_specifier}]
+Set a specific output video stream as the heartbeat stream according to which
+to split and push through currently in-progress subtitle upon receipt of a
+random access packet.
+
+This lowers the latency of subtitles for which the end packet or the following
+subtitle has not yet been received.
+
+Requires @option{-fix_sub_duration} to be set for the relevant input subtitle
+stream for this to have any effect.
+
@end table
@section Audio Options
diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index a7e6c0f6e0..56fcb994e3 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -128,6 +128,7 @@ typedef struct BenchmarkTimeStamps {
int64_t sys_usec;
} BenchmarkTimeStamps;
+static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt);
static BenchmarkTimeStamps get_benchmark_time_stamps(void);
static int64_t getmaxrss(void);
static int ifilter_has_all_input_formats(FilterGraph *fg);
@@ -978,6 +979,13 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, &enc->time_base));
}
+ if ((ret = trigger_fix_sub_duration_heartbeat(ost, pkt)) < 0) {
+ av_log(NULL, AV_LOG_ERROR,
+ "Subtitle heartbeat logic failed in %s! (%s)\n",
+ __func__, av_err2str(ret));
+ exit_program(1);
+ }
+
ost->data_size_enc += pkt->size;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO)
@@ -1934,6 +1942,16 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
opkt->duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase);
+ {
+ int ret = trigger_fix_sub_duration_heartbeat(ost, pkt);
+ if (ret < 0) {
+ av_log(NULL, AV_LOG_ERROR,
+ "Subtitle heartbeat logic failed in %s! (%s)\n",
+ __func__, av_err2str(ret));
+ exit_program(1);
+ }
+ }
+
output_packet(of, opkt, ost, 0);
ost->streamcopy_started = 1;
@@ -2365,6 +2383,130 @@ out:
return ret;
}
+static int copy_av_subtitle(AVSubtitle *dst, AVSubtitle *src)
+{
+ int ret = AVERROR_BUG;
+ AVSubtitle tmp = {
+ .format = src->format,
+ .start_display_time = src->start_display_time,
+ .end_display_time = src->end_display_time,
+ .num_rects = 0,
+ .rects = NULL,
+ .pts = src->pts
+ };
+
+ if (!src->num_rects)
+ goto success;
+
+ if (!(tmp.rects = av_calloc(src->num_rects, sizeof(*tmp.rects))))
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < src->num_rects; i++) {
+ AVSubtitleRect *src_rect = src->rects[i];
+ AVSubtitleRect *dst_rect;
+
+ if (!(dst_rect = tmp.rects[i] = av_mallocz(sizeof(*tmp.rects[0])))) {
+ ret = AVERROR(ENOMEM);
+ goto cleanup;
+ }
+
+ tmp.num_rects++;
+
+ dst_rect->type = src_rect->type;
+ dst_rect->flags = src_rect->flags;
+
+ dst_rect->x = src_rect->x;
+ dst_rect->y = src_rect->y;
+ dst_rect->w = src_rect->w;
+ dst_rect->h = src_rect->h;
+ dst_rect->nb_colors = src_rect->nb_colors;
+
+ if (src_rect->text)
+ if (!(dst_rect->text = av_strdup(src_rect->text))) {
+ ret = AVERROR(ENOMEM);
+ goto cleanup;
+ }
+
+ if (src_rect->ass)
+ if (!(dst_rect->ass = av_strdup(src_rect->ass))) {
+ ret = AVERROR(ENOMEM);
+ goto cleanup;
+ }
+
+ for (int j = 0; j < 4; j++) {
+ // SUBTITLE_BITMAP images are special in the sense that they
+ // are like PAL8 images. first pointer to data, second to
+ // palette. This makes the size calculation match this.
+ size_t buf_size = src_rect->type == SUBTITLE_BITMAP && j == 1 ?
+ AVPALETTE_SIZE :
+ src_rect->h * src_rect->linesize[j];
+
+ if (!src_rect->data[j])
+ continue;
+
+ if (!(dst_rect->data[j] = av_memdup(src_rect->data[j], buf_size))) {
+ ret = AVERROR(ENOMEM);
+ goto cleanup;
+ }
+ dst_rect->linesize[j] = src_rect->linesize[j];
+ }
+ }
+
+success:
+ *dst = tmp;
+
+ return 0;
+
+cleanup:
+ avsubtitle_free(&tmp);
+
+ return ret;
+}
+
+static int fix_sub_duration_heartbeat(InputStream *ist, int64_t signal_pts)
+{
+ int ret = AVERROR_BUG;
+ int got_output = 1;
+ AVSubtitle *prev_subtitle = &ist->prev_sub.subtitle;
+ AVSubtitle subtitle;
+
+ if (!ist->fix_sub_duration || !prev_subtitle->num_rects ||
+ signal_pts <= prev_subtitle->pts)
+ return 0;
+
+ if ((ret = copy_av_subtitle(&subtitle, prev_subtitle)) < 0)
+ return ret;
+
+ subtitle.pts = signal_pts;
+
+ return process_subtitle(ist, &subtitle, &got_output);
+}
+
+static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt)
+{
+ int64_t signal_pts = av_rescale_q(pkt->pts, ost->mux_timebase,
+ AV_TIME_BASE_Q);
+
+ if (!ost->fix_sub_duration_heartbeat || !(pkt->flags & AV_PKT_FLAG_KEY))
+ // we are only interested in heartbeats on streams configured, and
+ // only on random access points.
+ return 0;
+
+ for (int index = 0; index < nb_input_streams; index++) {
+ InputStream *subtitle_ist = input_streams[index];
+ int ret = AVERROR_BUG;
+
+ if (!subtitle_ist->decoding_needed ||
+ subtitle_ist->dec_ctx->codec_type != AVMEDIA_TYPE_SUBTITLE)
+ continue;
+
+ if ((ret = fix_sub_duration_heartbeat(subtitle_ist, signal_pts)) < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
int *decode_failed)
{
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 8460391afa..91ac821cc2 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -219,6 +219,8 @@ typedef struct OptionsContext {
int nb_reinit_filters;
SpecifierOpt *fix_sub_duration;
int nb_fix_sub_duration;
+ SpecifierOpt *fix_sub_duration_heartbeat;
+ int nb_fix_sub_duration_heartbeat;
SpecifierOpt *canvas_sizes;
int nb_canvas_sizes;
SpecifierOpt *pass;
@@ -610,6 +612,12 @@ typedef struct OutputStream {
int sq_idx_encode;
int sq_idx_mux;
+
+ /*
+ * bool on whether this stream should be utilized for splitting
+ * subtitles utilizing fix_sub_duration at random access points.
+ */
+ unsigned int fix_sub_duration_heartbeat;
} OutputStream;
typedef struct Muxer Muxer;
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 8f57b699f1..023706c2d7 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -98,6 +98,7 @@ static const char *const opt_name_filters[] = {"filter", "af",
static const char *const opt_name_filter_scripts[] = {"filter_script", NULL};
static const char *const opt_name_reinit_filters[] = {"reinit_filter", NULL};
static const char *const opt_name_fix_sub_duration[] = {"fix_sub_duration", NULL};
+static const char *const opt_name_fix_sub_duration_heartbeat[] = {"fix_sub_duration_heartbeat", NULL};
static const char *const opt_name_canvas_sizes[] = {"canvas_size", NULL};
static const char *const opt_name_pass[] = {"pass", NULL};
static const char *const opt_name_passlogfiles[] = {"passlogfile", NULL};
@@ -1718,6 +1719,9 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
MATCH_PER_STREAM_OPT(bits_per_raw_sample, i, ost->bits_per_raw_sample,
oc, st);
+ MATCH_PER_STREAM_OPT(fix_sub_duration_heartbeat, i, ost->fix_sub_duration_heartbeat,
+ oc, st);
+
if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx)
ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
@@ -4098,6 +4102,11 @@ const OptionDef options[] = {
{ "autoscale", HAS_ARG | OPT_BOOL | OPT_SPEC |
OPT_EXPERT | OPT_OUTPUT, { .off = OFFSET(autoscale) },
"automatically insert a scale filter at the end of the filter graph" },
+ { "fix_sub_duration_heartbeat", OPT_VIDEO | OPT_BOOL | OPT_EXPERT |
+ OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(fix_sub_duration_heartbeat) },
+ "set this video output stream to be a heartbeat stream for "
+ "fix_sub_duration, according to which subtitles should be split at "
+ "random access points" },
/* audio options */
{ "aframes", OPT_AUDIO | HAS_ARG | OPT_PERFILE | OPT_OUTPUT, { .func_arg = opt_audio_frames },
diff --git a/tests/fate/ffmpeg.mak b/tests/fate/ffmpeg.mak
index d87639c596..a983ace3bd 100644
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak
@@ -117,6 +117,20 @@ fate-ffmpeg-fix_sub_duration: CMD = fmtstdout srt -fix_sub_duration \
-real_time 1 -f lavfi \
-i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
+# Basic test for fix_sub_duration_heartbeat, which causes a buffered subtitle
+# to be pushed out when a video keyframe is received from an encoder.
+FATE_SAMPLES_FFMPEG-$(call FILTERDEMDECENCMUX, MOVIE, MPEGVIDEO, \
+ MPEG2VIDEO, SUBRIP, SRT, LAVFI_INDEV \
+ MPEGVIDEO_PARSER CCAPTION_DECODER \
+ MPEG2VIDEO_ENCODER NULL_MUXER PIPE_PROTOCOL) \
+ += fate-ffmpeg-fix_sub_duration_heartbeat
+fate-ffmpeg-fix_sub_duration_heartbeat: CMD = fmtstdout srt -fix_sub_duration \
+ -real_time 1 -f lavfi \
+ -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]" \
+ -map 0:v -fix_sub_duration_heartbeat:v:0 \
+ -c mpeg2video -b:v 2M -g 30 -sc_threshold 1000000000 \
+ -f null -
+
FATE_STREAMCOPY-$(call REMUX, MP4 MOV, EAC3_DEMUXER) += fate-copy-trac3074
fate-copy-trac3074: CMD = transcode eac3 $(TARGET_SAMPLES)/eac3/csi_miami_stereo_128_spx.eac3\
mp4 "-codec copy -map 0" "-codec copy"
diff --git a/tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat b/tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
new file mode 100644
index 0000000000..957a410921
--- /dev/null
+++ b/tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
@@ -0,0 +1,48 @@
+1
+00:00:00,968 --> 00:00:01,001
+<font face="Monospace">{\an7}(</font>
+
+2
+00:00:01,001 --> 00:00:01,168
+<font face="Monospace">{\an7}(</font>
+
+3
+00:00:01,168 --> 00:00:01,368
+<font face="Monospace">{\an7}(<i> inaudibl</i></font>
+
+4
+00:00:01,368 --> 00:00:01,568
+<font face="Monospace">{\an7}(<i> inaudible radio chat</i></font>
+
+5
+00:00:01,568 --> 00:00:02,002
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
+
+6
+00:00:02,002 --> 00:00:03,003
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
+
+7
+00:00:03,003 --> 00:00:03,103
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
+
+8
+00:00:03,103 --> 00:00:03,303
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
+>></font>
+
+9
+00:00:03,303 --> 00:00:03,503
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
+>> Safety rema</font>
+
+10
+00:00:03,504 --> 00:00:03,704
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
+>> Safety remains our numb</font>
+
+11
+00:00:03,704 --> 00:00:04,004
+<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
+>> Safety remains our number one</font>
+
--
2.37.3
More information about the ffmpeg-devel
mailing list