[FFmpeg-devel] [PATCH] libavutil/timestamp.h: Fix loss of precision in timestamps for silencedetect on long files

Allan Cady allancady at yahoo.com
Wed Feb 21 09:39:15 EET 2024


When the silencedetect audio filter is run against long files, the
output timestamps gradually lose precision as the scan proceeds further
into the file. This is because the output format specifier ("%.6g" in
libavutil/timestamp.h) limits the total field width to six significant
digits. As the offset into the file increases, digits drop off the end,
until eventually, for offsets greater than 100000 seconds (about 28
hours), fractions of a second disappear altogether, and the timestamps
are logged as whole seconds.

This patch changes the format to "%.6f" for silencedetect, which will
give microsecond precision for all timestamps regardless of offset.

libavutil/timestamp.h exposes a macro, av_ts2timestr, as the public
interface. This macro was used by silencedetect.c, as well as other
source files. In order to fix the issue for silencedetect without
affecting other files and tests, I have added a new macro,
av_ts2timestr_fixed_precision, which uses the new format specifier.
The original av_ts_make_time_string remains, with the original
behavior.

timestamp.h also exposes a function, av_ts_make_time_string, which
was called only from av_ts2timestr. After this patch, both of the
macros now use a new function, av_ts_make_time_string_format, which
takes a format specifier as an argument, so the function
av_ts_make_time_string is no longer used. I've left it in place, but
flagged it for deprecation with FF_API_AV_MAKETIMESTRING.

The test reference file filter-metadata-silencedetect has been updated
to match the new functionality.

Signed-off-by: Allan Cady <allancady at yahoo.com>
---
 libavfilter/af_silencedetect.c               | 12 +++----
 libavutil/timestamp.h                        | 34 +++++++++++++++++---
 libavutil/version.h                          |  1 +
 tests/ref/fate/filter-metadata-silencedetect |  2 +-
 4 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/libavfilter/af_silencedetect.c b/libavfilter/af_silencedetect.c
index 845c65bfed..f1a8096540 100644
--- a/libavfilter/af_silencedetect.c
+++ b/libavfilter/af_silencedetect.c
@@ -86,11 +86,11 @@ static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples,
                 s->start[channel] = insamples->pts + av_rescale_q(current_sample / s->channels + 1 - nb_samples_notify * s->independent_channels / s->channels,
                         (AVRational){ 1, s->last_sample_rate }, time_base);
                 set_meta(insamples, s->mono ? channel + 1 : 0, "silence_start",
-                        av_ts2timestr(s->start[channel], &time_base));
+                        av_ts2timestr_fixed_precision(s->start[channel], &time_base));
                 if (s->mono)
                     av_log(s, AV_LOG_INFO, "channel: %d | ", channel);
                 av_log(s, AV_LOG_INFO, "silence_start: %s\n",
-                        av_ts2timestr(s->start[channel], &time_base));
+                        av_ts2timestr_fixed_precision(s->start[channel], &time_base));
             }
         }
     } else {
@@ -101,15 +101,15 @@ static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples,
             int64_t duration_ts = end_pts - s->start[channel];
             if (insamples) {
                 set_meta(insamples, s->mono ? channel + 1 : 0, "silence_end",
-                        av_ts2timestr(end_pts, &time_base));
+                        av_ts2timestr_fixed_precision(end_pts, &time_base));
                 set_meta(insamples, s->mono ? channel + 1 : 0, "silence_duration",
-                        av_ts2timestr(duration_ts, &time_base));
+                        av_ts2timestr_fixed_precision(duration_ts, &time_base));
             }
             if (s->mono)
                 av_log(s, AV_LOG_INFO, "channel: %d | ", channel);
             av_log(s, AV_LOG_INFO, "silence_end: %s | silence_duration: %s\n",
-                    av_ts2timestr(end_pts, &time_base),
-                    av_ts2timestr(duration_ts, &time_base));
+                    av_ts2timestr_fixed_precision(end_pts, &time_base),
+                    av_ts2timestr_fixed_precision(duration_ts, &time_base));
         }
         s->nb_null_samples[channel] = 0;
         s->start[channel] = INT64_MIN;
diff --git a/libavutil/timestamp.h b/libavutil/timestamp.h
index 9ae64da8a1..b483b5e12d 100644
--- a/libavutil/timestamp.h
+++ b/libavutil/timestamp.h
@@ -31,6 +31,8 @@
 #endif
 
 #define AV_TS_MAX_STRING_SIZE 32
+#define AV_TS_FMT_DEFAULT "%.6g"
+#define AV_TS_FMT_FIXED_PRECISION_6 "%.6f"
 
 /**
  * Fill the provided buffer with a string containing a timestamp
@@ -53,9 +55,14 @@ static inline char *av_ts_make_string(char *buf, int64_t ts)
  */
 #define av_ts2str(ts) av_ts_make_string((char[AV_TS_MAX_STRING_SIZE]){0}, ts)
 
+#if FF_API_AV_MAKETIMESTRING
 /**
+ * This function is probably deprecated. It was originally called by
+ * av_ts_make_time_string defined below, which now uses av_ts_make_time_string_format.
+ * instead. Nothing external references this function directly.
+ *
  * Fill the provided buffer with a string containing a timestamp time
- * representation.
+ * representation, using the default format AV_TS_FMT_DEFAULT.
  *
  * @param buf a buffer with size in bytes of at least AV_TS_MAX_STRING_SIZE
  * @param ts the timestamp to represent
@@ -65,14 +72,33 @@ static inline char *av_ts_make_string(char *buf, int64_t ts)
 static inline char *av_ts_make_time_string(char *buf, int64_t ts, AVRational *tb)
 {
     if (ts == AV_NOPTS_VALUE) snprintf(buf, AV_TS_MAX_STRING_SIZE, "NOPTS");
-    else                      snprintf(buf, AV_TS_MAX_STRING_SIZE, "%.6g", av_q2d(*tb) * ts);
+    else                      snprintf(buf, AV_TS_MAX_STRING_SIZE, AV_TS_FMT_DEFAULT, av_q2d(*tb) * ts);
     return buf;
 }
+#endif
 
 /**
- * Convenience macro, the return value should be used only directly in
+ * Fill the provided buffer with a string containing a timestamp time
+ * representation.
+ *
+ * @param buf a buffer with size in bytes of at least AV_TS_MAX_STRING_SIZE
+ * @param ts the timestamp to represent
+ * @param tb the timebase of the timestamp
+ * @param format format string for timestamp output, e.g. AV_TS_FMT_DEFAULT.
+ * @return the buffer in input
+ */
+static inline char *av_ts_make_time_string_format(char *buf, int64_t ts, AVRational *tb, const char *format)
+{
+    if (ts == AV_NOPTS_VALUE) snprintf(buf, AV_TS_MAX_STRING_SIZE, "NOPTS");
+    else                      snprintf(buf, AV_TS_MAX_STRING_SIZE, format, av_q2d(*tb) * ts);
+    return buf;
+}
+
+/**
+ * Convenience macros, the return values should be used only directly in
  * function arguments but never stand-alone.
  */
-#define av_ts2timestr(ts, tb) av_ts_make_time_string((char[AV_TS_MAX_STRING_SIZE]){0}, ts, tb)
+#define av_ts2timestr(ts, tb) av_ts_make_time_string_format((char[AV_TS_MAX_STRING_SIZE]){0}, ts, tb, AV_TS_FMT_DEFAULT)
+#define av_ts2timestr_fixed_precision(ts, tb) av_ts_make_time_string_format((char[AV_TS_MAX_STRING_SIZE]){0}, ts, tb, AV_TS_FMT_FIXED_PRECISION_6)
 
 #endif /* AVUTIL_TIMESTAMP_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 9f45af93df..a2daeda772 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -119,6 +119,7 @@
 #define FF_API_FRAME_KEY                (LIBAVUTIL_VERSION_MAJOR < 59)
 #define FF_API_PALETTE_HAS_CHANGED      (LIBAVUTIL_VERSION_MAJOR < 59)
 #define FF_API_VULKAN_CONTIGUOUS_MEMORY (LIBAVUTIL_VERSION_MAJOR < 59)
+#define FF_API_AV_MAKETIMESTRING        (LIBAVUTIL_VERSION_MAJOR < 59)
 
 /**
  * @}
diff --git a/tests/ref/fate/filter-metadata-silencedetect b/tests/ref/fate/filter-metadata-silencedetect
index bc53fea047..e66ffe5fdd 100644
--- a/tests/ref/fate/filter-metadata-silencedetect
+++ b/tests/ref/fate/filter-metadata-silencedetect
@@ -1,5 +1,5 @@
 pts=0|tag:lavfi.silence_duration=0.523107|tag:lavfi.silence_end=0.690023|tag:lavfi.silence_start=0.736417
-pts=46080|tag:lavfi.silence_start=1.27626|tag:lavfi.silence_end=1.80751|tag:lavfi.silence_duration=0.531247
+pts=46080|tag:lavfi.silence_start=1.276259|tag:lavfi.silence_end=1.807506|tag:lavfi.silence_duration=0.531247
 pts=92160
 pts=138240
 pts=184320
-- 
2.25.1



More information about the ffmpeg-devel mailing list