[FFmpeg-devel] [PATCH] avfilter/select: evaluate silencedetect metadata

Fri Jun 18 07:19:16 EEST 2021

On 2021-06-18 01:53, Timo Rothenpieler wrote:
> ---
>   doc/filters.texi       |  6 ++++++
>   libavfilter/f_select.c | 18 ++++++++++++++++++
>   libavfilter/version.h  |  2 +-
>   3 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index da8f7d7726..db6ecd7c2a 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -5404,6 +5404,7 @@ Set sidechain gain. Default is 1. Range is from 0.015625 to 64.
>   
>   This filter supports the all above options as @ref{commands}.
>   
> + at anchor{silencedetect}
>   @section silencedetect
>   
>   Detect silence in an audio stream.
> @@ -25564,6 +25565,11 @@ missing.
>   That basically means that an input frame is selected if its pts is within the
>   interval set by the concat demuxer.
>   
> + at item silence_detected
> +Evaluates the metadata added to frames by the @ref{silencedetect} filter.
> +The @var{silence_detected} variable is -1 if silence was detected for the current
> +frame, 0 otherwise.
> +

Instead of a specific option for silencedetect, it would be future-proof 
if it was an option called, say, metadata with a constant for 
silencedetect to start with.
There are multiple per-frame analysis filters like blackframe, 
blackdetect, freezedetect..etc and it will be easier to just extend 
'metadata' with new constants
than to add a new option for each filter.

>   @end table
>   
>   The default value of the select expression is "1".
> diff --git a/libavfilter/f_select.c b/libavfilter/f_select.c
> index f0468078e8..1e655290f4 100644
> --- a/libavfilter/f_select.c
> +++ b/libavfilter/f_select.c
> @@ -86,6 +86,8 @@ static const char *const var_names[] = {
>   
>       "concatdec_select",  ///< frame is within the interval set by the concat demuxer
>   
> +    "silence_detected",  ///< silencedetect detected silence for this frame
> +
>       NULL
>   };
>   
> @@ -138,6 +140,8 @@ enum var_name {
>   
>       VAR_CONCATDEC_SELECT,
>   
> +    VAR_SILENCE_DETECTED,
> +
>       VAR_VARS_NB
>   };
>   
> @@ -157,6 +161,7 @@ typedef struct SelectContext {
>       double select;
>       int select_out;                 ///< mark the selected output pad index
>       int nb_outputs;
> +    int silence_detected;
>   } SelectContext;
>   
>   #define OFFSET(x) offsetof(SelectContext, x)
> @@ -325,6 +330,18 @@ static double get_concatdec_select(AVFrame *frame, int64_t pts)
>       return NAN;
>   }
>   
> +static double get_silence_detected(SelectContext *select, AVFrame *frame)
> +{
> +    AVDictionary *metadata = frame->metadata;
> +    AVDictionaryEntry *start = av_dict_get(metadata, "lavfi.silence_start", NULL, 0);
> +    AVDictionaryEntry *end = av_dict_get(metadata, "lavfi.silence_end", NULL, 0);
> +    if (start)
> +        select->silence_detected = -1;
> +    if (end)
> +        select->silence_detected = 0;
> +    return select->silence_detected;
> +}
> +
>   static void select_frame(AVFilterContext *ctx, AVFrame *frame)
>   {
>       SelectContext *select = ctx->priv;
> @@ -342,6 +359,7 @@ static void select_frame(AVFilterContext *ctx, AVFrame *frame)
>       select->var_values[VAR_POS] = frame->pkt_pos == -1 ? NAN : frame->pkt_pos;
>       select->var_values[VAR_KEY] = frame->key_frame;
>       select->var_values[VAR_CONCATDEC_SELECT] = get_concatdec_select(frame, av_rescale_q(frame->pts, inlink->time_base, AV_TIME_BASE_Q));
> +    select->var_values[VAR_SILENCE_DETECTED] = get_silence_detected(select, frame);
>   
>       switch (inlink->type) {
>       case AVMEDIA_TYPE_AUDIO:
> diff --git a/libavfilter/version.h b/libavfilter/version.h
> index 5052681653..fbb81ef31c 100644
> --- a/libavfilter/version.h
> +++ b/libavfilter/version.h
> @@ -31,7 +31,7 @@
>   
>   #define LIBAVFILTER_VERSION_MAJOR   8
>   #define LIBAVFILTER_VERSION_MINOR   0
> -#define LIBAVFILTER_VERSION_MICRO 102
> +#define LIBAVFILTER_VERSION_MICRO 103
>   
>   
>   #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \