[FFmpeg-devel] [PATCH v2] avcodec/noise_bsf: add expr support

Thu Jul 29 21:30:42 EEST 2021

Gyan Doshi:
> ---
>  doc/bitstream_filters.texi |  64 ++++++++++++---
>  libavcodec/noise_bsf.c     | 161 +++++++++++++++++++++++++++++++++----
>  tests/fate/matroska.mak    |   2 +-
>  3 files changed, 199 insertions(+), 28 deletions(-)
> 
> diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
> index d10842ae47..46e4869f80 100644
> --- a/doc/bitstream_filters.texi
> +++ b/doc/bitstream_filters.texi
> @@ -534,20 +534,62 @@ container. Can be used for fuzzing or testing error resilience/concealment.
>  Parameters:
>  @table @option
>  @item amount
> -A numeral string, whose value is related to how often output bytes will
> -be modified. Therefore, values below or equal to 0 are forbidden, and
> -the lower the more frequent bytes will be modified, with 1 meaning
> -every byte is modified.
> - at item dropamount
> -A numeral string, whose value is related to how often packets will be dropped.
> -Therefore, values below or equal to 0 are forbidden, and the lower the more
> -frequent packets will be dropped, with 1 meaning every packet is dropped.
> +Accepts an expression whose evaluation per-packet determines how often bytes in that
> +packet will be modified. A value below 0 will result in a variable frequency.
> +Default is 0 which results in no modification. However, if neither amount or drop is specified,
> +amount will be set to @var{-1}. See below for accepted variables.
> + at item drop, dropamount
> +Accepts an expression evaluated per-packet whose value determines whether that packet is dropped.
> +Evaluation to a positive value results in the packet being dropped. Evaluation to a negative
> +value results in a variable chance of it being dropped, roughly inverse in proportion to the magnitude
> +of the value. Default is 0 which results in no drops. See below for accepted variables.

Negating the dropamount scale broke all prior usages of it.

>  @end table
>  
> -The following example applies the modification to every byte but does not drop
> -any packets.
> +Both @code{amount} and @code{drop} accept expressions containing the following variables:
> +
> + at table @samp
> + at item n
> +The index of the packet, starting from zero.
> + at item tb
> +The timebase for packet timestamps.
> + at item pts
> +Packet presentation timestamp.
> + at item dts
> +Packet decoding timestamp.
> + at item nopts
> +Constant representing AV_NOPTS_VALUE.
> + at item startpts
> +First non-AV_NOPTS_VALUE PTS seen in the stream.
> + at item startdts
> +First non-AV_NOPTS_VALUE DTS seen in the stream.
> + at item duration
> + at itemx d
> +Packet duration, in timebase units.
> + at item pos
> +Packet position in input; may be -1 when unknown or not set.
> + at item size
> +Packet size, in bytes.
> + at item key
> +Whether packet is marked as a keyframe.
> + at item state
> +A pseudo random integer, primarily derived from the content of packet payload.
> + at end table
> +
> + at subsection Examples
> +Apply modification to every byte but don't drop any packets.
> + at example
> +ffmpeg -i INPUT -c copy -bsf noise=1 output.mkv
> + at end example
> +
> +Drop every video packet not marked as a keyframe after timestamp 30s but do not
> +modify any of the remaining packets.
> + at example
> +ffmpeg -i INPUT -c copy -bsf:v noise=drop='gt(t\,30)*not(key)' output.mkv
> + at end example
> +
> +Drop one second of audio every 10 seconds and add some random noise to the rest.
>  @example
> -ffmpeg -i INPUT -c copy -bsf noise[=1] output.mkv
> +ffmpeg -i INPUT -c copy -bsf:a noise=amount=-1:drop='between(mod(t\,10)\,9\,10)' output.mkv
>  @end example
>  
>  @section null
> diff --git a/libavcodec/noise_bsf.c b/libavcodec/noise_bsf.c
> index 6ebd369633..9d7ef93001 100644
> --- a/libavcodec/noise_bsf.c
> +++ b/libavcodec/noise_bsf.c
> @@ -23,55 +23,182 @@
>  #include "bsf.h"
>  #include "bsf_internal.h"
>  
> +#include "libavutil/avstring.h"
>  #include "libavutil/log.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/eval.h"
> +
> +static const char *const var_names[] = {
> +    "n",                           /// packet index, starting from zero
> +    "tb",                          /// timebase
> +    "pts",                         /// packet presentation timestamp
> +    "dts",                         /// packet decoding timestamp
> +    "nopts",                       /// AV_NOPTS_VALUE
> +    "startpts",                    /// first seen non-AV_NOPTS_VALUE packet timestamp
> +    "startdts",                    /// first seen non-AV_NOPTS_VALUE packet timestamp
> +    "duration", "d",               /// packet duration
> +    "pos",                         /// original position of packet in its source
> +    "size",                        /// packet size
> +    "key" ,                        /// packet keyframe flag
> +    "state",                       /// random-ish state
> +    NULL
> +};
> +
> +enum var_name {
> +    VAR_N,
> +    VAR_TB,
> +    VAR_PTS,
> +    VAR_DTS,
> +    VAR_NOPTS,
> +    VAR_STARTPTS,
> +    VAR_STARTDTS,
> +    VAR_DURATION, VAR_D,
> +    VAR_POS,
> +    VAR_SIZE,
> +    VAR_KEY,
> +    VAR_STATE,
> +    VAR_VARS_NB
> +};
>  
>  typedef struct NoiseContext {
>      const AVClass *class;
> -    int amount;
> -    int dropamount;
> +
> +    char *amount_str;
> +    char *drop_str;
> +
> +    AVExpr *amount_pexpr;
> +    AVExpr *drop_pexpr;
> +
> +    double var_values[VAR_VARS_NB];
> +
>      unsigned int state;
> +    unsigned int pkt_idx;
>  } NoiseContext;
>  
> -static int noise(AVBSFContext *ctx, AVPacket *pkt)
> +static int noise_init(AVBSFContext *ctx)
>  {
>      NoiseContext *s = ctx->priv_data;
> -    int amount = s->amount > 0 ? s->amount : (s->state % 10001 + 1);
> -    int i, ret;
> +    int ret;
>  
> -    if (amount <= 0)
> -        return AVERROR(EINVAL);
> +    if (!s->amount_str) {
> +        s->amount_str = !s->drop_str ? av_strdup("-1") : av_strdup("0");
> +        if (!s->amount_str)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    ret = av_expr_parse(&s->amount_pexpr, s->amount_str,
> +                        var_names, NULL, NULL, NULL, NULL, 0, ctx);
> +    if (ret < 0) {
> +        av_log(ctx, AV_LOG_ERROR, "Error in parsing expr for amount: %s\n", s->amount_str);
> +        return ret;
> +    }
> +
> +    if (s->drop_str) {
> +        ret = av_expr_parse(&s->drop_pexpr, s->drop_str,
> +                            var_names, NULL, NULL, NULL, NULL, 0, ctx);
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Error in parsing expr for drop: %s\n", s->drop_str);
> +            return ret;
> +        }
> +    }
> +
> +    s->var_values[VAR_TB]       = ctx->time_base_out.den ? av_q2d(ctx->time_base_out) : 0;
> +    s->var_values[VAR_NOPTS]    = AV_NOPTS_VALUE;
> +    s->var_values[VAR_STARTPTS] = AV_NOPTS_VALUE;
> +    s->var_values[VAR_STARTDTS] = AV_NOPTS_VALUE;
> +    s->var_values[VAR_STATE] = 0;
> +
> +    return 0;
> +}
> +
> +static int noise(AVBSFContext *ctx, AVPacket *pkt)
> +{
> +    NoiseContext *s = ctx->priv_data;
> +    int i, ret, amount, drop;
> +    double res;
>  
>      ret = ff_bsf_get_packet_ref(ctx, pkt);
>      if (ret < 0)
>          return ret;
>  
> -    if (s->dropamount > 0 && s->state % s->dropamount == 0) {
> -        s->state++;
> +    s->var_values[VAR_N]           = s->pkt_idx++;
> +    s->var_values[VAR_PTS]         = pkt->pts;
> +    s->var_values[VAR_DTS]         = pkt->dts;
> +    s->var_values[VAR_DURATION]    =
> +    s->var_values[VAR_D]           = pkt->duration;
> +    s->var_values[VAR_SIZE]        = pkt->size;
> +    s->var_values[VAR_KEY]         = !!(pkt->flags & AV_PKT_FLAG_KEY);
> +    s->var_values[VAR_POS]         = pkt->pos;
> +
> +    if (s->var_values[VAR_STARTPTS] == AV_NOPTS_VALUE)
> +        s->var_values[VAR_STARTPTS] = pkt->pts;
> +
> +    if (s->var_values[VAR_STARTDTS] == AV_NOPTS_VALUE)
> +        s->var_values[VAR_STARTDTS] = pkt->dts;
> +
> +    res = av_expr_eval(s->amount_pexpr, s->var_values, NULL);
> +
> +    if (isnan(res))
> +        amount = 0;
> +    else if (res < 0)
> +        amount = (s->state % 10001 + 1);
> +    else
> +        amount = (int)res;
> +
> +    if (s->drop_str) {
> +        res = av_expr_eval(s->drop_pexpr, s->var_values, NULL);
> +
> +        if (isnan(res))
> +            drop = 0;
> +        else if (res < 0)
> +            drop = !(s->state % FFABS((int)res));

(int)res is UB if the integral part of res is outside the range of int;
(int)res can be 0 even when res < 0.

> +        else
> +            drop = !!res;
> +    }
> +
> +    av_log(ctx, AV_LOG_VERBOSE, "Stream #%d packet %d pts %"PRId64" - amount %d drop %d\n",
> +           pkt->stream_index, (unsigned int)s->var_values[VAR_N], pkt->pts, amount, drop);
> +
> +    if (s->drop_str && drop) {
> +        s->var_values[VAR_STATE] = ++s->state;
>          av_packet_unref(pkt);
>          return AVERROR(EAGAIN);
>      }
>  
> -    ret = av_packet_make_writable(pkt);
> -    if (ret < 0) {
> -        av_packet_unref(pkt);
> -        return ret;
> +    if (amount) {
> +        ret = av_packet_make_writable(pkt);
> +        if (ret < 0) {
> +            av_packet_unref(pkt);
> +            return ret;
> +        }
>      }
>  
>      for (i = 0; i < pkt->size; i++) {
>          s->state += pkt->data[i] + 1;
> -        if (s->state % amount == 0)
> +        if (amount && s->state % amount == 0)
>              pkt->data[i] = s->state;
>      }
>  
> +    s->var_values[VAR_STATE] = s->state;
> +
>      return 0;
>  }
>  
> +static void noise_close(AVBSFContext *bsf)
> +{
> +    NoiseContext *s = bsf->priv_data;
> +
> +    av_expr_free(s->amount_pexpr);
> +    av_expr_free(s->drop_pexpr);
> +    s->amount_pexpr = s->drop_pexpr = NULL;
> +}
> +
>  #define OFFSET(x) offsetof(NoiseContext, x)
>  #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_BSF_PARAM)
>  static const AVOption options[] = {
> -    { "amount", NULL, OFFSET(amount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
> -    { "dropamount", NULL, OFFSET(dropamount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
> +    { "amount",     NULL, OFFSET(amount_str),     AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
> +    { "drop",       NULL, OFFSET(drop_str),       AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
> +    { "dropamount", NULL, OFFSET(drop_str),       AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
>      { NULL },
>  };
>  
> @@ -86,5 +213,7 @@ const AVBitStreamFilter ff_noise_bsf = {
>      .name           = "noise",
>      .priv_data_size = sizeof(NoiseContext),
>      .priv_class     = &noise_class,
> +    .init           = noise_init,
> +    .close          = noise_close,
>      .filter         = noise,
>  };
> diff --git a/tests/fate/matroska.mak b/tests/fate/matroska.mak
> index ca7193a055..b57765280a 100644
> --- a/tests/fate/matroska.mak
> +++ b/tests/fate/matroska.mak
> @@ -88,7 +88,7 @@ FATE_MATROSKA_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL MXF_DEMUXER        \
>                                              MATROSKA_MUXER MATROSKA_DEMUXER  \
>                                              FRAMECRC_MUXER PIPE_PROTOCOL)    \
>                                 += fate-matroska-mastering-display-metadata
> -fate-matroska-mastering-display-metadata: CMD = transcode mxf $(TARGET_SAMPLES)/mxf/Meridian-Apple_ProResProxy-HDR10.mxf matroska "-map 0 -map 0:0 -c:v:0 copy -c:v:1 ffv1 -c:a:0 copy -bsf:a:0 noise=amount=3 -filter:a:1 aresample -c:a:1 pcm_s16be -bsf:a:1 noise=dropamount=4" "-map 0 -c copy" "" "-show_entries stream_side_data_list:stream=index,codec_name"
> +fate-matroska-mastering-display-metadata: CMD = transcode mxf $(TARGET_SAMPLES)/mxf/Meridian-Apple_ProResProxy-HDR10.mxf matroska "-map 0 -map 0:0 -c:v:0 copy -c:v:1 ffv1 -c:a:0 copy -bsf:a:0 noise=amount=3 -filter:a:1 aresample -c:a:1 pcm_s16be -bsf:a:1 noise=amount=-1:dropamount=-4" "-map 0 -c copy" "" "-show_entries stream_side_data_list:stream=index,codec_name"
>  
>  # This test tests remuxing annex B H.264 into Matroska. It also tests writing
>  # the correct interlaced flags and overriding the sample aspect ratio, leading
>