[FFmpeg-devel] [PATCH] lavfi/af_ebur128: update filter to use new ebur128 API

Tue Nov 29 06:59:04 EET 2016

On Mon, 28 Nov 2016, Kyle Swanson wrote:

> On Thu, Nov 17, 2016 at 11:04 AM, Kyle Swanson <k at ylo.ph> wrote:
>> Hi,
>>
>> Here's a couple of patches which update the ebur128 filter to use the
>> recently added ebur128 API. This updated filter allows fine-tuned
>> control over which EBU R128 parameters are measured, and provides
>> modest speed increases over the previous ebur128 filter. Also
>> noteworthy: this removes the video output option of the ebur128
>> filter. This is extraneous for an ebur128 measurement filter IMHO, but
>> if we wanted to keep similar functionality in FFmpeg, we'd be better
>> served by a new video source filter where custom meters could be
>> created via exported frame metadata.
>>
>> The first patch adds true peak functionality to the ebur128 API using
>> swresample (this was already discussed a little bit:
>> http://ffmpeg.org/pipermail/ffmpeg-devel/2016-November/202583.html)
>> The second patch is an update to the ebur128 filter.
>>
>> Kyle
>
> Does anyone have any problems with the first patch?

> From 6912ed3a03cd19f46e96f1f4b9eb3aa69b7ce4df Mon Sep 17 00:00:00 2001
> From: Kyle Swanson <k at ylo.ph>
> Date: Thu, 17 Nov 2016 10:32:45 -0600
> Subject: [PATCH 1/2] lavfi/ebur128: add ebur128_check_true_peak()
> 
> Signed-off-by: Kyle Swanson <k at ylo.ph>
> ---
>  libavfilter/ebur128.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  libavfilter/ebur128.h |  17 ++++++
>  2 files changed, 177 insertions(+), 2 deletions(-)
> 
> diff --git a/libavfilter/ebur128.c b/libavfilter/ebur128.c
> index a46692e..dc16647 100644
> --- a/libavfilter/ebur128.c
> +++ b/libavfilter/ebur128.c
> @@ -50,6 +50,9 @@
>  #include "libavutil/common.h"
>  #include "libavutil/mem.h"
>  #include "libavutil/thread.h"
> +#include "libavutil/channel_layout.h"
> +#include "libswresample/swresample.h"

Isn't this include needs an ifdef as well?

> +#include "libavutil/opt.h"
>
>  #define CHECK_ERROR(condition, errorcode, goto_point)                          \
>      if ((condition)) {                                                         \
> @@ -91,6 +94,16 @@ struct FFEBUR128StateInternal {
>      size_t short_term_frame_counter;
>      /** Maximum sample peak, one per channel */
>      double *sample_peak;
> +    /** Maximum true peak, one per channel */
> +    double* true_peak;
> +#if CONFIG_SWRESAMPLE
> +    SwrContext *resampler;
> +    size_t oversample_factor;
> +    float* resampler_buffer_input;
> +    size_t resampler_buffer_input_frames;
> +    float* resampler_buffer_output;
> +    size_t resampler_buffer_output_frames;
> +#endif
>      /** The maximum window duration in ms. */
>      unsigned long window;
>      /** Data pointer array for interleaved data */
> @@ -214,12 +227,78 @@ static inline void init_histogram(void)
>      }
>  }
> 
> +#if CONFIG_SWRESAMPLE
> +static int ebur128_init_resampler(FFEBUR128State* st) {
> +    int64_t channel_layout;
> +    int errcode;
> +
> +    if (st->samplerate < 96000) {
> +        st->d->oversample_factor = 4;
> +    } else if (st->samplerate < 192000) {
> +        st->d->oversample_factor = 2;
> +    } else {
> +        st->d->oversample_factor = 1;
> +        st->d->resampler_buffer_input = NULL;
> +        st->d->resampler_buffer_output = NULL;
> +        st->d->resampler = NULL;
> +    }
> +
> +    st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4;
> +    st->d->resampler_buffer_input = av_malloc(st->d->resampler_buffer_input_frames *
> +                                              st->channels *
> +                                              sizeof(float));

av_malloc_array

> +    CHECK_ERROR(!st->d->resampler_buffer_input, 0, exit)
> +
> +    st->d->resampler_buffer_output_frames =
> +    st->d->resampler_buffer_input_frames *
> +    st->d->oversample_factor;
> +    st->d->resampler_buffer_output = av_malloc(st->d->resampler_buffer_output_frames *
> +                                               st->channels *
> +                                               sizeof(float));

av_malloc_array

> +    CHECK_ERROR(!st->d->resampler_buffer_output, 0, free_input)
> +
> +    st->d->resampler = swr_alloc();
> +    CHECK_ERROR(!st->d->resampler, 0, free_output)
> +
> +    channel_layout = av_get_default_channel_layout(st->channels);
> +
> +    av_opt_set_int(st->d->resampler, "in_channel_layout", channel_layout, 0);
> +    av_opt_set_int(st->d->resampler, "in_sample_rate", st->samplerate, 0);
> +    av_opt_set_sample_fmt(st->d->resampler, "in_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
> +    av_opt_set_int(st->d->resampler, "out_channel_layout", channel_layout, 0);
> +    av_opt_set_int(st->d->resampler, "out_sample_rate", st->samplerate * st->d->oversample_factor, 0);
> +    av_opt_set_sample_fmt(st->d->resampler, "out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
> +
> +    swr_init(st->d->resampler);
> +    return 0;
> +
> +free_output:
> +    av_free(st->d->resampler_buffer_output);
> +    st->d->resampler_buffer_output = NULL;

av_freep

> +free_input:
> +    av_free(st->d->resampler_buffer_input);
> +    st->d->resampler_buffer_input = NULL;

av_freep

> +exit:
> +    return AVERROR(ENOMEM);
> +}
> +
> +static void ebur128_destroy_resampler(FFEBUR128State* st) {
> +    av_free(st->d->resampler_buffer_input);
> +    st->d->resampler_buffer_input = NULL;

av_freep

> +    av_free(st->d->resampler_buffer_output);
> +    st->d->resampler_buffer_output = NULL;

av_freep

> +    swr_free(&st->d->resampler);
> +    st->d->resampler = NULL;

swr_free already sets resampler to NULL.

> +}
> +#endif
> +
>  FFEBUR128State *ff_ebur128_init(unsigned int channels,
>                                  unsigned long samplerate,
>                                  unsigned long window, int mode)
>  {
>      int errcode;
>      FFEBUR128State *st;
> +    unsigned int i;
>
>      st = (FFEBUR128State *) av_malloc(sizeof(FFEBUR128State));
>      CHECK_ERROR(!st, 0, exit)
> @@ -233,6 +312,14 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
>      st->d->sample_peak =
>          (double *) av_mallocz_array(channels, sizeof(double));
>      CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map)
> +    st->d->true_peak =
> +        (double*) malloc(channels * sizeof(double));

av_mallocz_array

> +    CHECK_ERROR(!st->d->true_peak, 0, free_sample_peak)
> +
> +    for (i = 0; i < channels; ++i) {
> +        st->d->sample_peak[i] = 0.0;
> +        st->d->true_peak[i] = 0.0;
> +    }

Technically not portable (AFAIK), but we assume in a lot of places that a 
mallocz-ed double is 0.0, so this initialization is unneeded.

>
>      st->samplerate = samplerate;
>      st->d->samples_in_100ms = (st->samplerate + 5) / 10;
> @@ -242,7 +329,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
>      } else if ((mode & FF_EBUR128_MODE_M) == FF_EBUR128_MODE_M) {
>          st->d->window = FFMAX(window, 400);
>      } else {
> -        goto free_sample_peak;
> +        goto free_true_peak;
>      }
>      st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
>      if (st->d->audio_data_frames % st->d->samples_in_100ms) {
> @@ -254,7 +341,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
>      st->d->audio_data =
>          (double *) av_mallocz_array(st->d->audio_data_frames,
>                                      st->channels * sizeof(double));
> -    CHECK_ERROR(!st->d->audio_data, 0, free_sample_peak)
> +    CHECK_ERROR(!st->d->audio_data, 0, free_true_peak)
>
>      ebur128_init_filter(st);
> 
> @@ -267,6 +354,11 @@ FFEBUR128State *ff_ebur128_init(unsigned int channels,
>                  free_block_energy_histogram)
>      st->d->short_term_frame_counter = 0;
> 
> +#if CONFIG_SWRESAMPLE
> +    unsigned int result = ebur128_init_resampler(st);

Why unsigned?

> +    CHECK_ERROR(result, 0, free_short_term_block_energy_histogram)
> +#endif
> +
>      /* the first block needs 400ms of audio data */
>      st->d->needed_frames = st->d->samples_in_100ms * 4;
>      /* start at the beginning of the buffer */
> @@ -287,6 +379,8 @@ free_block_energy_histogram:
>      av_free(st->d->block_energy_histogram);
>  free_audio_data:
>      av_free(st->d->audio_data);
> +free_true_peak:
> +    av_free(st->d->true_peak);
>  free_sample_peak:
>      av_free(st->d->sample_peak);
>  free_channel_map:
> @@ -306,12 +400,53 @@ void ff_ebur128_destroy(FFEBUR128State ** st)
>      av_free((*st)->d->audio_data);
>      av_free((*st)->d->channel_map);
>      av_free((*st)->d->sample_peak);
> +    av_free((*st)->d->true_peak);
>      av_free((*st)->d->data_ptrs);
> +#if CONFIG_SWRESAMPLE
> +  ebur128_destroy_resampler(*st);
> +#endif
>      av_free((*st)->d);
>      av_free(*st);
>      *st = NULL;
>  }
> 
> +static int ebur128_use_swresample(FFEBUR128State* st) {
> +#if CONFIG_SWRESAMPLE
> +    return ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) == FF_EBUR128_MODE_TRUE_PEAK);
> +#else
> +    (void) st;
> +    return 0;
> +#endif
> +}
> +
> +static void ebur128_check_true_peak(FFEBUR128State* st, size_t frames) {
> +#if CONFIG_SWRESAMPLE
> +    size_t c, i;
> +
> +    const int in_len  = frames;
> +    const int out_len = st->d->resampler_buffer_output_frames;
> +    swr_convert(st->d->resampler, (uint8_t **)&st->d->resampler_buffer_output, out_len,
> +                (const uint8_t **)&st->d->resampler_buffer_input, in_len);
> +
> +    for (c = 0; c < st->channels; ++c) {
> +        for (i = 0; i < out_len; ++i) {
> +            if (st->d->resampler_buffer_output[i * st->channels + c] >
> +                                                               st->d->true_peak[c]) {
> +              st->d->true_peak[c] =
> +                  st->d->resampler_buffer_output[i * st->channels + c];
> +            } else if (-st->d->resampler_buffer_output[i * st->channels + c] >
> +                                                               st->d->true_peak[c]) {
> +              st->d->true_peak[c] =
> +                 -st->d->resampler_buffer_output[i * st->channels + c];
> +            }
> +        }
> +    }
> +#else
> +    (void) st; (void) frames;
> +#endif
> +}
> +
> +
>  #define EBUR128_FILTER(type, scaling_factor)                                       \
>  static void ebur128_filter_##type(FFEBUR128State* st, const type** srcs,           \
>                                    size_t src_index, size_t frames,                 \
> @@ -334,6 +469,15 @@ static void ebur128_filter_##type(FFEBUR128State* st, const type** srcs,
>              if (max > st->d->sample_peak[c]) st->d->sample_peak[c] = max;          \
>          }                                                                          \
>      }                                                                              \
> +    if (ebur128_use_swresample(st)) {                                              \
> +        for (c = 0; c < st->channels; ++c) {                                       \
> +            for (i = 0; i < frames; ++i) {                                         \
> +                st->d->resampler_buffer_input[i * st->channels + c] =              \
> +                    (float) (srcs[c][src_index + i * stride] / scaling_factor);    \
> +            }                                                                      \
> +        }                                                                          \
> +        ebur128_check_true_peak(st, frames);                                       \
> +    }                                                                              \
>      for (c = 0; c < st->channels; ++c) {                                           \
>          int ci = st->d->channel_map[c] - 1;                                        \
>          if (ci < 0) continue;                                                      \
> @@ -781,3 +925,17 @@ int ff_ebur128_sample_peak(FFEBUR128State * st,
>      *out = st->d->sample_peak[channel_number];
>      return 0;

Hmm, okay, I got a bit of a problem with this performance-wise. The way I 
see it first we convert everything to float, then we resample, then we 
find the maximum in an interleaved output. I'd say performance-wise it 
would be alot better if we could resample directly the input data to a 
planar float, and then measure the maximum there, so no intermediate 
conversions.

This can be a second step, if you are not interested in this now.

}
> +
> +int ff_ebur128_true_peak(FFEBUR128State * st,
> +                         unsigned int channel_number,
> +                         double* out) {
> +  if ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) != FF_EBUR128_MODE_TRUE_PEAK) {
> +      return AVERROR(EINVAL);
> +  } else if (channel_number >= st->channels) {
> +      return AVERROR(EINVAL);
> +  }
> +  *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number]
> +       ? st->d->true_peak[channel_number]
> +       : st->d->sample_peak[channel_number];
> +  return 0;
> +}
> diff --git a/libavfilter/ebur128.h b/libavfilter/ebur128.h
> index b94cd24..ca6dd62 100644
> --- a/libavfilter/ebur128.h
> +++ b/libavfilter/ebur128.h
> @@ -91,6 +91,9 @@ enum mode {
>      FF_EBUR128_MODE_LRA = (1 << 3) | FF_EBUR128_MODE_S,
>    /** can call ff_ebur128_sample_peak */
>      FF_EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | FF_EBUR128_MODE_M,
> +  /** can call ff_ebur128_true_peak */
> +    FF_EBUR128_MODE_TRUE_PEAK   = (1 << 5) | FF_EBUR128_MODE_M
> +                                           | FF_EBUR128_MODE_SAMPLE_PEAK

I'd rather not set implicitly MODE_M, because I don't want to give 
loudness measurement to the user, who only wants true peak.

>  };
>
>  /** forward declaration of FFEBUR128StateInternal */
> @@ -283,6 +286,20 @@ int ff_ebur128_loudness_range_multiple(FFEBUR128State ** sts,
>  int ff_ebur128_sample_peak(FFEBUR128State * st,
>                             unsigned int channel_number, double *out);
> 
> +/** \brief Get maximum true peak from all frames that have been processed.
> + *
> + *  @param st library state
> + *  @param channel_number channel to analyse
> + *  @param out maximum true peak in float format (1.0 is 0 dBTP)
> + *  @return
> + *    - 0 on success.
> + *    - AVERROR(EINVAL) if mode "FF_EBUR128_MODE_TRUE_PEAK" has not
> + *      been set.
> + *    - AVERROR(EINVAL) if invalid channel index.
> + */
> +int ff_ebur128_true_peak(FFEBUR128State* st,
> +                      unsigned int channel_number, double* out);
> +
>  /** \brief Get relative threshold in LUFS.
>   *
>   *  @param st library state
> -- 
> 2.10.1
>

Regards,
Marton