[FFmpeg-devel] One pass volume normalization (ebur128)
Paul B Mahol
onemda at gmail.com
Sat Jul 13 21:28:05 CEST 2013
On 7/13/13, Jan Ehrhardt <phpdev at ehrhardt.nl> wrote:
> I am once again proposing a patch for one pass volume normalization base
> on ebur128, as I see this still did not make it into FFMpeg 2.0. My
> patch is heaveily based on Clement Boesch's proposal in
> http://permalink.gmane.org/gmane.comp.video.ffmpeg.devel/159978
>
> We have been using this patch now for more than 4 months and 1800+
> videos of approximately 1 hour have been transcoded with it.
>
> Part of our FFMpeg commandline reads as
> -filter_complex \
>
> "[0:v]setpts=PTS-STARTPTS[v0];[0:a]asetpts=PTS-STARTPTS,ebur128=metadata=1,volume=metadata=lavfi.r128.I,ebur128[a0]"
> \
> -map [v0] -map [a0]
>
> It uses the already present ebur128 meta injection to adjust the
> volume on the fly. What would be the objection to move this into the
> FFMpeg core, so I do not have to patch my FFMpeg every time I compile
> a new one? I applied the patch below to FFMpeg Release/v.2.0.
>
> Jan
>
>
> diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c
> index a2ac1e2..6372bb2 100644
> --- a/libavfilter/af_volume.c
> +++ b/libavfilter/af_volume.c
> @@ -51,18 +51,24 @@ static const AVOption volume_options[] = {
> { "fixed", "select 8-bit fixed-point", 0, AV_OPT_TYPE_CONST, {
> .i64 = PRECISION_FIXED }, INT_MIN, INT_MAX, A|F, "precision" },
> { "float", "select 32-bit floating-point", 0, AV_OPT_TYPE_CONST, {
> .i64 = PRECISION_FLOAT }, INT_MIN, INT_MAX, A|F, "precision" },
> { "double", "select 64-bit floating-point", 0, AV_OPT_TYPE_CONST, {
> .i64 = PRECISION_DOUBLE }, INT_MIN, INT_MAX, A|F, "precision" },
> + { "metadata", "set the metadata key for loudness normalization",
> OFFSET(metadata), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = A|F },
> { NULL },
> };
>
> AVFILTER_DEFINE_CLASS(volume);
>
> +static void set_fixed_volume(VolumeContext *vol, double volume)
> +{
> + vol->volume_i = (int)(volume * 256 + 0.5);
> + vol->volume = vol->volume_i / 256.0;
> +}
> +
> static av_cold int init(AVFilterContext *ctx)
> {
> VolumeContext *vol = ctx->priv;
>
> if (vol->precision == PRECISION_FIXED) {
> - vol->volume_i = (int)(vol->volume * 256 + 0.5);
> - vol->volume = vol->volume_i / 256.0;
> + set_fixed_volume(vol, vol->volume);
> av_log(ctx, AV_LOG_VERBOSE, "volume:(%d/256)(%f)(%1.2fdB)
> precision:fixed\n",
> vol->volume_i, vol->volume, 20.0*log(vol->volume)/M_LN10);
> } else {
> @@ -171,13 +177,13 @@ static av_cold void volume_init(VolumeContext *vol)
>
> switch (av_get_packed_sample_fmt(vol->sample_fmt)) {
> case AV_SAMPLE_FMT_U8:
> - if (vol->volume_i < 0x1000000)
> + if (vol->volume_i < 0x1000000 && !vol->metadata)
> vol->scale_samples = scale_samples_u8_small;
> else
> vol->scale_samples = scale_samples_u8;
> break;
> case AV_SAMPLE_FMT_S16:
> - if (vol->volume_i < 0x10000)
> + if (vol->volume_i < 0x10000 && !vol->metadata)
> vol->scale_samples = scale_samples_s16_small;
> else
> vol->scale_samples = scale_samples_s16;
> @@ -216,11 +222,30 @@ static int config_output(AVFilterLink *outlink)
>
> static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
> {
> - VolumeContext *vol = inlink->dst->priv;
> - AVFilterLink *outlink = inlink->dst->outputs[0];
> + AVFilterContext *ctx = inlink->dst;
> + VolumeContext *vol = ctx->priv;
> + AVFilterLink *outlink = ctx->outputs[0];
> int nb_samples = buf->nb_samples;
> AVFrame *out_buf;
>
> + if (vol->metadata) {
> + double loudness, new_volume, timestamp, mx;
> + AVDictionaryEntry *e;
> + mx = 20;
> + timestamp = (float)(1.0 * buf->pts / outlink->sample_rate);
> + mx = fmin(mx, timestamp);
> + e = av_dict_get(buf->metadata, vol->metadata, NULL, 0);
> + if (e) {
> + loudness = av_strtod(e->value, NULL);
> + if (loudness > -69) {
> + new_volume = fmax(-mx,fmin(mx,(-23 - loudness)));
> + av_log(NULL, AV_LOG_VERBOSE, "loudness=%f => %f =>
> volume=%f\n",
> + loudness, new_volume, pow(10, new_volume / 20));
> + set_fixed_volume(vol, pow(10, new_volume / 20));
> + }
> + }
> + }
> +
> if (vol->volume == 1.0 || vol->volume_i == 256)
> return ff_filter_frame(outlink, buf);
>
> @@ -269,6 +294,12 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> *buf)
> return ff_filter_frame(outlink, out_buf);
> }
>
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + VolumeContext *vol = ctx->priv;
> + av_opt_free(vol);
> +}
> +
> static const AVFilterPad avfilter_af_volume_inputs[] = {
> {
> .name = "default",
> @@ -294,6 +325,7 @@ AVFilter avfilter_af_volume = {
> .priv_size = sizeof(VolumeContext),
> .priv_class = &volume_class,
> .init = init,
> + .uninit = uninit,
> .inputs = avfilter_af_volume_inputs,
> .outputs = avfilter_af_volume_outputs,
> .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
> diff --git a/libavfilter/af_volume.h b/libavfilter/af_volume.h
> index bd7932e..4deca9c 100644
> --- a/libavfilter/af_volume.h
> +++ b/libavfilter/af_volume.h
> @@ -48,6 +48,7 @@ typedef struct VolumeContext {
> void (*scale_samples)(uint8_t *dst, const uint8_t *src, int
> nb_samples,
> int volume);
> int samples_align;
> + char *metadata;
> } VolumeContext;
>
> void ff_volume_init_x86(VolumeContext *vol);
> diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
> index 88d37e8..f4ce6d9 100644
> --- a/libavfilter/f_ebur128.c
> +++ b/libavfilter/f_ebur128.c
> @@ -410,7 +410,7 @@ static av_cold int init(AVFilterContext *ctx)
>
> if (ebur128->loglevel != AV_LOG_INFO &&
> ebur128->loglevel != AV_LOG_VERBOSE) {
> - if (ebur128->do_video || ebur128->metadata)
> + if (ebur128->do_video)
> ebur128->loglevel = AV_LOG_VERBOSE;
> else
> ebur128->loglevel = AV_LOG_INFO;
> @@ -689,7 +689,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> *insamples)
> SET_META("LRA.high", ebur128->lra_high);
> }
>
> - av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT "\n",
> + av_log(ctx, ebur128->metadata || !ebur128->do_video ?
> AV_LOG_VERBOSE : ebur128->loglevel, "t: %-10s " LOG_FMT "\n",
> av_ts2timestr(pts, &outlink->time_base),
> loudness_400, loudness_3000,
> ebur128->integrated_loudness, ebur128->loudness_range);
> diff --git a/libavfilter/x86/af_volume_init.c
> b/libavfilter/x86/af_volume_init.c
> index 81d605f..fab5a03 100644
> --- a/libavfilter/x86/af_volume_init.c
> +++ b/libavfilter/x86/af_volume_init.c
> @@ -39,7 +39,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol)
> enum AVSampleFormat sample_fmt =
> av_get_packed_sample_fmt(vol->sample_fmt);
>
> if (sample_fmt == AV_SAMPLE_FMT_S16) {
> - if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768) {
> + if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768 &&
Why? This is suboptimal.
> !vol->metadata) {
> vol->scale_samples = ff_scale_samples_s16_sse2;
> vol->samples_align = 8;
> }
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list