[FFmpeg-devel] [PATCH 2/3] lavfi/ebur128: add metadata injection. - volnorm.patch (1/1)

Thu May 2 08:17:07 CEST 2013

Carl Eugen Hoyos in gmane.comp.video.ffmpeg.devel (Wed,
1 May 2013 23:00:19 +0000 (UTC)):
>Jan Ehrhardt <phpdev <at> ehrhardt.nl> writes:
>
>> begin 644 volnorm.patch
>> M9&EF9B`M=2`M<B!A+VQI8F%V9FEL=&5R+V%F7W9O;'5M92YC(&(O;&EB879F
>
>If you cannot use git send-email, please use git format-patch 
>to produce a patch file and attach that to an email, if 
>possible as "plain text".

I did not even use git. Below is the patch as plain text.
Many changes were part of Clément's earlier proposals, only a
few are mine. 

Jan


diff -u -r a/libavfilter/af_volume.c b/libavfilter/af_volume.c
+++ b/libavfilter/af_volume.c	2013-04-30 02:12:47.000000000 +0200

--- a/libavfilter/af_volume.c	2013-05-01 06:12:17.453187577 +0200
@@ -51,18 +51,24 @@
         { "fixed",  "select 8-bit fixed-point",     0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FIXED  }, INT_MIN, INT_MAX, A|F, "precision" },
         { "float",  "select 32-bit floating-point", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FLOAT  }, INT_MIN, INT_MAX, A|F, "precision" },
         { "double", "select 64-bit floating-point", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_DOUBLE }, INT_MIN, INT_MAX, A|F, "precision" },
+    { "metadata", "set the metadata key for loudness normalization", OFFSET(metadata), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = A|F },
     { NULL },
 };
 
 AVFILTER_DEFINE_CLASS(volume);
 
+static void set_fixed_volume(VolumeContext *vol, double volume)
+{
+    vol->volume_i = (int)(volume * 256 + 0.5);
+    vol->volume   = vol->volume_i / 256.0;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     VolumeContext *vol = ctx->priv;
 
     if (vol->precision == PRECISION_FIXED) {
-        vol->volume_i = (int)(vol->volume * 256 + 0.5);
-        vol->volume   = vol->volume_i / 256.0;
+			(vol, vol->volume);
         av_log(ctx, AV_LOG_VERBOSE, "volume:(%d/256)(%f)(%1.2fdB) precision:fixed\n",
                vol->volume_i, vol->volume, 20.0*log(vol->volume)/M_LN10);
     } else {
@@ -171,13 +177,13 @@
 
     switch (av_get_packed_sample_fmt(vol->sample_fmt)) {
     case AV_SAMPLE_FMT_U8:
-        if (vol->volume_i < 0x1000000)
+        if (vol->volume_i < 0x1000000 && !vol->metadata)
             vol->scale_samples = scale_samples_u8_small;
         else
             vol->scale_samples = scale_samples_u8;
         break;
     case AV_SAMPLE_FMT_S16:
-        if (vol->volume_i < 0x10000)
+        if (vol->volume_i < 0x10000 && !vol->metadata)
             vol->scale_samples = scale_samples_s16_small;
         else
             vol->scale_samples = scale_samples_s16;
@@ -216,11 +222,33 @@
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 {
-    VolumeContext *vol    = inlink->dst->priv;
-    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFilterContext *ctx  = inlink->dst;
+    VolumeContext *vol    = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     int nb_samples        = buf->nb_samples;
     AVFrame *out_buf;
 
+    if (vol->metadata) {
+        double loudness, new_volume, pts, timestamp, mx;
+        AVDictionaryEntry *t, *e;
+        t = av_dict_get(buf->metadata, "lavfi.r128.pts", NULL, 0);
+        mx = 20; 
+        if (t) {
+            pts = av_strtod(t->value, NULL);
+            timestamp = pts / 48000; /* assume 48kHz */
+            mx = fmin(mx, timestamp);
+            av_log(NULL, AV_LOG_VERBOSE, "timestamp=%f, mx=%f\n",timestamp,mx);
+        }
+        e = av_dict_get(buf->metadata, vol->metadata, NULL, 0);
+        if (e) {
+            loudness = av_strtod(e->value, NULL);
+            new_volume = fmax(-mx,fmin(mx,(-23 - loudness)));
+            av_log(NULL, AV_LOG_VERBOSE, "loudness=%f => %f => volume=%f\n",
+                loudness, new_volume, pow(10, new_volume / 20));
+            set_fixed_volume(vol, pow(10, new_volume / 20));
+        }
+    }
+
     if (vol->volume == 1.0 || vol->volume_i == 256)
         return ff_filter_frame(outlink, buf);
 
@@ -269,6 +297,12 @@
     return ff_filter_frame(outlink, out_buf);
 }
 
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    VolumeContext *vol = ctx->priv;
+    av_opt_free(vol);
+}
+
 static const AVFilterPad avfilter_af_volume_inputs[] = {
     {
         .name           = "default",
@@ -294,6 +328,7 @@
     .priv_size      = sizeof(VolumeContext),
     .priv_class     = &volume_class,
     .init           = init,
+    .uninit         = uninit,
     .inputs         = avfilter_af_volume_inputs,
     .outputs        = avfilter_af_volume_outputs,
     .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE,
diff -u -r a/libavfilter/af_volume.h b/libavfilter/af_volume.h
+++ b/libavfilter/af_volume.h	2013-04-30 02:12:47.000000000 +0200
--- a/libavfilter/af_volume.h	2013-04-30 18:15:32.971376902 +0200
@@ -48,6 +48,7 @@
     void (*scale_samples)(uint8_t *dst, const uint8_t *src, int nb_samples,
                           int volume);
     int samples_align;
+    char *metadata;
 } VolumeContext;
 
 void ff_volume_init_x86(VolumeContext *vol);
diff -u -r a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c	2013-04-30 02:12:47.000000000 +0200
--- a/libavfilter/f_ebur128.c	2013-05-01 05:57:35.000291781 +0200
@@ -1,3 +1,4 @@
+
 /*
  * Copyright (c) 2012 Clément Bœsch
  *
@@ -410,7 +411,7 @@
 
     if (ebur128->loglevel != AV_LOG_INFO &&
         ebur128->loglevel != AV_LOG_VERBOSE) {
-        if (ebur128->do_video || ebur128->metadata)
+        if (ebur128->do_video)
             ebur128->loglevel = AV_LOG_VERBOSE;
         else
             ebur128->loglevel = AV_LOG_INFO;
@@ -687,9 +688,10 @@
                 SET_META("LRA",      ebur128->loudness_range);
                 SET_META("LRA.low",  ebur128->lra_low);
                 SET_META("LRA.high", ebur128->lra_high);
+                SET_META("pts",      (double)pts);
             }
 
-            av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT "\n",
+            av_log(ctx, AV_LOG_VERBOSE, "t: %-10s " LOG_FMT "\n",
                    av_ts2timestr(pts, &outlink->time_base),
                    loudness_400, loudness_3000,
                    ebur128->integrated_loudness, ebur128->loudness_range);
diff -u -r a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c
+++ b/libavfilter/x86/af_volume_init.c	2013-04-30 02:12:47.000000000 +0200
--- a/libavfilter/x86/af_volume_init.c	2013-04-30 18:15:42.987250663 +0200
@@ -38,7 +38,7 @@
     enum AVSampleFormat sample_fmt = av_get_packed_sample_fmt(vol->sample_fmt);
 
     if (sample_fmt == AV_SAMPLE_FMT_S16) {
-        if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768) {
+        if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768 && !vol->metadata) {
             vol->scale_samples = ff_scale_samples_s16_sse2;
             vol->samples_align = 8;
         }