[FFmpeg-devel] [PATCH 2/2] lavfi/volume: support volume normalization through metadata.

Fri Mar 1 18:07:26 CET 2013

---
 doc/filters.texi                 | 27 +++++++++++++++++++++++++++
 libavfilter/af_volume.c          | 33 ++++++++++++++++++++++++++-------
 libavfilter/af_volume.h          |  1 +
 libavfilter/x86/af_volume_init.c |  2 +-
 4 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 0d7856a..aa77a47 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1224,6 +1224,7 @@ out
 Convert the audio sample format, sample rate and channel layout. This filter is
 not meant to be used directly.
 
+ at anchor{volume}
 @section volume
 
 Adjust the input audio volume.
@@ -1263,6 +1264,11 @@ precision of the volume scaling.
 @item double
 64-bit floating-point; limits input sample format to DBL.
 @end table
+
+ at item metadata
+Set the frame metadata key to read volume adjustment value from. The main
+purpose of this option is to be used along with filters injecting volume
+metadata information such as @ref{ebur128}. See examples for more details.
 @end table
 
 @subsection Examples
@@ -1287,6 +1293,24 @@ Increase input audio power by 6 decibels using fixed-point precision:
 @example
 volume=volume=6dB:precision=fixed
 @end example
+
+ at item
+Normalize in real-time an audio stream with @command{ffmpeg} and the help of
+the @ref{ebur128} filter:
+ at example
+ffmpeg -i input.mp3 -af ebur128=volnorm=I,volume=metadata=lavfi.r128.volume output.wav
+ at end example
+
+ at item
+Normalize the audio using @ref{ebur128} and observe its effect using
+ at command{ffplay}:
+ at example
+ffplay -f lavfi -i '
+                    amovie=input.mp3,ebur128=video=1:volnorm=I         [r128-0][a];
+  [a]               volume=metadata=lavfi.r128.volume,ebur128=video=1  [r128-1][out1];
+  [r128-0]          pad=iw*2                                           [padded];
+  [padded][r128-1]  overlay=w'
+ at end example
 @end itemize
 
 @section volumedetect
@@ -6242,6 +6266,7 @@ setpts='(RTCTIME - RTCSTART) / (TB * 1000000)'
 @end example
 @end itemize
 
+ at anchor{ebur128}
 @section ebur128
 
 EBU R128 scanner filter. This filter takes an audio stream as input and outputs
@@ -6294,6 +6319,8 @@ output frames, each of them containing a volume adjustment metadata
 @var{lavfi.r128.volume}. Note: all the frames might not contain that a
 metadata.
 
+The main purpose of this option is to be used along with the @ref{volume} audio
+filter (refer to the filter documentation and examples for details).
 @end table
 
 Example of real-time graph using @command{ffplay}, with a EBU scale meter +18:
diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c
index 5ffa1fe..717497c 100644
--- a/libavfilter/af_volume.c
+++ b/libavfilter/af_volume.c
@@ -51,11 +51,18 @@ static const AVOption volume_options[] = {
         { "fixed",  "select 8-bit fixed-point",     0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FIXED  }, INT_MIN, INT_MAX, A|F, "precision" },
         { "float",  "select 32-bit floating-point", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_FLOAT  }, INT_MIN, INT_MAX, A|F, "precision" },
         { "double", "select 64-bit floating-point", 0, AV_OPT_TYPE_CONST, { .i64 = PRECISION_DOUBLE }, INT_MIN, INT_MAX, A|F, "precision" },
+    { "metadata", "set the metadata key for volume adjustment", OFFSET(metadata), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = A|F },
     { NULL },
 };
 
 AVFILTER_DEFINE_CLASS(volume);
 
+static void set_fixed_volume(VolumeContext *vol, double volume)
+{
+    vol->volume_i = (int)(volume * 256 + 0.5);
+    vol->volume   = vol->volume_i / 256.0;
+}
+
 static av_cold int init(AVFilterContext *ctx, const char *args)
 {
     VolumeContext *vol = ctx->priv;
@@ -69,8 +76,7 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
         return ret;
 
     if (vol->precision == PRECISION_FIXED) {
-        vol->volume_i = (int)(vol->volume * 256 + 0.5);
-        vol->volume   = vol->volume_i / 256.0;
+        set_fixed_volume(vol, vol->volume);
         av_log(ctx, AV_LOG_VERBOSE, "volume:(%d/256)(%f)(%1.2fdB) precision:fixed\n",
                vol->volume_i, vol->volume, 20.0*log(vol->volume)/M_LN10);
     } else {
@@ -79,7 +85,6 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
                precision_str[vol->precision]);
     }
 
-    av_opt_free(vol);
     return ret;
 }
 
@@ -183,13 +188,13 @@ static void volume_init(VolumeContext *vol)
 
     switch (av_get_packed_sample_fmt(vol->sample_fmt)) {
     case AV_SAMPLE_FMT_U8:
-        if (vol->volume_i < 0x1000000)
+        if (vol->volume_i < 0x1000000 && !vol->metadata)
             vol->scale_samples = scale_samples_u8_small;
         else
             vol->scale_samples = scale_samples_u8;
         break;
     case AV_SAMPLE_FMT_S16:
-        if (vol->volume_i < 0x10000)
+        if (vol->volume_i < 0x10000 && !vol->metadata)
             vol->scale_samples = scale_samples_s16_small;
         else
             vol->scale_samples = scale_samples_s16;
@@ -228,11 +233,18 @@ static int config_output(AVFilterLink *outlink)
 
 static int filter_frame(AVFilterLink *inlink, AVFilterBufferRef *buf)
 {
-    VolumeContext *vol    = inlink->dst->priv;
-    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFilterContext *ctx  = inlink->dst;
+    VolumeContext *vol    = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     int nb_samples        = buf->audio->nb_samples;
     AVFilterBufferRef *out_buf;
 
+    if (vol->metadata) {
+        AVDictionaryEntry *e = av_dict_get(buf->metadata, vol->metadata, NULL, 0);
+        if (e)
+            set_fixed_volume(vol, av_strtod(e->value, NULL));
+    }
+
     if (vol->volume == 1.0 || vol->volume_i == 256)
         return ff_filter_frame(outlink, buf);
 
@@ -281,6 +293,12 @@ static int filter_frame(AVFilterLink *inlink, AVFilterBufferRef *buf)
     return ff_filter_frame(outlink, out_buf);
 }
 
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    VolumeContext *vol = ctx->priv;
+    av_opt_free(vol);
+}
+
 static const AVFilterPad avfilter_af_volume_inputs[] = {
     {
         .name           = "default",
@@ -305,6 +323,7 @@ AVFilter avfilter_af_volume = {
     .query_formats  = query_formats,
     .priv_size      = sizeof(VolumeContext),
     .init           = init,
+    .uninit         = uninit,
     .inputs         = avfilter_af_volume_inputs,
     .outputs        = avfilter_af_volume_outputs,
     .priv_class     = &volume_class,
diff --git a/libavfilter/af_volume.h b/libavfilter/af_volume.h
index bd7932e..4deca9c 100644
--- a/libavfilter/af_volume.h
+++ b/libavfilter/af_volume.h
@@ -48,6 +48,7 @@ typedef struct VolumeContext {
     void (*scale_samples)(uint8_t *dst, const uint8_t *src, int nb_samples,
                           int volume);
     int samples_align;
+    char *metadata;
 } VolumeContext;
 
 void ff_volume_init_x86(VolumeContext *vol);
diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c
index beee8ca..c018ce5 100644
--- a/libavfilter/x86/af_volume_init.c
+++ b/libavfilter/x86/af_volume_init.c
@@ -38,7 +38,7 @@ void ff_volume_init_x86(VolumeContext *vol)
     enum AVSampleFormat sample_fmt = av_get_packed_sample_fmt(vol->sample_fmt);
 
     if (sample_fmt == AV_SAMPLE_FMT_S16) {
-        if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768) {
+        if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768 && !vol->metadata) {
             vol->scale_samples = ff_scale_samples_s16_sse2;
             vol->samples_align = 8;
         }
-- 
1.8.1.4