[FFmpeg-cvslog] avfilter: add loudnorm
Kyle Swanson
git at videolan.org
Wed May 18 22:18:43 CEST 2016
ffmpeg | branch: master | Kyle Swanson <k at ylo.ph> | Wed May 11 13:30:14 2016 -0500| [c0c378009b4ba5dea2ac1f93c972a6c84b2dff0d] | committer: Paul B Mahol
avfilter: add loudnorm
Signed-off-by: Kyle Swanson <k at ylo.ph>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c0c378009b4ba5dea2ac1f93c972a6c84b2dff0d
---
Changelog | 1 +
MAINTAINERS | 1 +
configure | 5 +
doc/filters.texi | 55 +++
libavfilter/Makefile | 1 +
libavfilter/af_loudnorm.c | 907 +++++++++++++++++++++++++++++++++++++++++++++
libavfilter/allfilters.c | 1 +
libavfilter/version.h | 2 +-
8 files changed, 972 insertions(+), 1 deletion(-)
diff --git a/Changelog b/Changelog
index 402594d..003b69c 100644
--- a/Changelog
+++ b/Changelog
@@ -35,6 +35,7 @@ version <next>:
- Generic OpenMAX IL encoder with support for Raspberry Pi
- IFF ANIM demuxer & decoder
- Direct Stream Transfer (DST) decoder
+- loudnorm filter
version 3.0:
- Common Encryption (CENC) MP4 encoding and decoding support
diff --git a/MAINTAINERS b/MAINTAINERS
index 14bf377..52c30ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -358,6 +358,7 @@ Filters:
af_compand.c Paul B Mahol
af_firequalizer.c Muhammad Faiz
af_ladspa.c Paul B Mahol
+ af_loudnorm.c Kyle Swanson
af_pan.c Nicolas George
af_sidechaincompress.c Paul B Mahol
af_silenceremove.c Paul B Mahol
diff --git a/configure b/configure
index 2dede36..cc2c9e7 100755
--- a/configure
+++ b/configure
@@ -226,6 +226,8 @@ External library support:
--enable-libcdio enable audio CD grabbing with libcdio [no]
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
and libraw1394 [no]
+ --enable-libebur128 enable libebur128 for EBU R128 measurement,
+ needed for loudnorm filter [no]
--enable-libfaac enable AAC encoding via libfaac [no]
--enable-libfdk-aac enable AAC de/encoding via libfdk-aac [no]
--enable-libflite enable flite (voice synthesis) support via libflite [no]
@@ -1472,6 +1474,7 @@ EXTERNAL_LIBRARY_LIST="
libcdio
libcelt
libdc1394
+ libebur128
libfaac
libfdk_aac
libflite
@@ -2987,6 +2990,7 @@ hqdn3d_filter_deps="gpl"
interlace_filter_deps="gpl"
kerndeint_filter_deps="gpl"
ladspa_filter_deps="ladspa dlopen"
+loudnorm_filter_deps="libebur128"
mcdeint_filter_deps="avcodec gpl"
movie_filter_deps="avcodec avformat"
mpdecimate_filter_deps="gpl"
@@ -5593,6 +5597,7 @@ enabled libcelt && require libcelt celt/celt.h celt_decode -lcelt0 &&
{ check_lib celt/celt.h celt_decoder_create_custom -lcelt0 ||
die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
enabled libcaca && require_pkg_config caca caca.h caca_create_canvas
+enabled libebur128 && require ebur128 ebur128.h ebur128_relative_threshold -lebur128
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
enabled libfdk_aac && { use_pkg_config fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
{ require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
diff --git a/doc/filters.texi b/doc/filters.texi
index a7c480e..27584e9 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2711,6 +2711,61 @@ Modify the @var{N}-th control value.
If the specified value is not valid, it is ignored and prior one is kept.
@end table
+ at section loudnorm
+
+EBU R128 loudness normalization. Includes both dynamic and linear normalization modes.
+Support for both single pass (livestreams, files) and double pass (files) modes.
+This algorithm can target IL, LRA, and maximum true peak.
+
+To enable compilation of this filter you need to configure FFmpeg with
+ at code{--enable-libebur128}.
+
+The filter accepts the following options:
+
+ at table @option
+ at item I, i
+Set integrated loudness target.
+Range is -70.0 - -5.0. Default value is -24.0.
+
+ at item LRA, lra
+Set loudness range target.
+Range is 1.0 - 20.0. Default value is 7.0.
+
+ at item TP, tp
+Set maximum true peak.
+Range is -9.0 - +0.0. Default value is -2.0.
+
+ at item measured_I, measured_i
+Measured IL of input file.
+Range is -99.0 - +0.0.
+
+ at item measured_LRA, measured_lra
+Measured LRA of input file.
+Range is 0.0 - 99.0.
+
+ at item measured_TP, measured_tp
+Measured true peak of input file.
+Range is -99.0 - +99.0.
+
+ at item measured_thresh
+Measured threshold of input file.
+Range is -99.0 - +0.0.
+
+ at item offset
+Set offset gain. Gain is applied before the true-peak limiter.
+Range is -99.0 - +99.0. Default is +0.0.
+
+ at item linear
+Normalize linearly if possible.
+measured_I, measured_LRA, measured_TP, and measured_thresh must also
+to be specified in order to use this mode.
+Options are true or false. Default is true.
+
+ at item print_format
+Set print format for stats. Options are summary, json, or none.
+Default value is none.
+ at end table
+
@section lowpass
Apply a low-pass filter with 3dB point frequency.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index aac2f57..65a831e 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -89,6 +89,7 @@ OBJS-$(CONFIG_FLANGER_FILTER) += af_flanger.o generate_wave_table
OBJS-$(CONFIG_HIGHPASS_FILTER) += af_biquads.o
OBJS-$(CONFIG_JOIN_FILTER) += af_join.o
OBJS-$(CONFIG_LADSPA_FILTER) += af_ladspa.o
+OBJS-$(CONFIG_LOUDNORM_FILTER) += af_loudnorm.o
OBJS-$(CONFIG_LOWPASS_FILTER) += af_biquads.o
OBJS-$(CONFIG_PAN_FILTER) += af_pan.o
OBJS-$(CONFIG_REPLAYGAIN_FILTER) += af_replaygain.o
diff --git a/libavfilter/af_loudnorm.c b/libavfilter/af_loudnorm.c
new file mode 100644
index 0000000..cb210d4
--- /dev/null
+++ b/libavfilter/af_loudnorm.c
@@ -0,0 +1,907 @@
+/*
+ * Copyright (c) 2016 Kyle Swanson <k at ylo.ph>.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* http://k.ylo.ph/2016/04/04/loudnorm.html */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+#include <ebur128.h>
+
+enum FrameType {
+ FIRST_FRAME,
+ INNER_FRAME,
+ FINAL_FRAME,
+ LINEAR_MODE,
+ FRAME_NB
+};
+
+enum LimiterState {
+ OUT,
+ ATTACK,
+ SUSTAIN,
+ RELEASE,
+ STATE_NB
+};
+
+enum PrintFormat {
+ NONE,
+ JSON,
+ SUMMARY,
+ PF_NB
+};
+
+typedef struct LoudNormContext {
+ const AVClass *class;
+ double target_i;
+ double target_lra;
+ double target_tp;
+ double measured_i;
+ double measured_lra;
+ double measured_tp;
+ double measured_thresh;
+ double offset;
+ int linear;
+ enum PrintFormat print_format;
+
+ double *buf;
+ int buf_size;
+ int buf_index;
+ int prev_buf_index;
+
+ double delta[30];
+ double weights[21];
+ double prev_delta;
+ int index;
+
+ double gain_reduction[2];
+ double *limiter_buf;
+ double *prev_smp;
+ int limiter_buf_index;
+ int limiter_buf_size;
+ enum LimiterState limiter_state;
+ int peak_index;
+ int env_index;
+ int env_cnt;
+ int attack_length;
+ int release_length;
+
+ int64_t pts;
+ enum FrameType frame_type;
+ int above_threshold;
+ int prev_nb_samples;
+ int channels;
+
+ ebur128_state *r128_in;
+ ebur128_state *r128_out;
+} LoudNormContext;
+
+#define OFFSET(x) offsetof(LoudNormContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption loudnorm_options[] = {
+ { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
+ { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
+ { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
+ { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
+ { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
+ { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
+ { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
+ { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
+ { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
+ { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
+ { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
+ { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
+ { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
+ { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
+ { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
+ { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
+ { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
+ { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
+ { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(loudnorm);
+
+static inline int frame_size(int sample_rate, int frame_len_msec)
+{
+ const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
+ return frame_size + (frame_size % 2);
+}
+
+static void init_gaussian_filter(LoudNormContext *s)
+{
+ double total_weight = 0.0;
+ const double sigma = 3.5;
+ double adjust;
+ int i;
+
+ const int offset = 21 / 2;
+ const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
+ const double c2 = 2.0 * pow(sigma, 2.0);
+
+ for (i = 0; i < 21; i++) {
+ const int x = i - offset;
+ s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
+ total_weight += s->weights[i];
+ }
+
+ adjust = 1.0 / total_weight;
+ for (i = 0; i < 21; i++)
+ s->weights[i] *= adjust;
+}
+
+static double gaussian_filter(LoudNormContext *s, int index)
+{
+ double result = 0.;
+ int i;
+
+ index = index - 10 > 0 ? index - 10 : index + 20;
+ for (i = 0; i < 21; i++)
+ result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
+
+ return result;
+}
+
+static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
+{
+ int n, c, i, index;
+ double ceiling;
+ double *buf;
+
+ *peak_delta = -1;
+ buf = s->limiter_buf;
+ ceiling = s->target_tp;
+
+ index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
+ if (index >= s->limiter_buf_size)
+ index -= s->limiter_buf_size;
+
+ if (s->frame_type == FIRST_FRAME) {
+ for (c = 0; c < channels; c++)
+ s->prev_smp[c] = fabs(buf[index + c - channels]);
+ }
+
+ for (n = 0; n < nb_samples; n++) {
+ for (c = 0; c < channels; c++) {
+ double this, next, max_peak;
+
+ this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
+ next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
+
+ if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
+ int detected;
+
+ detected = 1;
+ for (i = 2; i < 12; i++) {
+ next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
+ if (next > this) {
+ detected = 0;
+ break;
+ }
+ }
+
+ if (!detected)
+ continue;
+
+ for (c = 0; c < channels; c++) {
+ if (c == 0 || fabs(buf[index + c]) > max_peak)
+ max_peak = fabs(buf[index + c]);
+
+ s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
+ }
+
+ *peak_delta = n;
+ s->peak_index = index;
+ *peak_value = max_peak;
+ return;
+ }
+
+ s->prev_smp[c] = this;
+ }
+
+ index += channels;
+ if (index >= s->limiter_buf_size)
+ index -= s->limiter_buf_size;
+ }
+}
+
+static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
+{
+ int n, c, index, peak_delta, smp_cnt;
+ double ceiling, peak_value;
+ double *buf;
+
+ buf = s->limiter_buf;
+ ceiling = s->target_tp;
+ index = s->limiter_buf_index;
+ smp_cnt = 0;
+
+ if (s->frame_type == FIRST_FRAME) {
+ double max;
+
+ max = 0.;
+ for (n = 0; n < 1920; n++) {
+ for (c = 0; c < channels; c++) {
+ max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
+ }
+ buf += channels;
+ }
+
+ if (max > ceiling) {
+ s->gain_reduction[1] = ceiling / max;
+ s->limiter_state = SUSTAIN;
+ buf = s->limiter_buf;
+
+ for (n = 0; n < 1920; n++) {
+ for (c = 0; c < channels; c++) {
+ double env;
+ env = s->gain_reduction[1];
+ buf[c] *= env;
+ }
+ buf += channels;
+ }
+ }
+
+ buf = s->limiter_buf;
+ }
+
+ do {
+
+ switch(s->limiter_state) {
+ case OUT:
+ detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
+ if (peak_delta != -1) {
+ s->env_cnt = 0;
+ smp_cnt += (peak_delta - s->attack_length);
+ s->gain_reduction[0] = 1.;
+ s->gain_reduction[1] = ceiling / peak_value;
+ s->limiter_state = ATTACK;
+
+ s->env_index = s->peak_index - (s->attack_length * channels);
+ if (s->env_index < 0)
+ s->env_index += s->limiter_buf_size;
+
+ s->env_index += (s->env_cnt * channels);
+ if (s->env_index > s->limiter_buf_size)
+ s->env_index -= s->limiter_buf_size;
+
+ } else {
+ smp_cnt = nb_samples;
+ }
+ break;
+
+ case ATTACK:
+ for (; s->env_cnt < s->attack_length; s->env_cnt++) {
+ for (c = 0; c < channels; c++) {
+ double env;
+ env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
+ buf[s->env_index + c] *= env;
+ }
+
+ s->env_index += channels;
+ if (s->env_index >= s->limiter_buf_size)
+ s->env_index -= s->limiter_buf_size;
+
+ smp_cnt++;
+ if (smp_cnt >= nb_samples) {
+ s->env_cnt++;
+ break;
+ }
+ }
+
+ if (smp_cnt < nb_samples) {
+ s->env_cnt = 0;
+ s->attack_length = 1920;
+ s->limiter_state = SUSTAIN;
+ }
+ break;
+
+ case SUSTAIN:
+ detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
+ if (peak_delta == -1) {
+ s->limiter_state = RELEASE;
+ s->gain_reduction[0] = s->gain_reduction[1];
+ s->gain_reduction[1] = 1.;
+ s->env_cnt = 0;
+ break;
+ } else {
+ double gain_reduction;
+ gain_reduction = ceiling / peak_value;
+
+ if (gain_reduction < s->gain_reduction[1]) {
+ s->limiter_state = ATTACK;
+
+ s->attack_length = peak_delta;
+ if (s->attack_length <= 1)
+ s->attack_length = 2;
+
+ s->gain_reduction[0] = s->gain_reduction[1];
+ s->gain_reduction[1] = gain_reduction;
+ s->env_cnt = 0;
+ break;
+ }
+
+ for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
+ for (c = 0; c < channels; c++) {
+ double env;
+ env = s->gain_reduction[1];
+ buf[s->env_index + c] *= env;
+ }
+
+ s->env_index += channels;
+ if (s->env_index >= s->limiter_buf_size)
+ s->env_index -= s->limiter_buf_size;
+
+ smp_cnt++;
+ if (smp_cnt >= nb_samples) {
+ s->env_cnt++;
+ break;
+ }
+ }
+ }
+ break;
+
+ case RELEASE:
+ for (; s->env_cnt < s->release_length; s->env_cnt++) {
+ for (c = 0; c < channels; c++) {
+ double env;
+ env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
+ buf[s->env_index + c] *= env;
+ }
+
+ s->env_index += channels;
+ if (s->env_index >= s->limiter_buf_size)
+ s->env_index -= s->limiter_buf_size;
+
+ smp_cnt++;
+ if (smp_cnt >= nb_samples) {
+ s->env_cnt++;
+ break;
+ }
+ }
+
+ if (smp_cnt < nb_samples) {
+ s->env_cnt = 0;
+ s->limiter_state = OUT;
+ }
+
+ break;
+ }
+
+ } while (smp_cnt < nb_samples);
+
+ for (n = 0; n < nb_samples; n++) {
+ for (c = 0; c < channels; c++) {
+ out[c] = buf[index + c];
+ if (fabs(out[c]) > ceiling) {
+ out[c] = ceiling * (out[c] < 0 ? -1 : 1);
+ }
+ }
+ out += channels;
+ index += channels;
+ if (index >= s->limiter_buf_size)
+ index -= s->limiter_buf_size;
+ }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ LoudNormContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+ const double *src;
+ double *dst;
+ double *buf;
+ double *limiter_buf;
+ int i, n, c, subframe_length, src_index;
+ double gain, gain_next, env_global, env_shortterm,
+ global, shortterm, lra, relative_threshold;
+
+ if (av_frame_is_writable(in)) {
+ out = in;
+ } else {
+ out = ff_get_audio_buffer(inlink, in->nb_samples);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+ }
+
+ out->pts = s->pts;
+ src = (const double *)in->data[0];
+ dst = (double *)out->data[0];
+ buf = s->buf;
+ limiter_buf = s->limiter_buf;
+
+ ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
+
+ if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
+ double offset, offset_tp, true_peak;
+
+ ebur128_loudness_global(s->r128_in, &global);
+ for (c = 0; c < inlink->channels; c++) {
+ double tmp;
+ ebur128_sample_peak(s->r128_in, c, &tmp);
+ if (c == 0 || tmp > true_peak)
+ true_peak = tmp;
+ }
+
+ offset = s->target_i - global;
+ offset_tp = true_peak + offset;
+ s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
+ s->offset = pow(10., s->offset / 20.);
+ s->frame_type = LINEAR_MODE;
+ }
+
+ switch (s->frame_type) {
+ case FIRST_FRAME:
+ for (n = 0; n < in->nb_samples; n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ buf[s->buf_index + c] = src[c];
+ }
+ src += inlink->channels;
+ s->buf_index += inlink->channels;
+ }
+
+ ebur128_loudness_shortterm(s->r128_in, &shortterm);
+
+ if (shortterm < s->measured_thresh) {
+ s->above_threshold = 0;
+ env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
+ } else {
+ s->above_threshold = 1;
+ env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
+ }
+
+ for (n = 0; n < 30; n++)
+ s->delta[n] = pow(10., env_shortterm / 20.);
+ s->prev_delta = s->delta[s->index];
+
+ s->buf_index =
+ s->limiter_buf_index = 0;
+
+ for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
+ }
+ s->limiter_buf_index += inlink->channels;
+ if (s->limiter_buf_index >= s->limiter_buf_size)
+ s->limiter_buf_index -= s->limiter_buf_size;
+
+ s->buf_index += inlink->channels;
+ }
+
+ subframe_length = frame_size(inlink->sample_rate, 100);
+ true_peak_limiter(s, dst, subframe_length, inlink->channels);
+ ebur128_add_frames_double(s->r128_out, dst, subframe_length);
+
+ s->pts +=
+ out->nb_samples =
+ inlink->min_samples =
+ inlink->max_samples =
+ inlink->partial_buf_size = subframe_length;
+
+ s->frame_type = INNER_FRAME;
+ break;
+
+ case INNER_FRAME:
+ gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
+ gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
+
+ for (n = 0; n < in->nb_samples; n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ buf[s->prev_buf_index + c] = src[c];
+ limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
+ }
+ src += inlink->channels;
+
+ s->limiter_buf_index += inlink->channels;
+ if (s->limiter_buf_index >= s->limiter_buf_size)
+ s->limiter_buf_index -= s->limiter_buf_size;
+
+ s->prev_buf_index += inlink->channels;
+ if (s->prev_buf_index >= s->buf_size)
+ s->prev_buf_index -= s->buf_size;
+
+ s->buf_index += inlink->channels;
+ if (s->buf_index >= s->buf_size)
+ s->buf_index -= s->buf_size;
+ }
+
+ subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
+ s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
+
+ true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
+ ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
+
+ ebur128_loudness_range(s->r128_in, &lra);
+ ebur128_loudness_global(s->r128_in, &global);
+ ebur128_loudness_shortterm(s->r128_in, &shortterm);
+ ebur128_relative_threshold(s->r128_in, &relative_threshold);
+
+ if (s->above_threshold == 0) {
+ double shortterm_out;
+
+ if (shortterm > s->measured_thresh)
+ s->prev_delta *= 1.0058;
+
+ ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
+ if (shortterm_out >= s->target_i)
+ s->above_threshold = 1;
+ }
+
+ if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
+ s->delta[s->index] = s->prev_delta;
+ } else {
+ env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
+ env_shortterm = s->target_i - shortterm;
+ s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
+ }
+
+ s->prev_delta = s->delta[s->index];
+ s->index++;
+ if (s->index >= 30)
+ s->index -= 30;
+ s->prev_nb_samples = in->nb_samples;
+ s->pts += in->nb_samples;
+ break;
+
+ case FINAL_FRAME:
+ gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
+ s->limiter_buf_index = 0;
+ src_index = 0;
+
+ for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
+ }
+ src_index += inlink->channels;
+
+ s->limiter_buf_index += inlink->channels;
+ if (s->limiter_buf_index >= s->limiter_buf_size)
+ s->limiter_buf_index -= s->limiter_buf_size;
+ }
+
+ subframe_length = frame_size(inlink->sample_rate, 100);
+ for (i = 0; i < in->nb_samples / subframe_length; i++) {
+ true_peak_limiter(s, dst, subframe_length, inlink->channels);
+
+ for (n = 0; n < subframe_length; n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ if (src_index < (in->nb_samples * inlink->channels)) {
+ limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
+ } else {
+ limiter_buf[s->limiter_buf_index + c] = 0.;
+ }
+ }
+
+ if (src_index < (in->nb_samples * inlink->channels))
+ src_index += inlink->channels;
+
+ s->limiter_buf_index += inlink->channels;
+ if (s->limiter_buf_index >= s->limiter_buf_size)
+ s->limiter_buf_index -= s->limiter_buf_size;
+ }
+
+ dst += (subframe_length * inlink->channels);
+ }
+
+ dst = (double *)out->data[0];
+ ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
+ break;
+
+ case LINEAR_MODE:
+ for (n = 0; n < in->nb_samples; n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ dst[c] = src[c] * s->offset;
+ }
+ src += inlink->channels;
+ dst += inlink->channels;
+ }
+
+ dst = (double *)out->data[0];
+ ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
+ s->pts += in->nb_samples;
+ break;
+ }
+
+ if (in != out)
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+ int ret;
+ AVFilterContext *ctx = outlink->src;
+ AVFilterLink *inlink = ctx->inputs[0];
+ LoudNormContext *s = ctx->priv;
+
+ ret = ff_request_frame(inlink);
+ if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
+ double *src;
+ double *buf;
+ int nb_samples, n, c, offset;
+ AVFrame *frame;
+
+ nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
+ nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
+
+ frame = ff_get_audio_buffer(outlink, nb_samples);
+ if (!frame)
+ return AVERROR(ENOMEM);
+ frame->nb_samples = nb_samples;
+
+ buf = s->buf;
+ src = (double *)frame->data[0];
+
+ offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
+ offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
+ s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
+
+ for (n = 0; n < nb_samples; n++) {
+ for (c = 0; c < inlink->channels; c++) {
+ src[c] = buf[s->buf_index + c];
+ }
+ src += inlink->channels;
+ s->buf_index += inlink->channels;
+ if (s->buf_index >= s->buf_size)
+ s->buf_index -= s->buf_size;
+ }
+
+ s->frame_type = FINAL_FRAME;
+ ret = filter_frame(inlink, frame);
+ }
+ return ret;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+ AVFilterFormats *formats;
+ AVFilterChannelLayouts *layouts;
+ AVFilterLink *inlink = ctx->inputs[0];
+ AVFilterLink *outlink = ctx->outputs[0];
+ static const int input_srate[] = {192000, -1};
+ static const enum AVSampleFormat sample_fmts[] = {
+ AV_SAMPLE_FMT_DBL,
+ AV_SAMPLE_FMT_NONE
+ };
+ int ret;
+
+ layouts = ff_all_channel_counts();
+ if (!layouts)
+ return AVERROR(ENOMEM);
+ ret = ff_set_common_channel_layouts(ctx, layouts);
+ if (ret < 0)
+ return ret;
+
+ formats = ff_make_format_list(sample_fmts);
+ if (!formats)
+ return AVERROR(ENOMEM);
+ ret = ff_set_common_formats(ctx, formats);
+ if (ret < 0)
+ return ret;
+
+ formats = ff_make_format_list(input_srate);
+ if (!formats)
+ return AVERROR(ENOMEM);
+ ret = ff_formats_ref(formats, &inlink->out_samplerates);
+ if (ret < 0)
+ return ret;
+ ret = ff_formats_ref(formats, &outlink->in_samplerates);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ LoudNormContext *s = ctx->priv;
+
+ s->r128_in = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
+ if (!s->r128_in)
+ return AVERROR(ENOMEM);
+
+ s->r128_out = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
+ if (!s->r128_out)
+ return AVERROR(ENOMEM);
+
+ s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
+ s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
+ if (!s->buf)
+ return AVERROR(ENOMEM);
+
+ s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
+ s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
+ if (!s->limiter_buf)
+ return AVERROR(ENOMEM);
+
+ s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
+ if (!s->prev_smp)
+ return AVERROR(ENOMEM);
+
+ init_gaussian_filter(s);
+
+ s->frame_type = FIRST_FRAME;
+
+ if (s->linear) {
+ double offset, offset_tp;
+ offset = s->target_i - s->measured_i;
+ offset_tp = s->measured_tp + offset;
+
+ if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
+ if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
+ s->frame_type = LINEAR_MODE;
+ s->offset = offset;
+ }
+ }
+ }
+
+ if (s->frame_type != LINEAR_MODE) {
+ inlink->min_samples =
+ inlink->max_samples =
+ inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
+ }
+
+ s->pts =
+ s->buf_index =
+ s->prev_buf_index =
+ s->limiter_buf_index = 0;
+ s->channels = inlink->channels;
+ s->index = 1;
+ s->limiter_state = OUT;
+ s->offset = pow(10., s->offset / 20.);
+ s->target_tp = pow(10., s->target_tp / 20.);
+ s->attack_length = frame_size(inlink->sample_rate, 10);
+ s->release_length = frame_size(inlink->sample_rate, 100);
+
+ return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ LoudNormContext *s = ctx->priv;
+ double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
+ int c;
+
+ ebur128_loudness_range(s->r128_in, &lra_in);
+ ebur128_loudness_global(s->r128_in, &i_in);
+ ebur128_relative_threshold(s->r128_in, &thresh_in);
+ for (c = 0; c < s->channels; c++) {
+ double tmp;
+ ebur128_sample_peak(s->r128_in, c, &tmp);
+ if ((c == 0) || (tmp > tp_in))
+ tp_in = tmp;
+ }
+
+ ebur128_loudness_range(s->r128_out, &lra_out);
+ ebur128_loudness_global(s->r128_out, &i_out);
+ ebur128_relative_threshold(s->r128_out, &thresh_out);
+ for (c = 0; c < s->channels; c++) {
+ double tmp;
+ ebur128_sample_peak(s->r128_out, c, &tmp);
+ if ((c == 0) || (tmp > tp_out))
+ tp_out = tmp;
+ }
+
+ switch(s->print_format) {
+ case NONE:
+ break;
+
+ case JSON:
+ av_log(ctx, AV_LOG_INFO,
+ "\n{\n"
+ "\t\"input_i\" : \"%.2f\",\n"
+ "\t\"input_tp\" : \"%.2f\",\n"
+ "\t\"input_lra\" : \"%.2f\",\n"
+ "\t\"input_thresh\" : \"%.2f\",\n"
+ "\t\"output_i\" : \"%.2f\",\n"
+ "\t\"output_tp\" : \"%+.2f\",\n"
+ "\t\"output_lra\" : \"%.2f\",\n"
+ "\t\"output_thresh\" : \"%.2f\",\n"
+ "\t\"normalization_type\" : \"%s\",\n"
+ "\t\"target_offset\" : \"%.2f\"\n"
+ "}\n",
+ i_in,
+ 20. * log10(tp_in),
+ lra_in,
+ thresh_in,
+ i_out,
+ 20. * log10(tp_out),
+ lra_out,
+ thresh_out,
+ s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
+ s->target_i - i_out
+ );
+ break;
+
+ case SUMMARY:
+ av_log(ctx, AV_LOG_INFO,
+ "\n"
+ "Input Integrated: %+6.1f LUFS\n"
+ "Input True Peak: %+6.1f dBTP\n"
+ "Input LRA: %6.1f LU\n"
+ "Input Threshold: %+6.1f LUFS\n"
+ "\n"
+ "Output Integrated: %+6.1f LUFS\n"
+ "Output True Peak: %+6.1f dBTP\n"
+ "Output LRA: %6.1f LU\n"
+ "Output Threshold: %+6.1f LUFS\n"
+ "\n"
+ "Normalization Type: %s\n"
+ "Target Offset: %+6.1f LU\n",
+ i_in,
+ 20. * log10(tp_in),
+ lra_in,
+ thresh_in,
+ i_out,
+ 20. * log10(tp_out),
+ lra_out,
+ thresh_out,
+ s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
+ s->target_i - i_out
+ );
+ break;
+ }
+
+ ebur128_destroy(&s->r128_in);
+ ebur128_destroy(&s->r128_out);
+ av_freep(&s->limiter_buf);
+ av_freep(&s->prev_smp);
+ av_freep(&s->buf);
+}
+
+static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .config_props = config_input,
+ .filter_frame = filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
+ {
+ .name = "default",
+ .request_frame = request_frame,
+ .type = AVMEDIA_TYPE_AUDIO,
+ },
+ { NULL }
+};
+
+AVFilter ff_af_loudnorm = {
+ .name = "loudnorm",
+ .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
+ .priv_size = sizeof(LoudNormContext),
+ .priv_class = &loudnorm_class,
+ .query_formats = query_formats,
+ .uninit = uninit,
+ .inputs = avfilter_af_loudnorm_inputs,
+ .outputs = avfilter_af_loudnorm_outputs,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index a972576..d0d491e 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -108,6 +108,7 @@ void avfilter_register_all(void)
REGISTER_FILTER(HIGHPASS, highpass, af);
REGISTER_FILTER(JOIN, join, af);
REGISTER_FILTER(LADSPA, ladspa, af);
+ REGISTER_FILTER(LOUDNORM, loudnorm, af);
REGISTER_FILTER(LOWPASS, lowpass, af);
REGISTER_FILTER(PAN, pan, af);
REGISTER_FILTER(REPLAYGAIN, replaygain, af);
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9c4ac..d693d6d 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
#include "libavutil/version.h"
#define LIBAVFILTER_VERSION_MAJOR 6
-#define LIBAVFILTER_VERSION_MINOR 45
+#define LIBAVFILTER_VERSION_MINOR 46
#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
More information about the ffmpeg-cvslog
mailing list