[FFmpeg-devel] [PATCH] avfilter/af_volumedetect.c: Add 32bit float audio support

Sat Mar 23 17:35:45 EET 2024

Fixes #9613

---
 libavfilter/af_volumedetect.c | 234 +++++++++++++++++++++++++---------
 1 file changed, 172 insertions(+), 62 deletions(-)

diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
index 8b001d1cf2..d13d043f88 100644
--- a/libavfilter/af_volumedetect.c
+++ b/libavfilter/af_volumedetect.c
@@ -24,94 +24,193 @@
 #include "avfilter.h"
 #include "internal.h"
 
+#define NOISE_FLOOR_DB_FLT -758
+#define MAX_DB_FLT 770
+#define MAX_DB 91
+#define HISTOGRAM_SIZE 0x10000
+
 typedef struct VolDetectContext {
-    /**
-     * Number of samples at each PCM value.
-     * histogram[0x8000 + i] is the number of samples at value i.
-     * The extra element is there for symmetry.
-     */
-    uint64_t histogram[0x10001];
+    uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB
+    uint64_t nb_samples; ///< number of samples
+    double sum2;         ///< sum of the squares of the samples
+    double max;          ///< maximum sample value
+    int is_float;        ///< true if the input is in floating point
 } VolDetectContext;
 
+static inline double logdb(double v, enum AVSampleFormat sample_fmt)
+{
+    /*
+    * Since it is a not a power value, able to use 20.0 * log10(v)
+    */
+    if (sample_fmt == AV_SAMPLE_FMT_FLT) {
+        if (!v)
+            return MAX_DB_FLT;
+        return 20.0 * log10(v);
+    } else {
+        double d = v / (double)(0x8000 * 0x8000);
+        if (!v)
+            return MAX_DB;
+        return -log10(d) * 10;
+    }
+}
+
+static void update_float_stats(VolDetectContext *vd, float *audio_data)
+{
+    double max_sample;
+    max_sample = fabsf(*audio_data);
+    if (max_sample > vd->max)
+        vd->max = max_sample;
+    vd->sum2 += *audio_data * *audio_data;
+    vd->histogram[(int)logdb(max_sample, AV_SAMPLE_FMT_FLT) + MAX_DB_FLT]++;
+    vd->nb_samples++;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
 {
     AVFilterContext *ctx = inlink->dst;
     VolDetectContext *vd = ctx->priv;
-    int nb_samples  = samples->nb_samples;
     int nb_channels = samples->ch_layout.nb_channels;
     int nb_planes   = nb_channels;
+    int planar      = 0;
     int plane, i;
-    int16_t *pcm;
 
-    if (!av_sample_fmt_is_planar(samples->format)) {
-        nb_samples *= nb_channels;
+    planar = av_sample_fmt_is_planar(samples->format);
+    if (!planar)
         nb_planes = 1;
+    if (vd->is_float) {
+        float *audio_data;
+        for (plane = 0; plane < nb_planes; plane++) {
+            audio_data = (float *)samples->extended_data[plane];
+            for (i = 0; i < samples->nb_samples; i++) {
+                /*
+                 * If the input is planar, the samples are in the seperated planes.
+                 * if the input is not planar, the samples are interleaved.
+                 * if the input is not planar, split the samples into the planes.
+                 */
+                if (planar) {
+                    update_float_stats(vd, &audio_data[i]);
+                } else {
+                    for (int j = 0; j < nb_channels; j++)
+                        update_float_stats(vd, &audio_data[i * nb_channels + j]);
+                }
+            }
+        }
+    } else {
+        int16_t *pcm;
+        for (plane = 0; plane < nb_planes; plane++) {
+            pcm = (int16_t *)samples->extended_data[plane];
+            for (i = 0; i < samples->nb_samples; i++) {
+                if (planar) {
+                    vd->histogram[pcm[i] + 0x8000]++;
+                    vd->nb_samples++;
+                } else {
+                    for (int j = 0; j < nb_channels; j++) {
+                        vd->histogram[pcm[i * nb_channels + j] + 0x8000]++;
+                        vd->nb_samples++;
+                    }
+                }
+            }
+        }
     }
-    for (plane = 0; plane < nb_planes; plane++) {
-        pcm = (int16_t *)samples->extended_data[plane];
-        for (i = 0; i < nb_samples; i++)
-            vd->histogram[pcm[i] + 0x8000]++;
-    }
-
     return ff_filter_frame(inlink->dst->outputs[0], samples);
 }
 
-#define MAX_DB 91
-
-static inline double logdb(uint64_t v)
+static void print_stats(AVFilterContext *ctx)
 {
-    double d = v / (double)(0x8000 * 0x8000);
-    if (!v)
-        return MAX_DB;
-    return -log10(d) * 10;
+    VolDetectContext *vd = ctx->priv;
+
+    if (!vd->nb_samples)
+        return;
+    if (vd->is_float) {
+        double rms;
+        int i, sum = 0;
+        av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", vd->nb_samples);
+        rms = sqrt(vd->sum2 / vd->nb_samples);
+        av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", logdb(rms, AV_SAMPLE_FMT_FLT));
+        av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", logdb(vd->max, AV_SAMPLE_FMT_FLT));
+        for (i = MAX_DB_FLT - NOISE_FLOOR_DB_FLT; i >= 0 && !vd->histogram[i]; i--);
+        for (; i >= 0 && sum < vd->nb_samples / 1000; i--) {
+            if (!vd->histogram[i])
+                continue;
+            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]);
+            sum += vd->histogram[i];
+        }
+    } else {
+        int i, max_volume, shift;
+        uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
+        uint64_t histdb[MAX_DB + 1] = {0};
+        for (i = 0; i < 0x10000; i++)
+            nb_samples += vd->histogram[i];
+        av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", nb_samples);
+        /*
+            * If nb_samples > 1<<34, there is a risk of overflow in the
+            * multiplication or the sum: shift all histogram values to avoid that.
+            * The total number of samples must be recomputed to avoid rounding
+            * errors.
+        */
+        shift = av_log2(nb_samples >> 33);
+        for (i = 0; i < 0x10000; i++) {
+            nb_samples_shift += vd->histogram[i] >> shift;
+            power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
+        }
+        if (!nb_samples_shift)
+            return;
+        power = (power + nb_samples_shift / 2) / nb_samples_shift;
+        av_assert0(power <= 0x8000 * 0x8000);
+        av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb((double)power, AV_SAMPLE_FMT_S16));
+        max_volume = 0x8000;
+        while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
+                !vd->histogram[0x8000 - max_volume])
+            max_volume--;
+        av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16));
+        for (i = 0; i < 0x10000; i++)
+            histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), AV_SAMPLE_FMT_S16)] += vd->histogram[i];
+        for (i = 0; i <= MAX_DB && !histdb[i]; i++);
+        for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
+            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", i, histdb[i]);
+            sum += histdb[i];
+        }
+    }
 }
 
-static void print_stats(AVFilterContext *ctx)
+static int config_output(AVFilterLink *outlink)
 {
+    AVFilterContext *ctx = outlink->src;
     VolDetectContext *vd = ctx->priv;
-    int i, max_volume, shift;
-    uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
-    uint64_t histdb[MAX_DB + 1] = { 0 };
-
-    for (i = 0; i < 0x10000; i++)
-        nb_samples += vd->histogram[i];
-    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
-    if (!nb_samples)
-        return;
 
-    /* If nb_samples > 1<<34, there is a risk of overflow in the
-       multiplication or the sum: shift all histogram values to avoid that.
-       The total number of samples must be recomputed to avoid rounding
-       errors. */
-    shift = av_log2(nb_samples >> 33);
-    for (i = 0; i < 0x10000; i++) {
-        nb_samples_shift += vd->histogram[i] >> shift;
-        power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
-    }
-    if (!nb_samples_shift)
-        return;
-    power = (power + nb_samples_shift / 2) / nb_samples_shift;
-    av_assert0(power <= 0x8000 * 0x8000);
-    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
-
-    max_volume = 0x8000;
-    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
-                             !vd->histogram[0x8000 - max_volume])
-        max_volume--;
-    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
-
-    for (i = 0; i < 0x10000; i++)
-        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
-    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
-    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
-        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
-        sum += histdb[i];
+    vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT ||
+                   outlink->format == AV_SAMPLE_FMT_FLTP;
+
+    if (!vd->is_float) {
+        /*
+        * Number of samples at each PCM value.
+        * Only used for integer formats.
+        * For 16 bit signed PCM there are 65536.
+        * histogram[0x8000 + i] is the number of samples at value i.
+        * The extra element is there for symmetry.
+        */
+        vd->histogram = av_calloc(HISTOGRAM_SIZE + 1, sizeof(uint64_t));
+        if (!vd->histogram)
+            return AVERROR(ENOMEM);
+    } else {
+        /*
+        * The histogram is used to store the number of samples at each dB
+        * instead of the number of samples at each PCM value.
+        * The range of dB is from -758 to 770.
+        */
+        vd->histogram = av_calloc(MAX_DB_FLT - NOISE_FLOOR_DB_FLT + 1, sizeof(uint64_t));
+        if (!vd->histogram)
+            return AVERROR(ENOMEM);
     }
+    return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
+    VolDetectContext *vd = ctx->priv;
     print_stats(ctx);
+    if (vd->histogram)
+        av_freep(&vd->histogram);
 }
 
 static const AVFilterPad volumedetect_inputs[] = {
@@ -122,6 +221,14 @@ static const AVFilterPad volumedetect_inputs[] = {
     },
 };
 
+static const AVFilterPad volumedetect_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
+    },
+};
+
 const AVFilter ff_af_volumedetect = {
     .name          = "volumedetect",
     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
@@ -129,6 +236,9 @@ const AVFilter ff_af_volumedetect = {
     .uninit        = uninit,
     .flags         = AVFILTER_FLAG_METADATA_ONLY,
     FILTER_INPUTS(volumedetect_inputs),
-    FILTER_OUTPUTS(ff_audio_default_filterpad),
-    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P),
+    FILTER_OUTPUTS(volumedetect_outputs),
+    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16,
+                      AV_SAMPLE_FMT_S16P,
+                      AV_SAMPLE_FMT_FLT,
+                      AV_SAMPLE_FMT_FLTP),
 };
-- 
2.44.0


> On Mar 23, 2024, at 6:21 PM, Paul B Mahol <onemda at gmail.com> wrote:
> 
> On Sat, Mar 23, 2024 at 3:28 PM Yiğithan Yiğit <yigithanyigit35 at gmail.com <mailto:yigithanyigit35 at gmail.com>>
> wrote:
> 
>> Hi,
>> 
>> According to your advices, I made some changes of mine last patch. I feel
>> like this one way more better. I removed trivial calculations but I want to
>> say I am not proud of how I handled histogram in float despite 16 bit
>> integer histogram. I am storing dB values instead of storing samples. I
>> feel this one is more convenient. Still I am open to advices.
>> 
>> 
> I see no patch at all.
> 
> volumedetect displays histogram typically with 1dB steps, so build
> histogram with 1dB range between each bin.
> for float, only use normal values, no +inf/subnormals/nans etc.
> I bet there is less than current 2^16 entries in histogram table of filter
> context to fill.
> There is no need to convert each input sample to dB scale. Just to
> calculate ranges for each 1dB entry calculate range in linear space and
> every such sample that is in such range get added to such histogram bin
> entry.
> Or if you calculate in dB scale anyway than just round(ceilf/floorf/lrintf)
> dB value (removing fractional parts) and add it into histogram table, do
> not forget to count >+/-1.0 values too. (ones with >0dB values), you can
> use normal mean/max/peak calculations (do not use histogram to calculate
> them for float/double).
> 
> 
> 
>> Thank you.
>> Yigithan
>> 
>> 
>> 
>> 
>>> On Mar 21, 2024, at 11:30 PM, Paul B Mahol <onemda at gmail.com <mailto:onemda at gmail.com>> wrote:
>>> 
>>> On Wed, Mar 20, 2024 at 11:55 PM Yiğithan Yiğit <
>> yigithanyigit35 at gmail.com <mailto:yigithanyigit35 at gmail.com> <mailto:yigithanyigit35 at gmail.com>>
>>> wrote:
>>> 
>>>> 
>>>>> On Mar 21, 2024, at 12:10 AM, Paul B Mahol <onemda at gmail.com <mailto:onemda at gmail.com>> wrote:
>>>>> 
>>>>> Why? This is pointless.
>>>>> 
>>>>> volumedetect have histogram output, float patch does not have it at
>> all.
>>>>> Use astats filter.
>>>>> 
>>>>> On Wed, Mar 20, 2024 at 9:47 PM Yiğithan Yiğit <
>>>> yigithanyigit35 at gmail.com>
>>>>> wrote:
>>>>> 
>>>>>> _______________________________________________
>>>>>> ffmpeg-devel mailing list
>>>>>> ffmpeg-devel at ffmpeg.org
>>>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>>> 
>>>>>> To unsubscribe, visit link above, or email
>>>>>> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>>>>>> 
>>>>> _______________________________________________
>>>>> ffmpeg-devel mailing list
>>>>> ffmpeg-devel at ffmpeg.org
>>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>> 
>>>>> To unsubscribe, visit link above, or email
>>>>> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe”.
>>>> 
>>>> I am a beginner/student also new at open source but I love FFmpeg and
>>>> using in my daily life. From my perspective volumedetect way more user
>>>> friendly. I believe adding this patch would be useful to people such as
>>>> #9613. The reason lack of histogram output for float mostly for my
>>>> indecision about range of the histogram. I am open the suggestions and
>>>> after that I can make a new patch.
>>>> 
>>> 
>>> It is trivial (to some people) to add histogram per dB for float/double
>>> inputs.
>>> But this patch just does some extremely trivial math calculations so that
>>> float input have completely different output from integer ones.
>>> That is very odd and unfriendly from my perspective.
>>> 
>>> Besides if you only interested in discrete sample audio peak finder in
>>> audio input use astats and measure_overall=Peak_level options.
>>> Yes they are not default on. Because more statistics are more important
>>> than single number.
>>> 
>>> I'm not against adding proper and useful and correct float/double support
>>> to volumedetect, but it needs to have same/similar structure of output as
>>> integer sample format input audio, otherwise it just looks lazy and prone
>>> for users wondering what is going on when they use different sample
>> formats
>>> in theirs graphs.
>>> 
>>> 
>>>> 
>>>> Best Regards
>>>> Yigithan
>>>> 
>>>> 
>>>> _______________________________________________
>>>> ffmpeg-devel mailing list
>>>> ffmpeg-devel at ffmpeg.org <mailto:ffmpeg-devel at ffmpeg.org> <mailto:ffmpeg-devel at ffmpeg.org>
>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>> 
>>>> To unsubscribe, visit link above, or email
>>>> ffmpeg-devel-request at ffmpeg.org <mailto:ffmpeg-devel-request at ffmpeg.org> <mailto:ffmpeg-devel-request at ffmpeg.org>
>> with subject "unsubscribe".
>>>> 
>>> _______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel at ffmpeg.org <mailto:ffmpeg-devel at ffmpeg.org> <mailto:ffmpeg-devel at ffmpeg.org>
>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>> 
>>> To unsubscribe, visit link above, or email
>>> ffmpeg-devel-request at ffmpeg.org <mailto:ffmpeg-devel-request at ffmpeg.org> <mailto:ffmpeg-devel-request at ffmpeg.org>
>> with subject "unsubscribe".
>> 
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>> 
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".