[FFmpeg-devel] [PATCH v3 2/5] avfilter/af_volumedetect.c: Add 32bit float audio support

Tue Jul 2 14:46:45 EEST 2024

> On Jul 2, 2024, at 8:51 AM, Rémi Denis-Courmont <remi at remlab.net> wrote:
> 
> 
> 
> Le 2 juillet 2024 04:33:51 GMT+03:00, Yigithan Yigit <yigithanyigitdevel at gmail.com <mailto:yigithanyigitdevel at gmail.com>> a écrit :
>> ---
>> libavfilter/af_volumedetect.c | 139 ++++++++++++++++++++++++++--------
>> 1 file changed, 107 insertions(+), 32 deletions(-)
>> 
>> diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
>> index 327801a7f9..edd2d56f7a 100644
>> --- a/libavfilter/af_volumedetect.c
>> +++ b/libavfilter/af_volumedetect.c
>> @@ -1,5 +1,6 @@
>> /*
>> * Copyright (c) 2012 Nicolas George
>> + * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support
>> *
>> * This file is part of FFmpeg.
>> *
>> @@ -20,48 +21,62 @@
>> 
>> #include "libavutil/channel_layout.h"
>> #include "libavutil/avassert.h"
>> +#include "libavutil/mem.h"
>> #include "audio.h"
>> #include "avfilter.h"
>> #include "internal.h"
>> 
>> +#define MAX_DB_FLT 1024
>> #define MAX_DB 91
>> +#define HISTOGRAM_SIZE 0x10000
>> +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2)
>> +
>> +typedef struct VolDetectContext VolDetectContext;
>> 
>> typedef struct VolDetectContext {
>> -    /**
>> -     * Number of samples at each PCM value.
>> -     * histogram[0x8000 + i] is the number of samples at value i.
>> -     * The extra element is there for symmetry.
>> -     */
>> -    uint64_t histogram[0x10001];
>> +    uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB
>> +    uint64_t nb_samples; ///< number of samples
>> +    double sum2;         ///< sum of the squares of the samples
>> +    double max;          ///< maximum sample value
>> +    int is_float;        ///< true if the input is in floating point
>> +    void (*process_samples)(VolDetectContext *vd, AVFrame *samples);
>> } VolDetectContext;
>> 
>> -static inline double logdb(uint64_t v)
>> +static inline double logdb(double v, enum AVSampleFormat sample_fmt)
>> {
>> -    double d = v / (double)(0x8000 * 0x8000);
>> -    if (!v)
>> -        return MAX_DB;
>> -    return -log10(d) * 10;
>> +    if (sample_fmt == AV_SAMPLE_FMT_FLT) {
> 
> There's no point in doing this. You've already up-converted to double precision and do all the calculations in double precision. Maybe that's fine or maybe not, but either way, this doesn't look sensible.
> 
>> +        if (!v)
>> +            return MAX_DB_FLT;
>> +        return -log10(v) * 10;
>> +    } else {
>> +        double d = v / (double)(0x8000 * 0x8000);
>> +        if (!v)
>> +            return MAX_DB;
>> +        return -log10(d) * 10;
>> +    }
>> +}
>> +

If I understand your concerns correctly, We should have function like this;

> static inline double logdb(double v, enum AVSampleFormat sample_fmt)
> {
>     if (!v)
>         return sample_fmt == AV_SAMPLE_FMT_FLT ? MAX_DB_FLT : MAX_DB;
> 
>     if (sample_fmt == AV_SAMPLE_FMT_S16)
>       v = ldexp(v, -30);
> 
>     return -log10(v) * 10;
> }

What do you think about that?

Thanks for the feedback
Yigithan