[FFmpeg-devel] [PATCH 3/6] avcodec/vorbisenc: Add clipping avoidance

Tue Aug 22 04:23:04 EEST 2017

Clipping is avoided by taking the maximum value of each frame before
window application, and scaling down the entire frame by a scalar
factor.

Signed-off-by: Tyler Jones <tdjones879 at gmail.com>
---
 libavcodec/vorbisenc.c |  8 ++++----
 libavcodec/vorbispsy.c | 17 +++++++++++++++++
 libavcodec/vorbispsy.h | 10 ++++++++++
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index c968956794..73182c6356 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -1037,10 +1037,10 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc,
  * See Vorbis I spec Fig. 2, 3 for examples.
  */
 static void apply_window(vorbis_enc_context *venc, const int *blockflags,
-                         float *out, float* in)
+                         float *out, float* in, const float clip_factor)
 {
     int prev_size, curr_size, next_size, bound;
-    float scale = 1.0f / (float) (1 << (venc->log2_blocksize[blockflags[1]] - 2));
+    float scale = clip_factor / (float) (1 << (venc->log2_blocksize[blockflags[1]] - 2));
     const float *prev_win, *next_win;
     AVFloatDSPContext *fdsp = venc->fdsp;
 
@@ -1098,9 +1098,9 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, int next_type)
     for (channel = 0; channel < venc->channels; channel++) {
         float *out = venc->scratch;
         float *in  = venc->samples + channel * 2 * long_len + transient_offset;
+        float clip_factor = ff_psy_vorbis_avoid_clip(in, curr_len, curr_type);
 
-        apply_window(venc, blockflags, out, in);
-
+        apply_window(venc, blockflags, out, in, clip_factor);
         venc->mdct[curr_type].mdct_calc(&venc->mdct[curr_type],
                                         venc->coeffs + channel * curr_len, out);
     }
diff --git a/libavcodec/vorbispsy.c b/libavcodec/vorbispsy.c
index ab2d41f62f..56e23dea5e 100644
--- a/libavcodec/vorbispsy.c
+++ b/libavcodec/vorbispsy.c
@@ -140,6 +140,23 @@ int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio,
     return block_flag;
 }
 
+float ff_psy_vorbis_avoid_clip(float *audio, int window_len, int blockflag)
+{
+    int i;
+    float max = 0, clip = 1.0f;
+    /* Due to how the mdct scaling works in the vorbis encoder, short blocks are
+     * more likely to clip. This serves as more fine-grained control */
+    const float avoidance_factor = blockflag ? 0.95f : 0.75f;
+
+    for (i = 0; i < window_len; i++)
+        max = FFMAX(max, fabsf(audio[i]));
+
+    if (max > avoidance_factor)
+        clip = avoidance_factor / max;
+
+    return clip;
+}
+
 av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx)
 {
     av_freep(&vpctx->filter_delay);
diff --git a/libavcodec/vorbispsy.h b/libavcodec/vorbispsy.h
index 93a03fd8ca..e632e8ad1d 100644
--- a/libavcodec/vorbispsy.h
+++ b/libavcodec/vorbispsy.h
@@ -75,6 +75,16 @@ av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
  */
 int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio,
                               int ch, int frame_size, int block_size);
+
+/**
+ * Provide a scalar coefficient to avoid clipping.
+ *
+ * @param audio      Raw audio sample input for one channel
+ * @param window_len Chosen window length for the given frame
+ * @return Coefficient to be applied alongside the window function
+ */
+float ff_psy_vorbis_avoid_clip(float *audio, int window_len, int blockflag);
+
 /**
  * Closes and frees the memory used by the psychoacoustic model
  */
-- 
2.14.1