[FFmpeg-devel] [PATCH] QCELP decoder

Sun Nov 30 16:50:29 CET 2008

On Sat, Nov 29, 2008 at 10:39:58AM -0800, Kenan Gillet wrote:
> Hi all,
> 
> sorry for the delay. I was waiting to resolve the parsing/decoding of
> the bitrate/packet type issue.
> Reynaldo and I agreed on IRC to live the parsing/decoding as it is for now.
> 
> So here is round 13 of the qcelp decoder.
> - QCELPContext was split so that it can be moved into qcelpdec.c and
> only keep the unpacked data structure (QCELPFrame) in qcelpdata.h
> - add doxy comments on QCELPFrame field
> - simplify decode_gain_and_index for RATE_OCTAVE and IFQ
> - rename qcelp_bits_per_rate into qcelp_unpacking_bitmaps_lengths
> - use double in qcelp_lsp.c
> 
> have a great day
> 
> Kenan

> Index: libavcodec/qcelp.h
> ===================================================================
> --- libavcodec/qcelp.h	(revision 0)
> +++ libavcodec/qcelp.h	(revision 0)
> @@ -0,0 +1,48 @@
> +/*
> + * QCELP decoder
> + * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_QCELP_H
> +#define AVCODEC_QCELP_H
> +
> +/**
> + * @file qcelp.h
> + * QCELP decoder
> + * @author Reynaldo H. Verdejo Pinochet
> + */
> +

> +typedef enum
> +{
> +    I_F_Q = -1,           /*!< insufficient frame quality */
> +    SILENCE,
> +    RATE_OCTAVE,
> +    RATE_QUARTER,
> +    RATE_HALF,
> +    RATE_FULL
> +} qcelp_packet_rate;

i think this could be in qcelpdec.c ?

> +
> +/**
> + * Reconstructs LPC coefficients from the line spectral pair frequencies.
> + *
> + * TIA/EIA/IS-733 2.4.3.3.5
> + */
> +void qcelp_lspf2lpc(const float *lspf, float *lpc);
> +
> +#endif /* AVCODEC_QCELP_H */

> Index: libavcodec/qcelpdata.h
> ===================================================================
> --- libavcodec/qcelpdata.h	(revision 15955)
> +++ libavcodec/qcelpdata.h	(working copy)
> @@ -34,6 +34,37 @@
>  #include "libavutil/common.h"
>  
>  /**
> + * QCELP unpacked data frame
> + */
> +typedef struct {
> +/// @defgroup qcelp_codebook_parameters QCELP excitation codebook parameters
> +/// @{
> +    uint8_t cbsign[16]; ///!< sign of the codebook gain for each codebook subframe
> +    uint8_t cbgain[16]; ///!< unsigned codebook gain for each codebook subframe
> +    uint8_t cindex[16]; ///!< codebook index for each codebook subframe
> +/// @}
> +
> +/// @defgroup qcelp_pitch_parameters QCELP pitch prediction parameters
> +/// @{
> +    uint8_t plag[4];    ///!< pitch lag for each pitch subframe
> +    uint8_t pfrac[4];   ///!< fractional pitch lag for each pitch subframe
> +    uint8_t pgain[4];   ///!< pitch gain for each pitch subframe
> +/// @}
> +
> +    /**
> +     * line spectral pair frequencies (LSP) for RATE_OCTAVE,
> +     * line spectral pair frequencies grouped into five vectors
> +     * of dimension two (LSPV) for other rates
> +     */
> +    uint8_t lspv[10];
> +

ok

> +    /**
> +     * reserved bits on all bitrate but bitrate 1/2 packets

this is unclear, field that is on all but ... , vs. field that exists always but
is reserved on all but .....

> +     */
> +    uint8_t reserved;
> +} QCELPFrame;
> +
> +/**
>   * pre-calculated table for hammsinc function
>   * Only half of the table is needed because of symmetry.
>   *
> @@ -47,10 +78,8 @@
>      uint8_t bitlen; /*!< number of bits to read */
>  } QCELPBitmap;
>  
> -#define QCELP_OF(variable, bit, len) {offsetof(QCELPContext, variable), bit, len}
> +#define QCELP_OF(variable, bit, len) {offsetof(QCELPFrame, variable), bit, len}

ok

>  
> -/* Disable the below code for now to allow 'make checkheaders' to pass. */
> -#if 0
>  /**
>   * bitmap unpacking tables for RATE_FULL
>   *

> @@ -243,14 +272,13 @@
>      qcelp_rate_full_bitmap,
>  };
>  
> -static const uint16_t qcelp_bits_per_rate[5] = {
> +static const uint16_t qcelp_unpacking_bitmaps_lengths[5] = {
>      0, ///!< for SILENCE rate
>      FF_ARRAY_ELEMS(qcelp_rate_octave_bitmap),
>      FF_ARRAY_ELEMS(qcelp_rate_quarter_bitmap),
>      FF_ARRAY_ELEMS(qcelp_rate_half_bitmap),
>      FF_ARRAY_ELEMS(qcelp_rate_full_bitmap),
>  };

ok

> -#endif
>  
>  typedef uint16_t qcelp_vector[2];
>  

> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c	(revision 15955)
> +++ libavcodec/qcelpdec.c	(working copy)
> @@ -40,6 +40,27 @@
>  #undef NDEBUG
>  #include <assert.h>
>  
> +typedef struct {
> +    GetBitContext     gb;
> +    qcelp_packet_rate bitrate;
> +    QCELPFrame        frame;                  /*!< unpacked data frame */
> +    uint8_t           erasure_count;
> +    uint8_t           octave_count;           /*!< count the consecutive RATE_OCTAVE frames */
> +    float             prev_lspf[10];
> +    float             predictor_lspf[10];     /*!< LSP predictor,

ok

> +                                                  only use for RATE_OCTAVE and I_F_Q */
> +    float             pitch_synthesis_filter_mem[303];
> +    float             pitch_pre_filter_mem[303];

> +    float             rnd_fir_filter_mem[180];

is it correct to use the random values from the last frame? I faintly remember
seeing that this wasnt done in previous patches ... just asking to make sure
no bug slipt in ...

> +    float             formant_mem[170];
> +    float             last_codebook_gain;
> +    int               prev_g1[2];
> +    int               prev_bitrate;
> +    float             prev_pitch_gain[4];
> +    uint8_t           prev_pitch_lag[4];
> +    uint16_t          first16bits;
> +} QCELPContext;
> +
>  static void weighted_vector_sumf(float *out, const float *in_a,
>                                   const float *in_b, float weight_coeff_a,
>                                   float weight_coeff_b, int length)
> @@ -99,8 +120,8 @@
>              for(i=0; i<10; i++)
>              {
>                  q->predictor_lspf[i] =
> -                             lspf[i] = (q->lspv[i] ?  QCELP_LSP_SPREAD_FACTOR
> -                                                   : -QCELP_LSP_SPREAD_FACTOR)
> +                             lspf[i] = (q->frame.lspv[i] ?  QCELP_LSP_SPREAD_FACTOR
> +                                                         : -QCELP_LSP_SPREAD_FACTOR)
>                                       + predictors[i] * QCELP_LSP_OCTAVE_PREDICTOR
>                                       + (i + 1) * ((1 - QCELP_LSP_OCTAVE_PREDICTOR)/11);
>              }
> @@ -141,8 +162,8 @@
>          tmp_lspf = 0.;
>          for(i=0; i<5 ; i++)
>          {
> -            lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][0] * 0.0001;
> -            lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][1] * 0.0001;
> +            lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->frame.lspv[i]][0] * 0.0001;
> +            lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->frame.lspv[i]][1] * 0.0001;
>          }
>  
>          // Check for badly received packets.
> @@ -166,6 +187,83 @@
>  }
>  
>  /**
> + * Converts codebook transmission codes to GAIN and INDEX.
> + *
> + * @param q the context
> + * @param gain array holding the decoded gain
> + *
> + * TIA/EIA/IS-733 2.4.6.2
> + */
> +static void decode_gain_and_index(QCELPContext  *q,
> +                                  float *gain) {
> +    int   i, subframes_count, g1[16];
> +    float slope;
> +
> +    if (q->bitrate >= RATE_QUARTER) {
> +        switch (q->bitrate) {
> +            case RATE_FULL: subframes_count = 16; break;
> +            case RATE_HALF: subframes_count = 4;  break;
> +            default:        subframes_count = 5;
> +        }
> +        for (i = 0; i < subframes_count; i++) {
> +            g1[i] = 4 * q->frame.cbgain[i];
> +            if (q->bitrate == RATE_FULL && !((i+1) & 3)) {
> +                g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3 - 6, 0, 32);
> +            }
> +
> +            gain[i] = qcelp_g12ga[g1[i]];
> +
> +            if (q->frame.cbsign[i]) {
> +                gain[i] = -gain[i];
> +                q->frame.cindex[i] = (q->frame.cindex[i]-89) & 127;
> +            }
> +        }
> +
> +        q->prev_g1[0] = g1[i-2];
> +        q->prev_g1[1] = g1[i-1];
> +        q->last_codebook_gain = qcelp_g12ga[g1[i-1]];
> +
> +        if (q->bitrate == RATE_QUARTER) {
> +            // Provide smoothing of the unvoiced excitation energy.
> +            gain[7] =     gain[4];
> +            gain[6] = 0.4*gain[3] + 0.6*gain[4];
> +            gain[5] =     gain[3];
> +            gain[4] = 0.8*gain[2] + 0.2*gain[3];
> +            gain[3] = 0.2*gain[1] + 0.8*gain[2];
> +            gain[2] =     gain[1];
> +            gain[1] = 0.6*gain[0] + 0.4*gain[1];
> +        }
> +    } else {
> +        if (q->bitrate == RATE_OCTAVE) {
> +            g1[0] = 2 * q->frame.cbgain[0]
> +                  + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);
> +            subframes_count = 8;
> +        } else {
> +            assert(q->bitrate == I_F_Q);
> +
> +            g1[0] = q->prev_g1[1];
> +            switch (q->erasure_count) {
> +            case 1 : break;
> +            case 2 : g1[0] -= 1; break;
> +            case 3 : g1[0] -= 2; break;
> +            default: g1[0] -= 6;
> +            }
> +            if (g1[0] < 0)
> +                g1[0] = 0;
> +            subframes_count = 4;
> +        }
> +        // This interpolation is done to produce smoother background noise.
> +        slope = 0.5*(qcelp_g12ga[g1[0]] - q->last_codebook_gain) / subframes_count;
> +        for (i = 1; i <= subframes_count; i++)
> +            gain[i-1] = q->last_codebook_gain + slope * i;
> +        q->last_codebook_gain = gain[i-2];
> +
> +        q->prev_g1[0] = q->prev_g1[1];
> +        q->prev_g1[1] = g1[0];
> +    }
> +}
> +
> +/**
>   * If the received packet is Rate 1/4 a further sanity check is made of the
>   * codebook gain.
>   *
> @@ -224,7 +322,7 @@
>              for(i=0; i<16; i++)
>              {
>                  tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
> -                cindex = -q->cindex[i];
> +                cindex = -q->frame.cindex[i];
>                  for(j=0; j<10; j++)
>                      *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
>              }
> @@ -233,17 +331,17 @@
>              for(i=0; i<4; i++)
>              {
>                  tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
> -                cindex = -q->cindex[i];
> +                cindex = -q->frame.cindex[i];
>                  for (j = 0; j < 40; j++)
>                  *cdn_vector++ = tmp_gain * qcelp_rate_half_codebook[cindex++ & 127];
>              }
>          break;
>          case RATE_QUARTER:
> -            cbseed = (0x0003 & q->lspv[4])<<14 |
> -                     (0x003F & q->lspv[3])<< 8 |
> -                     (0x0060 & q->lspv[2])<< 1 |
> -                     (0x0007 & q->lspv[1])<< 3 |
> -                     (0x0038 & q->lspv[0])>> 3 ;
> +            cbseed = (0x0003 & q->frame.lspv[4])<<14 |
> +                     (0x003F & q->frame.lspv[3])<< 8 |
> +                     (0x0060 & q->frame.lspv[2])<< 1 |
> +                     (0x0007 & q->frame.lspv[1])<< 3 |
> +                     (0x0038 & q->frame.lspv[0])>> 3 ;
>              rnd = q->rnd_fir_filter_mem + 20;
>              for(i=0; i<8; i++)
>              {

ok

> @@ -381,6 +479,64 @@
>  }
>  
>  /**
> + * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
> + * TIA/EIA/IS-733 2.4.5.2
> + *
> + * @param q the context
> + * @param cdn_vector the scaled codebook vector
> + */
> +static void apply_pitch_filters(QCELPContext *q,
> +                                float *cdn_vector) {
> +    int         i;
> +    float       gain[4];
> +    const float *v_synthesis_filtered, *v_pre_filtered;
> +
> +    if (q->bitrate >= RATE_HALF ||
> +       (q->bitrate == I_F_Q && (q->prev_bitrate >= RATE_HALF))) {
> +
> +        if (q->bitrate >= RATE_HALF) {
> +
> +            // Compute gain & lag for the whole frame.
> +            for (i = 0; i < 4; i++) {
> +                gain[i] = q->frame.plag[i] ? (q->frame.pgain[i] + 1) * 0.25 : 0.0;
> +
> +                q->frame.plag[i] += 16;
> +            }
> +            memcpy(q->prev_pitch_lag, q->frame.plag, sizeof(q->frame.plag));
> +        } else {
> +            gain[3] = q->erasure_count < 3 ? 0.9 - 0.3 * (q->erasure_count - 1)
> +                                           : 0.0;
> +            for (i = 0; i < 4; i++)
> +                gain[i] = FFMIN(q->prev_pitch_gain[i], gain[3]);
> +
> +            memset(q->frame.pfrac, 0, sizeof(q->frame.pfrac));
> +            memcpy(q->frame.plag, q->prev_pitch_lag, sizeof(q->frame.plag));
> +        }
> +
> +        // pitch synthesis filter
> +        v_synthesis_filtered = do_pitchfilter(q->pitch_synthesis_filter_mem, cdn_vector,
> +                                              gain, q->frame.plag, q->frame.pfrac);
> +
> +        // pitch prefilter update
> +        for (i = 0; i < 4; i++)
> +            gain[i] = 0.5 * FFMIN(gain[i], 1.0);
> +
> +        v_pre_filtered = do_pitchfilter(q->pitch_pre_filter_mem, v_synthesis_filtered,
> +                                        gain, q->frame.plag, q->frame.pfrac);
> +
> +        apply_gain_ctrl(cdn_vector, v_synthesis_filtered, v_pre_filtered);
> +
> +        memcpy(q->prev_pitch_gain, gain, sizeof(q->prev_pitch_gain));
> +
> +    } else {
> +        memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17, 143 * sizeof(float));
> +        memcpy(q->pitch_pre_filter_mem,       cdn_vector + 17, 143 * sizeof(float));
> +        memset(q->prev_pitch_gain, 0, sizeof(q->prev_pitch_gain));
> +        memset(q->prev_pitch_lag,  0, sizeof(q->prev_pitch_lag));
> +    }
> +}
> +
> +/**
>   * Interpolates LSP frequencies and computes LPC coefficients
>   * for a given bitrate & pitch subframe.
>   *

> @@ -432,6 +588,46 @@
>      return -1;
>  }
>  

> +/*
> + * Determine the bitrate from the frame size and/or the first byte of the frame.

/**

> + *
> + * @param avctx the AV codec context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return the bitrate on success,
> + *         I_F_Q  if the bitrate cannot be satisfactorily determined
> + *
> + * TIA/EIA/IS-733 2.4.8.7.1
> + */
> +static int determine_bitrate(AVCodecContext *avctx,
> +                               const int buf_size,
> +                               uint8_t **buf) {
> +    qcelp_packet_rate bitrate;
> +
> +    if ((bitrate = buf_size2bitrate(buf_size)) >= 0) {
> +        if (bitrate > **buf) {
> +            av_log(avctx, AV_LOG_WARNING, "Claimed bitrate and buffer size mismatch.\n");
> +            bitrate = **buf;
> +        } else if (bitrate < **buf) {

> +            av_log(avctx, AV_LOG_WARNING, "Buffer is too small for the claimed bitrate.\n");

i think tis should be AV_LOG_ERROR

> +            return I_F_Q;
> +        }
> +        (*buf)++;
> +    } else if ((bitrate = buf_size2bitrate(buf_size + 1)) >= 0) {
> +        av_log(avctx, AV_LOG_WARNING,
> +               "Bitrate byte is missing, guessing the bitrate from packet size.\n");
> +    } else
> +        return I_F_Q;
> +
> +    if (bitrate == SILENCE) {
> +        // FIXME: the decoder should not handle SILENCE frames as I_F_Q frames
> +        av_log_missing_feature(avctx, "Blank frame", 1);
> +        bitrate = I_F_Q;
> +    }
> +    return bitrate;
> +}
> +
>  static void warn_insufficient_frame_quality(AVCodecContext *avctx,
>                                              const char *message)
>  {

> @@ -439,6 +635,107 @@
>             message);
>  }
>  
> +static int qcelp_decode_frame(AVCodecContext *avctx,
> +                              void *data,
> +                              int *data_size,
> +                              uint8_t *buf,
> +                              const int buf_size) {
> +    QCELPContext      *q = avctx->priv_data;
> +    float             *outbuffer = data;
> +    int               i;
> +    float             quantized_lspf[10], lpc[10];
> +    float             gain[16];
> +    float             *formant_mem;
> +
> +    if ((q->bitrate = determine_bitrate(avctx, buf_size, &buf)) == I_F_Q) {
> +        warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
> +        goto erasure;
> +    }
> +
> +    if (q->bitrate == RATE_OCTAVE &&
> +       (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> +        warn_insufficient_frame_quality(avctx, "Bitrate is 1/8 and first 16 bits are on.");
> +        goto erasure;
> +    }
> +
> +    if (q->bitrate > SILENCE) {
> +        const QCELPBitmap *bitmaps     = qcelp_unpacking_bitmaps_per_rate[q->bitrate];
> +        const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->bitrate]
> +                                       + qcelp_unpacking_bitmaps_lengths[q->bitrate];
> +        uint8_t           *unpacked_data = (uint8_t *)&q->frame;
> +
> +        init_get_bits(&q->gb, buf, 8*buf_size);
> +
> +        memset(&q->frame, 0, sizeof(QCELPFrame));
> +
> +        for (; bitmaps < bitmaps_end; bitmaps++)
> +            unpacked_data[bitmaps->index] |= get_bits(&q->gb, bitmaps->bitlen) << bitmaps->bitpos;
> +
> +        // Check for erasures/blanks on rates 1, 1/4 and 1/8.
> +        if (q->frame.reserved) {
> +            warn_insufficient_frame_quality(avctx, "Wrong data in reserved frame area.");
> +            goto erasure;
> +        }
> +        if (q->bitrate == RATE_QUARTER && codebook_sanity_check_for_rate_quarter(q->frame.cbgain)) {
> +            warn_insufficient_frame_quality(avctx, "Codebook gain sanity check failed.");
> +            goto erasure;
> +        }
> +
> +        if (q->bitrate >= RATE_HALF) {
> +            for (i = 0; i < 4; i++) {
> +                if (q->frame.pfrac[i] && q->frame.plag[i] >= 124) {
> +                    warn_insufficient_frame_quality(avctx, "Cannot initialize pitch filter.");
> +                    goto erasure;
> +                }
> +            }
> +        }
> +    }
> +
> +    decode_gain_and_index(q, gain);
> +    compute_svector(q, gain, outbuffer);
> +
> +    if (decode_lspf(q, quantized_lspf) < 0) {
> +        warn_insufficient_frame_quality(avctx, "Badly received packets in frame.");
> +        goto erasure;
> +    }
> +
> +
> +    apply_pitch_filters(q, outbuffer);
> +
> +    if (q->bitrate == I_F_Q) {
> +erasure:
> +        q->bitrate = I_F_Q;
> +        q->erasure_count++;
> +        decode_gain_and_index(q, gain);
> +        compute_svector(q, gain, outbuffer);
> +        decode_lspf(q, quantized_lspf);
> +        apply_pitch_filters(q, outbuffer);
> +    } else
> +        q->erasure_count = 0;
> +
> +    formant_mem = q->formant_mem + 10;
> +    for (i = 0; i < 4; i++) {
> +        interpolate_lpc(q, quantized_lspf, lpc, i);
> +        ff_celp_lp_synthesis_filterf(formant_mem, lpc, outbuffer + i * 40, 40, 10);
> +        formant_mem += 40;
> +    }
> +    memcpy(q->formant_mem, q->formant_mem + 160, 10 * sizeof(float));
> +
> +    // FIXME: postfilter and final gain control should be here.
> +    // TIA/EIA/IS-733 2.4.8.6
> +
> +    formant_mem = q->formant_mem + 10;
> +    for (i = 0; i < 160; i++)
> +        *outbuffer++ = av_clipf(*formant_mem++, QCELP_CLIP_LOWER_BOUND, QCELP_CLIP_UPPER_BOUND);
> +
> +    memcpy(q->prev_lspf, quantized_lspf, sizeof(q->prev_lspf));
> +    q->prev_bitrate = q->bitrate;
> +
> +    *data_size = 160 * sizeof(*outbuffer);
> +
> +    return *data_size;
> +}
> +
>  AVCodec qcelp_decoder =
>  {
>      .name   = "qcelp",

ok

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In fact, the RIAA has been known to suggest that students drop out
of college or go to community college in order to be able to afford
settlements. -- The RIAA
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081130/2a554d10/attachment.pgp>