[FFmpeg-devel] [PATCH] QCELP decoder

Sun Nov 9 00:57:59 CET 2008

On Fri, Nov 07, 2008 at 09:24:25AM -0800, Kenan Gillet wrote:
> Hi everybody,
>
> sorry for the delay,
>
> And here is another updated set of patches which makes it round 8.
> and a small summary:

> - optimizes qcelp_lspf2lpc which is now 60% faster.
> 	should it still be in it own file or merging qcelp_lsp.c and qcelpdec.c is 
> ok?

i need to look at this a little more but i think the final code should be
in some file seperate from th qcelpp decoder

> - simplifies interpolate_lpc and determine_framerate after Michael comment.
> - removes useless wrapper function.
> - added minor version bump in libavcodec/avcodec.h
> - it fixes the FIR filter to behave like in the reference code
> - some cosmetics
>

[...]
> +typedef struct {
> +    GetBitContext     gb;
> +    qcelp_packet_rate framerate;
> +

> +// beginning of unpacked data
> +    uint8_t           cbsign[16];
> +    uint8_t           cbgain[16];
> +    uint8_t           cindex[16];
> +    uint8_t           plag[4];
> +    uint8_t           pfrac[4];
> +    uint8_t           pgain[4];
> +    uint8_t           lspv[10];               /*!< LSP for RATE_OCTAVE, LSPV for other rates */
> +    uint8_t           reserved;               /*!< on all but rate 1/2 packets */
> +// end of unpacked data

doxygen has some tags to mark groups of things
something like ///@{ or so

> +
> +    uint8_t           erasure_count;
> +    uint8_t           octave_count;           /*!< count the consecutive RATE_OCTAVE frames */
> +    float             prev_lspf[10];
> +    float             predictor_lspf[10];     /*!< LSP predictor,
> +                                                  only use for RATE_OCTAVE and I_F_Q */
> +    float             pitch_synthesis_filter_mem[303];
> +    float             pitch_pre_filter_mem[303];
> +    float             formant_mem[170];
> +    float             fir_filter_mem[180];
> +    float             last_codebook_gain;
> +    int               prev_g1[2];
> +    int               prev_framerate;
> +    float             prev_pitch_gain[4];
> +    uint8_t           prev_pitch_lag[4];
> +    uint16_t          first16bits;
> +    float             bandwith_expansion_coeff[10];
> +#define QCELP_BANDWITH_EXPANSION_COEFF 0.9883
> +

> +    float             g12ga[61];              /*!< unpacked qcelp_g12ga table */

hmm, constant tables must not be in the context as that wastes memory.
depending on how often it is accessed, qcelp_g12ga should either be
changed back to float or the int16->float be done as it is accessed.
(int16->float is prefered due to smaller memory if there is no speed
differeence otherwise the faster one should be used)

[...]
> +/**
> + * circular codebook for rate 1 frames in x*100 form
> + *
> + * TIA/EIA/IS-733 2.4.6.1-2
> + */
> +static const int16_t qcelp_rate_full_codebook[128] = {
> +     10,  -65,  -59,   12,  110,   34, -134,  157,
> +    104,  -84,  -34, -115,   23, -101,    3,   45,
> +   -101,  -16,  -59,   28,  -45,  134,  -67,   22,
> +     61,  -29,  226,  -26,  -55, -179,  157,  -51,
> +   -220,  -93,  -37,   60,  118,   74,  -48,  -95,
> +   -181,  111,   36,  -52, -215,   78, -112,   39,
> +    -17,  -47, -223,   19,   12,  -98, -142,  130,
> +     54, -127,   21,  -12,   39,  -48,   12,  128,
> +      6, -167,   82, -102,  -79,   55,  -44,   48,
> +    -20,  -53,    8,  -61,   11,  -70, -157, -168,
> +     20,  -56,  -74,   78,   33,  -63, -173,   -2,
> +    -75,  -53, -146,   77,   66,  -29,    9,  -75,
> +     65,  119,  -43,   76,  233,   98,  125, -156,
> +    -27,   78,   -9,  170,  176,  143, -148,   -7,
> +     27, -136,    5,   27,   18,  139,  204,    7,
> +   -184, -197,   52,   -3,   78, -189,    8,  -65
> +};
> +#define QCELP_RATE_FULL_CODEBOOK_RATIO .01
> +
> +/**
> + * circular codebook for rate 1/2 frames in x*2 form
> + *
> + * TIA/EIA/IS-733 2.4.6.1-1
> + */
> +static const int8_t qcelp_rate_half_codebook[128] = {
> +     0, -4,  0, -3,  0,  0,  0,  0,
> +     0,  0,  0,  0,  0,  0,  0,  0,
> +     0, -3, -2,  0,  0,  0,  0,  0,
> +     0,  0,  0,  0,  0,  0,  0,  5,
> +     0,  0,  0,  0,  0,  0,  4,  0,
> +     0,  3,  2,  0,  3,  4,  0,  0,
> +     0,  0,  0,  0,  0,  0,  0,  0,
> +     0,  0,  0,  0,  0,  3,  0,  0,
> +    -3,  3,  0,  0, -2,  0,  3,  0,
> +     0,  0,  0,  0,  0,  0, -5,  0,
> +     0,  0,  0,  3,  0,  0,  0,  3,
> +     0,  0,  0,  0,  0,  0,  0,  4,
> +     0,  0,  0,  0,  0,  0,  0,  0,
> +     0,  3,  6, -3, -4,  0, -3, -3,
> +     3, -3,  0,  0,  0,  0,  0,  0,
> +     0,  0,  0,  0,  0,  0,  0,  0
> +};
> +#define QCELP_RATE_HALF_CODEBOOK_RATIO 0.5

ok

[...]
> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c	(revision 15785)
> +++ libavcodec/qcelpdec.c	(working copy)
> @@ -38,7 +38,370 @@
>  #undef NDEBUG
>  #include <assert.h>
>  
> +static void weighted_vector_sumf(float *out,
> +                                 const float *in_a,
> +                                 const float *in_b,
> +                                 float weight_coeff_a,
> +                                 float weight_coeff_b,
> +                                 int length) {
> +    int   i;
> +
> +    for (i = 0; i < length; i++)
> +        out[i] = weight_coeff_a * in_a[i]
> +               + weight_coeff_b * in_b[i];
> +}
> +

ok

[...]
> +/**
> + * Computes the scaled codebook vector Cdn From INDEX and GAIN
> + * for all rates.
> + *
> + * The specification lacks some information here.
> + *
> + * TIA/EIA/IS-733 has an omission on the codebook index determination
> + * formula for RATE_FULL and RATE_HALF frames at section 2.4.8.1.1. It says
> + * you have to subtract the decoded index parameter from the given scaled
> + * codebook vector index 'n' to get the desired circular codebook index, but
> + * it does not mention that you have to clamp 'n' to [0-9] in order to get
> + * RI-compliant results.
> + *
> + * The reason for this mistake seems to be the fact they forgot to mention you
> + * have to do these calculations per codebook subframe and adjust given
> + * equation values accordingly.
> + *
> + * @param q the context
> + * @param gain array holding the 4 pitch subframe gain values
> + * @param cdn_vector array for the generated scaled codebook vector
> + */
> +static void compute_svector(const QCELPContext *q,
> +                            float *gain,
> +                            float *cdn_vector) {
> +    int      i, j, k;
> +    uint16_t cbseed, cindex;
> +    float    *rnd;
> +
> +    switch (q->framerate) {
> +    case RATE_FULL:
> +        for (i = 0; i < 16; i++) {

> +            gain[i] *= QCELP_RATE_FULL_CODEBOOK_RATIO;

it is very hackish to misuse gain as a temporary variable
gain should be const

> +            cindex = -q->cindex[i];
> +            for (j = 0; j < 10; j++)
> +                *cdn_vector++ = gain[i] * qcelp_rate_full_codebook[cindex++ & 127];
> +        }
> +        break;
> +    case RATE_HALF:
> +        for (i = 0; i < 4; i++) {
> +            gain[i] *= QCELP_RATE_HALF_CODEBOOK_RATIO;
> +            cindex = -q->cindex[i];
> +            for (j = 0; j < 40; j++)
> +                *cdn_vector++ = gain[i] * qcelp_rate_half_codebook[cindex++ & 127];
> +        }
> +        break;
> +    case RATE_QUARTER:
> +        cbseed = (0x0003 & q->lspv[4])<<14 |
> +                 (0x003F & q->lspv[3])<< 8 |
> +                 (0x0060 & q->lspv[2])<< 1 |
> +                 (0x0007 & q->lspv[1])<< 3 |
> +                 (0x0038 & q->lspv[0])>> 3 ;
> +        rnd = q->fir_filter_mem + 20;
> +        for (i = 0; i < 8; i++) {
> +            gain[i] *= QCELP_SQRT1887 / 32768.0;
> +            for (k = 0; k < 20; k++) {
> +                cbseed = 521 * cbseed + 259;
> +                *rnd = (int16_t)cbseed;
> +

> +                // FIR filter
> +                *cdn_vector = 0.0;
> +                for (j = 0; j < 10; j++)
> +                    *cdn_vector += qcelp_rnd_fir_coefs[j ] * (rnd[-j ] + rnd[-20+j]);
> +                *cdn_vector     += qcelp_rnd_fir_coefs[10] *  rnd[-10];
> +
> +                *cdn_vector++ *= gain[i];

i think this would be clearer with a temporary variable insteda of using
*cdn_vector, also it should help gcc in terms of optimization simplicity
as gcc can not exclude that cdn_vector is pointing to gain or another
array.

[...]
> +/**
> + * Apply generic gain control.
> + *
> + * @param v_out output vector
> + * @param v_in vector to control gain of
> + * @param v_gain gain-controlled vector
> + *
> + * TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
> + */
> +static void apply_gain_ctrl(float *v_out,
> +                            const float *v_in,
> +                            const float *v_gain) {
> +    int   i, j, len;
> +    float scalefactor;
> +
> +    for (i = 0, j = 0; i < 4; i++) {
> +        scalefactor = ff_dot_productf(v_gain + j, v_gain + j, 40);
> +        if (scalefactor) {
> +            scalefactor = sqrt(ff_dot_productf(v_in + j, v_in + j, 40) / scalefactor);
> +            for (len = j + 40; j < len; j++)
> +                v_out[j] = scalefactor * v_gain[j];

somehow the variable naming seems exchanged
one would have expected vin->vout and controlled by a vref

> +        } else {
> +            memset(v_out + j, 0,  40 * sizeof(float));
> +            j += 40;
> +        }
> +    }
> +}
> +
> +/**
>   * Apply filter in pitch-subframe steps.
>   *
>   * @param memory buffer for the previous state of the filter
> @@ -90,7 +453,260 @@
>      return memory + 143;
>  }
>  
> +/**
> + * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
> + * TIA/EIA/IS-733 2.4.5.2
> + *
> + * @param q the context
> + * @param cdn_vector the scaled codebook vector
> + *
> + * @return 0 on success, -1 if the lag is out of range
> + */
> +static int apply_pitch_filters(QCELPContext *q,
> +                               float *cdn_vector) {
> +    int         i;
> +    float       gain[4];
> +    const float *v_synthesis_filtered, *v_pre_filtered;
> +
> +    if (q->framerate >= RATE_HALF ||
> +       (q->framerate == I_F_Q && (q->prev_framerate >= RATE_HALF))) {
> +
> +        if (q->framerate >= RATE_HALF) {
> +
> +            // Compute gain & lag for the whole frame.
> +            for (i = 0; i < 4; i++) {
> +                gain[i] = q->plag[i] ? (q->pgain[i] + 1) / 4.0 : 0.0;
> +
> +                q->plag[i] += 16;
> +
> +                if (q->pfrac[i] && q->plag[i] >= 140)
> +                    return -1;
> +            }
> +            memcpy(q->prev_pitch_lag, q->plag, 4 * sizeof(*q->plag));
> +        } else {
> +            gain[3] = q->erasure_count < 3 ? 0.9 - 0.3 * (q->erasure_count - 1)
> +                                           : 0.0;
> +            for (i = 0; i < 4; i++)
> +                gain[i] = FFMIN(q->prev_pitch_gain[i], gain[3]);
> +

> +            memset(q->pfrac, 0, 4 *sizeof(*q->pfrac));
> +            memcpy(q->plag, q->prev_pitch_lag, 4 * sizeof(*q->plag));

s/4*sizeof(*)/ sizeof()/

> +        }
> +
> +        // pitch synthesis filter
> +        v_synthesis_filtered = do_pitchfilter(q->pitch_synthesis_filter_mem, cdn_vector,
> +                                              gain, q->plag, q->pfrac);
> +
> +        // pitch prefilter update
> +        for (i = 0; i < 4; i++)
> +            gain[i] = 0.5 * FFMIN(gain[i], 1.0);
> +
> +        v_pre_filtered = do_pitchfilter(q->pitch_pre_filter_mem, v_synthesis_filtered,
> +                                        gain, q->plag, q->pfrac);
> +
> +        apply_gain_ctrl(cdn_vector, v_synthesis_filtered, v_pre_filtered);
> +
> +        memcpy(q->prev_pitch_gain, gain, sizeof(q->prev_pitch_gain));
> +
> +    } else {
> +        memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17, 143 * sizeof(float));
> +        memcpy(q->pitch_pre_filter_mem,       cdn_vector + 17, 143 * sizeof(float));
> +        memset(q->prev_pitch_gain, 0, sizeof(q->prev_pitch_gain));

> +        memset(q->prev_pitch_lag,  0, 4 * sizeof(*q->plag));

same

> +    }
> +    return 0;
> +}
> +

> +/**
> + * Interpolates LSP frequencies and computes LPC coefficients
> + * for a given framerate & pitch subframe.
> + *
> + * TIA/EIA/IS-733 2.4.3.3.4
> + *
> + * @param q the context
> + * @param curr_lspf LSP frequencies vector of the current frame
> + * @param lpc float vector for the resulting LPC
> + * @param subframe_num frame number in decoded stream
> + */
> +void interpolate_lpc(QCELPContext *q,
> +                     const float *curr_lspf,
> +                     float *lpc,
> +                     const int subframe_num) {
> +    float interpolated_lspf[10];
> +    float weight;
> +
> +    if (q->framerate >= RATE_QUARTER) {
> +        weight = 0.25 * (subframe_num + 1);
> +    } else if (q->framerate == RATE_OCTAVE && !subframe_num) {
> +        weight = 0.625;
> +    } else {
> +        weight = 1.0;
> +    }
> +
> +    if (weight != 1.0) {
> +        weighted_vector_sumf(interpolated_lspf, curr_lspf, q->prev_lspf, weight, 1.0 - weight, 10);

> +        if (q->framerate >= RATE_QUARTER || !subframe_num)
> +            lspf2lpc(q, interpolated_lspf, lpc);

this if is unneeded

> +    } else if (q->framerate >= RATE_QUARTER || (q->framerate == I_F_Q && !subframe_num))
> +        lspf2lpc(q, curr_lspf, lpc);
> +}
> +

> +static int buf_size2framerate(const int buf_size) {
> +    switch (buf_size) {
> +    case 35:
> +        return RATE_FULL;
> +    case 17:
> +        return RATE_HALF;
> +    case  8:
> +        return RATE_QUARTER;
> +    case  4:
> +        return RATE_OCTAVE;
> +    case  1:
> +        return SILENCE;
> +    }
> +    return -1;
> +}

ok

> +/*
> + * Determine the framerate from the frame size and/or the first byte of the frame.
> + *
> + * @param avctx the AV codec context
> + * @param q the QCELP context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return 0 on success, negative error number otherwise.
> + */
> +static int determine_framerate(AVCodecContext *avctx,
> +                               QCELPContext *q,
> +                               const int buf_size,
> +                               uint8_t **buf) {
> +    if ((q->framerate = buf_size2framerate(buf_size)) >= 0) {
> +        if (q->framerate != **buf) {
> +            av_log(avctx, AV_LOG_WARNING, "Claimed framerate and buffer size mismatch.\n");
> +            q->framerate = **buf;
> +        }
> +        (*buf)++;
> +    } else if ((q->framerate = buf_size2framerate(buf_size + 1)) >= 0)
> +        av_log(avctx, AV_LOG_WARNING,
> +               "Framerate byte is missing, guessing the framerate from packet size.\n");
> +    else
> +        return -1;
> +
> +    if (q->framerate == SILENCE) {
> +        // FIXME: the decoder should not handle SILENCE frames as I_F_Q frames
> +        av_log_missing_feature(avctx, "Blank frame", 1);
> +        q->framerate = I_F_Q;
> +    }
> +    return 0;
> +}

IMHO this should return the "frame rate" instead of setting q->framerate
behind the curtain.

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you really think that XML is the answer, then you definitly missunderstood
the question -- Attila Kinali
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081109/d0b5c0c8/attachment.pgp>