[FFmpeg-devel] [PATCH] QCELP decoder

Fri Nov 14 11:14:39 CET 2008

On Thu, Nov 13, 2008 at 11:46:40AM -0800, Kenan Gillet wrote:
> Thanks for your reviews,
>
> here is round 10 of the QCELP decoder patch:
> - it simplifies and optimizes qcelp_lspf2lpc and lps2poly
> - it removes bandwith_expansion_coeff from QCELPContext, and replaces it
>   by a systematic recalculation as the benchmarks show it is faster
> - simplifies apply_gain_control,
> - adds FIXME comment an av_log_missing_features in apply_gain_control
>   if the vector to control gain of is a zero vector.
> - various cosmetics
>
> Kenan
>

[...]

> @@ -461,6 +517,15 @@
>  #define QCELP_RATE_HALF_CODEBOOK_RATIO 0.5
>  
>  /**
> + * sqrt(1.887) is the maximum of the pseudorandom
> + * white sequence used to generate the scaled codebook
> + * vector for framerate 1/4.
> + *
> + * TIA/EIA/IS-733 2.4.8.1.2
> + */
> +#define QCELP_SQRT1887 1.373681186
> +
> +/**
>   * table for impulse response of BPF used to filter
>   * the white excitation for framerate 1/4 synthesis
>   *

ok

[...]

> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c	(revision 15816)
> +++ libavcodec/qcelpdec.c	(working copy)

> @@ -52,6 +52,327 @@
>  }
>  
>  /**
> + * Initialize the speech codec according to the specification.
> + *
> + * TIA/EIA/IS-733 2.4.9
> + */
> +static av_cold int qcelp_decode_init(AVCodecContext *avctx) {
> +    QCELPContext *q = avctx->priv_data;
> +    int i;
> +
> +    avctx->sample_fmt = SAMPLE_FMT_FLT;
> +
> +    for (i = 0; i < 10; i++)
> +        q->prev_lspf[i] = (i + 1) / 11.;
> +
> +    return 0;
> +}

ok

> +
> +/**
> + * Decodes the 10 quantized LSP frequencies from the LSPV/LSP
> + * transmission codes of any framerate and checks for badly received packets.
> + *
> + * @param q the context
> + * @param lspf line spectral pair frequencies
> + *
> + * @return 0 on success, -1 if the packet is badly received
> + *
> + * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
> + */
> +static int decode_lspf(QCELPContext *q,
> +                       float *lspf) {
> +    int i;
> +    float tmp_lspf;
> +
> +    if (q->framerate == RATE_OCTAVE ||
> +        q->framerate == I_F_Q) {
> +        float smooth;

> +        const float *predictors = (q->prev_framerate != RATE_OCTAVE ||
> +                                   q->prev_framerate != I_F_Q ? q->prev_lspf
> +                                                              : q->predictor_lspf);

3rd try.
This is a constant, if you disagree tell us for which value of prev_framerate
q->predictor_lspf is selected

[...]

> +/**
> + * Converts codebook transmission codes to GAIN and INDEX.
> + *
> + * @param q the context
> + * @param gain array holding the decoded gain
> + *
> + * @return 0 on success, -1 if the gain is out of range for RATE_QUARTER
> + *
> + * TIA/EIA/IS-733 2.4.6.2
> + */
> +static int decode_gain_and_index(QCELPContext  *q,
> +                                 float *gain) {
> +    int   i, subframes_count, g1[16];
> +    float ga[16], gain_memory, smooth_coef;
> +
> +    if (q->framerate >= RATE_QUARTER) {
> +        subframes_count = q->framerate == RATE_FULL ? 16
> +                                                    : q->framerate == RATE_HALF ? 4
> +                                                                                : 5;
> +        for (i = 0; i < subframes_count; i++) {
> +            g1[i] = 4 * q->cbgain[i];
> +            if (q->framerate == RATE_FULL && !((i+1) & 3)) {

> +                g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3, 6, 38) - 6;

g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3 - 6, 0, 32);

> +                if (g1[i] > 60)
> +                    g1[i] = 60;

i dont see how this can be true

> +            } else if (q->framerate == RATE_QUARTER) {

> +                if (i > 0  && FFABS(g1[i] -     g1[i-1]) > 40)
> +                    return -1;
> +                if (i >= 2 && FFABS(g1[i] - 2 * g1[i-1] + g1[i-2]) > 48)
> +                    return -1;

iam not sure, but maybe this check should be seperate and closer
to where the bits are read

> +            }
> +
> +            gain[i] = qcelp_g12ga[g1[i]];
> +
> +            if (q->cbsign[i]) {
> +                gain[i] = -gain[i];
> +                q->cindex[i] = (q->cindex[i]-89) & 127;
> +            }
> +        }
> +
> +        q->prev_g1[0] = g1[i-2];
> +        q->prev_g1[1] = g1[i-1];

> +        q->last_codebook_gain = gain[i-1];

q->last_codebook_gain = qcelp_g12ga[g1[i-1]];
would avoid the FFABS() later

> +
> +        if (q->framerate == RATE_QUARTER) {
> +            // Provide smoothing of the unvoiced excitation energy.
> +            gain[7] =     gain[4];
> +            gain[6] = 0.4*gain[3] + 0.6*gain[4];
> +            gain[5] =     gain[3];
> +            gain[4] = 0.8*gain[2] + 0.2*gain[3];
> +            gain[3] = 0.2*gain[1] + 0.8*gain[2];
> +            gain[2] =     gain[1];
> +            gain[1] = 0.6*gain[0] + 0.4*gain[1];
> +        }
> +    } else {
> +        if (q->framerate == RATE_OCTAVE) {

> +            g1[0] = -4 + 2 * q->cbgain[0]
> +                  + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 1, 4, 58);

g1[0] = 2 * q->cbgain[0] + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);

> +            smooth_coef = 0.125;
> +            i = 7;
> +        } else {
> +            assert(q->framerate == I_F_Q);
> +
> +            g1[0] = q->prev_g1[1];
> +            switch (q->erasure_count) {
> +            case 1 : break;
> +            case 2 : g1[0] -= 1; break;
> +            case 3 : g1[0] -= 2; break;
> +            default: g1[0] -= 6;
> +            }
> +            if (g1[0] < 0)
> +                g1[0] = 0;
> +            smooth_coef = 0.25;
> +            i = 3;
> +        }
> +        ga[0] = qcelp_g12ga[g1[0]];

> +        gain_memory = FFABS(q->last_codebook_gain);

this almost is qcelp_g12ga[q->prev_g1[1]]
i assume the "almost" is not just a bug and it was intended to be always?

> +
> +        q->last_codebook_gain =
> +                      gain[i] =
> +                        ga[0] = 0.5 * (gain_memory + ga[0]);
> +
> +        smooth_coef *= (ga[0] - gain_memory);

storing in ga[0] is redundant

> +        // This interpolation is done to produce smoother background noise.
> +        for (; i > 0; i--)
> +            gain[i-1] = gain_memory + smooth_coef * i;
> +
> +        q->prev_g1[0] = q->prev_g1[1];
> +        q->prev_g1[1] = g1[0];
> +    }
> +    return 0;
> +}
> +

> +/**
> + * Computes the scaled codebook vector Cdn From INDEX and GAIN
> + * for all rates.
> + *
> + * The specification lacks some information here.
> + *
> + * TIA/EIA/IS-733 has an omission on the codebook index determination
> + * formula for RATE_FULL and RATE_HALF frames at section 2.4.8.1.1. It says
> + * you have to subtract the decoded index parameter from the given scaled
> + * codebook vector index 'n' to get the desired circular codebook index, but
> + * it does not mention that you have to clamp 'n' to [0-9] in order to get
> + * RI-compliant results.
> + *
> + * The reason for this mistake seems to be the fact they forgot to mention you
> + * have to do these calculations per codebook subframe and adjust given
> + * equation values accordingly.
> + *
> + * @param q the context
> + * @param gain array holding the 4 pitch subframe gain values
> + * @param cdn_vector array for the generated scaled codebook vector
> + */
> +static void compute_svector(const QCELPContext *q,
> +                            const float *gain,
> +                            float *cdn_vector) {
> +    int      i, j, k;
> +    uint16_t cbseed, cindex;
> +    float    *rnd, tmp_gain, fir_filter_value;
> +
> +    switch (q->framerate) {
> +    case RATE_FULL:
> +        for (i = 0; i < 16; i++) {
> +            tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
> +            cindex = -q->cindex[i];
> +            for (j = 0; j < 10; j++)
> +                *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
> +        }
> +        break;
> +    case RATE_HALF:
> +        for (i = 0; i < 4; i++) {
> +            tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
> +            cindex = -q->cindex[i];
> +            for (j = 0; j < 40; j++)
> +                *cdn_vector++ = tmp_gain * qcelp_rate_half_codebook[cindex++ & 127];
> +        }
> +        break;
> +    case RATE_QUARTER:
> +        cbseed = (0x0003 & q->lspv[4])<<14 |
> +                 (0x003F & q->lspv[3])<< 8 |
> +                 (0x0060 & q->lspv[2])<< 1 |
> +                 (0x0007 & q->lspv[1])<< 3 |
> +                 (0x0038 & q->lspv[0])>> 3 ;
> +        rnd = q->rnd_fir_filter_mem + 20;
> +        for (i = 0; i < 8; i++) {
> +            tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
> +            for (k = 0; k < 20; k++) {
> +                cbseed = 521 * cbseed + 259;
> +                *rnd = (int16_t)cbseed;
> +
> +                // FIR filter
> +                fir_filter_value = 0.0;
> +                for (j = 0; j < 10; j++)
> +                    fir_filter_value += qcelp_rnd_fir_coefs[j ] * (rnd[-j ] + rnd[-20+j]);
> +                fir_filter_value     += qcelp_rnd_fir_coefs[10] *  rnd[-10];
> +
> +                *cdn_vector++ = tmp_gain * fir_filter_value;
> +                rnd++;
> +            }
> +        }
> +        memcpy(q->rnd_fir_filter_mem, q->rnd_fir_filter_mem + 160, 20 * sizeof(float));
> +        break;
> +    case RATE_OCTAVE:
> +        cbseed = q->first16bits;
> +        for (i = 0; i < 8; i++) {
> +            tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
> +            for (j = 0; j < 20; j++) {
> +                cbseed = 521 * cbseed + 259;
> +                *cdn_vector++ = tmp_gain * (int16_t)cbseed;
> +            }
> +        }
> +        break;
> +    case I_F_Q:
> +        cbseed = -44; // random codebook index
> +        for (i = 0; i < 4; i++) {
> +            tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
> +            for (j = 0; j < 40; j++)
> +                *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cbseed++ & 127];
> +        }
> +        break;
> +    }
> +}
> +
> +/**
> + * Apply generic gain control.
> + *
> + * @param v_out output vector
> + * @param v_in gain-controlled vector
> + * @param v_ref vector to control gain of
> + *
> + * FIXME: If v_ref is a zero vector, it energy is zero
> + *        and the behavior of the gain control is 
> + *        undefined in the specs.
> + *
> + * TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
> + */
> +static void apply_gain_ctrl(float *v_out,
> +                            const float *v_ref,
> +                            const float *v_in) {
> +    int   i, j, len;
> +    float scalefactor;
> +
> +    for (i = 0, j = 0; i < 4; i++) {
> +        scalefactor = ff_dot_productf(v_in + j, v_in + j, 40);
> +        if (scalefactor)
> +            scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref + j, 40) / scalefactor);
> +        else
> +            av_log_missing_feature(NULL, "Zero energy for gain control", 1);
> +        for (len = j + 40; j < len; j++)
> +            v_out[j] = scalefactor * v_in[j];
> +    }
> +}

ok

> +
> +/**
>   * Apply filter in pitch-subframe steps.
>   *
>   * @param memory buffer for the previous state of the filter
> @@ -104,6 +425,70 @@
>  }
>  

>  /**
> + * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
> + * TIA/EIA/IS-733 2.4.5.2
> + *
> + * @param q the context
> + * @param cdn_vector the scaled codebook vector
> + *
> + * @return 0 on success, -1 if the lag is out of range
> + */
> +static int apply_pitch_filters(QCELPContext *q,
> +                               float *cdn_vector) {
> +    int         i;
> +    float       gain[4];
> +    const float *v_synthesis_filtered, *v_pre_filtered;
> +
> +    if (q->framerate >= RATE_HALF ||
> +       (q->framerate == I_F_Q && (q->prev_framerate >= RATE_HALF))) {
> +
> +        if (q->framerate >= RATE_HALF) {
> +
> +            // Compute gain & lag for the whole frame.
> +            for (i = 0; i < 4; i++) {
> +                gain[i] = q->plag[i] ? (q->pgain[i] + 1) / 4.0 : 0.0;
> +
> +                q->plag[i] += 16;
> +

> +                if (q->pfrac[i] && q->plag[i] >= 140)
> +                    return -1;

iam thinking that such bitstream checks should be closer to the bitstream
decoding.
this also would allow this function to have no return value

[...]

> @@ -131,9 +516,9 @@
>  
>      if (weight != 1.0) {
>          weighted_vector_sumf(interpolated_lspf, curr_lspf, q->prev_lspf, weight, 1.0 - weight, 10);
> -        lspf2lpc(q, interpolated_lspf, lpc);
> +        qcelp_lspf2lpc(interpolated_lspf, lpc);
>      } else if (q->framerate >= RATE_QUARTER || (q->framerate == I_F_Q && !subframe_num))
> -        lspf2lpc(q, curr_lspf, lpc);
> +        qcelp_lspf2lpc(curr_lspf, lpc);
>  }
>  
>  static int buf_size2framerate(const int buf_size) {

ok

> @@ -152,11 +537,140 @@
>      return -1;
>  }
>  
> +/*
> + * Determine the framerate from the frame size and/or the first byte of the frame.
> + *
> + * @param avctx the AV codec context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return the framerate on success, RATE_UNKNOWN otherwise.
> + */
> +static int determine_framerate(AVCodecContext *avctx,
> +                               const int buf_size,
> +                               uint8_t **buf) {
> +    qcelp_packet_rate framerate;
> +
> +    if ((framerate = buf_size2framerate(buf_size)) >= 0) {
> +        if (framerate != **buf) {

iam not sure but didnt you at some point reorder the enum?
if so how can this code be correct before and afterwards?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If a bugfix only changes things apparently unrelated to the bug with no
further explanation, that is a good sign that the bugfix is wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081114/2a5a56c2/attachment.pgp>