[FFmpeg-devel] [PATCH] QCELP decoder

Tue Nov 11 19:00:08 CET 2008

On Sun, Nov 09, 2008 at 09:49:20PM -0800, Kenan Gillet wrote:
> Hi,
> On Nov 8, 2008, at 3:57 PM, Michael Niedermayer wrote:
[...]
> New patch attached which makes it round 9:
> - above changes
> - the qcelp_g12ga changed back to float because it seems
>          see other email for benchmark
[...]

> Index: libavcodec/qcelpdata.h
> ===================================================================
> --- libavcodec/qcelpdata.h	(revision 15797)
> +++ libavcodec/qcelpdata.h	(working copy)
[...]
> @@ -378,7 +426,50 @@
>      qcelp_lspvq5
>  };
>  
> +
>  /**
> + * the final gain scalefactor before clipping into a usable output float
> + */
> +#define QCELP_SCALE 8192.

ok

> +
> +/**
> + * the upper boundary of the clipping, depends on QCELP_SCALE
> + */
> +#define QCELP_CLIP_UPPER_BOUND (8191.75/8192.)
> +
> +/**
> + * the lower boundary of the clipping, depends on QCELP_SCALE
> + */
> +#define QCELP_CLIP_LOWER_BOUND -1.
> +

> +/**
> + * table for computing Ga (decoded linear codebook gain magnitude)
> + *
> + * @note The table could fit in int16_t in x*8 form, but it seems
> + *       to be slower on x86
> + *
> + * TIA/EIA/IS-733 2.4.6.2.1-3
> + */
> +
> +static const float qcelp_g12ga[61] = {
> +    1.000/QCELP_SCALE,   1.125/QCELP_SCALE,   1.250/QCELP_SCALE,   1.375/QCELP_SCALE,
> +    1.625/QCELP_SCALE,   1.750/QCELP_SCALE,   2.000/QCELP_SCALE,   2.250/QCELP_SCALE,
> +    2.500/QCELP_SCALE,   2.875/QCELP_SCALE,   3.125/QCELP_SCALE,   3.500/QCELP_SCALE,
> +    4.000/QCELP_SCALE,   4.500/QCELP_SCALE,   5.000/QCELP_SCALE,   5.625/QCELP_SCALE,
> +    6.250/QCELP_SCALE,   7.125/QCELP_SCALE,   8.000/QCELP_SCALE,   8.875/QCELP_SCALE,
> +   10.000/QCELP_SCALE,  11.250/QCELP_SCALE,  12.625/QCELP_SCALE,  14.125/QCELP_SCALE,
> +   15.875/QCELP_SCALE,  17.750/QCELP_SCALE,  20.000/QCELP_SCALE,  22.375/QCELP_SCALE,
> +   25.125/QCELP_SCALE,  28.125/QCELP_SCALE,  31.625/QCELP_SCALE,  35.500/QCELP_SCALE,
> +   39.750/QCELP_SCALE,  44.625/QCELP_SCALE,  50.125/QCELP_SCALE,  56.250/QCELP_SCALE,
> +   63.125/QCELP_SCALE,  70.750/QCELP_SCALE,  79.375/QCELP_SCALE,  89.125/QCELP_SCALE,
> +  100.000/QCELP_SCALE, 112.250/QCELP_SCALE, 125.875/QCELP_SCALE, 141.250/QCELP_SCALE,
> +  158.500/QCELP_SCALE, 177.875/QCELP_SCALE, 199.500/QCELP_SCALE, 223.875/QCELP_SCALE,
> +  251.250/QCELP_SCALE, 281.875/QCELP_SCALE, 316.250/QCELP_SCALE, 354.875/QCELP_SCALE,
> +  398.125/QCELP_SCALE, 446.625/QCELP_SCALE, 501.125/QCELP_SCALE, 563.375/QCELP_SCALE,
> +  631.000/QCELP_SCALE, 708.000/QCELP_SCALE, 794.375/QCELP_SCALE, 891.250/QCELP_SCALE,
> + 1000.000/QCELP_SCALE};
> +
> +/**
>   * circular codebook for rate 1 frames in x*100 form
>   *
>   * TIA/EIA/IS-733 2.4.6.1-2

ok

> @@ -428,4 +519,29 @@
>  };
>  #define QCELP_RATE_HALF_CODEBOOK_RATIO 0.5
>  
> +#define QCELP_SQRT1887 1.373681186

this should get a comment explaining what the value is

> +
> +/**
> + * table for impulse response of BPF used to filter
> + * the white excitation for framerate 1/4 synthesis
> + *
> + * Only half the tables are needed because of symetry.
> + *
> + * TIA/EIA/IS-733 2.4.8.1.2-1.1
> + */
> +static const double qcelp_rnd_fir_coefs[11] = {
> +  -1.344519e-1, 1.735384e-2, -6.905826e-2, 2.434368e-2,
> +  -8.210701e-2, 3.041388e-2, -9.251384e-2, 3.501983e-2,
> +  -9.918777e-2, 3.749518e-2,  8.985137e-1
> +};

ok

> +
> +#define QCELP_LSP_SPREAD_FACTOR 0.02
> +

> +/**
> + * predictor coefficient for the conversion of LSP codes to LSP frequencies
> + * for RATE_OCTAVE and I_F_Q
> + * TIA/EIA/IS-733 2.4.3.2.7-2
> + */
> +#define QCELP_LSP_OCTAVE_PREDICTOR 0.90625

29.0/32

> +
>  #endif /* AVCODEC_QCELPDATA_H */

> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c	(revision 15797)
> +++ libavcodec/qcelpdec.c	(working copy)
> @@ -28,6 +28,7 @@
>  
>  #include "avcodec.h"
>  #include "bitstream.h"
> +#include "libavutil/common.h"
>  
>  #include "qcelp.h"
>  #include "qcelpdata.h"
> @@ -52,6 +53,353 @@
>  }
>  
>  /**
> + * Initialize the speech codec according to the specification.
> + *
> + * TIA/EIA/IS-733 2.4.9
> + */
> +static av_cold int qcelp_decode_init(AVCodecContext *avctx) {
> +    QCELPContext *q = avctx->priv_data;
> +    int i;
> +
> +    avctx->sample_fmt = SAMPLE_FMT_FLT;
> +
> +    for (i = 0; i < 10; i++)
> +        q->prev_lspf[i] = (i + 1) / 11.;
> +

> +    q->bandwith_expansion_coeff[0] = -QCELP_BANDWITH_EXPANSION_COEFF;
> +    for (i = 1; i < 10; i++) {
> +        q->bandwith_expansion_coeff[i] = q->bandwith_expansion_coeff[i-1]
> +                                       * QCELP_BANDWITH_EXPANSION_COEFF;

this seem to be 10 constants, thus they do not need to be in each context

> +    }
> +    return 0;
> +}
> +
> +/**
> + * Decodes the 10 quantized LSP frequencies from the LSPV/LSP
> + * transmission codes of any framerate and checks for badly received packets.
> + *
> + * @param q the context
> + * @param lspf line spectral pair frequencies
> + *
> + * @return 0 on success, -1 if the packet is badly received
> + *
> + * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
> + */
> +static int decode_lspf(QCELPContext *q,
> +                       float *lspf) {
> +    int i;
> +    float tmp_lspf;
> +
> +    if (q->framerate == RATE_OCTAVE ||
> +        q->framerate == I_F_Q) {
> +        float smooth;

> +        const float *predictors = (q->prev_framerate != RATE_OCTAVE ||
> +                                   q->prev_framerate != I_F_Q ? q->prev_lspf
> +                                                              : q->predictor_lspf);

hmmmmmmm
hmm

Which value is not unequal to either RATE_OCTAVE or I_F_Q ?

Are you testing the code in each patch iteration?

[...]
> +//START_TIMER;

i think this should be removed

[...]
> +/**
> + * Computes the scaled codebook vector Cdn From INDEX and GAIN
> + * for all rates.
> + *
> + * The specification lacks some information here.
> + *
> + * TIA/EIA/IS-733 has an omission on the codebook index determination
> + * formula for RATE_FULL and RATE_HALF frames at section 2.4.8.1.1. It says
> + * you have to subtract the decoded index parameter from the given scaled
> + * codebook vector index 'n' to get the desired circular codebook index, but
> + * it does not mention that you have to clamp 'n' to [0-9] in order to get
> + * RI-compliant results.
> + *
> + * The reason for this mistake seems to be the fact they forgot to mention you
> + * have to do these calculations per codebook subframe and adjust given
> + * equation values accordingly.
> + *
> + * @param q the context
> + * @param gain array holding the 4 pitch subframe gain values

> + * @param cdn_vector array for the generated scaled codebook vector

what does cdn stand for?

> + */

> +static void compute_svector(const QCELPContext *q,
> +                            const float *gain,
> +                            float *cdn_vector) {
> +    int      i, j, k;
> +    uint16_t cbseed, cindex;
> +    float    *rnd, tmp_gain, fir_filter_value;
> +
> +    switch (q->framerate) {
> +    case RATE_FULL:
> +        for (i = 0; i < 16; i++) {
> +            tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
> +            cindex = -q->cindex[i];
> +            for (j = 0; j < 10; j++)
> +                *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
> +        }
> +        break;
> +    case RATE_HALF:
> +        for (i = 0; i < 4; i++) {
> +            tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
> +            cindex = -q->cindex[i];
> +            for (j = 0; j < 40; j++)
> +                *cdn_vector++ = tmp_gain * qcelp_rate_half_codebook[cindex++ & 127];
> +        }
> +        break;
> +    case RATE_QUARTER:
> +        cbseed = (0x0003 & q->lspv[4])<<14 |
> +                 (0x003F & q->lspv[3])<< 8 |
> +                 (0x0060 & q->lspv[2])<< 1 |
> +                 (0x0007 & q->lspv[1])<< 3 |
> +                 (0x0038 & q->lspv[0])>> 3 ;
> +        rnd = q->fir_filter_mem + 20;
> +        for (i = 0; i < 8; i++) {
> +            tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
> +            for (k = 0; k < 20; k++) {
> +                cbseed = 521 * cbseed + 259;
> +                *rnd = (int16_t)cbseed;
> +
> +                // FIR filter
> +                fir_filter_value = 0.0;
> +                for (j = 0; j < 10; j++)
> +                    fir_filter_value += qcelp_rnd_fir_coefs[j ] * (rnd[-j ] + rnd[-20+j]);
> +                fir_filter_value     += qcelp_rnd_fir_coefs[10] *  rnd[-10];
> +
> +                *cdn_vector++ = tmp_gain * fir_filter_value;
> +                rnd++;
> +            }
> +        }

> +        memcpy(q->fir_filter_mem, q->fir_filter_mem + 160, 20 * sizeof(float));

rnd_fir_filter_mem

> +        break;
> +    case RATE_OCTAVE:
> +        cbseed = q->first16bits;
> +        for (i = 0; i < 8; i++) {
> +            tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
> +            for (j = 0; j < 20; j++) {
> +                cbseed = 521 * cbseed + 259;
> +                *cdn_vector++ = tmp_gain * (int16_t)cbseed;
> +            }
> +        }
> +        break;
> +    case I_F_Q:
> +        cbseed = -44; // random codebook index
> +        for (i = 0; i < 4; i++) {
> +            tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
> +            for (j = 0; j < 40; j++)
> +                *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cbseed++ & 127];
> +        }
> +        break;
> +    }
> +}
> +

> +/**
> + * Apply generic gain control.
> + *
> + * @param v_out output vector
> + * @param v_in gain-controlled vector
> + * @param v_ref vector to control gain of
> + *
> + * TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
> + */
> +static void apply_gain_ctrl(float *v_out,
> +                            const float *v_ref,
> +                            const float *v_in) {
> +    int   i, j, len;
> +    float scalefactor;
> +
> +    for (i = 0, j = 0; i < 4; i++) {
> +        scalefactor = ff_dot_productf(v_in + j, v_in + j, 40);

> +        if (scalefactor) {
> +            scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref + j, 40) / scalefactor);
> +            for (len = j + 40; j < len; j++)
> +                v_out[j] = scalefactor * v_in[j];
> +        } else {
> +            memset(v_out + j, 0,  40 * sizeof(float));
> +            j += 40;
> +        }

assuming this is correct

if (scalefactor)
    scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref + j, 40) / scalefactor);
for (len = j + 40; j < len; j++)
    v_out[j] = scalefactor * v_in[j];

> +    }
> +}
> +
> +/**
>   * Apply filter in pitch-subframe steps.
>   *
>   * @param memory buffer for the previous state of the filter

> @@ -103,6 +451,103 @@
>      return memory + 143;
>  }
>  
> +/**
> + * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
> + * TIA/EIA/IS-733 2.4.5.2
> + *
> + * @param q the context
> + * @param cdn_vector the scaled codebook vector
> + *
> + * @return 0 on success, -1 if the lag is out of range
> + */
> +static int apply_pitch_filters(QCELPContext *q,
> +                               float *cdn_vector) {
> +    int         i;
> +    float       gain[4];
> +    const float *v_synthesis_filtered, *v_pre_filtered;
> +
> +    if (q->framerate >= RATE_HALF ||
> +       (q->framerate == I_F_Q && (q->prev_framerate >= RATE_HALF))) {
> +
> +        if (q->framerate >= RATE_HALF) {
> +
> +            // Compute gain & lag for the whole frame.
> +            for (i = 0; i < 4; i++) {
> +                gain[i] = q->plag[i] ? (q->pgain[i] + 1) / 4.0 : 0.0;
> +
> +                q->plag[i] += 16;
> +
> +                if (q->pfrac[i] && q->plag[i] >= 140)
> +                    return -1;
> +            }
> +            memcpy(q->prev_pitch_lag, q->plag, sizeof(q->plag));
> +        } else {
> +            gain[3] = q->erasure_count < 3 ? 0.9 - 0.3 * (q->erasure_count - 1)
> +                                           : 0.0;
> +            for (i = 0; i < 4; i++)
> +                gain[i] = FFMIN(q->prev_pitch_gain[i], gain[3]);
> +
> +            memset(q->pfrac, 0, sizeof(q->pfrac));
> +            memcpy(q->plag, q->prev_pitch_lag, sizeof(q->plag));
> +        }
> +
> +        // pitch synthesis filter
> +        v_synthesis_filtered = do_pitchfilter(q->pitch_synthesis_filter_mem, cdn_vector,
> +                                              gain, q->plag, q->pfrac);
> +
> +        // pitch prefilter update
> +        for (i = 0; i < 4; i++)
> +            gain[i] = 0.5 * FFMIN(gain[i], 1.0);
> +
> +        v_pre_filtered = do_pitchfilter(q->pitch_pre_filter_mem, v_synthesis_filtered,
> +                                        gain, q->plag, q->pfrac);
> +
> +        apply_gain_ctrl(cdn_vector, v_synthesis_filtered, v_pre_filtered);
> +
> +        memcpy(q->prev_pitch_gain, gain, sizeof(q->prev_pitch_gain));
> +
> +    } else {
> +        memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17, 143 * sizeof(float));
> +        memcpy(q->pitch_pre_filter_mem,       cdn_vector + 17, 143 * sizeof(float));
> +        memset(q->prev_pitch_gain, 0, sizeof(q->prev_pitch_gain));
> +        memset(q->prev_pitch_lag,  0, sizeof(q->prev_pitch_lag));
> +    }
> +    return 0;
> +}
> +

> +/**
> + * Interpolates LSP frequencies and computes LPC coefficients
> + * for a given framerate & pitch subframe.
> + *
> + * TIA/EIA/IS-733 2.4.3.3.4
> + *
> + * @param q the context
> + * @param curr_lspf LSP frequencies vector of the current frame
> + * @param lpc float vector for the resulting LPC
> + * @param subframe_num frame number in decoded stream
> + */
> +void interpolate_lpc(QCELPContext *q,
> +                     const float *curr_lspf,
> +                     float *lpc,
> +                     const int subframe_num) {
> +    float interpolated_lspf[10];
> +    float weight;
> +
> +    if (q->framerate >= RATE_QUARTER) {
> +        weight = 0.25 * (subframe_num + 1);
> +    } else if (q->framerate == RATE_OCTAVE && !subframe_num) {
> +        weight = 0.625;
> +    } else {
> +        weight = 1.0;
> +    }
> +
> +    if (weight != 1.0) {
> +        weighted_vector_sumf(interpolated_lspf, curr_lspf, q->prev_lspf, weight, 1.0 - weight, 10);
> +        lspf2lpc(q, interpolated_lspf, lpc);
> +    } else if (q->framerate >= RATE_QUARTER || (q->framerate == I_F_Q && !subframe_num))
> +        lspf2lpc(q, curr_lspf, lpc);
> +}
> +
>  static int buf_size2framerate(const int buf_size) {
>      switch (buf_size) {
>      case 35:

ok

> @@ -118,8 +563,147 @@
>      }
>      return -1;
>  }
> +/*
> + * Determine the framerate from the frame size and/or the first byte of the frame.
> + *
> + * @param avctx the AV codec context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return the framerate on success, RATE_UNKNOWN otherwise.
> + */
> +static int determine_framerate(AVCodecContext *avctx,
> +                               const int buf_size,
> +                               uint8_t **buf) {
> +    qcelp_packet_rate framerate;
>  
> +    if ((framerate = buf_size2framerate(buf_size)) >= 0) {
> +        if (framerate != **buf) {
> +            av_log(avctx, AV_LOG_WARNING, "Claimed framerate and buffer size mismatch.\n");
> +            framerate = **buf;
> +        }
> +        (*buf)++;

> +    } else if ((framerate = buf_size2framerate(buf_size + 1)) >= 0)
> +        av_log(avctx, AV_LOG_WARNING,
> +               "Framerate byte is missing, guessing the framerate from packet size.\n");
> +    else

{} could be added here, this simplifies future patches adding lines while it
does not waste a extra line for }

> +        return RATE_UNKNOWN;
> +
> +    if (framerate == SILENCE) {
> +        // FIXME: the decoder should not handle SILENCE frames as I_F_Q frames
> +        av_log_missing_feature(avctx, "Blank frame", 1);
> +        framerate = I_F_Q;
> +    }
> +    return framerate;
> +}
> +
>  static void warn_insufficient_frame_quality(AVCodecContext *avctx,
>                                              const char *message) {
>      av_log(avctx, AV_LOG_WARNING, "Frame #%d, IFQ: %s\n", avctx->frame_number, message);
>  }
> +
> +static int qcelp_decode_frame(AVCodecContext *avctx,
> +                              void *data,
> +                              int *data_size,
> +                              uint8_t *buf,
> +                              const int buf_size) {
> +    QCELPContext      *q = avctx->priv_data;
> +    float             *outbuffer = data;
> +    int               i;

> +    float             qtzd_lspf[10], lpc[10];

qtzd means what? quantized? please dont abbreviate it.

> +    float             gain[16];
> +    float             *formant_mem;
> +
> +    if ((q->framerate = determine_framerate(avctx, buf_size, &buf)) == RATE_UNKNOWN) {
> +        av_log(avctx, AV_LOG_ERROR, "Frame #%d: Unknown framerate, unsupported size: %d.\n",
> +               avctx->frame_number, buf_size);
> +        return -1;
> +    }
> +
> +    if (q->framerate == RATE_OCTAVE &&
> +       (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> +        warn_insufficient_frame_quality(avctx, "Framerate is 1/8 and first 16 bits are on.");
> +        goto erasure;
> +    }
> +

> +    if (q->framerate > 0) {

maybe 0 should be a named value from the enum

[...]
> +AVCodec qcelp_decoder =
> +{
> +    .name   = "qcelp",
> +    .type   = CODEC_TYPE_AUDIO,
> +    .id     = CODEC_ID_QCELP,
> +    .init   = qcelp_decode_init,
> +    .decode = qcelp_decode_frame,
> +    .priv_data_size = sizeof(QCELPContext),
> +    .long_name = NULL_IF_CONFIG_SMALL("QCELP / PureVoice"),
> +};

ok

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081111/e6f20b1e/attachment.pgp>