[FFmpeg-devel] [PATCH] QCELP decoder

Wed Nov 12 18:05:02 CET 2008

On Nov 11, 2008, at 10:00 AM, Michael Niedermayer wrote:

> On Sun, Nov 09, 2008 at 09:49:20PM -0800, Kenan Gillet wrote:
>> Hi,
>> On Nov 8, 2008, at 3:57 PM, Michael Niedermayer wrote:
> [...]
>> New patch attached which makes it round 9:
>> - above changes
>> - the qcelp_g12ga changed back to float because it seems
>>         see other email for benchmark
> [...]
>
>> Index: libavcodec/qcelpdata.h
>> ===================================================================
>> --- libavcodec/qcelpdata.h	(revision 15797)
>> +++ libavcodec/qcelpdata.h	(working copy)

[...]

>> +
>> +/**
>> + * the upper boundary of the clipping, depends on QCELP_SCALE
>> + */
>> +#define QCELP_CLIP_UPPER_BOUND (8191.75/8192.)
>> +
>> +/**
>> + * the lower boundary of the clipping, depends on QCELP_SCALE
>> + */
>> +#define QCELP_CLIP_LOWER_BOUND -1.
>> +
>

[...]

>> @@ -428,4 +519,29 @@
>> };
>> #define QCELP_RATE_HALF_CODEBOOK_RATIO 0.5
>>
>> +#define QCELP_SQRT1887 1.373681186
>
> this should get a comment explaining what the value is

done

[...]

>> +
>> +#define QCELP_LSP_SPREAD_FACTOR 0.02
>> +
>
>> +/**
>> + * predictor coefficient for the conversion of LSP codes to LSP  
>> frequencies
>> + * for RATE_OCTAVE and I_F_Q
>> + * TIA/EIA/IS-733 2.4.3.2.7-2
>> + */
>> +#define QCELP_LSP_OCTAVE_PREDICTOR 0.90625
>
> 29.0/32

done

>
>> +
>> #endif /* AVCODEC_QCELPDATA_H */
>
>
>> Index: libavcodec/qcelpdec.c
>> ===================================================================
>> --- libavcodec/qcelpdec.c	(revision 15797)
>> +++ libavcodec/qcelpdec.c	(working copy)
>> @@ -28,6 +28,7 @@
>>
>> #include "avcodec.h"
>> #include "bitstream.h"
>> +#include "libavutil/common.h"
>>
>> #include "qcelp.h"
>> #include "qcelpdata.h"
>> @@ -52,6 +53,353 @@
>> }
>>
>> /**
>> + * Initialize the speech codec according to the specification.
>> + *
>> + * TIA/EIA/IS-733 2.4.9
>> + */
>> +static av_cold int qcelp_decode_init(AVCodecContext *avctx) {
>> +    QCELPContext *q = avctx->priv_data;
>> +    int i;
>> +
>> +    avctx->sample_fmt = SAMPLE_FMT_FLT;
>> +
>> +    for (i = 0; i < 10; i++)
>> +        q->prev_lspf[i] = (i + 1) / 11.;
>> +
>
>> +    q->bandwith_expansion_coeff[0] = - 
>> QCELP_BANDWITH_EXPANSION_COEFF;
>> +    for (i = 1; i < 10; i++) {
>> +        q->bandwith_expansion_coeff[i] = q- 
>> >bandwith_expansion_coeff[i-1]
>> +                                       *  
>> QCELP_BANDWITH_EXPANSION_COEFF;
>
> this seem to be 10 constants, thus they do not need to be in each  
> context

yes, will move it back to qcelp_lsp and will benchmark it.

>> +    }
>> +    return 0;
>> +}
>> +
>> +/**
>> + * Decodes the 10 quantized LSP frequencies from the LSPV/LSP
>> + * transmission codes of any framerate and checks for badly  
>> received packets.
>> + *
>> + * @param q the context
>> + * @param lspf line spectral pair frequencies
>> + *
>> + * @return 0 on success, -1 if the packet is badly received
>> + *
>> + * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
>> + */
>> +static int decode_lspf(QCELPContext *q,
>> +                       float *lspf) {
>> +    int i;
>> +    float tmp_lspf;
>> +
>> +    if (q->framerate == RATE_OCTAVE ||
>> +        q->framerate == I_F_Q) {
>> +        float smooth;
>
>> +        const float *predictors = (q->prev_framerate !=  
>> RATE_OCTAVE ||
>> +                                   q->prev_framerate != I_F_Q ? q- 
>> >prev_lspf
>> +                                                              : q- 
>> >predictor_lspf);
>
> hmmmmmmm
> hmm
>
> Which value is not unequal to either RATE_OCTAVE or I_F_Q ?

q->prev_framerate : framerate of the previous frame
q->framerate: framerate of the current frame
:)

The predictor changes based on the rate of the previous frame.

>
>
> Are you testing the code in each patch iteration?

I do check the output to wav of all the test files I have for every  
commit in my svn.

>
>
> [...]
>> +//START_TIMER;
>
> i think this should be removed

done,
I  was interested to know if the place of the benchmarking was  
correct :)

>
>
>
> [...]
>> +/**
>> + * Computes the scaled codebook vector Cdn From INDEX and GAIN
>> + * for all rates.
>> + *
>> + * The specification lacks some information here.
>> + *
>> + * TIA/EIA/IS-733 has an omission on the codebook index  
>> determination
>> + * formula for RATE_FULL and RATE_HALF frames at section  
>> 2.4.8.1.1. It says
>> + * you have to subtract the decoded index parameter from the given  
>> scaled
>> + * codebook vector index 'n' to get the desired circular codebook  
>> index, but
>> + * it does not mention that you have to clamp 'n' to [0-9] in  
>> order to get
>> + * RI-compliant results.
>> + *
>> + * The reason for this mistake seems to be the fact they forgot to  
>> mention you
>> + * have to do these calculations per codebook subframe and adjust  
>> given
>> + * equation values accordingly.
>> + *
>> + * @param q the context
>> + * @param gain array holding the 4 pitch subframe gain values
>
>> + * @param cdn_vector array for the generated scaled codebook vector
>
> what does cdn stand for?

it is the abbreviation cd(n) for the scaled codebook vector in the  
specs.
Should we change it to something like scaled_codebook_vector ?
what do you think Reynaldo?

>> + */
>
>> +static void compute_svector(const QCELPContext *q,
>> +                            const float *gain,
>> +                            float *cdn_vector) {
>> +    int      i, j, k;
>> +    uint16_t cbseed, cindex;
>> +    float    *rnd, tmp_gain, fir_filter_value;
>> +
>> +    switch (q->framerate) {
>> +    case RATE_FULL:
>> +        for (i = 0; i < 16; i++) {
>> +            tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
>> +            cindex = -q->cindex[i];
>> +            for (j = 0; j < 10; j++)
>> +                *cdn_vector++ = tmp_gain *  
>> qcelp_rate_full_codebook[cindex++ & 127];
>> +        }
>> +        break;
>> +    case RATE_HALF:
>> +        for (i = 0; i < 4; i++) {
>> +            tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
>> +            cindex = -q->cindex[i];
>> +            for (j = 0; j < 40; j++)
>> +                *cdn_vector++ = tmp_gain *  
>> qcelp_rate_half_codebook[cindex++ & 127];
>> +        }
>> +        break;
>> +    case RATE_QUARTER:
>> +        cbseed = (0x0003 & q->lspv[4])<<14 |
>> +                 (0x003F & q->lspv[3])<< 8 |
>> +                 (0x0060 & q->lspv[2])<< 1 |
>> +                 (0x0007 & q->lspv[1])<< 3 |
>> +                 (0x0038 & q->lspv[0])>> 3 ;
>> +        rnd = q->fir_filter_mem + 20;
>> +        for (i = 0; i < 8; i++) {
>> +            tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
>> +            for (k = 0; k < 20; k++) {
>> +                cbseed = 521 * cbseed + 259;
>> +                *rnd = (int16_t)cbseed;
>> +
>> +                // FIR filter
>> +                fir_filter_value = 0.0;
>> +                for (j = 0; j < 10; j++)
>> +                    fir_filter_value += qcelp_rnd_fir_coefs[j ] *  
>> (rnd[-j ] + rnd[-20+j]);
>> +                fir_filter_value     += qcelp_rnd_fir_coefs[10] *   
>> rnd[-10];
>> +
>> +                *cdn_vector++ = tmp_gain * fir_filter_value;
>> +                rnd++;
>> +            }
>> +        }
>
>> +        memcpy(q->fir_filter_mem, q->fir_filter_mem + 160, 20 *  
>> sizeof(float));
>
> rnd_fir_filter_mem

done

>> +        break;
>> +    case RATE_OCTAVE:
>> +        cbseed = q->first16bits;
>> +        for (i = 0; i < 8; i++) {
>> +            tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
>> +            for (j = 0; j < 20; j++) {
>> +                cbseed = 521 * cbseed + 259;
>> +                *cdn_vector++ = tmp_gain * (int16_t)cbseed;
>> +            }
>> +        }
>> +        break;
>> +    case I_F_Q:
>> +        cbseed = -44; // random codebook index
>> +        for (i = 0; i < 4; i++) {
>> +            tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
>> +            for (j = 0; j < 40; j++)
>> +                *cdn_vector++ = tmp_gain *  
>> qcelp_rate_full_codebook[cbseed++ & 127];
>> +        }
>> +        break;
>> +    }
>> +}
>> +
>
>> +/**
>> + * Apply generic gain control.
>> + *
>> + * @param v_out output vector
>> + * @param v_in gain-controlled vector
>> + * @param v_ref vector to control gain of
>> + *
>> + * TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
>> + */
>> +static void apply_gain_ctrl(float *v_out,
>> +                            const float *v_ref,
>> +                            const float *v_in) {
>> +    int   i, j, len;
>> +    float scalefactor;
>> +
>> +    for (i = 0, j = 0; i < 4; i++) {
>> +        scalefactor = ff_dot_productf(v_in + j, v_in + j, 40);
>
>> +        if (scalefactor) {
>> +            scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref +  
>> j, 40) / scalefactor);
>> +            for (len = j + 40; j < len; j++)
>> +                v_out[j] = scalefactor * v_in[j];
>> +        } else {
>> +            memset(v_out + j, 0,  40 * sizeof(float));
>> +            j += 40;
>> +        }
>
> assuming this is correct

it is undefined in the specs and the reference code is setting  
scalefactor
  only if both dot product are not zero but sill assign
v_out[j] = v_in[j] * scalefactor
  with the unassigned scalefactor :(

>
> if (scalefactor)
>    scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref + j, 40) /  
> scalefactor);
> for (len = j + 40; j < len; j++)
>    v_out[j] = scalefactor * v_in[j];
>

done

>> +    }
>> +}
>> +
>> +/**
>>  * Apply filter in pitch-subframe steps.
>>  *
>>  * @param memory buffer for the previous state of the filter
>
>> @@ -103,6 +451,103 @@
>>     return memory + 143;
>> }
>>
>> +/**
>> + * Apply pitch synthesis filter and pitch prefilter to the scaled  
>> codebook vector.
>> + * TIA/EIA/IS-733 2.4.5.2
>> + *
>> + * @param q the context
>> + * @param cdn_vector the scaled codebook vector
>> + *
>> + * @return 0 on success, -1 if the lag is out of range
>> + */
>> +static int apply_pitch_filters(QCELPContext *q,
>> +                               float *cdn_vector) {
>> +    int         i;
>> +    float       gain[4];
>> +    const float *v_synthesis_filtered, *v_pre_filtered;
>> +
>> +    if (q->framerate >= RATE_HALF ||
>> +       (q->framerate == I_F_Q && (q->prev_framerate >=  
>> RATE_HALF))) {
>> +
>> +        if (q->framerate >= RATE_HALF) {
>> +
>> +            // Compute gain & lag for the whole frame.
>> +            for (i = 0; i < 4; i++) {
>> +                gain[i] = q->plag[i] ? (q->pgain[i] + 1) / 4.0 :  
>> 0.0;
>> +
>> +                q->plag[i] += 16;
>> +
>> +                if (q->pfrac[i] && q->plag[i] >= 140)
>> +                    return -1;
>> +            }
>> +            memcpy(q->prev_pitch_lag, q->plag, sizeof(q->plag));
>> +        } else {
>> +            gain[3] = q->erasure_count < 3 ? 0.9 - 0.3 * (q- 
>> >erasure_count - 1)
>> +                                           : 0.0;
>> +            for (i = 0; i < 4; i++)
>> +                gain[i] = FFMIN(q->prev_pitch_gain[i], gain[3]);
>> +
>> +            memset(q->pfrac, 0, sizeof(q->pfrac));
>> +            memcpy(q->plag, q->prev_pitch_lag, sizeof(q->plag));
>> +        }
>> +
>> +        // pitch synthesis filter
>> +        v_synthesis_filtered = do_pitchfilter(q- 
>> >pitch_synthesis_filter_mem, cdn_vector,
>> +                                              gain, q->plag, q- 
>> >pfrac);
>> +
>> +        // pitch prefilter update
>> +        for (i = 0; i < 4; i++)
>> +            gain[i] = 0.5 * FFMIN(gain[i], 1.0);
>> +
>> +        v_pre_filtered = do_pitchfilter(q->pitch_pre_filter_mem,  
>> v_synthesis_filtered,
>> +                                        gain, q->plag, q->pfrac);
>> +
>> +        apply_gain_ctrl(cdn_vector, v_synthesis_filtered,  
>> v_pre_filtered);
>> +
>> +        memcpy(q->prev_pitch_gain, gain, sizeof(q- 
>> >prev_pitch_gain));
>> +
>> +    } else {
>> +        memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17, 143  
>> * sizeof(float));
>> +        memcpy(q->pitch_pre_filter_mem,       cdn_vector + 17, 143  
>> * sizeof(float));
>> +        memset(q->prev_pitch_gain, 0, sizeof(q->prev_pitch_gain));
>> +        memset(q->prev_pitch_lag,  0, sizeof(q->prev_pitch_lag));
>> +    }
>> +    return 0;
>> +}
>> +
>

[...]

>> @@ -118,8 +563,147 @@
>>     }
>>     return -1;
>> }
>> +/*
>> + * Determine the framerate from the frame size and/or the first  
>> byte of the frame.
>> + *
>> + * @param avctx the AV codec context
>> + * @param buf_size length of the buffer
>> + * @param buf the bufffer
>> + *
>> + * @return the framerate on success, RATE_UNKNOWN otherwise.
>> + */
>> +static int determine_framerate(AVCodecContext *avctx,
>> +                               const int buf_size,
>> +                               uint8_t **buf) {
>> +    qcelp_packet_rate framerate;
>>
>> +    if ((framerate = buf_size2framerate(buf_size)) >= 0) {
>> +        if (framerate != **buf) {
>> +            av_log(avctx, AV_LOG_WARNING, "Claimed framerate and  
>> buffer size mismatch.\n");
>> +            framerate = **buf;
>> +        }
>> +        (*buf)++;
>
>> +    } else if ((framerate = buf_size2framerate(buf_size + 1)) >= 0)
>> +        av_log(avctx, AV_LOG_WARNING,
>> +               "Framerate byte is missing, guessing the framerate  
>> from packet size.\n");
>> +    else
>
> {} could be added here, this simplifies future patches adding lines  
> while it
> does not waste a extra line for }

done

>
>> +        return RATE_UNKNOWN;
>> +
>> +    if (framerate == SILENCE) {
>> +        // FIXME: the decoder should not handle SILENCE frames as  
>> I_F_Q frames
>> +        av_log_missing_feature(avctx, "Blank frame", 1);
>> +        framerate = I_F_Q;
>> +    }
>> +    return framerate;
>> +}
>> +
>> static void warn_insufficient_frame_quality(AVCodecContext *avctx,
>>                                             const char *message) {
>>     av_log(avctx, AV_LOG_WARNING, "Frame #%d, IFQ: %s\n", avctx- 
>> >frame_number, message);
>> }
>> +
>> +static int qcelp_decode_frame(AVCodecContext *avctx,
>> +                              void *data,
>> +                              int *data_size,
>> +                              uint8_t *buf,
>> +                              const int buf_size) {
>> +    QCELPContext      *q = avctx->priv_data;
>> +    float             *outbuffer = data;
>> +    int               i;
>
>> +    float             qtzd_lspf[10], lpc[10];
>
> qtzd means what? quantized? please dont abbreviate it.

don't know, I supposed it could be change to just lspf ?
Reynaldo, do you have a preference?

>> +    float             gain[16];
>> +    float             *formant_mem;
>> +
>> +    if ((q->framerate = determine_framerate(avctx, buf_size,  
>> &buf)) == RATE_UNKNOWN) {
>> +        av_log(avctx, AV_LOG_ERROR, "Frame #%d: Unknown framerate,  
>> unsupported size: %d.\n",
>> +               avctx->frame_number, buf_size);
>> +        return -1;
>> +    }
>> +
>> +    if (q->framerate == RATE_OCTAVE &&
>> +       (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
>> +        warn_insufficient_frame_quality(avctx, "Framerate is 1/8  
>> and first 16 bits are on.");
>> +        goto erasure;
>> +    }
>> +
>
>> +    if (q->framerate > 0) {
>
> maybe 0 should be a named value from the enum

done