[FFmpeg-devel] [PATCH] QCELP decoder

Fri Nov 21 02:08:33 CET 2008

On Thu, Nov 20, 2008 at 10:46:49AM -0800, Kenan Gillet wrote:
> On Sat, Nov 15, 2008 at 3:10 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
> > On Fri, Nov 14, 2008 at 03:32:51PM -0800, Kenan Gillet wrote:
> >> Hi,
> >> On Fri, Nov 14, 2008 at 2:27 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
> >> > On Fri, Nov 14, 2008 at 12:17:50PM -0800, Kenan Gillet wrote:
> >> >>
> >> >> On Nov 14, 2008, at 2:14 AM, Michael Niedermayer wrote:
[...]
> >> +
> >> +/// @defgroup qcelp_unpacked_data_frame QCELP unpacked data frame
> >> +/// @{
> >> +    uint8_t           cbsign[16];
> >
> >> +    uint8_t           cbgain[16];
> >> +    uint8_t           cindex[16];
> >> +    uint8_t           plag[4];
> >> +    uint8_t           pfrac[4];
> >> +    uint8_t           pgain[4];
> >> +    uint8_t           lspv[10];               /*!< LSP for RATE_OCTAVE, LSPV for other rates */
> >> +    uint8_t           reserved;               /*!< on all but rate 1/2 packets */
> >> +/// @}
> >> +
> >> +    uint8_t           erasure_count;
> >> +    uint8_t           octave_count;           /*!< count the consecutive RATE_OCTAVE frames */
> >> +    float             prev_lspf[10];
> >> +    float             predictor_lspf[10];     /*!< LSP predictor,
> >> +                                                  only use for RATE_OCTAVE and I_F_Q */
> >> +    float             pitch_synthesis_filter_mem[303];
> >> +    float             pitch_pre_filter_mem[303];
> >> +    float             rnd_fir_filter_mem[180];
> >> +    float             formant_mem[170];
> >> +    float             last_codebook_gain;
> >> +    int               prev_g1[2];
> >> +    int               prev_framerate;
> >> +    float             prev_pitch_gain[4];
> >> +    uint8_t           prev_pitch_lag[4];
> >> +    uint16_t          first16bits;
> >> +} QCELPContext;
> >
> > i somehow think this struct does not belong in qcelpdata.h
> > but rather qcelpdec.c
> >
> 
> I agree, but it is needed by the unpacking table.
> should I just put the struct in qcelpdec.c and include qcelpdata.h after ?

ok (maybe diego will want it to be renamed to .c though ...)

[...]
> >
> > [...]
> >
> >
> >>  static void warn_insufficient_frame_quality(AVCodecContext *avctx,
> >>                                              const char *message) {
> >>      av_log(avctx, AV_LOG_WARNING, "Frame #%d, IFQ: %s\n", avctx->frame_number, message);
> >>  }
> >>
> >> +static int qcelp_decode_frame(AVCodecContext *avctx,
> >> +                              void *data,
> >> +                              int *data_size,
> >> +                              uint8_t *buf,
> >> +                              const int buf_size) {
> >> +    QCELPContext      *q = avctx->priv_data;
> >> +    float             *outbuffer = data;
> >> +    int               i;
> >> +    float             quantized_lspf[10], lpc[10];
> >> +    float             gain[16];
> >> +    float             *formant_mem;
> >> +
> >> +    if ((q->framerate = determine_framerate(avctx, buf_size, &buf)) == I_F_Q) {
> >> +        warn_insufficient_frame_quality(avctx, "Framerate cannot be determined.");
> >> +        goto erasure;
> >> +    }
> >> +
> >> +    if (q->framerate == RATE_OCTAVE &&
> >> +       (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> >> +        warn_insufficient_frame_quality(avctx, "Framerate is 1/8 and first 16 bits are on.");
> >> +        goto erasure;
> >> +    }
> >> +
> >> +    if (q->framerate > SILENCE) {
> >> +        const QCELPBitmap *bitmaps     = qcelp_unpacking_bitmaps_per_rate[q->framerate];
> >> +        const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->framerate]
> >> +                                       + qcelp_bits_per_rate[q->framerate];
> >> +        uint8_t           *unpacked_data = (uint8_t *)q;
> >> +
> >
> >> +        init_get_bits(&q->gb, buf, qcelp_bits_per_rate[q->framerate]);
> >
> > qcelp_bits_per_rate does not seem correct here nor does its name seem
> > to match what it contains
> 
> 
> yes changed back to buf_size.
> 
> what about changing qcelp_bits_per_rate  to qcelp_unpacking_bitmaps_per_rate_len
> because it really is the len of the unpacking bitmaps, or do you have
> a better suggestion ?

the suggested name is too long IMHO

[...]
> >
> > [...]
> >> +/**
> >> + * Computes the Pa or Qa coefficients needed for LSP to LPC conversion.
> >> + * We only need to calculate the 6 first elements of the polynomial.
> >> + *
> >> + * @param lspf line spectral pair frequencies
> >> + * @param v_poly polynomial input/output as a vector
> >> + *
> >> + * TIA/EIA/IS-733 2.4.3.3.5-1/2
> >> + */
> >> +static void lsp2poly(const float *lspf,
> >> +                     float *v_poly) {
> >> +    float val, *v;
> >> +    int   i;
> >> +
> >> +    // optimization to simplify calculation in loop
> >> +    v_poly++;
> >> +
> >> +    for (i = 0; i < 10; i += 2) {
> >> +        val = -2 * cos(M_PI * *lspf);
> >> +        lspf += 2;
> >> +        v = v_poly + FFMIN(4, i);
> >> +
> >> +        if (i < 4) {
> >> +            v[2] = v[0];
> >> +            v[1] = v[0] * val + v[-1];
> >> +        }
> >> +        for ( ; v > v_poly; v--)
> >> +            v[0] = v[0]
> >> +                 + v[-1] * val
> >> +                 + v[-2];
> >> +        v[0] += v[-1] * val;
> >> +    }
> >> +}
> >> +
> >> +/**
> >> + * Reconstructs LPC coefficients from the line spectral pair frequencies
> >> + * and performs bandwidth expansion.
> >> + *
> >> + * @param lspf line spectral pair frequencies
> >> + * @param lpc linear predictive coding coefficients
> >> + *
> >> + * @note: bandwith_expansion_coeff could be precalculated into a table
> >> + *        but it seems to be slower on x86
> >> + *
> >> + * TIA/EIA/IS-733 2.4.3.3.5
> >> + */
> >> +void qcelp_lspf2lpc(const float *lspf,
> >> +                    float *lpc) {
> >> +    float pa[6], qa[6];
> >> +    int   i;
> >> +    float bandwith_expansion_coeff = -QCELP_BANDWITH_EXPANSION_COEFF;
> >> +
> >
> >> +    pa[0] = 0.5;
> >> +    pa[1] = 0.5;
> >> +    lsp2poly(lspf, pa);
> >> +
> >> +    qa[0] = 0.5;
> >> +    qa[1] = -0.5;
> >> +    lsp2poly(lspf + 1, qa);
> >
> > it should be faster to deal with 0.5 + 0.5x / 0.5 - 0.5x after building
> > the polynomials
> 
> done
> 
> 
> >
> > anyway, see ff_acelp_lsp2lpc
> 
> done, it is globally ~10% faster.
> 
> but it gives some significant difference in the WAV output.
> I doule check, and it seems to come from the float rounding :(
> here is the list of result of 'tiny_psnr old.wav new.wav'.

what happens with double instead of float?

[...]

> @@ -391,6 +428,16 @@
>  #define QCELP_SCALE 8192.
>  
>  /**
> + * the upper boundary of the clipping, depends on QCELP_SCALE
> + */
> +#define QCELP_CLIP_UPPER_BOUND (8191.75/8192.)
> +
> +/**
> + * the lower boundary of the clipping, depends on QCELP_SCALE
> + */
> +#define QCELP_CLIP_LOWER_BOUND -1.
> +
> +/**
>   * table for computing Ga (decoded linear codebook gain magnitude)
>   *
>   * @note The table could fit in int16_t in x*8 form, but it seems

ok

[...]
> @@ -470,7 +517,7 @@
>  /**
>   * sqrt(1.887) is the maximum of the pseudorandom
>   * white sequence used to generate the scaled codebook
> - * vector for framerate 1/4.
> + * vector for bitrate 1/4.
>   *
>   * TIA/EIA/IS-733 2.4.8.1.2
>   */
> @@ -478,7 +525,7 @@
>  
>  /**
>   * table for impulse response of BPF used to filter
> - * the white excitation for framerate 1/4 synthesis
> + * the white excitation for bitrate 1/4 synthesis
>   *
>   * Only half the tables are needed because of symetry.
>   *

ok, also all other such renamings are ok

> @@ -490,4 +537,20 @@
>    -9.918777e-2, 3.749518e-2,  8.985137e-1
>  };
>  
> +/**
> + * This spread factor is used, for bitrate 1/8 and I_F_Q,
> + * to force the LSP frequencies to be at least 80 Hz apart.
> + *
> + * TIA/EIA/IS-733 2.4.3.3.2
> + */
> +#define QCELP_LSP_SPREAD_FACTOR 0.02
> +
> +/**
> + * predictor coefficient for the conversion of LSP codes
> + * to LSP frequencies for 1/8 and I_F_Q
> + *
> + * TIA/EIA/IS-733 2.4.3.2.7-2
> + */
> +#define QCELP_LSP_OCTAVE_PREDICTOR 29.0/32
> +
>  #endif /* AVCODEC_QCELPDATA_H */
> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c	(revision 15885)
> +++ libavcodec/qcelpdec.c	(working copy)
> @@ -69,6 +69,203 @@
>  }
>  
>  /**
> + * Decodes the 10 quantized LSP frequencies from the LSPV/LSP
> + * transmission codes of any bitrate and checks for badly received packets.
> + *
> + * @param q the context
> + * @param lspf line spectral pair frequencies
> + *
> + * @return 0 on success, -1 if the packet is badly received
> + *
> + * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
> + */
> +static int decode_lspf(QCELPContext *q,
> +                       float *lspf) {
> +    int i;
> +    float tmp_lspf;
> +
> +    if (q->bitrate == RATE_OCTAVE ||
> +        q->bitrate == I_F_Q) {
> +        float smooth;
> +        const float *predictors = (q->prev_bitrate != RATE_OCTAVE &&
> +                                   q->prev_bitrate != I_F_Q ? q->prev_lspf
> +                                                            : q->predictor_lspf);
> +
> +        if (q->bitrate == RATE_OCTAVE) {
> +            q->octave_count++;
> +
> +            for (i = 0; i < 10; i++) {
> +                q->predictor_lspf[i] =
> +                             lspf[i] = (q->lspv[i] ?  QCELP_LSP_SPREAD_FACTOR
> +                                                   : -QCELP_LSP_SPREAD_FACTOR)
> +                                     + predictors[i] * QCELP_LSP_OCTAVE_PREDICTOR
> +                                     + (i + 1) * ((1 - QCELP_LSP_OCTAVE_PREDICTOR)/11);
> +            }
> +            smooth = (q->octave_count < 10 ? .875 : 0.1);
> +        } else {
> +            float erasure_coeff = QCELP_LSP_OCTAVE_PREDICTOR;
> +
> +            assert(q->bitrate == I_F_Q);
> +
> +            if (q->erasure_count > 1)
> +                erasure_coeff *= (q->erasure_count < 4 ? 0.9 : 0.7);
> +
> +            for (i = 0; i < 10; i++) {
> +                q->predictor_lspf[i] =
> +                             lspf[i] = (i + 1) * ( 1 - erasure_coeff)/11
> +                                     + erasure_coeff * predictors[i];
> +            }
> +            smooth = 0.125;
> +        }
> +
> +        // Check the stability of the LSP frequencies.
> +        lspf[0] = FFMAX(lspf[0], QCELP_LSP_SPREAD_FACTOR);
> +        for (i = 1; i < 10; i++)
> +            lspf[i] = FFMAX(lspf[i], (lspf[i-1] + QCELP_LSP_SPREAD_FACTOR));
> +
> +        lspf[9] = FFMIN(lspf[9], (1.0 - QCELP_LSP_SPREAD_FACTOR));
> +        for (i = 9; i > 0; i--)
> +            lspf[i-1] = FFMIN(lspf[i-1], (lspf[i] - QCELP_LSP_SPREAD_FACTOR));
> +
> +        // Low-pass filter the LSP frequencies.
> +        weighted_vector_sumf(lspf, lspf, q->prev_lspf, smooth, 1.0 - smooth, 10);
> +    } else {
> +        q->octave_count = 0;
> +
> +        tmp_lspf = 0.;
> +        for (i = 0; i < 5 ; i++) {
> +            lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][0] * 0.0001;
> +            lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][1] * 0.0001;
> +        }
> +
> +        // Check for badly received packets.
> +        if (q->bitrate == RATE_QUARTER) {
> +            if (lspf[9] <= .70 || lspf[9] >=  .97)
> +                return -1;
> +            for (i = 3; i < 10; i++)
> +                if (fabs(lspf[i] - lspf[i-2]) < .08)
> +                    return -1;
> +        } else {
> +            if (lspf[9] <= .66 || lspf[9] >= .985)
> +                return -1;
> +            for (i = 4; i < 10; i++)
> +                if (fabs(lspf[i] - lspf[i-4]) < .0931)
> +                    return -1;
> +        }
> +    }
> +    return 0;
> +}

ok

> +
> +/**
> + * Converts codebook transmission codes to GAIN and INDEX.
> + *
> + * @param q the context
> + * @param gain array holding the decoded gain
> + *
> + * TIA/EIA/IS-733 2.4.6.2
> + */
> +static void decode_gain_and_index(QCELPContext  *q,
> +                                  float *gain) {
> +    int   i, subframes_count, g1[16];
> +    float gain_memory, smooth_coef;
> +
> +    if (q->bitrate >= RATE_QUARTER) {

> +        switch (q->bitrate) {
> +            case RATE_FULL:
> +                subframes_count = 16;
> +                break;
> +            case RATE_HALF:
> +                subframes_count = 4;
> +                break;
> +            default:
> +                subframes_count = 5;
> +        }

switch (q->bitrate) {
    case RATE_FULL: subframes_count = 16; break;
    case RATE_HALF: subframes_count = 4 ; break;
    default       : subframes_count = 5 ;
}

> +        for (i = 0; i < subframes_count; i++) {
> +            g1[i] = 4 * q->cbgain[i];
> +            if (q->bitrate == RATE_FULL && !((i+1) & 3)) {
> +                g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3 - 6, 0, 32);
> +            }
> +
> +            gain[i] = qcelp_g12ga[g1[i]];
> +
> +            if (q->cbsign[i]) {
> +                gain[i] = -gain[i];
> +                q->cindex[i] = (q->cindex[i]-89) & 127;
> +            }
> +        }
> +
> +        q->prev_g1[0] = g1[i-2];
> +        q->prev_g1[1] = g1[i-1];
> +        q->last_codebook_gain = qcelp_g12ga[g1[i-1]];
> +
> +        if (q->bitrate == RATE_QUARTER) {
> +            // Provide smoothing of the unvoiced excitation energy.
> +            gain[7] =     gain[4];
> +            gain[6] = 0.4*gain[3] + 0.6*gain[4];
> +            gain[5] =     gain[3];
> +            gain[4] = 0.8*gain[2] + 0.2*gain[3];
> +            gain[3] = 0.2*gain[1] + 0.8*gain[2];
> +            gain[2] =     gain[1];
> +            gain[1] = 0.6*gain[0] + 0.4*gain[1];
> +        }
> +    } else {
> +        if (q->bitrate == RATE_OCTAVE) {
> +            g1[0] = 2 * q->cbgain[0] + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);
> +            smooth_coef = 0.125;
> +            i = 7;
> +        } else {
> +            assert(q->bitrate == I_F_Q);
> +
> +            g1[0] = q->prev_g1[1];
> +            switch (q->erasure_count) {
> +            case 1 : break;
> +            case 2 : g1[0] -= 1; break;
> +            case 3 : g1[0] -= 2; break;
> +            default: g1[0] -= 6;
> +            }
> +            if (g1[0] < 0)
> +                g1[0] = 0;
> +            smooth_coef = 0.25;
> +            i = 3;
> +        }

> +        gain_memory = q->last_codebook_gain;
> +
> +        q->last_codebook_gain =
> +                      gain[i] = 0.5 * (gain_memory + qcelp_g12ga[g1[0]]);
> +
> +        smooth_coef *= (gain[i] - gain_memory);
> +        // This interpolation is done to produce smoother background noise.
> +        for (; i > 0; i--)
> +            gain[i-1] = gain_memory + smooth_coef * i;

something like:

N= 8 or 4
slope= 0.5*(qcelp_g12ga[g1[0]] - q->last_codebook_gain)/N;
for (i=1; i<= N; i++)
    gain[i-1] = q->last_codebook_gain + i*slope;
q->last_codebook_gain = gain[i-1];

appears simpler

> +
> +        q->prev_g1[0] = q->prev_g1[1];
> +        q->prev_g1[1] = g1[0];
> +    }
> +}
> +

> +/**
> + * If the received packet is Rate 1/4 a further sanity check is made of the codebook gain.
> + *
> + * @param cbgain the unpacked cbgain array
> + * @return -1 if the sanity check fails, 0 otherwise
> + *
> + * TIA/EIA/IS-733 2.4.8.7.3
> + */
> +static int codebook_sanity_check_for_rate_quarter(const uint8_t *cbgain) {
> +   int i, prev_diff=0;
> +
> +   for (i = 1; i < 5; i++) {
> +       int diff = cbgain[i] - cbgain[i-1];
> +       if (FFABS(diff) > 10)
> +           return -1;
> +       else if (FFABS(diff - prev_diff) > 12)
> +           return -1;
> +       prev_diff = diff;
> +   }
> +   return 0;
> +}
> +
> +/**
>   * Computes the scaled codebook vector Cdn From INDEX and GAIN
>   * for all rates.
>   *

ok

> @@ -96,7 +293,7 @@

[...]
> @@ -298,6 +553,46 @@
>      return -1;
>  }
>  
> +/*
> + * Determine the bitrate from the frame size and/or the first byte of the frame.
> + *
> + * @param avctx the AV codec context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return the bitrate on success,
> + *         I_F_Q  if the bitrate cannot be satisfactorily determined
> + *
> + * TIA/EIA/IS-733 2.4.8.7.1
> + */
> +static int determine_bitrate(AVCodecContext *avctx,
> +                               const int buf_size,
> +                               uint8_t **buf) {
> +    qcelp_packet_rate bitrate;
> +
> +    if ((bitrate = buf_size2bitrate(buf_size)) >= 0) {

> +        if (bitrate > **buf && **buf >= 0) {

**buf being unsigned so i dont think the >= 0 check makes sense

[...]
> @@ -305,6 +600,107 @@
>             message);
>  }
>  
> +static int qcelp_decode_frame(AVCodecContext *avctx,
> +                              void *data,
> +                              int *data_size,
> +                              uint8_t *buf,
> +                              const int buf_size) {
> +    QCELPContext      *q = avctx->priv_data;
> +    float             *outbuffer = data;
> +    int               i;
> +    float             quantized_lspf[10], lpc[10];
> +    float             gain[16];
> +    float             *formant_mem;
> +
> +    if ((q->bitrate = determine_bitrate(avctx, buf_size, &buf)) == I_F_Q) {
> +        warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
> +        goto erasure;
> +    }
> +
> +    if (q->bitrate == RATE_OCTAVE &&
> +       (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> +        warn_insufficient_frame_quality(avctx, "Bitrate is 1/8 and first 16 bits are on.");
> +        goto erasure;
> +    }
> +
> +    if (q->bitrate > SILENCE) {
> +        const QCELPBitmap *bitmaps     = qcelp_unpacking_bitmaps_per_rate[q->bitrate];
> +        const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->bitrate]
> +                                       + qcelp_bits_per_rate[q->bitrate];
> +        uint8_t           *unpacked_data = (uint8_t *)q;
> +

> +        init_get_bits(&q->gb, buf, buf_size);

i think this is mixing size in bits and size in bytes

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081121/a9eb2bc4/attachment.pgp>