[FFmpeg-devel] [PATCH] QCELP decoder
Michael Niedermayer
michaelni
Sun Nov 30 16:50:29 CET 2008
On Sat, Nov 29, 2008 at 10:39:58AM -0800, Kenan Gillet wrote:
> Hi all,
>
> sorry for the delay. I was waiting to resolve the parsing/decoding of
> the bitrate/packet type issue.
> Reynaldo and I agreed on IRC to live the parsing/decoding as it is for now.
>
> So here is round 13 of the qcelp decoder.
> - QCELPContext was split so that it can be moved into qcelpdec.c and
> only keep the unpacked data structure (QCELPFrame) in qcelpdata.h
> - add doxy comments on QCELPFrame field
> - simplify decode_gain_and_index for RATE_OCTAVE and IFQ
> - rename qcelp_bits_per_rate into qcelp_unpacking_bitmaps_lengths
> - use double in qcelp_lsp.c
>
> have a great day
>
> Kenan
> Index: libavcodec/qcelp.h
> ===================================================================
> --- libavcodec/qcelp.h (revision 0)
> +++ libavcodec/qcelp.h (revision 0)
> @@ -0,0 +1,48 @@
> +/*
> + * QCELP decoder
> + * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_QCELP_H
> +#define AVCODEC_QCELP_H
> +
> +/**
> + * @file qcelp.h
> + * QCELP decoder
> + * @author Reynaldo H. Verdejo Pinochet
> + */
> +
> +typedef enum
> +{
> + I_F_Q = -1, /*!< insufficient frame quality */
> + SILENCE,
> + RATE_OCTAVE,
> + RATE_QUARTER,
> + RATE_HALF,
> + RATE_FULL
> +} qcelp_packet_rate;
i think this could be in qcelpdec.c ?
> +
> +/**
> + * Reconstructs LPC coefficients from the line spectral pair frequencies.
> + *
> + * TIA/EIA/IS-733 2.4.3.3.5
> + */
> +void qcelp_lspf2lpc(const float *lspf, float *lpc);
> +
> +#endif /* AVCODEC_QCELP_H */
> Index: libavcodec/qcelpdata.h
> ===================================================================
> --- libavcodec/qcelpdata.h (revision 15955)
> +++ libavcodec/qcelpdata.h (working copy)
> @@ -34,6 +34,37 @@
> #include "libavutil/common.h"
>
> /**
> + * QCELP unpacked data frame
> + */
> +typedef struct {
> +/// @defgroup qcelp_codebook_parameters QCELP excitation codebook parameters
> +/// @{
> + uint8_t cbsign[16]; ///!< sign of the codebook gain for each codebook subframe
> + uint8_t cbgain[16]; ///!< unsigned codebook gain for each codebook subframe
> + uint8_t cindex[16]; ///!< codebook index for each codebook subframe
> +/// @}
> +
> +/// @defgroup qcelp_pitch_parameters QCELP pitch prediction parameters
> +/// @{
> + uint8_t plag[4]; ///!< pitch lag for each pitch subframe
> + uint8_t pfrac[4]; ///!< fractional pitch lag for each pitch subframe
> + uint8_t pgain[4]; ///!< pitch gain for each pitch subframe
> +/// @}
> +
> + /**
> + * line spectral pair frequencies (LSP) for RATE_OCTAVE,
> + * line spectral pair frequencies grouped into five vectors
> + * of dimension two (LSPV) for other rates
> + */
> + uint8_t lspv[10];
> +
ok
> + /**
> + * reserved bits on all bitrate but bitrate 1/2 packets
this is unclear, field that is on all but ... , vs. field that exists always but
is reserved on all but .....
> + */
> + uint8_t reserved;
> +} QCELPFrame;
> +
> +/**
> * pre-calculated table for hammsinc function
> * Only half of the table is needed because of symmetry.
> *
> @@ -47,10 +78,8 @@
> uint8_t bitlen; /*!< number of bits to read */
> } QCELPBitmap;
>
> -#define QCELP_OF(variable, bit, len) {offsetof(QCELPContext, variable), bit, len}
> +#define QCELP_OF(variable, bit, len) {offsetof(QCELPFrame, variable), bit, len}
ok
>
> -/* Disable the below code for now to allow 'make checkheaders' to pass. */
> -#if 0
> /**
> * bitmap unpacking tables for RATE_FULL
> *
> @@ -243,14 +272,13 @@
> qcelp_rate_full_bitmap,
> };
>
> -static const uint16_t qcelp_bits_per_rate[5] = {
> +static const uint16_t qcelp_unpacking_bitmaps_lengths[5] = {
> 0, ///!< for SILENCE rate
> FF_ARRAY_ELEMS(qcelp_rate_octave_bitmap),
> FF_ARRAY_ELEMS(qcelp_rate_quarter_bitmap),
> FF_ARRAY_ELEMS(qcelp_rate_half_bitmap),
> FF_ARRAY_ELEMS(qcelp_rate_full_bitmap),
> };
ok
> -#endif
>
> typedef uint16_t qcelp_vector[2];
>
> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c (revision 15955)
> +++ libavcodec/qcelpdec.c (working copy)
> @@ -40,6 +40,27 @@
> #undef NDEBUG
> #include <assert.h>
>
> +typedef struct {
> + GetBitContext gb;
> + qcelp_packet_rate bitrate;
> + QCELPFrame frame; /*!< unpacked data frame */
> + uint8_t erasure_count;
> + uint8_t octave_count; /*!< count the consecutive RATE_OCTAVE frames */
> + float prev_lspf[10];
> + float predictor_lspf[10]; /*!< LSP predictor,
ok
> + only use for RATE_OCTAVE and I_F_Q */
> + float pitch_synthesis_filter_mem[303];
> + float pitch_pre_filter_mem[303];
> + float rnd_fir_filter_mem[180];
is it correct to use the random values from the last frame? I faintly remember
seeing that this wasnt done in previous patches ... just asking to make sure
no bug slipt in ...
> + float formant_mem[170];
> + float last_codebook_gain;
> + int prev_g1[2];
> + int prev_bitrate;
> + float prev_pitch_gain[4];
> + uint8_t prev_pitch_lag[4];
> + uint16_t first16bits;
> +} QCELPContext;
> +
> static void weighted_vector_sumf(float *out, const float *in_a,
> const float *in_b, float weight_coeff_a,
> float weight_coeff_b, int length)
> @@ -99,8 +120,8 @@
> for(i=0; i<10; i++)
> {
> q->predictor_lspf[i] =
> - lspf[i] = (q->lspv[i] ? QCELP_LSP_SPREAD_FACTOR
> - : -QCELP_LSP_SPREAD_FACTOR)
> + lspf[i] = (q->frame.lspv[i] ? QCELP_LSP_SPREAD_FACTOR
> + : -QCELP_LSP_SPREAD_FACTOR)
> + predictors[i] * QCELP_LSP_OCTAVE_PREDICTOR
> + (i + 1) * ((1 - QCELP_LSP_OCTAVE_PREDICTOR)/11);
> }
> @@ -141,8 +162,8 @@
> tmp_lspf = 0.;
> for(i=0; i<5 ; i++)
> {
> - lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][0] * 0.0001;
> - lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][1] * 0.0001;
> + lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->frame.lspv[i]][0] * 0.0001;
> + lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->frame.lspv[i]][1] * 0.0001;
> }
>
> // Check for badly received packets.
> @@ -166,6 +187,83 @@
> }
>
> /**
> + * Converts codebook transmission codes to GAIN and INDEX.
> + *
> + * @param q the context
> + * @param gain array holding the decoded gain
> + *
> + * TIA/EIA/IS-733 2.4.6.2
> + */
> +static void decode_gain_and_index(QCELPContext *q,
> + float *gain) {
> + int i, subframes_count, g1[16];
> + float slope;
> +
> + if (q->bitrate >= RATE_QUARTER) {
> + switch (q->bitrate) {
> + case RATE_FULL: subframes_count = 16; break;
> + case RATE_HALF: subframes_count = 4; break;
> + default: subframes_count = 5;
> + }
> + for (i = 0; i < subframes_count; i++) {
> + g1[i] = 4 * q->frame.cbgain[i];
> + if (q->bitrate == RATE_FULL && !((i+1) & 3)) {
> + g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3 - 6, 0, 32);
> + }
> +
> + gain[i] = qcelp_g12ga[g1[i]];
> +
> + if (q->frame.cbsign[i]) {
> + gain[i] = -gain[i];
> + q->frame.cindex[i] = (q->frame.cindex[i]-89) & 127;
> + }
> + }
> +
> + q->prev_g1[0] = g1[i-2];
> + q->prev_g1[1] = g1[i-1];
> + q->last_codebook_gain = qcelp_g12ga[g1[i-1]];
> +
> + if (q->bitrate == RATE_QUARTER) {
> + // Provide smoothing of the unvoiced excitation energy.
> + gain[7] = gain[4];
> + gain[6] = 0.4*gain[3] + 0.6*gain[4];
> + gain[5] = gain[3];
> + gain[4] = 0.8*gain[2] + 0.2*gain[3];
> + gain[3] = 0.2*gain[1] + 0.8*gain[2];
> + gain[2] = gain[1];
> + gain[1] = 0.6*gain[0] + 0.4*gain[1];
> + }
> + } else {
> + if (q->bitrate == RATE_OCTAVE) {
> + g1[0] = 2 * q->frame.cbgain[0]
> + + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);
> + subframes_count = 8;
> + } else {
> + assert(q->bitrate == I_F_Q);
> +
> + g1[0] = q->prev_g1[1];
> + switch (q->erasure_count) {
> + case 1 : break;
> + case 2 : g1[0] -= 1; break;
> + case 3 : g1[0] -= 2; break;
> + default: g1[0] -= 6;
> + }
> + if (g1[0] < 0)
> + g1[0] = 0;
> + subframes_count = 4;
> + }
> + // This interpolation is done to produce smoother background noise.
> + slope = 0.5*(qcelp_g12ga[g1[0]] - q->last_codebook_gain) / subframes_count;
> + for (i = 1; i <= subframes_count; i++)
> + gain[i-1] = q->last_codebook_gain + slope * i;
> + q->last_codebook_gain = gain[i-2];
> +
> + q->prev_g1[0] = q->prev_g1[1];
> + q->prev_g1[1] = g1[0];
> + }
> +}
> +
> +/**
> * If the received packet is Rate 1/4 a further sanity check is made of the
> * codebook gain.
> *
> @@ -224,7 +322,7 @@
> for(i=0; i<16; i++)
> {
> tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
> - cindex = -q->cindex[i];
> + cindex = -q->frame.cindex[i];
> for(j=0; j<10; j++)
> *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
> }
> @@ -233,17 +331,17 @@
> for(i=0; i<4; i++)
> {
> tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
> - cindex = -q->cindex[i];
> + cindex = -q->frame.cindex[i];
> for (j = 0; j < 40; j++)
> *cdn_vector++ = tmp_gain * qcelp_rate_half_codebook[cindex++ & 127];
> }
> break;
> case RATE_QUARTER:
> - cbseed = (0x0003 & q->lspv[4])<<14 |
> - (0x003F & q->lspv[3])<< 8 |
> - (0x0060 & q->lspv[2])<< 1 |
> - (0x0007 & q->lspv[1])<< 3 |
> - (0x0038 & q->lspv[0])>> 3 ;
> + cbseed = (0x0003 & q->frame.lspv[4])<<14 |
> + (0x003F & q->frame.lspv[3])<< 8 |
> + (0x0060 & q->frame.lspv[2])<< 1 |
> + (0x0007 & q->frame.lspv[1])<< 3 |
> + (0x0038 & q->frame.lspv[0])>> 3 ;
> rnd = q->rnd_fir_filter_mem + 20;
> for(i=0; i<8; i++)
> {
ok
> @@ -381,6 +479,64 @@
> }
>
> /**
> + * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
> + * TIA/EIA/IS-733 2.4.5.2
> + *
> + * @param q the context
> + * @param cdn_vector the scaled codebook vector
> + */
> +static void apply_pitch_filters(QCELPContext *q,
> + float *cdn_vector) {
> + int i;
> + float gain[4];
> + const float *v_synthesis_filtered, *v_pre_filtered;
> +
> + if (q->bitrate >= RATE_HALF ||
> + (q->bitrate == I_F_Q && (q->prev_bitrate >= RATE_HALF))) {
> +
> + if (q->bitrate >= RATE_HALF) {
> +
> + // Compute gain & lag for the whole frame.
> + for (i = 0; i < 4; i++) {
> + gain[i] = q->frame.plag[i] ? (q->frame.pgain[i] + 1) * 0.25 : 0.0;
> +
> + q->frame.plag[i] += 16;
> + }
> + memcpy(q->prev_pitch_lag, q->frame.plag, sizeof(q->frame.plag));
> + } else {
> + gain[3] = q->erasure_count < 3 ? 0.9 - 0.3 * (q->erasure_count - 1)
> + : 0.0;
> + for (i = 0; i < 4; i++)
> + gain[i] = FFMIN(q->prev_pitch_gain[i], gain[3]);
> +
> + memset(q->frame.pfrac, 0, sizeof(q->frame.pfrac));
> + memcpy(q->frame.plag, q->prev_pitch_lag, sizeof(q->frame.plag));
> + }
> +
> + // pitch synthesis filter
> + v_synthesis_filtered = do_pitchfilter(q->pitch_synthesis_filter_mem, cdn_vector,
> + gain, q->frame.plag, q->frame.pfrac);
> +
> + // pitch prefilter update
> + for (i = 0; i < 4; i++)
> + gain[i] = 0.5 * FFMIN(gain[i], 1.0);
> +
> + v_pre_filtered = do_pitchfilter(q->pitch_pre_filter_mem, v_synthesis_filtered,
> + gain, q->frame.plag, q->frame.pfrac);
> +
> + apply_gain_ctrl(cdn_vector, v_synthesis_filtered, v_pre_filtered);
> +
> + memcpy(q->prev_pitch_gain, gain, sizeof(q->prev_pitch_gain));
> +
> + } else {
> + memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17, 143 * sizeof(float));
> + memcpy(q->pitch_pre_filter_mem, cdn_vector + 17, 143 * sizeof(float));
> + memset(q->prev_pitch_gain, 0, sizeof(q->prev_pitch_gain));
> + memset(q->prev_pitch_lag, 0, sizeof(q->prev_pitch_lag));
> + }
> +}
> +
> +/**
> * Interpolates LSP frequencies and computes LPC coefficients
> * for a given bitrate & pitch subframe.
> *
> @@ -432,6 +588,46 @@
> return -1;
> }
>
> +/*
> + * Determine the bitrate from the frame size and/or the first byte of the frame.
/**
> + *
> + * @param avctx the AV codec context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return the bitrate on success,
> + * I_F_Q if the bitrate cannot be satisfactorily determined
> + *
> + * TIA/EIA/IS-733 2.4.8.7.1
> + */
> +static int determine_bitrate(AVCodecContext *avctx,
> + const int buf_size,
> + uint8_t **buf) {
> + qcelp_packet_rate bitrate;
> +
> + if ((bitrate = buf_size2bitrate(buf_size)) >= 0) {
> + if (bitrate > **buf) {
> + av_log(avctx, AV_LOG_WARNING, "Claimed bitrate and buffer size mismatch.\n");
> + bitrate = **buf;
> + } else if (bitrate < **buf) {
> + av_log(avctx, AV_LOG_WARNING, "Buffer is too small for the claimed bitrate.\n");
i think tis should be AV_LOG_ERROR
> + return I_F_Q;
> + }
> + (*buf)++;
> + } else if ((bitrate = buf_size2bitrate(buf_size + 1)) >= 0) {
> + av_log(avctx, AV_LOG_WARNING,
> + "Bitrate byte is missing, guessing the bitrate from packet size.\n");
> + } else
> + return I_F_Q;
> +
> + if (bitrate == SILENCE) {
> + // FIXME: the decoder should not handle SILENCE frames as I_F_Q frames
> + av_log_missing_feature(avctx, "Blank frame", 1);
> + bitrate = I_F_Q;
> + }
> + return bitrate;
> +}
> +
> static void warn_insufficient_frame_quality(AVCodecContext *avctx,
> const char *message)
> {
> @@ -439,6 +635,107 @@
> message);
> }
>
> +static int qcelp_decode_frame(AVCodecContext *avctx,
> + void *data,
> + int *data_size,
> + uint8_t *buf,
> + const int buf_size) {
> + QCELPContext *q = avctx->priv_data;
> + float *outbuffer = data;
> + int i;
> + float quantized_lspf[10], lpc[10];
> + float gain[16];
> + float *formant_mem;
> +
> + if ((q->bitrate = determine_bitrate(avctx, buf_size, &buf)) == I_F_Q) {
> + warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
> + goto erasure;
> + }
> +
> + if (q->bitrate == RATE_OCTAVE &&
> + (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> + warn_insufficient_frame_quality(avctx, "Bitrate is 1/8 and first 16 bits are on.");
> + goto erasure;
> + }
> +
> + if (q->bitrate > SILENCE) {
> + const QCELPBitmap *bitmaps = qcelp_unpacking_bitmaps_per_rate[q->bitrate];
> + const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->bitrate]
> + + qcelp_unpacking_bitmaps_lengths[q->bitrate];
> + uint8_t *unpacked_data = (uint8_t *)&q->frame;
> +
> + init_get_bits(&q->gb, buf, 8*buf_size);
> +
> + memset(&q->frame, 0, sizeof(QCELPFrame));
> +
> + for (; bitmaps < bitmaps_end; bitmaps++)
> + unpacked_data[bitmaps->index] |= get_bits(&q->gb, bitmaps->bitlen) << bitmaps->bitpos;
> +
> + // Check for erasures/blanks on rates 1, 1/4 and 1/8.
> + if (q->frame.reserved) {
> + warn_insufficient_frame_quality(avctx, "Wrong data in reserved frame area.");
> + goto erasure;
> + }
> + if (q->bitrate == RATE_QUARTER && codebook_sanity_check_for_rate_quarter(q->frame.cbgain)) {
> + warn_insufficient_frame_quality(avctx, "Codebook gain sanity check failed.");
> + goto erasure;
> + }
> +
> + if (q->bitrate >= RATE_HALF) {
> + for (i = 0; i < 4; i++) {
> + if (q->frame.pfrac[i] && q->frame.plag[i] >= 124) {
> + warn_insufficient_frame_quality(avctx, "Cannot initialize pitch filter.");
> + goto erasure;
> + }
> + }
> + }
> + }
> +
> + decode_gain_and_index(q, gain);
> + compute_svector(q, gain, outbuffer);
> +
> + if (decode_lspf(q, quantized_lspf) < 0) {
> + warn_insufficient_frame_quality(avctx, "Badly received packets in frame.");
> + goto erasure;
> + }
> +
> +
> + apply_pitch_filters(q, outbuffer);
> +
> + if (q->bitrate == I_F_Q) {
> +erasure:
> + q->bitrate = I_F_Q;
> + q->erasure_count++;
> + decode_gain_and_index(q, gain);
> + compute_svector(q, gain, outbuffer);
> + decode_lspf(q, quantized_lspf);
> + apply_pitch_filters(q, outbuffer);
> + } else
> + q->erasure_count = 0;
> +
> + formant_mem = q->formant_mem + 10;
> + for (i = 0; i < 4; i++) {
> + interpolate_lpc(q, quantized_lspf, lpc, i);
> + ff_celp_lp_synthesis_filterf(formant_mem, lpc, outbuffer + i * 40, 40, 10);
> + formant_mem += 40;
> + }
> + memcpy(q->formant_mem, q->formant_mem + 160, 10 * sizeof(float));
> +
> + // FIXME: postfilter and final gain control should be here.
> + // TIA/EIA/IS-733 2.4.8.6
> +
> + formant_mem = q->formant_mem + 10;
> + for (i = 0; i < 160; i++)
> + *outbuffer++ = av_clipf(*formant_mem++, QCELP_CLIP_LOWER_BOUND, QCELP_CLIP_UPPER_BOUND);
> +
> + memcpy(q->prev_lspf, quantized_lspf, sizeof(q->prev_lspf));
> + q->prev_bitrate = q->bitrate;
> +
> + *data_size = 160 * sizeof(*outbuffer);
> +
> + return *data_size;
> +}
> +
> AVCodec qcelp_decoder =
> {
> .name = "qcelp",
ok
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In fact, the RIAA has been known to suggest that students drop out
of college or go to community college in order to be able to afford
settlements. -- The RIAA
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081130/2a554d10/attachment.pgp>
More information about the ffmpeg-devel
mailing list