[FFmpeg-devel] [PATCH] QCELP decoder
Michael Niedermayer
michaelni
Fri Nov 21 02:08:33 CET 2008
On Thu, Nov 20, 2008 at 10:46:49AM -0800, Kenan Gillet wrote:
> On Sat, Nov 15, 2008 at 3:10 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
> > On Fri, Nov 14, 2008 at 03:32:51PM -0800, Kenan Gillet wrote:
> >> Hi,
> >> On Fri, Nov 14, 2008 at 2:27 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
> >> > On Fri, Nov 14, 2008 at 12:17:50PM -0800, Kenan Gillet wrote:
> >> >>
> >> >> On Nov 14, 2008, at 2:14 AM, Michael Niedermayer wrote:
[...]
> >> +
> >> +/// @defgroup qcelp_unpacked_data_frame QCELP unpacked data frame
> >> +/// @{
> >> + uint8_t cbsign[16];
> >
> >> + uint8_t cbgain[16];
> >> + uint8_t cindex[16];
> >> + uint8_t plag[4];
> >> + uint8_t pfrac[4];
> >> + uint8_t pgain[4];
> >> + uint8_t lspv[10]; /*!< LSP for RATE_OCTAVE, LSPV for other rates */
> >> + uint8_t reserved; /*!< on all but rate 1/2 packets */
> >> +/// @}
> >> +
> >> + uint8_t erasure_count;
> >> + uint8_t octave_count; /*!< count the consecutive RATE_OCTAVE frames */
> >> + float prev_lspf[10];
> >> + float predictor_lspf[10]; /*!< LSP predictor,
> >> + only use for RATE_OCTAVE and I_F_Q */
> >> + float pitch_synthesis_filter_mem[303];
> >> + float pitch_pre_filter_mem[303];
> >> + float rnd_fir_filter_mem[180];
> >> + float formant_mem[170];
> >> + float last_codebook_gain;
> >> + int prev_g1[2];
> >> + int prev_framerate;
> >> + float prev_pitch_gain[4];
> >> + uint8_t prev_pitch_lag[4];
> >> + uint16_t first16bits;
> >> +} QCELPContext;
> >
> > i somehow think this struct does not belong in qcelpdata.h
> > but rather qcelpdec.c
> >
>
> I agree, but it is needed by the unpacking table.
> should I just put the struct in qcelpdec.c and include qcelpdata.h after ?
ok (maybe diego will want it to be renamed to .c though ...)
[...]
> >
> > [...]
> >
> >
> >> static void warn_insufficient_frame_quality(AVCodecContext *avctx,
> >> const char *message) {
> >> av_log(avctx, AV_LOG_WARNING, "Frame #%d, IFQ: %s\n", avctx->frame_number, message);
> >> }
> >>
> >> +static int qcelp_decode_frame(AVCodecContext *avctx,
> >> + void *data,
> >> + int *data_size,
> >> + uint8_t *buf,
> >> + const int buf_size) {
> >> + QCELPContext *q = avctx->priv_data;
> >> + float *outbuffer = data;
> >> + int i;
> >> + float quantized_lspf[10], lpc[10];
> >> + float gain[16];
> >> + float *formant_mem;
> >> +
> >> + if ((q->framerate = determine_framerate(avctx, buf_size, &buf)) == I_F_Q) {
> >> + warn_insufficient_frame_quality(avctx, "Framerate cannot be determined.");
> >> + goto erasure;
> >> + }
> >> +
> >> + if (q->framerate == RATE_OCTAVE &&
> >> + (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> >> + warn_insufficient_frame_quality(avctx, "Framerate is 1/8 and first 16 bits are on.");
> >> + goto erasure;
> >> + }
> >> +
> >> + if (q->framerate > SILENCE) {
> >> + const QCELPBitmap *bitmaps = qcelp_unpacking_bitmaps_per_rate[q->framerate];
> >> + const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->framerate]
> >> + + qcelp_bits_per_rate[q->framerate];
> >> + uint8_t *unpacked_data = (uint8_t *)q;
> >> +
> >
> >> + init_get_bits(&q->gb, buf, qcelp_bits_per_rate[q->framerate]);
> >
> > qcelp_bits_per_rate does not seem correct here nor does its name seem
> > to match what it contains
>
>
> yes changed back to buf_size.
>
> what about changing qcelp_bits_per_rate to qcelp_unpacking_bitmaps_per_rate_len
> because it really is the len of the unpacking bitmaps, or do you have
> a better suggestion ?
the suggested name is too long IMHO
[...]
> >
> > [...]
> >> +/**
> >> + * Computes the Pa or Qa coefficients needed for LSP to LPC conversion.
> >> + * We only need to calculate the 6 first elements of the polynomial.
> >> + *
> >> + * @param lspf line spectral pair frequencies
> >> + * @param v_poly polynomial input/output as a vector
> >> + *
> >> + * TIA/EIA/IS-733 2.4.3.3.5-1/2
> >> + */
> >> +static void lsp2poly(const float *lspf,
> >> + float *v_poly) {
> >> + float val, *v;
> >> + int i;
> >> +
> >> + // optimization to simplify calculation in loop
> >> + v_poly++;
> >> +
> >> + for (i = 0; i < 10; i += 2) {
> >> + val = -2 * cos(M_PI * *lspf);
> >> + lspf += 2;
> >> + v = v_poly + FFMIN(4, i);
> >> +
> >> + if (i < 4) {
> >> + v[2] = v[0];
> >> + v[1] = v[0] * val + v[-1];
> >> + }
> >> + for ( ; v > v_poly; v--)
> >> + v[0] = v[0]
> >> + + v[-1] * val
> >> + + v[-2];
> >> + v[0] += v[-1] * val;
> >> + }
> >> +}
> >> +
> >> +/**
> >> + * Reconstructs LPC coefficients from the line spectral pair frequencies
> >> + * and performs bandwidth expansion.
> >> + *
> >> + * @param lspf line spectral pair frequencies
> >> + * @param lpc linear predictive coding coefficients
> >> + *
> >> + * @note: bandwith_expansion_coeff could be precalculated into a table
> >> + * but it seems to be slower on x86
> >> + *
> >> + * TIA/EIA/IS-733 2.4.3.3.5
> >> + */
> >> +void qcelp_lspf2lpc(const float *lspf,
> >> + float *lpc) {
> >> + float pa[6], qa[6];
> >> + int i;
> >> + float bandwith_expansion_coeff = -QCELP_BANDWITH_EXPANSION_COEFF;
> >> +
> >
> >> + pa[0] = 0.5;
> >> + pa[1] = 0.5;
> >> + lsp2poly(lspf, pa);
> >> +
> >> + qa[0] = 0.5;
> >> + qa[1] = -0.5;
> >> + lsp2poly(lspf + 1, qa);
> >
> > it should be faster to deal with 0.5 + 0.5x / 0.5 - 0.5x after building
> > the polynomials
>
> done
>
>
> >
> > anyway, see ff_acelp_lsp2lpc
>
> done, it is globally ~10% faster.
>
> but it gives some significant difference in the WAV output.
> I doule check, and it seems to come from the float rounding :(
> here is the list of result of 'tiny_psnr old.wav new.wav'.
what happens with double instead of float?
[...]
> @@ -391,6 +428,16 @@
> #define QCELP_SCALE 8192.
>
> /**
> + * the upper boundary of the clipping, depends on QCELP_SCALE
> + */
> +#define QCELP_CLIP_UPPER_BOUND (8191.75/8192.)
> +
> +/**
> + * the lower boundary of the clipping, depends on QCELP_SCALE
> + */
> +#define QCELP_CLIP_LOWER_BOUND -1.
> +
> +/**
> * table for computing Ga (decoded linear codebook gain magnitude)
> *
> * @note The table could fit in int16_t in x*8 form, but it seems
ok
[...]
> @@ -470,7 +517,7 @@
> /**
> * sqrt(1.887) is the maximum of the pseudorandom
> * white sequence used to generate the scaled codebook
> - * vector for framerate 1/4.
> + * vector for bitrate 1/4.
> *
> * TIA/EIA/IS-733 2.4.8.1.2
> */
> @@ -478,7 +525,7 @@
>
> /**
> * table for impulse response of BPF used to filter
> - * the white excitation for framerate 1/4 synthesis
> + * the white excitation for bitrate 1/4 synthesis
> *
> * Only half the tables are needed because of symetry.
> *
ok, also all other such renamings are ok
> @@ -490,4 +537,20 @@
> -9.918777e-2, 3.749518e-2, 8.985137e-1
> };
>
> +/**
> + * This spread factor is used, for bitrate 1/8 and I_F_Q,
> + * to force the LSP frequencies to be at least 80 Hz apart.
> + *
> + * TIA/EIA/IS-733 2.4.3.3.2
> + */
> +#define QCELP_LSP_SPREAD_FACTOR 0.02
> +
> +/**
> + * predictor coefficient for the conversion of LSP codes
> + * to LSP frequencies for 1/8 and I_F_Q
> + *
> + * TIA/EIA/IS-733 2.4.3.2.7-2
> + */
> +#define QCELP_LSP_OCTAVE_PREDICTOR 29.0/32
> +
> #endif /* AVCODEC_QCELPDATA_H */
> Index: libavcodec/qcelpdec.c
> ===================================================================
> --- libavcodec/qcelpdec.c (revision 15885)
> +++ libavcodec/qcelpdec.c (working copy)
> @@ -69,6 +69,203 @@
> }
>
> /**
> + * Decodes the 10 quantized LSP frequencies from the LSPV/LSP
> + * transmission codes of any bitrate and checks for badly received packets.
> + *
> + * @param q the context
> + * @param lspf line spectral pair frequencies
> + *
> + * @return 0 on success, -1 if the packet is badly received
> + *
> + * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
> + */
> +static int decode_lspf(QCELPContext *q,
> + float *lspf) {
> + int i;
> + float tmp_lspf;
> +
> + if (q->bitrate == RATE_OCTAVE ||
> + q->bitrate == I_F_Q) {
> + float smooth;
> + const float *predictors = (q->prev_bitrate != RATE_OCTAVE &&
> + q->prev_bitrate != I_F_Q ? q->prev_lspf
> + : q->predictor_lspf);
> +
> + if (q->bitrate == RATE_OCTAVE) {
> + q->octave_count++;
> +
> + for (i = 0; i < 10; i++) {
> + q->predictor_lspf[i] =
> + lspf[i] = (q->lspv[i] ? QCELP_LSP_SPREAD_FACTOR
> + : -QCELP_LSP_SPREAD_FACTOR)
> + + predictors[i] * QCELP_LSP_OCTAVE_PREDICTOR
> + + (i + 1) * ((1 - QCELP_LSP_OCTAVE_PREDICTOR)/11);
> + }
> + smooth = (q->octave_count < 10 ? .875 : 0.1);
> + } else {
> + float erasure_coeff = QCELP_LSP_OCTAVE_PREDICTOR;
> +
> + assert(q->bitrate == I_F_Q);
> +
> + if (q->erasure_count > 1)
> + erasure_coeff *= (q->erasure_count < 4 ? 0.9 : 0.7);
> +
> + for (i = 0; i < 10; i++) {
> + q->predictor_lspf[i] =
> + lspf[i] = (i + 1) * ( 1 - erasure_coeff)/11
> + + erasure_coeff * predictors[i];
> + }
> + smooth = 0.125;
> + }
> +
> + // Check the stability of the LSP frequencies.
> + lspf[0] = FFMAX(lspf[0], QCELP_LSP_SPREAD_FACTOR);
> + for (i = 1; i < 10; i++)
> + lspf[i] = FFMAX(lspf[i], (lspf[i-1] + QCELP_LSP_SPREAD_FACTOR));
> +
> + lspf[9] = FFMIN(lspf[9], (1.0 - QCELP_LSP_SPREAD_FACTOR));
> + for (i = 9; i > 0; i--)
> + lspf[i-1] = FFMIN(lspf[i-1], (lspf[i] - QCELP_LSP_SPREAD_FACTOR));
> +
> + // Low-pass filter the LSP frequencies.
> + weighted_vector_sumf(lspf, lspf, q->prev_lspf, smooth, 1.0 - smooth, 10);
> + } else {
> + q->octave_count = 0;
> +
> + tmp_lspf = 0.;
> + for (i = 0; i < 5 ; i++) {
> + lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][0] * 0.0001;
> + lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->lspv[i]][1] * 0.0001;
> + }
> +
> + // Check for badly received packets.
> + if (q->bitrate == RATE_QUARTER) {
> + if (lspf[9] <= .70 || lspf[9] >= .97)
> + return -1;
> + for (i = 3; i < 10; i++)
> + if (fabs(lspf[i] - lspf[i-2]) < .08)
> + return -1;
> + } else {
> + if (lspf[9] <= .66 || lspf[9] >= .985)
> + return -1;
> + for (i = 4; i < 10; i++)
> + if (fabs(lspf[i] - lspf[i-4]) < .0931)
> + return -1;
> + }
> + }
> + return 0;
> +}
ok
> +
> +/**
> + * Converts codebook transmission codes to GAIN and INDEX.
> + *
> + * @param q the context
> + * @param gain array holding the decoded gain
> + *
> + * TIA/EIA/IS-733 2.4.6.2
> + */
> +static void decode_gain_and_index(QCELPContext *q,
> + float *gain) {
> + int i, subframes_count, g1[16];
> + float gain_memory, smooth_coef;
> +
> + if (q->bitrate >= RATE_QUARTER) {
> + switch (q->bitrate) {
> + case RATE_FULL:
> + subframes_count = 16;
> + break;
> + case RATE_HALF:
> + subframes_count = 4;
> + break;
> + default:
> + subframes_count = 5;
> + }
switch (q->bitrate) {
case RATE_FULL: subframes_count = 16; break;
case RATE_HALF: subframes_count = 4 ; break;
default : subframes_count = 5 ;
}
> + for (i = 0; i < subframes_count; i++) {
> + g1[i] = 4 * q->cbgain[i];
> + if (q->bitrate == RATE_FULL && !((i+1) & 3)) {
> + g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3 - 6, 0, 32);
> + }
> +
> + gain[i] = qcelp_g12ga[g1[i]];
> +
> + if (q->cbsign[i]) {
> + gain[i] = -gain[i];
> + q->cindex[i] = (q->cindex[i]-89) & 127;
> + }
> + }
> +
> + q->prev_g1[0] = g1[i-2];
> + q->prev_g1[1] = g1[i-1];
> + q->last_codebook_gain = qcelp_g12ga[g1[i-1]];
> +
> + if (q->bitrate == RATE_QUARTER) {
> + // Provide smoothing of the unvoiced excitation energy.
> + gain[7] = gain[4];
> + gain[6] = 0.4*gain[3] + 0.6*gain[4];
> + gain[5] = gain[3];
> + gain[4] = 0.8*gain[2] + 0.2*gain[3];
> + gain[3] = 0.2*gain[1] + 0.8*gain[2];
> + gain[2] = gain[1];
> + gain[1] = 0.6*gain[0] + 0.4*gain[1];
> + }
> + } else {
> + if (q->bitrate == RATE_OCTAVE) {
> + g1[0] = 2 * q->cbgain[0] + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);
> + smooth_coef = 0.125;
> + i = 7;
> + } else {
> + assert(q->bitrate == I_F_Q);
> +
> + g1[0] = q->prev_g1[1];
> + switch (q->erasure_count) {
> + case 1 : break;
> + case 2 : g1[0] -= 1; break;
> + case 3 : g1[0] -= 2; break;
> + default: g1[0] -= 6;
> + }
> + if (g1[0] < 0)
> + g1[0] = 0;
> + smooth_coef = 0.25;
> + i = 3;
> + }
> + gain_memory = q->last_codebook_gain;
> +
> + q->last_codebook_gain =
> + gain[i] = 0.5 * (gain_memory + qcelp_g12ga[g1[0]]);
> +
> + smooth_coef *= (gain[i] - gain_memory);
> + // This interpolation is done to produce smoother background noise.
> + for (; i > 0; i--)
> + gain[i-1] = gain_memory + smooth_coef * i;
something like:
N= 8 or 4
slope= 0.5*(qcelp_g12ga[g1[0]] - q->last_codebook_gain)/N;
for (i=1; i<= N; i++)
gain[i-1] = q->last_codebook_gain + i*slope;
q->last_codebook_gain = gain[i-1];
appears simpler
> +
> + q->prev_g1[0] = q->prev_g1[1];
> + q->prev_g1[1] = g1[0];
> + }
> +}
> +
> +/**
> + * If the received packet is Rate 1/4 a further sanity check is made of the codebook gain.
> + *
> + * @param cbgain the unpacked cbgain array
> + * @return -1 if the sanity check fails, 0 otherwise
> + *
> + * TIA/EIA/IS-733 2.4.8.7.3
> + */
> +static int codebook_sanity_check_for_rate_quarter(const uint8_t *cbgain) {
> + int i, prev_diff=0;
> +
> + for (i = 1; i < 5; i++) {
> + int diff = cbgain[i] - cbgain[i-1];
> + if (FFABS(diff) > 10)
> + return -1;
> + else if (FFABS(diff - prev_diff) > 12)
> + return -1;
> + prev_diff = diff;
> + }
> + return 0;
> +}
> +
> +/**
> * Computes the scaled codebook vector Cdn From INDEX and GAIN
> * for all rates.
> *
ok
> @@ -96,7 +293,7 @@
[...]
> @@ -298,6 +553,46 @@
> return -1;
> }
>
> +/*
> + * Determine the bitrate from the frame size and/or the first byte of the frame.
> + *
> + * @param avctx the AV codec context
> + * @param buf_size length of the buffer
> + * @param buf the bufffer
> + *
> + * @return the bitrate on success,
> + * I_F_Q if the bitrate cannot be satisfactorily determined
> + *
> + * TIA/EIA/IS-733 2.4.8.7.1
> + */
> +static int determine_bitrate(AVCodecContext *avctx,
> + const int buf_size,
> + uint8_t **buf) {
> + qcelp_packet_rate bitrate;
> +
> + if ((bitrate = buf_size2bitrate(buf_size)) >= 0) {
> + if (bitrate > **buf && **buf >= 0) {
**buf being unsigned so i dont think the >= 0 check makes sense
[...]
> @@ -305,6 +600,107 @@
> message);
> }
>
> +static int qcelp_decode_frame(AVCodecContext *avctx,
> + void *data,
> + int *data_size,
> + uint8_t *buf,
> + const int buf_size) {
> + QCELPContext *q = avctx->priv_data;
> + float *outbuffer = data;
> + int i;
> + float quantized_lspf[10], lpc[10];
> + float gain[16];
> + float *formant_mem;
> +
> + if ((q->bitrate = determine_bitrate(avctx, buf_size, &buf)) == I_F_Q) {
> + warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
> + goto erasure;
> + }
> +
> + if (q->bitrate == RATE_OCTAVE &&
> + (q->first16bits = AV_RB16(buf)) == 0xFFFF) {
> + warn_insufficient_frame_quality(avctx, "Bitrate is 1/8 and first 16 bits are on.");
> + goto erasure;
> + }
> +
> + if (q->bitrate > SILENCE) {
> + const QCELPBitmap *bitmaps = qcelp_unpacking_bitmaps_per_rate[q->bitrate];
> + const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->bitrate]
> + + qcelp_bits_per_rate[q->bitrate];
> + uint8_t *unpacked_data = (uint8_t *)q;
> +
> + init_get_bits(&q->gb, buf, buf_size);
i think this is mixing size in bits and size in bytes
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081121/a9eb2bc4/attachment.pgp>
More information about the ffmpeg-devel
mailing list