[FFmpeg-devel] AMR-NB decoder

Thu Aug 6 17:28:43 CEST 2009

On Wed, Aug 05, 2009 at 05:51:36PM +0100, Colin McQuillan wrote:
> Attached is a patch for an AMR-NB decoder.
> 
> It is not bit-exact. This makes it tricky to verify, but I have been
> checking that internal parameters match the 3GPP decoder for the AMR
> test sequences. The PSNR between the input and output is 3.90 to 8.42
> which is about the same as the reference decoder. The PSNR between the
> two outputs is between 8.50 and 18.16, which seems quite good.

[...]
> +/**
> + * AMRNB SID frame parameters
> + */
> +typedef struct {
> +    uint16_t ref_vector; ///< index of reference vector
> +    uint16_t energy;     ///< index of logarithmic frame energy
> +} AMRNBSIDFrame;
> +
> +/**
> + * AMRNB unpacked data frame
> + */
> +typedef struct {
> +    uint16_t lsf[5];           ///< lsf parameters: 5 parameters for MODE_122, only 3 for other modes
> +    union {
> +        AMRNBSubframe subframe[4]; ///< unpacked data for each subframe
> +        AMRNBSIDFrame sid;
> +    } info;
> +} AMRNBFrame;

thats quite complex to avoid having 4 more bytes in the struct

> +
> +
> +// The following order* tables are used to convert AMR frame parameters to and
> +// from a bitstream. See 3GPP TS 26.101 for more information.
> +
> +#define AMR_BIT(field, bit)                  {offsetof(AMRNBFrame, field) >> 1, bit}

> +/** Specify an LSF parameter bit */
> +#define AMR_LSF(variable, bit)               AMR_BIT(lsf[variable], bit)
> +/** Specify a subframe-specific bit */
> +#define AMR_OF(frame_num, variable, bit)     AMR_BIT(info.subframe[frame_num].variable, bit)
> +/** Specify a pitch gain bit */
> +#define AMR_PGAIN(frame_num, bit)            AMR_OF(frame_num, p_gain, bit)
> +/** Specify a fixed gain bit */
> +#define AMR_FIXED_GAIN(frame_num, bit)       AMR_OF(frame_num, fixed_gain, bit)
> +/** Specify a pitch lag bit */
> +#define AMR_PLAG(frame_num, bit)             AMR_OF(frame_num, p_lag, bit)
> +/** Specify a pulse bit */
> +#define AMR_PULSES(frame_num, pulse_id, bit) AMR_OF(frame_num, pulses[pulse_id], bit)
> +/** Specify an SID reference vector bit */
> +#define AMR_SVECTOR(bit)                     AMR_BIT(info.sid.ref_vector, bit)
> +/** Specify an SID energy index bit */
> +#define AMR_SENERGY(bit)                     AMR_BIT(info.sid.energy, bit)

are these macros really useful?

[...]

> +// LSF tables

they are kinda big, i assume they cant be stored more efficiently?

[...]

> Index: libavcodec/celp_filters.c
> ===================================================================
> --- libavcodec/celp_filters.c	(revision 19598)
> +++ libavcodec/celp_filters.c	(working copy)
> @@ -47,6 +47,28 @@
>      }
>  }
>  
> +void ff_celp_convolve_circf(float* fc_out,
> +                            const float* fc_in,
> +                            const float* filter,
> +                            int len)
> +{
> +    int i, k;
> +
> +    memset(fc_out, 0, len * sizeof(float));
> +
> +    /* Since there are few pulses over an entire subframe (i.e. almost
> +       all fc_in[i] are zero) it is faster to loop over fc_in first. */
> +    for (i = 0; i < len; i++) {
> +        if (fc_in[i]) {
> +            for (k = 0; k < i; k++)
> +                fc_out[k] += fc_in[i] * filter[len + k - i];
> +
> +            for (k = i; k < len; k++)
> +                fc_out[k] += fc_in[i] * filter[      k - i];
> +        }
> +    }
> +}
> +
>  int ff_celp_lp_synthesis_filter(int16_t *out,
>                                  const int16_t* filter_coeffs,
>                                  const int16_t* in,
> Index: libavcodec/celp_filters.h
> ===================================================================
> --- libavcodec/celp_filters.h	(revision 19598)
> +++ libavcodec/celp_filters.h	(working copy)
> @@ -42,6 +42,22 @@
>                             int len);
>  
>  /**
> + * Circularly convolve fixed vector with a phase dispersion impulse
> + *        response filter (D.6.2 of G.729 and 6.1.5 of AMR).
> + * @param fc_out vector with filter applied
> + * @param fc_in source vector
> + * @param filter phase filter coefficients
> + *
> + *  fc_out[n] = sum(i,0,len-1){ fc_in[i] * filter[(len + n - i)%len] }
> + *
> + * \note fc_in and fc_out should not overlap!
> + */
> +void ff_celp_convolve_circf(float* fc_out,
> +                            const float* fc_in,
> +                            const float* filter,
> +                            int len);
> +
> +/**
>   * LP synthesis filter.
>   * @param out [out] pointer to output buffer
>   * @param filter_coeffs filter coefficients (-0x8000 <= (3.12) < 0x8000)

that should e a seperate patch

> Index: libavcodec/acelp_vectors.c
> ===================================================================
> --- libavcodec/acelp_vectors.c	(revision 19598)
> +++ libavcodec/acelp_vectors.c	(working copy)
> @@ -22,6 +22,7 @@
>  
>  #include <inttypes.h>
>  #include "avcodec.h"
> +#include "celp_math.h"
>  #include "acelp_vectors.h"
>  
>  const uint8_t ff_fc_2pulses_9bits_track1[16] =
> @@ -155,3 +156,25 @@
>          out[i] = weight_coeff_a * in_a[i]
>                 + weight_coeff_b * in_b[i];
>  }
> +
> +float ff_energyf(const float *v, int length)
> +{
> +    float sum = 0;
> +    int i;
> +
> +    for (i = 0; i < length; i++)
> +        sum += v[i] * v[i];
> +
> +    return sum;
> +}
> +
> +void ff_set_energyf(float *v_out, const float *v_in, float energy,
> +                    const int length)
> +{
> +    int i;
> +    float scalefactor = ff_energyf(v_in, length);
> +    if (scalefactor)
> +        scalefactor = sqrt(energy / scalefactor);
> +    for (i = 0; i < length; i++)
> +        v_out[i] = v_in[i] * scalefactor;
> +}
> Index: libavcodec/acelp_vectors.h
> ===================================================================
> --- libavcodec/acelp_vectors.h	(revision 19598)
> +++ libavcodec/acelp_vectors.h	(working copy)
> @@ -164,4 +164,31 @@
>  void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b,
>                               float weight_coeff_a, float weight_coeff_b, int length);
>  
> +/**
> + * returns the energy
> + * @param in input data array
> + * @param length number of elements
> + *
> + * @return energy = sum of squares
> + */
> +float ff_energyf(const float *in, int length);
> +
> +/**
> + * Set the energy of a vector by scaling
> + *
> + * @param v_out output vector
> + * @param v_in vector to set energy of
> + * @param energy new energy
> + * @param length vectors length
> + *
> + * @note If v is zero (or its energy underflows), the output is zero.
> + *       This is the behavior of AGC in the AMR reference decoder. The QCELP
> + *       reference decoder seems to have undefined behavior.
> + *
> + * TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
> + * 3GPP TS 26.090 6.1 (6)
> + */
> +void ff_set_energyf(float *v_out, const float *v_in, float energy,
> +                    const int length);
> +
>  #endif /* AVCODEC_ACELP_VECTORS_H */

so should this

> Index: libavcodec/allcodecs.c
> ===================================================================
> --- libavcodec/allcodecs.c	(revision 19598)
> +++ libavcodec/allcodecs.c	(working copy)
> @@ -198,6 +198,7 @@
>      REGISTER_ENCDEC  (AAC, aac);
>      REGISTER_ENCDEC  (AC3, ac3);
>      REGISTER_ENCDEC  (ALAC, alac);
> +    REGISTER_DECODER (AMRNB, amrnb);
>      REGISTER_DECODER (APE, ape);
>      REGISTER_DECODER (ATRAC3, atrac3);
>      REGISTER_DECODER (COOK, cook);
> Index: libavcodec/acelp_filters.c
> ===================================================================
> --- libavcodec/acelp_filters.c	(revision 19598)
> +++ libavcodec/acelp_filters.c	(working copy)
> @@ -93,3 +93,17 @@
>          hpf_f[0] = tmp;
>      }
>  }
> +
> +void ff_acelp_high_pass_filterf(float *buf, float *mem, int length)
> +{
> +    int i;
> +    float tmp;
> +
> +    for (i = 0; i < length; i++) {
> +         tmp = buf[i] + 1.933105469 * mem[0] - 0.935913085 * mem[1];
> +         buf[i] = 0.939819335 * (tmp - 2 * mem[0] + mem[1]);
> +
> +         mem[1] = mem[0];
> +         mem[0] = tmp;
> +    }
> +}
> Index: libavcodec/acelp_filters.h
> ===================================================================
> --- libavcodec/acelp_filters.h	(revision 19598)
> +++ libavcodec/acelp_filters.h	(working copy)
> @@ -81,4 +81,12 @@
>  void ff_acelp_high_pass_filter(int16_t* out, int hpf_f[2],
>                                 const int16_t* in, int length);
>  
> +/**
> + * high-pass filtering (6.2.2 of 3GPP TS 26.090)
> + * @param samples [in/out]?
> + * @param mem intermediate values used by filter (should be 0 initially)
> + * @param length input data size
> + */
> +void ff_acelp_high_pass_filterf(float *samples, float mem[2], int length);
> +
>  #endif /* AVCODEC_ACELP_FILTERS_H */

and this

[...]
> +typedef struct AMRContext {
> +
> +    GetBitContext                        gb;
> +
> +    AMRNBFrame                        frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc)
> +    uint8_t             bad_frame_indicator; ///< bad frame ? 1 : 0

> +    enum Mode                cur_frame_mode; ///< current frame mode

the comment is redudant

> +
> +    float       prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe
> +    float           lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame
> +    float    prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame
> +
> +    float         lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing
> +    float          lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector
> +
> +    float           lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes
> +
> +    uint8_t                   pitch_lag_int; ///< integer part of pitch lag from current subframe
> +

> +    float excitation_buf[PITCH_LAG_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< excitation buffer

redudant comment

[...]
> +/**
> + * Decode an RFC4867 speech frame into the AMR frame mode and parameters.
> + *
> + * The order of speech bits is specified by 3GPP TS 26.101.
> + *
> + * @param p the context
> + * @param buf               pointer to the input buffer
> + * @param buf_size          size of the input buffer
> + *
> + * @return the frame mode
> + */
> +static enum Mode decode_bitstream(AMRContext *p, const uint8_t *buf,
> +                                  int buf_size)

i think unpack_bitstream() is a better name

> +{
> +    enum Mode mode;
> +
> +    init_get_bits(&p->gb, buf, buf_size * 8);
> +
> +    // Decode the first octet.
> +    skip_bits(&p->gb, 1);                        // padding bit
> +    mode = get_bits(&p->gb, 4);                  // frame type
> +    p->bad_frame_indicator = !get_bits1(&p->gb); // quality bit
> +    skip_bits(&p->gb, 2);                        // two padding bits
> +
> +    if (mode <= MODE_DTX) {
> +        uint16_t *data = (uint16_t *)&p->frame;
> +        const AMROrder *order = amr_unpacking_bitmaps_per_mode[mode];
> +        int i;
> +
> +        memset(&p->frame, 0, sizeof(AMRNBFrame));

> +        for (i = 0; i < mode_bits[mode]; i++)
> +            data[order[i].index] += get_bits1(&p->gb) << order[i].bit;

it might reduce code size and improve speed if more than 1 bit would
be read where possible

> +    }
> +
> +    return mode;
> +}
> +
> +
> +/// @defgroup amr_lpc_decoding AMR pitch LPC coefficient decoding functions
> +/// @{
> +

> +/**
> + * Convert an lsf vector into an lsp vector.
> + *
> + * @param lsf               input lsf vector
> + * @param lsp               output lsp vector
> + */
> +static void lsf2lsp(float *lsf, float *lsp)
> +{
> +    int i;
> +
> +    for (i = 0; i < LP_FILTER_ORDER; i++)
> +        lsp[i] = cos(lsf[i] * FREQ_LSP_FAC); // FREQ_LSP_FAC = 2*M_PI / 8000.0

considering that FREQ_LSP_FAC is used just once, maybe using its expression
directly would be simpler

[...]
> +/// @}
> +
> +
> +/// @defgroup amr_pitch_vector_decoding AMR pitch vector decoding functions
> +/// @{
> +
> +/**
> + * Decode the adaptive codebook index to the integer and fractional parts
> + * of the pitch lag for one subframe at 1/6 resolution for MODE_122,
> + * 1/3 for other modes.
> + *
> + * The choice of pitch lag is described in 3GPP TS 26.090 section 5.6.1.
> + *
> + * @param lag_int             integer part of pitch lag of the current subframe
> + * @param lag_frac            fractional part of pitch lag of the current subframe
> + * @param pitch_index         parsed adaptive codebook (pitch) index
> + * @param prev_lag_int        integer part of pitch lag for the previous subframe
> + * @param subframe            current subframe number
> + * @param mode                mode of the current frame
> + */
> +static void decode_pitch_lag(int *lag_int, int *lag_frac, int pitch_index,
> +                             const int prev_lag_int, const int subframe,
> +                             const enum Mode mode)
> +{
> +    /* Note n * 10923 >> 15 is floor(x/3) for 0 <= n <= 32767 */
> +    if (subframe == 0 ||
> +        (subframe == 2 && mode != MODE_475 && mode != MODE_515)) {
> +        if (mode == MODE_122) {
> +            if (pitch_index < 463) {

> +                *lag_int  = (pitch_index + 5) / 6 + 17;

* 10923 >> 16
or something like that for consistency

> +                *lag_frac = pitch_index - *lag_int * 6 + 105;
> +            } else {
> +                *lag_int  = pitch_index - 368;
> +                *lag_frac = 0;
> +            }
> +        } else if (pitch_index < 197) {

> +            *lag_int  = ((pitch_index + 2) * 10923 >> 15) + 19;

the +2 and +19 can maybe be merged

[...]
> +/**
> + * Apply pitch lag to the fixed vector (section 6.1.2)
> + *
> + * @param p the context
> + * @param subframe unpacked amr subframe
> + * @param mode mode of the current frame
> + * @param fixed_vector vector to be modified
> + */
> +static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode,
> +                             float *fixed_vector)
> +{
> +    int i;
> +
> +    // The spec suggests the current pitch gain is always used, but in other
> +    // modes the pitch and codebook gains are joinly quantized (sec 5.8.2)
> +    // so the codebook gain cannot depend on the quantized pitch gain.
> +    if (mode == MODE_122)
> +        p->beta = FFMIN(p->pitch_gain[4], 1.0);

> +
> +    // conduct pitch sharpening as appropriate (section 6.1.2)
> +    if (p->pitch_lag_int < AMR_SUBFRAME_SIZE)
> +        for (i = p->pitch_lag_int; i < AMR_SUBFRAME_SIZE; i++)
> +            fixed_vector[i] += p->beta * fixed_vector[i - p->pitch_lag_int];

this can be optimized if one considers that fixed_vector is sparse (and
stores it appropriately)
and possibly other code also could be optimized similarly, i dont know how
sparse the various vectors are so it may or may not make sense for others

> +
> +    // Save pitch sharpening factor for the next subframe
> +    // MODE_475 only updates on the 2nd and 4th subframes - this follows from
> +    // the fact that the gains for two subframes are jointly quantized.
> +    if (mode != MODE_475 || subframe & 1)
> +        p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX);
> +}
> +
> +/// @}
> +
> +
> +/// @defgroup amr_gain_decoding AMR gain decoding functions
> +/// @{
> +

> +/**
> + * fixed gain smoothing
> + * Note that where the spec specifies the "spectrum in the q domain"
> + * in section 6.1.4, in fact frequencies should be used.
> + *
> + * @param p the context
> + * @param lsf LSFs for the current subframe, in the range [0,1]
> + * @param lsf_avg averaged LSFs
> + * @param mode mode of the current frame
> + *
> + * @return fixed gain smoothed
> + */
> +static float fixed_gain_smooth(AMRContext *p , const float *lsf,
> +                               const float *lsf_avg, const enum Mode mode)
> +{
> +    float diff = 0.0;
> +    int i;
> +
> +    for (i = 0; i < LP_FILTER_ORDER; i++)
> +        diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i];
> +
> +    // If diff is large for ten subframes, disable smoothing for a 40-subframe
> +    // hangover period.
> +    p->diff_count = diff > 0.65 ? p->diff_count + 1 : 0;

id write 

p->diff_count++;
if(diff <= 0.65)
    p->diff_count= 0;

it feels more readable, but thats really minor nitpicking

either way, cant diff_count overflow?

> +
> +    if (p->diff_count > 10)
> +        p->hang_count = 0;
> +
> +    if (p->hang_count < 40) {
> +        p->hang_count++;
> +    } else if (mode < MODE_74 || mode == MODE_102) {
> +        const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0);
> +        const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] +
> +                                       p->fixed_gain[2] + p->fixed_gain[3] +
> +                                       p->fixed_gain[4]) * 0.2;
> +        return smoothing_factor * p->fixed_gain[4] +
> +               (1.0 - smoothing_factor) * fixed_gain_mean;
> +    }
> +    return p->fixed_gain[4];
> +}
> +

> +/**
> + * Decode pitch gain and fixed gain factor (part of section 6.1.3).
> + *
> + * @param p the context
> + * @param amr_subframe unpacked amr subframe
> + * @param mode mode of the current frame
> + * @param subframe current subframe number
> + * @param fixed_gain_factor decoded gain correction factor
> + */
> +static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe,
> +                         const enum Mode mode, const int subframe,
> +                         float *fixed_gain_factor)
> +{
> +    if (mode == MODE_122 || mode == MODE_795) {

> +        p->pitch_gain[4]  = qua_gain_pit [amr_subframe->p_gain];
> +        *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain];

could be vertically aligned

> +    } else {
> +        const float *gains =
> +            mode >= MODE_67  ? gains_high[amr_subframe->p_gain] :
> +            mode >= MODE_515 ? gains_low [amr_subframe->p_gain] :
> +                // gain index is only coded in subframes 0,2 for MODE_475
> +                gains_MODE_475[(p->frame.info.subframe[subframe & 2].p_gain
> +                                                                       << 1) +
> +                               (subframe & 1)];
> +

> +        p->pitch_gain[4]  = gains[0];
> +        *fixed_gain_factor = gains[1];

these too

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In a rich man's house there is no place to spit but his face.
-- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090806/11d14cb5/attachment.pgp>