[FFmpeg-devel] AMR-NB decoder
Michael Niedermayer
michaelni
Thu Aug 6 17:28:43 CEST 2009
On Wed, Aug 05, 2009 at 05:51:36PM +0100, Colin McQuillan wrote:
> Attached is a patch for an AMR-NB decoder.
>
> It is not bit-exact. This makes it tricky to verify, but I have been
> checking that internal parameters match the 3GPP decoder for the AMR
> test sequences. The PSNR between the input and output is 3.90 to 8.42
> which is about the same as the reference decoder. The PSNR between the
> two outputs is between 8.50 and 18.16, which seems quite good.
[...]
> +/**
> + * AMRNB SID frame parameters
> + */
> +typedef struct {
> + uint16_t ref_vector; ///< index of reference vector
> + uint16_t energy; ///< index of logarithmic frame energy
> +} AMRNBSIDFrame;
> +
> +/**
> + * AMRNB unpacked data frame
> + */
> +typedef struct {
> + uint16_t lsf[5]; ///< lsf parameters: 5 parameters for MODE_122, only 3 for other modes
> + union {
> + AMRNBSubframe subframe[4]; ///< unpacked data for each subframe
> + AMRNBSIDFrame sid;
> + } info;
> +} AMRNBFrame;
thats quite complex to avoid having 4 more bytes in the struct
> +
> +
> +// The following order* tables are used to convert AMR frame parameters to and
> +// from a bitstream. See 3GPP TS 26.101 for more information.
> +
> +#define AMR_BIT(field, bit) {offsetof(AMRNBFrame, field) >> 1, bit}
> +/** Specify an LSF parameter bit */
> +#define AMR_LSF(variable, bit) AMR_BIT(lsf[variable], bit)
> +/** Specify a subframe-specific bit */
> +#define AMR_OF(frame_num, variable, bit) AMR_BIT(info.subframe[frame_num].variable, bit)
> +/** Specify a pitch gain bit */
> +#define AMR_PGAIN(frame_num, bit) AMR_OF(frame_num, p_gain, bit)
> +/** Specify a fixed gain bit */
> +#define AMR_FIXED_GAIN(frame_num, bit) AMR_OF(frame_num, fixed_gain, bit)
> +/** Specify a pitch lag bit */
> +#define AMR_PLAG(frame_num, bit) AMR_OF(frame_num, p_lag, bit)
> +/** Specify a pulse bit */
> +#define AMR_PULSES(frame_num, pulse_id, bit) AMR_OF(frame_num, pulses[pulse_id], bit)
> +/** Specify an SID reference vector bit */
> +#define AMR_SVECTOR(bit) AMR_BIT(info.sid.ref_vector, bit)
> +/** Specify an SID energy index bit */
> +#define AMR_SENERGY(bit) AMR_BIT(info.sid.energy, bit)
are these macros really useful?
[...]
> +// LSF tables
they are kinda big, i assume they cant be stored more efficiently?
[...]
> Index: libavcodec/celp_filters.c
> ===================================================================
> --- libavcodec/celp_filters.c (revision 19598)
> +++ libavcodec/celp_filters.c (working copy)
> @@ -47,6 +47,28 @@
> }
> }
>
> +void ff_celp_convolve_circf(float* fc_out,
> + const float* fc_in,
> + const float* filter,
> + int len)
> +{
> + int i, k;
> +
> + memset(fc_out, 0, len * sizeof(float));
> +
> + /* Since there are few pulses over an entire subframe (i.e. almost
> + all fc_in[i] are zero) it is faster to loop over fc_in first. */
> + for (i = 0; i < len; i++) {
> + if (fc_in[i]) {
> + for (k = 0; k < i; k++)
> + fc_out[k] += fc_in[i] * filter[len + k - i];
> +
> + for (k = i; k < len; k++)
> + fc_out[k] += fc_in[i] * filter[ k - i];
> + }
> + }
> +}
> +
> int ff_celp_lp_synthesis_filter(int16_t *out,
> const int16_t* filter_coeffs,
> const int16_t* in,
> Index: libavcodec/celp_filters.h
> ===================================================================
> --- libavcodec/celp_filters.h (revision 19598)
> +++ libavcodec/celp_filters.h (working copy)
> @@ -42,6 +42,22 @@
> int len);
>
> /**
> + * Circularly convolve fixed vector with a phase dispersion impulse
> + * response filter (D.6.2 of G.729 and 6.1.5 of AMR).
> + * @param fc_out vector with filter applied
> + * @param fc_in source vector
> + * @param filter phase filter coefficients
> + *
> + * fc_out[n] = sum(i,0,len-1){ fc_in[i] * filter[(len + n - i)%len] }
> + *
> + * \note fc_in and fc_out should not overlap!
> + */
> +void ff_celp_convolve_circf(float* fc_out,
> + const float* fc_in,
> + const float* filter,
> + int len);
> +
> +/**
> * LP synthesis filter.
> * @param out [out] pointer to output buffer
> * @param filter_coeffs filter coefficients (-0x8000 <= (3.12) < 0x8000)
that should e a seperate patch
> Index: libavcodec/acelp_vectors.c
> ===================================================================
> --- libavcodec/acelp_vectors.c (revision 19598)
> +++ libavcodec/acelp_vectors.c (working copy)
> @@ -22,6 +22,7 @@
>
> #include <inttypes.h>
> #include "avcodec.h"
> +#include "celp_math.h"
> #include "acelp_vectors.h"
>
> const uint8_t ff_fc_2pulses_9bits_track1[16] =
> @@ -155,3 +156,25 @@
> out[i] = weight_coeff_a * in_a[i]
> + weight_coeff_b * in_b[i];
> }
> +
> +float ff_energyf(const float *v, int length)
> +{
> + float sum = 0;
> + int i;
> +
> + for (i = 0; i < length; i++)
> + sum += v[i] * v[i];
> +
> + return sum;
> +}
> +
> +void ff_set_energyf(float *v_out, const float *v_in, float energy,
> + const int length)
> +{
> + int i;
> + float scalefactor = ff_energyf(v_in, length);
> + if (scalefactor)
> + scalefactor = sqrt(energy / scalefactor);
> + for (i = 0; i < length; i++)
> + v_out[i] = v_in[i] * scalefactor;
> +}
> Index: libavcodec/acelp_vectors.h
> ===================================================================
> --- libavcodec/acelp_vectors.h (revision 19598)
> +++ libavcodec/acelp_vectors.h (working copy)
> @@ -164,4 +164,31 @@
> void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b,
> float weight_coeff_a, float weight_coeff_b, int length);
>
> +/**
> + * returns the energy
> + * @param in input data array
> + * @param length number of elements
> + *
> + * @return energy = sum of squares
> + */
> +float ff_energyf(const float *in, int length);
> +
> +/**
> + * Set the energy of a vector by scaling
> + *
> + * @param v_out output vector
> + * @param v_in vector to set energy of
> + * @param energy new energy
> + * @param length vectors length
> + *
> + * @note If v is zero (or its energy underflows), the output is zero.
> + * This is the behavior of AGC in the AMR reference decoder. The QCELP
> + * reference decoder seems to have undefined behavior.
> + *
> + * TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
> + * 3GPP TS 26.090 6.1 (6)
> + */
> +void ff_set_energyf(float *v_out, const float *v_in, float energy,
> + const int length);
> +
> #endif /* AVCODEC_ACELP_VECTORS_H */
so should this
> Index: libavcodec/allcodecs.c
> ===================================================================
> --- libavcodec/allcodecs.c (revision 19598)
> +++ libavcodec/allcodecs.c (working copy)
> @@ -198,6 +198,7 @@
> REGISTER_ENCDEC (AAC, aac);
> REGISTER_ENCDEC (AC3, ac3);
> REGISTER_ENCDEC (ALAC, alac);
> + REGISTER_DECODER (AMRNB, amrnb);
> REGISTER_DECODER (APE, ape);
> REGISTER_DECODER (ATRAC3, atrac3);
> REGISTER_DECODER (COOK, cook);
> Index: libavcodec/acelp_filters.c
> ===================================================================
> --- libavcodec/acelp_filters.c (revision 19598)
> +++ libavcodec/acelp_filters.c (working copy)
> @@ -93,3 +93,17 @@
> hpf_f[0] = tmp;
> }
> }
> +
> +void ff_acelp_high_pass_filterf(float *buf, float *mem, int length)
> +{
> + int i;
> + float tmp;
> +
> + for (i = 0; i < length; i++) {
> + tmp = buf[i] + 1.933105469 * mem[0] - 0.935913085 * mem[1];
> + buf[i] = 0.939819335 * (tmp - 2 * mem[0] + mem[1]);
> +
> + mem[1] = mem[0];
> + mem[0] = tmp;
> + }
> +}
> Index: libavcodec/acelp_filters.h
> ===================================================================
> --- libavcodec/acelp_filters.h (revision 19598)
> +++ libavcodec/acelp_filters.h (working copy)
> @@ -81,4 +81,12 @@
> void ff_acelp_high_pass_filter(int16_t* out, int hpf_f[2],
> const int16_t* in, int length);
>
> +/**
> + * high-pass filtering (6.2.2 of 3GPP TS 26.090)
> + * @param samples [in/out]?
> + * @param mem intermediate values used by filter (should be 0 initially)
> + * @param length input data size
> + */
> +void ff_acelp_high_pass_filterf(float *samples, float mem[2], int length);
> +
> #endif /* AVCODEC_ACELP_FILTERS_H */
and this
[...]
> +typedef struct AMRContext {
> +
> + GetBitContext gb;
> +
> + AMRNBFrame frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc)
> + uint8_t bad_frame_indicator; ///< bad frame ? 1 : 0
> + enum Mode cur_frame_mode; ///< current frame mode
the comment is redudant
> +
> + float prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe
> + float lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame
> + float prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame
> +
> + float lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing
> + float lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector
> +
> + float lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes
> +
> + uint8_t pitch_lag_int; ///< integer part of pitch lag from current subframe
> +
> + float excitation_buf[PITCH_LAG_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< excitation buffer
redudant comment
[...]
> +/**
> + * Decode an RFC4867 speech frame into the AMR frame mode and parameters.
> + *
> + * The order of speech bits is specified by 3GPP TS 26.101.
> + *
> + * @param p the context
> + * @param buf pointer to the input buffer
> + * @param buf_size size of the input buffer
> + *
> + * @return the frame mode
> + */
> +static enum Mode decode_bitstream(AMRContext *p, const uint8_t *buf,
> + int buf_size)
i think unpack_bitstream() is a better name
> +{
> + enum Mode mode;
> +
> + init_get_bits(&p->gb, buf, buf_size * 8);
> +
> + // Decode the first octet.
> + skip_bits(&p->gb, 1); // padding bit
> + mode = get_bits(&p->gb, 4); // frame type
> + p->bad_frame_indicator = !get_bits1(&p->gb); // quality bit
> + skip_bits(&p->gb, 2); // two padding bits
> +
> + if (mode <= MODE_DTX) {
> + uint16_t *data = (uint16_t *)&p->frame;
> + const AMROrder *order = amr_unpacking_bitmaps_per_mode[mode];
> + int i;
> +
> + memset(&p->frame, 0, sizeof(AMRNBFrame));
> + for (i = 0; i < mode_bits[mode]; i++)
> + data[order[i].index] += get_bits1(&p->gb) << order[i].bit;
it might reduce code size and improve speed if more than 1 bit would
be read where possible
> + }
> +
> + return mode;
> +}
> +
> +
> +/// @defgroup amr_lpc_decoding AMR pitch LPC coefficient decoding functions
> +/// @{
> +
> +/**
> + * Convert an lsf vector into an lsp vector.
> + *
> + * @param lsf input lsf vector
> + * @param lsp output lsp vector
> + */
> +static void lsf2lsp(float *lsf, float *lsp)
> +{
> + int i;
> +
> + for (i = 0; i < LP_FILTER_ORDER; i++)
> + lsp[i] = cos(lsf[i] * FREQ_LSP_FAC); // FREQ_LSP_FAC = 2*M_PI / 8000.0
considering that FREQ_LSP_FAC is used just once, maybe using its expression
directly would be simpler
[...]
> +/// @}
> +
> +
> +/// @defgroup amr_pitch_vector_decoding AMR pitch vector decoding functions
> +/// @{
> +
> +/**
> + * Decode the adaptive codebook index to the integer and fractional parts
> + * of the pitch lag for one subframe at 1/6 resolution for MODE_122,
> + * 1/3 for other modes.
> + *
> + * The choice of pitch lag is described in 3GPP TS 26.090 section 5.6.1.
> + *
> + * @param lag_int integer part of pitch lag of the current subframe
> + * @param lag_frac fractional part of pitch lag of the current subframe
> + * @param pitch_index parsed adaptive codebook (pitch) index
> + * @param prev_lag_int integer part of pitch lag for the previous subframe
> + * @param subframe current subframe number
> + * @param mode mode of the current frame
> + */
> +static void decode_pitch_lag(int *lag_int, int *lag_frac, int pitch_index,
> + const int prev_lag_int, const int subframe,
> + const enum Mode mode)
> +{
> + /* Note n * 10923 >> 15 is floor(x/3) for 0 <= n <= 32767 */
> + if (subframe == 0 ||
> + (subframe == 2 && mode != MODE_475 && mode != MODE_515)) {
> + if (mode == MODE_122) {
> + if (pitch_index < 463) {
> + *lag_int = (pitch_index + 5) / 6 + 17;
* 10923 >> 16
or something like that for consistency
> + *lag_frac = pitch_index - *lag_int * 6 + 105;
> + } else {
> + *lag_int = pitch_index - 368;
> + *lag_frac = 0;
> + }
> + } else if (pitch_index < 197) {
> + *lag_int = ((pitch_index + 2) * 10923 >> 15) + 19;
the +2 and +19 can maybe be merged
[...]
> +/**
> + * Apply pitch lag to the fixed vector (section 6.1.2)
> + *
> + * @param p the context
> + * @param subframe unpacked amr subframe
> + * @param mode mode of the current frame
> + * @param fixed_vector vector to be modified
> + */
> +static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode,
> + float *fixed_vector)
> +{
> + int i;
> +
> + // The spec suggests the current pitch gain is always used, but in other
> + // modes the pitch and codebook gains are joinly quantized (sec 5.8.2)
> + // so the codebook gain cannot depend on the quantized pitch gain.
> + if (mode == MODE_122)
> + p->beta = FFMIN(p->pitch_gain[4], 1.0);
> +
> + // conduct pitch sharpening as appropriate (section 6.1.2)
> + if (p->pitch_lag_int < AMR_SUBFRAME_SIZE)
> + for (i = p->pitch_lag_int; i < AMR_SUBFRAME_SIZE; i++)
> + fixed_vector[i] += p->beta * fixed_vector[i - p->pitch_lag_int];
this can be optimized if one considers that fixed_vector is sparse (and
stores it appropriately)
and possibly other code also could be optimized similarly, i dont know how
sparse the various vectors are so it may or may not make sense for others
> +
> + // Save pitch sharpening factor for the next subframe
> + // MODE_475 only updates on the 2nd and 4th subframes - this follows from
> + // the fact that the gains for two subframes are jointly quantized.
> + if (mode != MODE_475 || subframe & 1)
> + p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX);
> +}
> +
> +/// @}
> +
> +
> +/// @defgroup amr_gain_decoding AMR gain decoding functions
> +/// @{
> +
> +/**
> + * fixed gain smoothing
> + * Note that where the spec specifies the "spectrum in the q domain"
> + * in section 6.1.4, in fact frequencies should be used.
> + *
> + * @param p the context
> + * @param lsf LSFs for the current subframe, in the range [0,1]
> + * @param lsf_avg averaged LSFs
> + * @param mode mode of the current frame
> + *
> + * @return fixed gain smoothed
> + */
> +static float fixed_gain_smooth(AMRContext *p , const float *lsf,
> + const float *lsf_avg, const enum Mode mode)
> +{
> + float diff = 0.0;
> + int i;
> +
> + for (i = 0; i < LP_FILTER_ORDER; i++)
> + diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i];
> +
> + // If diff is large for ten subframes, disable smoothing for a 40-subframe
> + // hangover period.
> + p->diff_count = diff > 0.65 ? p->diff_count + 1 : 0;
id write
p->diff_count++;
if(diff <= 0.65)
p->diff_count= 0;
it feels more readable, but thats really minor nitpicking
either way, cant diff_count overflow?
> +
> + if (p->diff_count > 10)
> + p->hang_count = 0;
> +
> + if (p->hang_count < 40) {
> + p->hang_count++;
> + } else if (mode < MODE_74 || mode == MODE_102) {
> + const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0);
> + const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] +
> + p->fixed_gain[2] + p->fixed_gain[3] +
> + p->fixed_gain[4]) * 0.2;
> + return smoothing_factor * p->fixed_gain[4] +
> + (1.0 - smoothing_factor) * fixed_gain_mean;
> + }
> + return p->fixed_gain[4];
> +}
> +
> +/**
> + * Decode pitch gain and fixed gain factor (part of section 6.1.3).
> + *
> + * @param p the context
> + * @param amr_subframe unpacked amr subframe
> + * @param mode mode of the current frame
> + * @param subframe current subframe number
> + * @param fixed_gain_factor decoded gain correction factor
> + */
> +static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe,
> + const enum Mode mode, const int subframe,
> + float *fixed_gain_factor)
> +{
> + if (mode == MODE_122 || mode == MODE_795) {
> + p->pitch_gain[4] = qua_gain_pit [amr_subframe->p_gain];
> + *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain];
could be vertically aligned
> + } else {
> + const float *gains =
> + mode >= MODE_67 ? gains_high[amr_subframe->p_gain] :
> + mode >= MODE_515 ? gains_low [amr_subframe->p_gain] :
> + // gain index is only coded in subframes 0,2 for MODE_475
> + gains_MODE_475[(p->frame.info.subframe[subframe & 2].p_gain
> + << 1) +
> + (subframe & 1)];
> +
> + p->pitch_gain[4] = gains[0];
> + *fixed_gain_factor = gains[1];
these too
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In a rich man's house there is no place to spit but his face.
-- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090806/11d14cb5/attachment.pgp>
More information about the ffmpeg-devel
mailing list