[FFmpeg-devel] [PATCH] G.729A (now fixed-point) decoder

Michael Niedermayer michaelni
Thu Mar 20 03:29:52 CET 2008


On Sun, Mar 16, 2008 at 12:24:58AM +0600, Vladimir Voroshilov wrote:
> Hi, All
> 
> I'm back again and glad to show you reworked G.729A decoder.
> Now all code uses fixed-point arithmetic.
> 
> Here is results of comparing with reference fixed-point code results:
> ALGTHM.BIT    stddev: 44.15 PSNR:63.42 bytes:4096
> ERASURE.BIT  stddev: 12.79 PSNR:74.18 bytes:47104
> FIXED.BIT stddev:  3.82 PSNR:84.67 bytes:18432
> LSP.BIT  stddev: 11.96 PSNR:74.76 bytes:356352
> OVERFLOW.BIT stddev:681.71 PSNR:39.65 bytes:61440
> PARITY.BIT  stddev:  5.57 PSNR:81.40 bytes:47104
> PITCH.BIT  stddev: 16.43 PSNR:72.01 bytes:292864
> SPEECH.BIT  stddev:  9.63 PSNR:76.64 bytes:598016
> TAME.BIT  stddev: 24.67 PSNR:68.47 bytes:20480
> TEST.BIT stddev: 15.82 PSNR:72.33 bytes:26624
> 
> What was also done:
> 0. variables/parameters renaming is in progress as long as adding spaces.
> 1. Input buffers (as much as possible) were marked as "const"
> 2. Pitch delay code decoding code inlined into main loop. Several
> routines diappears.
>     Code becomes more readable. imho.
> 3. decode_frame now works only with one packet.
> 4. Doxygen comments were fixed in many places.
> 7. duplicated code in decode_frame routine replaced with a loop
> 8. a lot of errors/typos fixed in code
> 
> Hopefully this version will be better.
[...]
> +Per-vector results (PASS means no hearable differences, FAILED means hearable artefacts):
> +algthm  : PASS
> +erasure : PASS
> +fixed   : PASS
> +lsp     : PASS

> +overflow: FAIL

fix it!


[...]
> +#define PITCH_MIN 20
> +#define PITCH_MAX 143
> +#define INTERPOL_LEN 11
> +
> +#define L0_BITS 1             ///< Switched MA predictor of LSP quantizer (size in bits)
> +#define L1_BITS 7             ///< First stage vector of quantizer (size in bits)
> +#define L2_BITS 5             ///< First stage lowervector of quantizer (size in bits)
> +#define L3_BITS 5             ///< First stage hihjer vector of quantizer (size in bits)
> +#define P1_BITS 8             ///< Adaptive codebook index for first subframe (size in bits)
> +#define P2_BITS 5             ///< Adaptive codebook index for second subframe (size in bits)
> +#define P0_BITS 1             ///< Parity bit for pitch delay (size in bits)
> +#define GA_BITS 3             ///< GA codebook index (size in bits)
> +#define GB_BITS 4             ///< GB codebook index (size in bits)
> +#define FC_PULSE_COUNT 4      ///< Number of pulses in fixed-codebook vector

> +#define FC_BITS(ctx) (formats[ctx->format].fc_index_bits*FC_PULSE_COUNT+1) ///< Fixed codebook index (size in bits)

I do not like hiding code behind macros when this isnt obvious from the name
of the macro.


> +
> +typedef struct
> +{
> +    uint8_t ma_predictor;     ///< Switched MA predictor of LSP quantizer

> +    uint8_t quantizer_1st;    ///< First stage vector of quantizer
> +    uint8_t quantizer_2nd_lo; ///< First stage lowervector of quantizer (size in bits)
> +    uint8_t quantizer_2nd_hi; ///< First stage hihjer vector of quantizer (size in bits)

2nd != first
and hihjer is no word


[...]
> +    int16_t pred_energ_q[4];    ///< (Q10) past quantized energies

energY


[...]
> +    int16_t g;                  ///< gain coefficient (4.2.4)

s/g/gain_coeff/


[...]
> +/**
> + * ma_predicot_sum[i] := 1-sum{1}{4}{ma_predictor[k][i]}
              ^^^
?

[...]
> +/**
> + * Table used to compute 1/sqrt(x)
> + *
> + * tab_inv_sqrt[i] = 1/sqrt((16+i)/64)
> + */
> +static const uint16_t tab_inv_sqrt[49] =

> +{ /* Q!4 */

!1?


[...]
> +/**
> + * \brief multiplies 32-bit integer by abother 16-bit and divides result by 2^15
> + * \param var_q24 (Q24) 32-bit integer
> + * \param var_15 (Q15) 16-bit integer
> + * \return (Q24) result of mupliplication
> + *
> + * \note this code is bit-equal to reference's Mpy_32_16
> + */
> +static inline int mul_24_15(int var_q24, int16_t var_q15)
> +{
> +    int hi = var_q24 >> 15;
> +    int lo = var_q24 & 0x7fff;
> +    return var_q15 * hi + ((var_q15 * lo) >> 15);
> +}

if the following is faster (and equal), please use it:
((int64_t)var_q24 * var_q15) >> 15



> +
> +/**
> + * \brief shift to right with rounding
> + * \param var1 32-bit integer to shift
> + * \param var2 16-bit shift
> + */
> +static int l_shr_r(int var1, int16_t var2)
> +{
> +    if(var1 && (var1 & (1 << (var2 - 1))))
> +        return (var1 >> var2) + 1;
> +    else
> +        return (var1 >> var2);
> +}

var2 is certainly not a int16 it should either be uint8_t or unsigned int
and unsigned int is very much prefered.

also this should be
(var1 + (1 << (var2 - 1))) >> var2
but as its used just at 2 places:
> +        lp[i]   = l_shr_r(ff1 + ff2, 13);
> +        lp[9-i] = l_shr_r(ff1 - ff2, 13);

 these should be replaced by

ff1 += 1<<12;
(ff1 + ff2)>>13;
(ff1 - ff2)>>13;

all of course just if it doesnt change binary output ...


> +
> +/**
> + * \brief add two 32 bit intgers, bounding result into [min,max] range
> + * \param var1 first variable
> + * \param var2 second variable
> + * \param min lower bound for trimming
> + * \param max higher bound for trimming
> + *
> + * \return var1+var2 trimmed to [min,max] bounds
> + */
> +static int l_add_trim(int var1, int var2, int min, int max)
> +{
> +    if(var2 < 0)
> +        return FFMAX(var1, min - var2) + var2;
> +    else
> +        return FFMIN(var1, max - var2) + var2;
> +}

id replace this by
av_clip(var1+var2, min, max)


> +
> +/**
> + * \brief Calculates 2^x
> + * \param arg (Q15) power (>=0)
> + *
> + * \return (Q0) result of pow(2, power)
> + *
> + * \note If integer part of power is greater than 15, function
> + *       will return INT_MAX
> + */
> +static int l_pow2(int power)
> +{
> +    uint16_t frac_x0;
> +    uint16_t frac_dx;
> +    uint16_t power_int = power >> 15;
> +    int result;
> +
> +    assert(power>=0);
> +
> +    if(power_int > 30 )
> +        return INT_MAX; // overflow

as power_int is always 14 please remove it


> +
> +    /*
> +      power in Q15, thus
> +      b31-b15 - integer part
> +      b00-b14 - fractional part      
> +
> +      When fractional part is treated as Q10,
> +      bits 10-14 are integer part, 00-09 - fractional
> +      
> +    */
> +    frac_x0 = (power & 0x7c00) >> 10; // b10-b14 and Q10 -> Q0
> +    frac_dx = (power & 0x03ff) << 5;  // b00-b09 and Q10 -> Q15 
> +
> +    result = tab_pow2[frac_x0] << 15; // Q14 -> Q29;
> +    result += frac_dx * (tab_pow2[frac_x0+1] - tab_pow2[frac_x0]); // Q15*Q14;
> +


> +    // multiply by 2^power_int and Q29 -> Q1
> +    result >>= 28 - power_int;
> +    // rounding
> +    result++;
> +    
> +    return result >> 1; // Q1 -> Q0

return (result + 16384) >> 15;


> +}
> +
> +/**
> + * \brief Calculates log2(x)
> + * \param value function argument (>0)
> + *
> + * \return (Q15) result of log2(value)
> + */
> +static int l_log2(int value)
> +{
> +    int result;
> +    int power_int;
> +    uint16_t frac_x0;
> +    uint16_t frac_dx;
> +
> +    assert(value > 0);
> +

> +    // Stripping zeros from beginning ( ?? -> Q31)
> +    result=value;    
> +    for(power_int=31; power_int>=0 && !(result & 0x80000000); power_int--)
> +        result <<= 1;

av_log2


> +
> +    /*
> +      After normalization :
> +      b31 - integer part (always nonzero)
> +      b00-b30 - fractional part
> +
> +      When fractional part is treated as Q26,
> +      bits 26-31 are integer part, 16-25 - fractional
> +    */
> +    frac_x0 = (result & 0x7c000000) >> 26; // b26-b31 and [32..63] -> [0..31]  then Q26 -> Q0
> +    frac_dx = (result & 0x03fff800) >> 11; // b11-b25 and Q26 -> Q15, [0..1) in Q15
> +
> +    result = tab_log2[frac_x0] << 15; // Q15 -> Q30
> +    result += frac_dx * (tab_log2[frac_x0+1] - tab_log2[frac_x0]); // Q15*Q15;
> +

> +    result >>= 15; // Q30 -> Q15
> +
> +    result += power_int << 15; // Q0 -> Q15
> +
> +    return result;

return (power_int<<15) + (result>>15);


> +}
> +
> +/**
> + * \brief Computes 1/sqrt(x)
> + * \param arg (Q0) positive integer
> + *
> + * \return (Q29) 0 < 1/sqrt(arg) <= 1
> + */
> +static int l_inv_sqrt(int arg)
> +{
> +    uint32_t result;
> +    uint16_t frac_x0;
> +    uint16_t frac_dx;
> +    int8_t power_int;
> +
> +    assert(arg > 0);
> +

> +    result=arg;    
> +    for(power_int=16; power_int>=0 && !(result & 0xc0000000); power_int--)
> +        result <<= 2;

av_log2()>>1


[...]
> +/**
> + * \brief divide two positive fixed point numbers
> + * \param num numenator
> + * \param denom denumenator
> + * \param base base to scale result to
> + *
> + * \return result of division scaled to given base
> + *
> + * \remark numbers should in same base, result will be scaled to given base
> + *
> + * \todo Better implementation requred
> + */
> +int l_div(int num, int denom, int base)
> +{
> +    int diff = 0;
> +    int sig = 0;
> +
> +    assert(denom);
> +
> +    if(!num)
> +        return 0;
> +
> +    if(num < 0)
> +    {
> +        num = -num;
> +        sig = !sig;
> +    }
> +
> +    if(denom < 0)
> +    {
> +        denom = -denom;
> +        sig = !sig;
> +    }

"\brief divide two positive fixed point numbers"
remove the negative checks please if it divides 2 positive
but if you do add a assert(num >= 0) ! 
iam not sure if it really is always positive, iam quite sure denom is though


> +
> +    for(; num < 0x4000000; diff++)
> +        num <<= 1;
> +

> +    for(; denom < 0x4000000; diff--)
> +        denom <<= 1;

useless, remove this one


> +
> +    if(diff > base)
> +        num >>= diff-base;
> +    else
> +        denom >>= base-diff;
> +
> +    if(sig)
> +        return -num/denom;
> +    else
> +        return num/denom;
> +}
> +

> +/**
> + * \brief Calculates sum of array elements multiplications
> + * \param speech array with input data
> + * \param cycles number elements to proceed
> + * \param offset offset for calculation sum of s[i]*s[i+offset]
> + * \param shift right shift by this value will be done before multiplication
> + *
> + * \return sum of multiplications
> + *
> + * \note array must be at least length+offset long!
> + */
> +static int sum_of_squares(const int16_t* speech, int cycles, int offset, int shift)
> +{
> +    int n;
> +    int sum = 0;
> +
> +    if(offset < 0)
> +        return 0;

this can happen maximally in a single call of this function, it would be
cleaner to move this check there if it can happen at all


[...]
> +/**
> + * \brief pseudo random number generator
> + */
> +static inline uint16_t g729_random(G729A_Context* ctx)
> +{
> +    return ctx->rand_value = 31821 * ctx->rand_value + 13849;
> +}

static inline uint16_t g729_random(uint16_t value)
{
    return 31821*value + 13849;
}


[...]
> +            /*  R(x):=ac_v[-k+x] */
> +	    tmp = ac_v[n - pitch_delay_int - i    ] * interp_filter[i][    pitch_delay_frac];
> +            v = l_add_trim(v, tmp, INT_MIN >> 1, INT_MAX >> 1); //v += R(n-i)*interp_filter(t+3i)
> +	    tmp = ac_v[n - pitch_delay_int + i + 1] * interp_filter[i][3 - pitch_delay_frac];

if pitch_delay_frac=0 then
interp_filter[9][3 - 0] will be after the table


[...]
> +    energy += mul_24_15(26 << 15,              24660); // Q13
> +    energy += 30 << 13;

energy += constant;


> +
> +    // FIXME: Compensation. Makes result bit-equal with reference code
> +    energy -= 2;

hmmmmmmm


[...]
> +/**
> + * \brief Memory update (3.10)
> + * \param fc_v (Q13) fixed-codebook vector
> + * \param gp (Q14) quantized fixed-codebook gain (gain pitch)
> + * \param gc (Q1) quantized adaptive-codebook gain (gain code)
> + * \param exc [in/out] (Q0) last excitation signal buffer for current subframe
> + * \param subframe_size length of subframe
> + */
> +static void g729_mem_update(const int16_t *fc_v, int16_t gp, int16_t gc, int16_t* exc, int subframe_size)
> +{
> +    int i, sum;
> +
> +    for(i=0; i<subframe_size; i++)
> +    {
> +        sum = exc[i] * gp + fc_v[i] * gc;
> +        sum = FFMAX(FFMIN(sum, SHRT_MAX << 14), SHRT_MIN << 14);

av_clip()


[...]
> +/**
> + * \brief LP synthesis filter
> + * \param lp (Q12) filter coefficients
> + * \param in (Q0) input signal
> + * \param out [out] (Q0) output (filtered) signal
> + * \param filter_data [in/out] (Q0) filter data array (previous synthesis data)
> + * \param subframe_size length of subframe
> + * \param exit_on_overflow 1 - If overflow occured routine updates neither out nor
> + *                         filter data arrays, 0 - always update
> + *
> + * \return 1 if overflow occured, o - otherwise
> + *
> + * Routine applies 1/A(z) filter to given speech data
> + */
> +static int g729_lp_synthesis_filter(const int16_t* lp, const int16_t *in, int16_t *out, int16_t *filter_data, int subframe_size, int exit_on_overflow)
> +{
> +    int16_t tmp_buf[MAX_SUBFRAME_SIZE+10];
> +    int16_t* tmp=tmp_buf+10;
> +    int i,n;
> +    int sum;
> +
> +    memcpy(tmp_buf, filter_data, 10*sizeof(int16_t));
> +
> +    for(n=0; n<subframe_size; n++)
> +    {
> +        sum = in[n] << 12;
> +        for(i=0; i<10; i++)
> +            sum -= (lp[i] * tmp[n-i-1]);

superflous ()


> +	sum >>= 12;
> +	if(sum > SHRT_MAX || sum < SHRT_MIN)
> +	{
> +            if(exit_on_overflow)
> +                return 1;
> +            sum = FFMAX(FFMIN(sum, SHRT_MAX), SHRT_MIN);

av_clip()


> +	}
> +        tmp[n] = sum;
> +    }

tabs and indention


[...]
> +    //Downscaling corellaions to fit on 16-bit
> +    tmp = FFMAX(corr_0, FFMAX(corr_t0, corr_max));
> +    for(n=0; n<32 && tmp > SHRT_MAX; n++)
> +    {
> +        tmp      >>= 1;
> +        corr_t0  >>= 1;
> +        corr_0   >>= 1;
> +        corr_max >>= 1;
> +    }

something with av_log2(tmp)
and 
tmp     >>= X;
corr_t0 >>= X;
...


[...]
> +        sum = FFMAX(FFMIN(sum, SHRT_MAX << 12), SHRT_MIN << 12);

av_clip()


[...]
> +/**
> + * \brief high-pass filtering and upscaling (4.2.5)
> + * \param ctx private data structure
> + * \param speech [in/out] reconstructed speech signal for applying filter to
> + * \param length size of input data
> + *
> + * Filter has cut-off frequency 100Hz
> + */
> +static void g729_high_pass_filter(G729A_Context* ctx, int16_t* speech, int length)
> +{
> +    int16_t z_2=0;
> +    int f_0=0;
> +    int i;
> +
> +    for(i=0; i<length; i++)
> +    {
> +        z_2=ctx->hpf_z1;
> +        ctx->hpf_z1=ctx->hpf_z0;
> +        ctx->hpf_z0=speech[i];
> +
> +        f_0 = mul_24_15(ctx->hpf_f1, 15836)
> +            + mul_24_15(ctx->hpf_f2, -7667)

> +            +  7699 * ctx->hpf_z0
> +            - 15398 * ctx->hpf_z1
> +            +  7699 * z_2;

+ 7699*(ctx->hpf_z0 - 2*ctx->hpf_z1 + z_2)


> +	f_0 <<= 2; // Q13 -> Q15

tabs



> +
> +        speech[i] = FFMAX(FFMIN(f_0 >> 14, SHRT_MAX), SHRT_MIN); // 2*f_0 in 15

av_clip()


> +	
> +        ctx->hpf_f2=ctx->hpf_f1;
> +        ctx->hpf_f1=f_0;

all the hpf_* would be nicer in an array and memmoved()


[...]
> +        int16_t freq= (lsf[i] * 20861)>>15; //1.0/(2.0*PI) in Q17, result in Q16
> +        int16_t offset= freq & 0xff;

> +        int16_t ind = FFMIN(freq >> 8, 63);

The FFMIN is unneeded lsf is limited to LSFQ_MAX, if you want add an assert()


> +
> +        lsp[i] = base_cos[ind] + ((slope_cos[ind] * offset) >> 12);

lsp[i] =      base_cos[freq>>8]
         + ((slope_cos[freq>>8] * (freq&0xFF)) >> 12);

also the reference does >> 13 ?


[...]
> +        lq[i] >>= 15;
> +        lq[i] *= ma_predictor_sum_inv[ctx->prev_mode][i];                     // Q12
> +        lq[i] >>= 12;

lq[i]= ( (lq[i]>>15) * ma_predictor_sum_inv[ctx->prev_mode][i] )>>12

is IMHO slightly clearer


> +    }
> +
> +    /* Rotate lq_prev */
> +    for(i=0; i<10; i++)
> +    {
> +        for(k=MA_NP-1; k>0; k--)
> +            ctx->lq_prev[k][i] = ctx->lq_prev[k-1][i];
> +
> +        ctx->lq_prev[0][i] = lq[i];
> +    }
[...]
> +    /* Rotate lq_prev */
> +    for(i=0; i<10; i++)
> +    {
> +        for(k=MA_NP-1; k>0; k--)
> +            ctx->lq_prev[k][i] = ctx->lq_prev[k-1][i];
> +
> +        ctx->lq_prev[0][i] = lq[i];
> +    }

duplicate


[...]
> +/**
> + * \brief decodes polinomial coefficients from LSP

i->y


[...]
> +    init_get_bits(&gb, buf, buf_size);
> +
> +    frame_erasure = 1;
> +    for(i=0; i<buf_size; i++)
> +        if(get_bits(&gb,8))
> +	    frame_erasure=0;
> +
> +    if(frame_erasure)
> +        return 1;

for(i=0; i<buf_size; i++)
    or_off_all |= buf[i];
if(!or_off_all)
    return 1;


[...]
> +    int in_frame_size  = formats[ctx->format].input_frame_size;
> +    int out_frame_size = formats[ctx->format].output_frame_size;

int  in_frame_size = formats[ctx->format]. input_frame_size;
int out_frame_size = formats[ctx->format].output_frame_size;


[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

It is dangerous to be right in matters on which the established authorities
are wrong. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080320/4ba7f1f4/attachment.pgp>



More information about the ffmpeg-devel mailing list