[Ffmpeg-devel] [RFC] IMC decoder

Mon Oct 30 01:26:51 CET 2006

Hi

On Sun, Oct 29, 2006 at 08:35:11AM +0200, Kostya wrote:
> Here is Intel Music Codec decoder RE'd by Maxim Poliakovski, FFmpeg'ized
> by Benjamin Larsson, cleaned and fixed by me.
> 
> I haven't tested it on x86 but it works fine on PPC with samples from MPHQ.

[...]

> +    // MDCT tables
> +    float mdct_sine_window[COEFFS];
> +    float post_coef1[COEFFS];
> +    float post_coef2[COEFFS];
> +    float pre_coef1[COEFFS];
> +    float pre_coef2[COEFFS];
> +    float last_fft_im[COEFFS];

doxyfy with @{ @}

[...]
> +static int imc_decode_init(AVCodecContext * avctx)
> +{
> +    int i, j;
> +    IMCContext *q = avctx->priv_data;
> +    double r1;
> +
> +    q->decoder_reset = 1;
> +
> +    for(i = 0; i < BANDS; i++)
> +        q->old_floor[i] = 1.0f;
> +
> +    /* Build mdct window, a simple sine window normalized with sqrt(2) */
> +    for(i = 0; i < COEFFS; i++)
> +        q->mdct_sine_window[i] = (float) ((sin(((float) i + 0.5) / (float) 512 * M_PI)) * sqrt(2.0));

hmm this (and the code below) looks much more readable without the
rednundant casts and ()
q->mdct_sine_window[i] = (float) ((sin(((float) i + 0.5) / (float) 512 * M_PI)) * sqrt(2.0));
vs.
q->mdct_sine_window[i] = sin((i + 0.5) / 512 * M_PI) * sqrt(2);

> +    for(i = 0; i < COEFFS/2; i++){
> +        double  cntf = (double)i;

redundant cast

> +        double  r2 = cntf / 512.0 * M_PI;
> +        double  r3 = r2 * 8.0;
> +        double  r4 = r2 * 2.0;
> +
> +        q->post_coef1[i] = (float) cos (r4);
> +        q->post_coef2[i] = (float) sin (r4);

redundant cast, and there are more later ...

> +
> +        r3 = (cntf * 4.0 + 1.0) / 2048.0 * M_PI * 2.0;

/2048 * 2 = /1024

> +        r1 = sin (r3);
> +        r2 = cos (r3);
> +
> +        if (i & 0x1)
> +        {
> +            q->pre_coef1[i] = (float)((r1 + r2) * sqrt(2.0));
> +            r4 = -(r1 - r2);
> +        }
> +        else
> +        {
> +            q->pre_coef1[i] = (float)-((r1 + r2) * sqrt(2.0));
> +            r4 = (r1 - r2);
> +        }
> +
> +        q->pre_coef2[i] = (float)(r4 * sqrt(2.0));

if(i&1){
    q->pre_coef1[i] = (r1 + r2) * sqrt(2.0);
    q->pre_coef2[i] = (r1 - r2) * sqrt(2.0);
}else{
    q->pre_coef1[i] =-(r1 + r2) * sqrt(2.0);
    q->pre_coef2[i] = (r1 - r2) * sqrt(2.0);
}
note, this might give different rounding due to the removed cast to double
by storing into r4 on x86

> +        q->last_fft_im[i] = 0;
> +    }
> +    q->flcf1 = (float) (log2(10) * 0.05703125);
> +    q->flcf2 = (float) (log2(10) * 0.25);
> +
> +    /* Generate a square root table */
> +
> +    for(i = 0; i < 30; i++) {
> +        q->sqrt_tab[i] = sqrt((float)i);
> +    }
> +
> +    /* initialize the VLC tables */
> +    for(i = 0; i < 4 ; i++) {
> +        for(j = 0; j < 4; j++) {
> +            init_vlc (&q->huffman_vlc[i][j], 9, imc_huffman_sizes[i],
> +                     imc_huffman_lens[i][j], 1, 1,
> +                     imc_huffman_bits[i][j], 2, 2, 0);
> +        }
> +    }
> +    q->one_div_log2 = 1/log(2);
> +
> +    ff_fft_init(&q->fft, 7, 1);
> +    return 0;
> +}

this function always returns 0 so it could be changed to void

> +
> +
> +static int IMCConvertFloatToLong(float value) {
> +    if (value > 0)
> +        value += 0.5;
> +    else
> +        value -= 0.5;
> +    if (value < -32768.0)
> +        value = -32768.0;
> +    else
> +        if (value > 32767.0)
> +            value = 32767.0;
> +
> +        return (int)value;
> +}

sick ...
but i guess this cannot be changed without breaking the code due to
rounding differences?

> +
> +
> +static void imc_calculate_coeffs(IMCContext* q, float* flcoeffs1, float* flcoeffs2, int* bandWidthT,
> +                                float* flcoeffs3, float* flcoeffs5)
> +{
> +    float   workT1[BANDS];
> +    float   workT2[BANDS];
> +    float   workT3[BANDS];
> +    float   snr_limit = 1.e-30;
> +    float   accum = 0.0;
> +    int i, cnt2;
> +
> +    for(i = 0; i < BANDS; i++)
> +        flcoeffs5[i] = workT2[i] = 0.0;
> +
> +    for(i = 0; i < BANDS; i++) {
> +        if (bandWidthT[i]){
> +            workT1[i] = flcoeffs1[i] * flcoeffs1[i];
> +            flcoeffs3[i] = 2.0 * flcoeffs2[i];
> +        } else {
> +            workT1[i] = 0.0;
> +            flcoeffs3[i] = -30000.0;
> +        }
> +    }
> +
> +    for(i = 0; i < BANDS; i++) {
> +        workT3[i] = (float)bandWidthT[i] * workT1[i] * 0.01;
> +        if (workT3[i] <= snr_limit)
> +            workT3[i] = 0.0;
> +    }
> +
> +    for(i = 0; i < BANDS; i++) {
> +        cnt2 = i;
> +        while (cnt2 < cyclTab[i]) {
> +            flcoeffs5[cnt2] = flcoeffs5[cnt2] + workT3[i];
> +            cnt2++;
> +        }

for(cnt2 = i; cnt2 < cyclTab[i]; cnt2++)
    flcoeffs5[cnt2] = flcoeffs5[cnt2] + workT3[i];

also several of the for(i = 0; i < BANDS; i++) loops can be merged

> +        workT2[cnt2-1] = workT2[cnt2-1] + workT3[i];
> +    }
> +
> +    for(i = 1; i < BANDS; i++) {
> +        accum = (workT2[i-1] + accum) * imc_weights1[i-1];
> +        flcoeffs5[i] += accum;
> +    }
> +
> +    for(i = 0; i < BANDS; i++)
> +        workT2[i] = 0.0;
> +
> +    for(i = 0; i < BANDS; i++) {
> +        cnt2 = i-1;
> +        while (cnt2 > cyclTab2[i]) {
> +            flcoeffs5[cnt2] += workT3[i];
> +            cnt2--;
> +        }

this also can be done IMHO cleaner with a for() loop

[...]
> +static void imc_get_band_flag_bits(IMCContext* q, int* bandWidthT, int* bandFlagsBuf)
> +{
> +    int i;
> +
> +    for(i = 0; i < BANDS-1; i++) {
> +        if (bandWidthT[i])
> +            bandFlagsBuf[i] = get_bits1(&q->gb);
> +    }
> +}

this function is used just once and its pretty small, not sure if having
this as a seperate function helps readability or not ?

[...]
> +    for(i = 0; i < BANDS; i++) {
> +        if (q->flcoeffs1[i] > highest)
> +            highest = q->flcoeffs1[i];

FFMAX()

[...]
> +            for(j = band_tab[i]; j < (band_tab[i+1]-1); j += 2) {
> +                if(!get_bits1(&q->gb)){//0
> +                    q->skipFlagBits[i]++;
> +                    q->skipFlags[j]=1;
> +                    q->skipFlags[j+1]=1;
> +                    q->skipFlagCount[i] += 2;
> +                }else{
> +                    if(get_bits1(&q->gb)){//11
> +                        q->skipFlagBits[i] +=2;
> +                        q->skipFlags[j]=0;
> +                        q->skipFlags[j+1]=1;
> +                        q->skipFlagCount[i]++;
> +                    }else{
> +                        if(!get_bits1(&q->gb)){//100
> +                            q->skipFlagBits[i] +=3;
> +                            q->skipFlags[j]=1;
> +                            q->skipFlags[j+1]=0;
> +                            q->skipFlagCount[i]++;
> +                        }else{//101
> +                            q->skipFlagBits[i] +=3;
> +                            q->skipFlags[j]=0;
> +                            q->skipFlags[j+1]=0;
> +                        }

q->skipFlagBits[i] +=3; and q->skipFlags[j+1]=0; can be factored out of
the if()

[...]
> +    /* postrotation */
> +    for(i=0; i < COEFFS/2; i++){
> +        q->tmp[i].re = (q->samples[i].re * q->post_coef1[i]) + (-q->samples[i].im * q->post_coef2[i]);
> +        q->tmp[i].im = (-q->samples[i].im * q->post_coef1[i]) - (q->samples[i].re * q->post_coef2[i]);
> +    }
> +
> +    /* window and reorder */
> +    for(i = 0; i < COEFFS/2; i++){
> +        q->out_samples[i*2] = IMCConvertFloatToLong((q->mdct_sine_window[COEFFS-1-i*2] * q->last_fft_im[i]) + (q->mdct_sine_window[i*2] * q->tmp[i].re));
> +        q->out_samples[COEFFS-1-i*2] = IMCConvertFloatToLong((q->mdct_sine_window[i*2] * q->last_fft_im[i]) - (q->mdct_sine_window[COEFFS-1-i*2] * q->tmp[i].re));

float_to_int16() should be faster

also maybe these 2 loops can be merged, iam not sure (i didnt check
if anything overlaps ...)

[...]
> +    int16_t *outbuffer = (int16_t*) data;

redundant cast from void

> +
> +    /* swap the bitstream so we can use the internal bitreader */
> +    for(i = 0; i < buf_size / 2; i++)
> +        buf16[i] = bswap_16(buf16[i]);  //might be wrong on big endian

and hope that modifying the buffer is ok ...
this should at least get a FIXME note explaining that this is risky

[...]
> +    for(i = 0; i < COEFFS; i++){
> +        outbuffer[i] = q->out_samples[i];
> +    }

why is the stuff decoded into out_samples and then copied into outbuffer ?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is