[Ffmpeg-devel] [RFC] IMC decoder
Michael Niedermayer
michaelni
Mon Oct 30 01:26:51 CET 2006
Hi
On Sun, Oct 29, 2006 at 08:35:11AM +0200, Kostya wrote:
> Here is Intel Music Codec decoder RE'd by Maxim Poliakovski, FFmpeg'ized
> by Benjamin Larsson, cleaned and fixed by me.
>
> I haven't tested it on x86 but it works fine on PPC with samples from MPHQ.
[...]
> + // MDCT tables
> + float mdct_sine_window[COEFFS];
> + float post_coef1[COEFFS];
> + float post_coef2[COEFFS];
> + float pre_coef1[COEFFS];
> + float pre_coef2[COEFFS];
> + float last_fft_im[COEFFS];
doxyfy with @{ @}
[...]
> +static int imc_decode_init(AVCodecContext * avctx)
> +{
> + int i, j;
> + IMCContext *q = avctx->priv_data;
> + double r1;
> +
> + q->decoder_reset = 1;
> +
> + for(i = 0; i < BANDS; i++)
> + q->old_floor[i] = 1.0f;
> +
> + /* Build mdct window, a simple sine window normalized with sqrt(2) */
> + for(i = 0; i < COEFFS; i++)
> + q->mdct_sine_window[i] = (float) ((sin(((float) i + 0.5) / (float) 512 * M_PI)) * sqrt(2.0));
hmm this (and the code below) looks much more readable without the
rednundant casts and ()
q->mdct_sine_window[i] = (float) ((sin(((float) i + 0.5) / (float) 512 * M_PI)) * sqrt(2.0));
vs.
q->mdct_sine_window[i] = sin((i + 0.5) / 512 * M_PI) * sqrt(2);
> + for(i = 0; i < COEFFS/2; i++){
> + double cntf = (double)i;
redundant cast
> + double r2 = cntf / 512.0 * M_PI;
> + double r3 = r2 * 8.0;
> + double r4 = r2 * 2.0;
> +
> + q->post_coef1[i] = (float) cos (r4);
> + q->post_coef2[i] = (float) sin (r4);
redundant cast, and there are more later ...
> +
> + r3 = (cntf * 4.0 + 1.0) / 2048.0 * M_PI * 2.0;
/2048 * 2 = /1024
> + r1 = sin (r3);
> + r2 = cos (r3);
> +
> + if (i & 0x1)
> + {
> + q->pre_coef1[i] = (float)((r1 + r2) * sqrt(2.0));
> + r4 = -(r1 - r2);
> + }
> + else
> + {
> + q->pre_coef1[i] = (float)-((r1 + r2) * sqrt(2.0));
> + r4 = (r1 - r2);
> + }
> +
> + q->pre_coef2[i] = (float)(r4 * sqrt(2.0));
if(i&1){
q->pre_coef1[i] = (r1 + r2) * sqrt(2.0);
q->pre_coef2[i] = (r1 - r2) * sqrt(2.0);
}else{
q->pre_coef1[i] =-(r1 + r2) * sqrt(2.0);
q->pre_coef2[i] = (r1 - r2) * sqrt(2.0);
}
note, this might give different rounding due to the removed cast to double
by storing into r4 on x86
> + q->last_fft_im[i] = 0;
> + }
> + q->flcf1 = (float) (log2(10) * 0.05703125);
> + q->flcf2 = (float) (log2(10) * 0.25);
> +
> + /* Generate a square root table */
> +
> + for(i = 0; i < 30; i++) {
> + q->sqrt_tab[i] = sqrt((float)i);
> + }
> +
> + /* initialize the VLC tables */
> + for(i = 0; i < 4 ; i++) {
> + for(j = 0; j < 4; j++) {
> + init_vlc (&q->huffman_vlc[i][j], 9, imc_huffman_sizes[i],
> + imc_huffman_lens[i][j], 1, 1,
> + imc_huffman_bits[i][j], 2, 2, 0);
> + }
> + }
> + q->one_div_log2 = 1/log(2);
> +
> + ff_fft_init(&q->fft, 7, 1);
> + return 0;
> +}
this function always returns 0 so it could be changed to void
> +
> +
> +static int IMCConvertFloatToLong(float value) {
> + if (value > 0)
> + value += 0.5;
> + else
> + value -= 0.5;
> + if (value < -32768.0)
> + value = -32768.0;
> + else
> + if (value > 32767.0)
> + value = 32767.0;
> +
> + return (int)value;
> +}
sick ...
but i guess this cannot be changed without breaking the code due to
rounding differences?
> +
> +
> +static void imc_calculate_coeffs(IMCContext* q, float* flcoeffs1, float* flcoeffs2, int* bandWidthT,
> + float* flcoeffs3, float* flcoeffs5)
> +{
> + float workT1[BANDS];
> + float workT2[BANDS];
> + float workT3[BANDS];
> + float snr_limit = 1.e-30;
> + float accum = 0.0;
> + int i, cnt2;
> +
> + for(i = 0; i < BANDS; i++)
> + flcoeffs5[i] = workT2[i] = 0.0;
> +
> + for(i = 0; i < BANDS; i++) {
> + if (bandWidthT[i]){
> + workT1[i] = flcoeffs1[i] * flcoeffs1[i];
> + flcoeffs3[i] = 2.0 * flcoeffs2[i];
> + } else {
> + workT1[i] = 0.0;
> + flcoeffs3[i] = -30000.0;
> + }
> + }
> +
> + for(i = 0; i < BANDS; i++) {
> + workT3[i] = (float)bandWidthT[i] * workT1[i] * 0.01;
> + if (workT3[i] <= snr_limit)
> + workT3[i] = 0.0;
> + }
> +
> + for(i = 0; i < BANDS; i++) {
> + cnt2 = i;
> + while (cnt2 < cyclTab[i]) {
> + flcoeffs5[cnt2] = flcoeffs5[cnt2] + workT3[i];
> + cnt2++;
> + }
for(cnt2 = i; cnt2 < cyclTab[i]; cnt2++)
flcoeffs5[cnt2] = flcoeffs5[cnt2] + workT3[i];
also several of the for(i = 0; i < BANDS; i++) loops can be merged
> + workT2[cnt2-1] = workT2[cnt2-1] + workT3[i];
> + }
> +
> + for(i = 1; i < BANDS; i++) {
> + accum = (workT2[i-1] + accum) * imc_weights1[i-1];
> + flcoeffs5[i] += accum;
> + }
> +
> + for(i = 0; i < BANDS; i++)
> + workT2[i] = 0.0;
> +
> + for(i = 0; i < BANDS; i++) {
> + cnt2 = i-1;
> + while (cnt2 > cyclTab2[i]) {
> + flcoeffs5[cnt2] += workT3[i];
> + cnt2--;
> + }
this also can be done IMHO cleaner with a for() loop
[...]
> +static void imc_get_band_flag_bits(IMCContext* q, int* bandWidthT, int* bandFlagsBuf)
> +{
> + int i;
> +
> + for(i = 0; i < BANDS-1; i++) {
> + if (bandWidthT[i])
> + bandFlagsBuf[i] = get_bits1(&q->gb);
> + }
> +}
this function is used just once and its pretty small, not sure if having
this as a seperate function helps readability or not ?
[...]
> + for(i = 0; i < BANDS; i++) {
> + if (q->flcoeffs1[i] > highest)
> + highest = q->flcoeffs1[i];
FFMAX()
[...]
> + for(j = band_tab[i]; j < (band_tab[i+1]-1); j += 2) {
> + if(!get_bits1(&q->gb)){//0
> + q->skipFlagBits[i]++;
> + q->skipFlags[j]=1;
> + q->skipFlags[j+1]=1;
> + q->skipFlagCount[i] += 2;
> + }else{
> + if(get_bits1(&q->gb)){//11
> + q->skipFlagBits[i] +=2;
> + q->skipFlags[j]=0;
> + q->skipFlags[j+1]=1;
> + q->skipFlagCount[i]++;
> + }else{
> + if(!get_bits1(&q->gb)){//100
> + q->skipFlagBits[i] +=3;
> + q->skipFlags[j]=1;
> + q->skipFlags[j+1]=0;
> + q->skipFlagCount[i]++;
> + }else{//101
> + q->skipFlagBits[i] +=3;
> + q->skipFlags[j]=0;
> + q->skipFlags[j+1]=0;
> + }
q->skipFlagBits[i] +=3; and q->skipFlags[j+1]=0; can be factored out of
the if()
[...]
> + /* postrotation */
> + for(i=0; i < COEFFS/2; i++){
> + q->tmp[i].re = (q->samples[i].re * q->post_coef1[i]) + (-q->samples[i].im * q->post_coef2[i]);
> + q->tmp[i].im = (-q->samples[i].im * q->post_coef1[i]) - (q->samples[i].re * q->post_coef2[i]);
> + }
> +
> + /* window and reorder */
> + for(i = 0; i < COEFFS/2; i++){
> + q->out_samples[i*2] = IMCConvertFloatToLong((q->mdct_sine_window[COEFFS-1-i*2] * q->last_fft_im[i]) + (q->mdct_sine_window[i*2] * q->tmp[i].re));
> + q->out_samples[COEFFS-1-i*2] = IMCConvertFloatToLong((q->mdct_sine_window[i*2] * q->last_fft_im[i]) - (q->mdct_sine_window[COEFFS-1-i*2] * q->tmp[i].re));
float_to_int16() should be faster
also maybe these 2 loops can be merged, iam not sure (i didnt check
if anything overlaps ...)
[...]
> + int16_t *outbuffer = (int16_t*) data;
redundant cast from void
> +
> + /* swap the bitstream so we can use the internal bitreader */
> + for(i = 0; i < buf_size / 2; i++)
> + buf16[i] = bswap_16(buf16[i]); //might be wrong on big endian
and hope that modifying the buffer is ok ...
this should at least get a FIXME note explaining that this is risky
[...]
> + for(i = 0; i < COEFFS; i++){
> + outbuffer[i] = q->out_samples[i];
> + }
why is the stuff decoded into out_samples and then copied into outbuffer ?
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is
More information about the ffmpeg-devel
mailing list