[FFmpeg-devel] [PATCH 1/3] Indeo 5 decoder: common functions

Fri Jan 15 00:04:20 CET 2010

On Sun, Jan 03, 2010 at 12:55:32PM +0200, Kostya wrote:
> Since Maxim's decoder seems to work fine and code is reasonably good
> too, I'm sending it here for review, so it can be polished and
> committed.
> 
[...]
> /**
>  *  information for Indeo tile
>  */
> typedef struct {
>     uint32_t    xpos;
>     uint32_t    ypos;
>     uint32_t    width;
>     uint32_t    height;
>     int32_t     is_empty;  ///< = 1 if this tile doesn't contain any data
>     uint32_t    data_size; ///< size of the data in bytes
>     uint32_t    num_MBs;   ///< number of macroblocks in this tile
>     IVIMbInfo   *mbs;      ///< array of macroblock descriptors
>     IVIMbInfo   *ref_mbs;  ///< ptr to the macroblock descriptors of the reference tile
> } IVITile;
> 
> 
> /**
>  *  information for Indeo wavelet band
>  */
> typedef struct {
>     uint8_t         plane;          ///< plane number this band belongs to
>     uint8_t         band_num;       ///< band number
>     uint32_t        width;
>     uint32_t        height;
>     const uint8_t   *data_ptr;      ///< ptr to the first byte of the band data
>     uint32_t        data_size;      ///< size of the band data
>     int16_t         *buf;           ///< pointer to the output buffer for this band
>     int16_t         *ref_buf;       ///< pointer to the reference frame buffer (for motion compensation)
>     int16_t         *bufs[3];       ///< array of pointers to the band buffers
>     uint32_t        pitch;          ///< pitch associated with the buffers above
>     uint8_t         is_empty;       ///< = 1 if this band doesn't contain any data
>     uint8_t         mb_size;        ///< macroblock size
>     uint8_t         blk_size;       ///< block size

>     uint8_t         mc_resolution;  ///< resolution of the motion compensation: 0 - fullpel, 1 - halfpel

is_halfpel is more verbose

>     int8_t          inherit_mv;
>     int8_t          inherit_qdelta;

doxy

>     int8_t          qdelta_present; ///< tells if Qdelta signal is present in the bitstream (Indeo5 only)

>     uint8_t         quant_mat;      ///< dequant matrix

how do you fit a matrix in a uint8_t ?

>     uint8_t         glob_quant;     ///< quant base for this band
>     const uint8_t   *scan;          ///< ptr to the scan pattern
> 
>     uint8_t         huff_sel;       ///< huffman table for this band
>     IVIHuffDesc     huff_desc;      ///< table descriptor associated with the selector above
>     VLC             *blk_vlc;       ///< ptr to the vlc table for decoding block data
>     VLC             blk_vlc_cust;   ///< custom block vlc table
> 
>     uint16_t        *dequant_intra; ///< ptr to dequant tables for intra blocks
>     uint16_t        *dequant_inter; ///< ptr dequant tables for inter blocks
>     uint8_t         num_corr;       ///< number of correction entries
>     uint8_t         corr[61*2];     ///< rvmap correction pairs
>     uint8_t         rvmap_sel;      ///< rvmap table selector
>     RVMapDesc       *rv_map;        ///< ptr to the RLE table for this band
>     uint16_t        num_tiles;      ///< number of tiles in this band
>     IVITile         *tiles;         ///< array of tile descriptors
>     void (*inv_transform)(int32_t *in, int16_t *out, uint32_t pitch, uint8_t *flags); ///< inverse transform function pointer
>     void (*dc_transform) (int32_t *in, int16_t *out, uint32_t pitch, int blk_size);   ///< dc transform function pointer, it may be NULL
>     uint8_t         is_2d_trans;    ///< 1 indicates that the two-dimensional inverse transform is used
> #ifdef IVI_DEBUG
>     uint16_t        checksum;        ///< for debug purposes
>     int32_t         checksum_present;
>     uint32_t        bufsize;         ///< band buffer size in bytes
> #endif

>     const uint8_t   *intra_base;
>     const uint8_t   *inter_base;
>     const uint8_t   *intra_scale;
>     const uint8_t   *inter_scale;

doxy

> } IVIBandDesc;

and are all thouse structs neeed? cant the bitstream be decoded more
directly without all the block, tile, ... intermediates?

[...]
> /**
>  *  Reverses "nbits" bits of the value "val" and returns the result
>  *  right-justified.
>  */
> static uint16_t inv_bits(const uint16_t val, const int nbits)
> {
>     uint16_t res;
> 
>     if (nbits <= 8) {
>         res = av_reverse[val & 0xFF] >> (8-nbits);

is the & ff needed here?

[...]
> int ff_ivi_huff_desc_cmp(const IVIHuffDesc *desc1, const IVIHuffDesc *desc2)
> {
>     if (desc1->num_rows != desc2->num_rows ||
>                            memcmp(desc1->xbits, desc2->xbits, desc1->num_rows))
>         return 1;
>     return 0;

return    desc1->num_rows != desc2->num_rows
       || memcmp (desc1->xbits, desc2->xbits, desc1->num_rows);

> }
> 
> void ff_ivi_huff_desc_copy(IVIHuffDesc *dst, const IVIHuffDesc *src)
> {
>     dst->num_rows = src->num_rows;
>     memcpy(dst->xbits, src->xbits, src->num_rows);
> }
> 
> int av_cold ff_ivi_init_planes(IVIPlaneDesc *planes, const IVIPicConfig *cfg)
> {
>     int         p, b;
>     uint32_t    b_width, b_height, align_fac, width_aligned, height_aligned, buf_size;
>     IVIBandDesc *band;
> 
>     ff_ivi_free_buffers(planes);
> 
>     /* fill in the descriptor of the luminance plane */
>     planes[0].width     = cfg->pic_width;
>     planes[0].height    = cfg->pic_height;
>     planes[0].num_bands = cfg->luma_bands;
> 
>     /* fill in the descriptors of the chrominance planes */
>     planes[1].width     = planes[2].width     = (cfg->pic_width  + 3) >> 2;
>     planes[1].height    = planes[2].height    = (cfg->pic_height + 3) >> 2;
>     planes[1].num_bands = planes[2].num_bands = cfg->chroma_bands;
> 
>     for (p = 0; p < 3; p++) {
>         planes[p].bands = av_mallocz(planes[p].num_bands * sizeof(IVIBandDesc));
> 
>         /* select band dimensions: if there is only one band then it
>          *  has the full size, if there are several bands each of them
>          *  has only half size */

>         b_width  = planes[p].num_bands == 1 ? planes[p].width  : planes[p].width  >> 1;
>         b_height = planes[p].num_bands == 1 ? planes[p].height : planes[p].height >> 1;

is the rounding correct? i mean not +1 )>>1 ?

> 
>         /* luma   band buffers will be aligned on 16x16 (max macroblock size) */
>         /* chroma band buffers will be aligned on   8x8 (max macroblock size) */
>         align_fac       = !p ? 15 : 7;

p ? 7:15

>         width_aligned   = (b_width  + align_fac) & ~align_fac;
>         height_aligned  = (b_height + align_fac) & ~align_fac;
>         buf_size        = width_aligned * height_aligned * sizeof(int16_t);
> 
>         for (b = 0; b < planes[p].num_bands; b++) {
>             band = &planes[p].bands[b]; /* select appropriate plane/band */
>             band->plane    = p;
>             band->band_num = b;
>             band->width    = b_width;
>             band->height   = b_height;
>             band->pitch    = width_aligned;

>             band->bufs[0]  = av_malloc(buf_size);
>             band->bufs[1]  = av_malloc(buf_size);
> 
>             /* allocate the 3rd band buffer for scalability mode */
>             if (cfg->luma_bands > 1)
>                 band->bufs[2] = av_malloc(buf_size);

malloc NULL checks

[...]
> int av_cold ff_ivi_init_tiles(IVIPlaneDesc *planes, const int tile_width,
>                               const int tile_height)
> {
>     int         p, b, x, y, x_tiles, y_tiles, t_width, t_height;
>     IVIBandDesc *band;
>     IVITile     *tile, *ref_tile;
> 
>     for (p = 0; p < 3; p++) {
>         t_width  = !p ? tile_width  : (tile_width  + 3) >> 2;
>         t_height = !p ? tile_height : (tile_height + 3) >> 2;
> 
>         for (b = 0; b < planes[p].num_bands; b++) {
>             band = &planes[p].bands[b];

>             x_tiles = IVI_NUM_TILES(band->width, t_width);
>             y_tiles = IVI_NUM_TILES(band->height, t_height);

vertical align

[...]
> void ff_ivi_put_pixels_8x8(int32_t *in, int16_t *out, uint32_t pitch,
>                            uint8_t *flags)

missing const
unused flags

[...]
> int ff_ivi_decode_blocks(GetBitContext *gb, IVIBandDesc *band, IVITile *tile)
> {
>     int         mbn, blk, num_blocks, num_coeffs, blk_size, scan_pos, run, val,
>                 pos, is_intra, mc_type, mv_x, mv_y, col_mask;
>     uint8_t     col_flags[8];
>     int32_t     prev_dc, trvec[64];
>     uint32_t    cbp, sym, lo, hi, quant, buf_offs, q;
>     IVIMbInfo   *mb;
>     RVMapDesc   *rvmap = band->rv_map;
>     void (*mc_with_delta_func)(int16_t *buf, int16_t *ref_buf, uint32_t pitch, int mc_type);
>     void (*mc_no_delta_func)  (int16_t *buf, int16_t *ref_buf, uint32_t pitch, int mc_type);
>     const uint8_t   *base_tab, *scale_tab;
> 
>     prev_dc = 0; /* init intra prediction for the DC coefficient */
> 
>     blk_size   = band->blk_size;
>     col_mask   = (blk_size == 8) ? 7 : 3; /* column mask for tracking non-zero coeffs */
>     num_blocks = (band->mb_size != blk_size) ? 4 : 1; /* number of blocks per mb */
>     num_coeffs = 16 << ((blk_size >> 2) & 2); /* 64 - for 8x8 block, 16 - for 4x4 */
>     if (blk_size == 8) {
>         mc_with_delta_func = ff_ivi_mc_8x8_delta;
>         mc_no_delta_func   = ff_ivi_mc_8x8_no_delta;
>     } else {
>         mc_with_delta_func = ff_ivi_mc_4x4_delta;
>         mc_no_delta_func   = ff_ivi_mc_4x4_no_delta;
>     }
> 
>     for (mbn = 0, mb = tile->mbs; mbn < tile->num_MBs; mb++, mbn++) {
>         is_intra = !mb->type;
>         cbp      = mb->cbp;
>         buf_offs = mb->buf_offs;
> 
>         quant = av_clip(band->glob_quant + mb->q_delta, 0, 23);
> 
>         base_tab  = (is_intra) ? band->intra_base  : band->inter_base;
>         scale_tab = (is_intra) ? band->intra_scale : band->inter_scale;
> 
>         if (!is_intra) {
>             mv_x = mb->mv_x;
>             mv_y = mb->mv_y;
>             if (!band->mc_resolution) {
>                 mc_type = 0; /* we have only fullpel vectors */
>             } else {
>                 mc_type = ((mv_y & 1) << 1) | (mv_x & 1);
>                 mv_x >>= 1;
>                 mv_y >>= 1; /* convert halfpel vectors into fullpel ones */
>             }
>         }
> 
>         for (blk = 0; blk < num_blocks; blk++) {
>             /* adjust block position in the buffer according to its number */
>             if (blk & 1) {
>                 buf_offs += blk_size;
>             } else if (blk == 2) {
>                 buf_offs -= blk_size;
>                 buf_offs += blk_size * band->pitch;
>             }
> 
>             if (cbp & 1) { /* block coded ? */
>                 scan_pos = -1;
>                 memset(trvec, 0, num_coeffs*sizeof(int32_t)); /* zero transform vector */
>                 memset(col_flags, 0, sizeof(col_flags));      /* zero column flags */
> 

>                 while (scan_pos <= num_coeffs) {
>                     sym = get_vlc2(gb, band->blk_vlc->table, IVI_VLC_BITS, 1);
>                     if (sym == rvmap->eob_sym)
>                         break; /* End of block */
> 
>                     if (sym == rvmap->esc_sym) { /* Escape - run/val explicitly coded using 3 vlc codes */
>                         run = get_vlc2(gb, band->blk_vlc->table, IVI_VLC_BITS, 1) + 1;
>                         lo  = get_vlc2(gb, band->blk_vlc->table, IVI_VLC_BITS, 1);
>                         hi  = get_vlc2(gb, band->blk_vlc->table, IVI_VLC_BITS, 1);
>                         val = IVI_TOSIGNED((hi << 6) | lo); /* merge them and convert into signed val */
>                     } else {
>                         run = rvmap->runtab[sym];
>                         val = rvmap->valtab[sym];
>                     }
> 
>                     /* de-zigzag and dequantize */
>                     scan_pos += run;
>                     pos = band->scan[scan_pos];

segfault? run isnt checked
some tests with a fuzzer might make sense

> 
>                 #ifdef IVI_DEBUG
>                     if (!val)
>                         av_log(NULL, AV_LOG_ERROR, "Val = 0 encountered!\n");
>                 #endif
> 
>                     q = (base_tab[pos] * scale_tab[quant]) >> 8;

>                     q += !q; // ensure the q always >= 1
>                     if (q != 1) {

if(q>1) {

>                         if (val > 0) {
>                             val = (val * q) + (q >> 1) - (q & 1);
>                         } else
>                             val = (val * q) - (q >> 1) + (q & 1);

val can be stored in abs+sign which would avoid the branch misprediction prone
check

[...]
>                 /* adjust block position in the buffer according with its number */
>                 if (blk & 1) {
>                     offs += band->blk_size;
>                 } else if (blk == 2) {
>                     offs -= band->blk_size;
>                     offs += band->blk_size * band->pitch;
>                 }

looks duplicated

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Rewriting code that is poorly written but fully understood is good.
Rewriting code that one doesnt understand is a sign that one is less smart
then the original author, trying to rewrite it will not make it better.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100115/08a6a412/attachment.pgp>