[FFmpeg-devel] Indeo3 replacement, take 3

Michael Niedermayer michaelni
Thu Nov 5 15:53:31 CET 2009


On Tue, Nov 03, 2009 at 02:52:55PM +0100, Maxim wrote:
[...]
> /**
>  *  VQ delta table entry types.
>  */
> enum {
>     DELTA_DYAD    = 0,
>     DELTA_QUAD    = 1,
>     RLE_ESC_F9    = 2,
>     RLE_ESC_FA    = 3,
>     RLE_ESC_FB    = 4,
>     RLE_ESC_FC    = 5,
>     RLE_ESC_FD    = 6,
>     RLE_ESC_FE    = 7,
>     RLE_ESC_FF    = 8,
>     RLE_FORBIDDEN = 9
> };

maybe these should be documented


> 
> 
> /**
>  *  Some constants for parsing frame bitstream flags.
>  */
> #define BS_8BIT_PEL     (1<<1)
> #define BS_KEYFRAME     (1<<2)
> #define BS_MV_Y_HALF    (1<<4)
> #define BS_MV_X_HALF    (1<<5)
> #define BS_NONREF       (1<<8)
> #define BS_BUFFER        9

same


> 
> 
> typedef struct Plane {
>     uint8_t         *buffers[2];
>     uint8_t         *pixels[2]; ///< pointer to the actual pixel data of the buffers above
>     uint32_t        width;
>     uint32_t        height;
>     uint32_t        pitch;
> } Plane;

Please use AVFrame


[...]
> //! static vq delta tables (initialized at startup)
> static int32_t  delta_tabs   [24][256];     ///< vq delta tables for all 4x4 block modes
> static uint8_t  selector_tabs[24][256];     ///< each entry says which delta type each code is of
> 
> //! vq delta tables for the mode 10 (8x8 blocks)
> static DECLARE_ALIGNED_8(uint64_t, delta_m10_tab[24][256]);
> 
> 
> /**
>  *  Build decoder delta tables dynamically.
>  */
> static av_cold void build_delta_tables(void)
> {
>     int             n, i, j, quads, swap;
>     int32_t         a, b, corr;
>     int32_t         *delta_tab, *quad_tab;
>     const vqtEntry  *rom_tab;
>     const int8_t    *delta_rom;
>     int8_t          *sel_tab;
>     int64_t         *delta_64b;
> 
>     for (n = 0; n < 24; n++) {
>         rom_tab   = &vq_delta_rom[n];
>         delta_rom = rom_tab->delta_tab;
> 
>         delta_tab = &delta_tabs   [n][0];
>         sel_tab   = &selector_tabs[n][0];
>         delta_64b = &delta_m10_tab[n][0];
> 
>         /* set all code selectors = forbidden */
>         memset(sel_tab, RLE_FORBIDDEN, 249);
> 
>         /* initialize selectors for RLE escape codes (0xF8...0xFF) */
>         sel_tab[249] = RLE_ESC_F9;
>         sel_tab[250] = RLE_ESC_FA;
>         sel_tab[251] = RLE_ESC_FB;
>         sel_tab[252] = RLE_ESC_FC;
>         sel_tab[253] = RLE_ESC_FD;
>         sel_tab[254] = RLE_ESC_FE;
>         sel_tab[255] = RLE_ESC_FF;
> 
>         for (i = 0; i < rom_tab->num_dyads; i++) {
>             /* get two vq deltas from the ROM table */
>             a = delta_rom[i * 2];
>             b = delta_rom[i * 2 + 1];
>             if (!HAVE_BIGENDIAN)
>                 FFSWAP(int32_t, a, b);
> 
>             /* concatenate two vq deltas to form a two-pixel corrector (dyad) */
>             delta_tab[i] = (a << 8) + b;

iam not sure if i would call this concatenation, considering these are
signed variables and can be negative

also it seems to me that the delta_rom tables are never used directly
but rather just the softSIMD twistet form of them, if so this is a
waste of memory, the softSIMD twiseted form could be stored directly


[...]
> static uint8_t requant_tab[8][128];
> 
> /**
>  *  Build the static requantization table.
>  *  This table is used to remap pixel values according to a specific
>  *  quant index and thus avoid overflows while adding deltas.
>  */
> static av_cold void build_requant_tab(void)
> {
>     int i;
> 
>     for (i = 0; i < 128; i++) {
>         requant_tab[0][i] = (i + 1) / 2 * 2 + 0;
>         requant_tab[1][i] = (i + 1) / 3 * 3 + 1;
>         requant_tab[2][i] = (i + 2) / 4 * 4 + 0;
>         requant_tab[3][i] = (i - 3) / 5 * 5 + 4;
>         requant_tab[4][i] = (i - 3) / 6 * 6 + 4;
>         requant_tab[5][i] = (i + 3) / 7 * 7 + 1;
>         requant_tab[6][i] = (i + 4) / 8 * 8 + 0;
>         requant_tab[7][i] = (i + 4) / 9 * 9 + 1;
>     }
> 
>     /* some last elements calculated above will have values >= 128 */
>     /* pixel values shall never exceed 127 so set them to non-overflowing values */
>     /* according with the quantization step of the respective section */
>     requant_tab[0][127] = 126;
>     requant_tab[1][119] = 118;
>     requant_tab[1][120] = 118;
>     requant_tab[2][126] = 124;
>     requant_tab[2][127] = 124;
>     requant_tab[6][124] = 120;
>     requant_tab[6][125] = 120;
>     requant_tab[6][126] = 120;
>     requant_tab[6][127] = 120;
> 

>     /* those two values are different in the binary decoder, no idea why */
>     /* patch them in order to make the output bitexact */
>     requant_tab[1][7] = 10;
>     requant_tab[4][8] = 10;

are these the only differences in the tables?
or just the only used by the files you tested?
also from the comment i assume your decoder is bitexact, is it?

and the code above is not thread safe (a simple solution is to
never write to any spot in a global array a value that differs
from what you previously wrote there)


[...]
> #define DEREF_16(v) *((uint16_t *)(v))
> #define DEREF_32(v) *((uint32_t *)(v))
> #define DEREF_64(v) *((uint64_t *)(v))

remove these macros please, they make the code harder to read


[...]
> /**
>  *  Requant a 8 pixel line by duplicating each even pixel as follows:
>  *  ABCDEFGH -> AACCEEGG
>  */
> static inline uint64_t requant(uint64_t a) {
> #if HAVE_BIGENDIAN
>     a &= 0xFF00FF00FF00FF00;
>     a |= a >> 8;
> #else
>     a &= 0x00FF00FF00FF00FF;
>     a |= a << 8;
> #endif
>     return a;
> }

i would not call this operation re-quantization


[...]
>     /* requantize the prediction if VQ index of this cell differs from VQ index */
>     /* of the predicted cell in order to avoid overflows. */
>     if (vq_index >= 8) {
>         for (x = 0; x < cell->width << 2; x++)
>             ref_block[x] = requant_tab[vq_index & 7][ref_block[x]];
>     }

this maybe can segfault (i assume that ref_block can drift out of 0..127
for damaged files or did i miss something that prevents this?)


[...]
>                         case DELTA_DYAD: /* apply VQ delta to two dyads (2+2 pixels) using softSIMD */
>                             if (sel[line & 1][*data_ptr] != DELTA_DYAD) {
>                                 av_log(avctx, AV_LOG_ERROR, "Mode %d: invalid VQ data!\n", mode);
>                                 return -1;
>                             }
> 
>                             DEREF_16(dst + line_offset    ) = DEREF_16(ref    ) + delta_tab[*data_ptr++];
>                             DEREF_16(dst + line_offset + 2) = DEREF_16(ref + 2) + delta_tab[code];
> 

>                             if (mode >= 3) {
>                                 /* odd lines are not coded but rather interpolated/replicated */
>                                 /* first line of the cell on the top of image? - replicate */
>                                 /* otherwise - interpolate */
>                                 if (is_top_of_cell && !cell->ypos) {
>                                     DEREF_32(dst) = DEREF_32(dst + row_offset);
>                                 } else
>                                     AVG_32(dst, dst + row_offset, ref);
>                             }
>                             break;
> 
>                         case DELTA_QUAD: /* apply VQ delta to 4 pixels at once using softSIMD */
>                             DEREF_32(dst + line_offset) = DEREF_32(ref) + delta_tab[code];
> 

>                             if (mode >= 3) {
>                                 if (is_top_of_cell && !cell->ypos) {
>                                     DEREF_32(dst) = DEREF_32(dst + row_offset);
>                                 } else
>                                     AVG_32(dst, dst + row_offset, ref);
>                             }

code duplication

>                             break;
> 

>                         case RLE_ESC_FF: /* apply null delta to all lines up to the 2nd line */
>                             if (line)
>                                 ERR_BAD_RLE(avctx, mode, 0xFF, line);
>                             num_lines = 2;
>                             copy_block4(dst, ref, row_offset, row_offset, num_lines << zoom_fac);
>                             break;
> 
>                         case RLE_ESC_FE: /* apply null delta to all lines up to the 3rd line */
>                             if (line >= 2)
>                                 ERR_BAD_RLE(avctx, mode, 0xFE, line);
>                             num_lines = 3 - line;
>                             copy_block4(dst, ref, row_offset, row_offset, num_lines << zoom_fac);
>                             break;
> 
>                         case RLE_ESC_FC:
>                             /* apply null delta to all remaining lines of this block
>                                and to whole next block */
>                             skip_flag  = 0;
>                             rle_blocks = 1;
>                             /*FALLTHROUGH*/
> 
>                         case RLE_ESC_FD: /* apply null delta to all remaining lines of this block */
>                             num_lines = 4 - line; /* go to process next block */
>                             copy_block4(dst, ref, row_offset, row_offset, num_lines << zoom_fac);
>                             break;

num_lines = 10 - "code" - line;
copy_block4(dst, ref, row_offset, row_offset, num_lines << zoom_fac);


[...]
>                                 if (is_top_of_cell && !cell->ypos) {
>                                     DEREF_64(ref) = DEREF_64(ref + row_offset);
>                                 } else
>                                     AVG_64(ref, ref - row_offset, ref + row_offset);
>                                 break;
> 
>                             case DELTA_QUAD:
>                                 DEREF_64(ref + row_offset) = pix64 + delta_m10[code];
> 
>                                 if (is_top_of_cell && !cell->ypos) {
>                                     DEREF_64(ref) = DEREF_64(ref + row_offset);
>                                 } else
>                                     AVG_64(ref, ref - row_offset, ref + row_offset);

code duplication


>                                 break;
> 
>                             case RLE_ESC_FF: /* apply null delta to all lines up to the 2nd line */
>                                 if (line)
>                                     ERR_BAD_RLE(avctx, mode, 0xFF, line);
>                                 fill_64(ref + row_offset, pix64, 3, row_offset);
>                                 AVG_64(ref, ref - row_offset, ref + row_offset);
>                                 num_lines = 2;
>                                 break;
> 
>                             case RLE_ESC_FE: /* apply null delta to all lines up to the 3rd line */
>                                 if (line >= 2)
>                                     ERR_BAD_RLE(avctx, mode, 0xFE, line);
>                                 num_lines = 3 - line;
>                                 if (is_top_of_cell) {
>                                     fill_64(ref + row_offset, pix64, 5, row_offset);
>                                     AVG_64(ref, ref - row_offset, ref + row_offset);
>                                 } else
>                                     fill_64(ref, pix64, num_lines << 1, row_offset);
>                                 break;
> 
>                             case RLE_ESC_FC:
>                                 /* apply null delta to all remaining lines of this block
>                                    and to whole next block */
>                                 rle_blocks = 1;
>                                 /*FALLTHROUGH*/
> 
>                             case RLE_ESC_FD: /* apply null delta to all remaining lines of this block */
>                                 num_lines = 4 - line; /* go to process next block */
>                                 if (is_top_of_cell) {
>                                     fill_64(ref + row_offset, pix64, 7, row_offset);
>                                     AVG_64(ref, ref - row_offset, ref + row_offset);
>                                 } else
>                                     fill_64(ref, pix64, num_lines << 1, row_offset);
>                                 break;

lots of code duplication


[...]
>                             case RLE_ESC_FF: /* skip all lines up to the 2nd one */
>                                 if (line)
>                                     ERR_BAD_RLE(avctx, mode, 0xFF, line);
>                                 num_lines = 2;
>                                 break;
> 
>                             case RLE_ESC_FE: /* skip all lines up to the 3rd one */
>                                 if (line >= 2)
>                                     ERR_BAD_RLE(avctx, mode, 0xFE, line);
>                                 num_lines = 3 - line;
>                                 break;
> 
>                             case RLE_ESC_FC: /* skip all remaining lines of this block and the whole next block */
>                                 rle_blocks = 1;
>                                 /*FALLTHROUGH*/
> 
>                             case RLE_ESC_FD: /* skip all remaining lines of this block */
>                                 num_lines = 4 - line;
>                                 break;

more code that can be factored


[...]
> /**
>  *  Decode a plane (Y, V or U)
>  */
> static int decode_plane(Indeo3DecodeContext *ctx, AVCodecContext *avctx, Plane *plane,
>                         const uint8_t *data, int32_t strip_width)
> {
>     GetBitContext   gb;
>     Cell            *curr_cell, *prev_cell;
>     int             num_vectors, code, bytes_used, need_resync, skip_bits;
>     const int8_t    *mc_vectors;
>     const uint8_t   *next_cell_data;
> 
>     /* each plane data starts with mc_vector_count field, */
>     /* an optional array of motion vectors followed by the vq data */

>     num_vectors = bytestream_get_le32(&data);
>     mc_vectors  = num_vectors ? data : 0;
> 
>     /* init the bitreader */
>     init_get_bits(&gb, &data[num_vectors * 2], ctx->data_size << 3);

the size is nonsense
and an access to this can segfault


>     skip_bits   = 0;
>     need_resync = 0;
> 
>     /* init binary tree decoding */
>     curr_cell = ctx->cell_stack;
> 
>     /* initialize the 1st cell and set its dimensions to whole plane */
>     curr_cell->xpos   = curr_cell->ypos = 0;
>     curr_cell->width  = plane->width  >> 2;
>     curr_cell->height = plane->height >> 2;
>     curr_cell->tree   = 0; // we are in the MC tree now
>     curr_cell->mv_ptr = 0; // no motion vector = INTRA cell
> 
>     /* process the plane's binary tree until it gets exhausted */
>     while (curr_cell >= ctx->cell_stack) {
>         if (need_resync && !(get_bits_count(&gb) & 7)) {
>             skip_bits_long(&gb, skip_bits);
>             skip_bits   = 0;
>             need_resync = 0;
>         }
>         switch (get_bits(&gb, 2)) {
>         case H_SPLIT:
>             /* split current cell into two vertical subcells */
>             prev_cell = curr_cell;
>             if (curr_cell >= &ctx->cell_stack[CELL_STACK_MAX]) {
>                 av_log(avctx, AV_LOG_ERROR, "Cell stack overflow (corrupted binary tree)!\n");
>                 return -1;
>             }
>             DUPLICATE_CELL(curr_cell);
>             SPLIT_CELL(prev_cell->height, curr_cell->height);
>             prev_cell->ypos   += curr_cell->height;
>             prev_cell->height -= curr_cell->height;
>             break;

this function likely can be implemented much cleaner recursively instead of
this synthetic stack thing.


>         case V_SPLIT:
>             /* split current strip */
>             prev_cell = curr_cell;
>             if (curr_cell >= &ctx->cell_stack[CELL_STACK_MAX]) {
>                 av_log(avctx, AV_LOG_ERROR, "Cell stack overflow (corrupted binary tree)!\n");
>                 return -1;
>             }
>             DUPLICATE_CELL(curr_cell);
>             if (curr_cell->width > strip_width) {

>                 SPLIT_STRIP(curr_cell->width, strip_width);

this is used just once, so its just obfuscation to use a macro


[...]
>     y_offset = bytestream_get_le32(&buf_ptr);
>     v_offset = bytestream_get_le32(&buf_ptr);
>     u_offset = bytestream_get_le32(&buf_ptr);

>     if (FFMAX3(y_offset, v_offset, u_offset) >= ctx->data_size - 16) {

ctx->data_size - 16 can overflow


[...]
>         /* convert four pixels at once using softSIMD */
>         for (x = 0; x < plane->width >> 2; x++)
>             *dst32++ = (*src32++ & 0x7F7F7F7F) << 1;

is the & needed, arent these bits 0 anyway?


[...]
> typedef struct {
>     const int8_t   *delta_tab;
>     uint8_t        num_dyads;
>     int8_t 	   quad_exp;

tabs


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20091105/641de2f5/attachment.pgp>



More information about the ffmpeg-devel mailing list