[FFmpeg-devel] [PATCH][RFC] Indeo3 replacement
Michael Niedermayer
michaelni
Sun Jul 26 18:06:04 CEST 2009
On Sat, Jul 25, 2009 at 07:24:35PM +0200, Maxim wrote:
> Hi crews,
>
> as already volunteered I'd like to maintain indeo3 decoder in the
> future. Unfortunately the ffmpeg's decoder is unmaintable (Mike, sorry!)
> because nobody understands how it works. Therefore I want to submit a
> patch proposing a new source for this algorithm. Below its advantages in
> short:
>
> - deobfuscated algorithm
> - heavily commented source
> - decoding tables will be generated dynamically making "indeo3data.h"
> tiny compared to the existing one!
> - one huge code blob was splitted into several functions
>
> Disadvantages:
>
> - it was written fast therefore it contains several simplifications can
> be programmed safer
> - may be further splitted
> - it was not tested on PPC yet
>
> There is already a documentation for this algorithm here: http://wiki.multimedia.cx/index.php?title=Indeo_3
>
> Attached files are the sources itself because the new sources are VERY different from the existing ones!
>
> Plz be merciful to me and help me out to make this stuff good-looking!
> Waiting for reviews...
[...]
> //@{
> //! vq table selector codes
> #define DELTA_DYAD 0
> #define DELTA_QUAD 1
> #define RLE_ESC_F9 2
> #define RLE_ESC_FA 3
> #define RLE_ESC_FB 4
> #define RLE_ESC_FC 5
> #define RLE_ESC_FD 6
> #define RLE_ESC_FE 7
> #define RLE_ESC_FF 8
> #define RLE_FORBIDDEN 9
could be an enum
> //@}
>
>
> //@{
> //! some constants for parsing frame flags
> #define BS_8BIT_PEL 1<<1
> #define BS_KEYFRAME 1<<2
> #define BS_MV_Y_HALF 1<<4
> #define BS_MV_X_HALF 1<<5
> #define BS_BUFFER 9
> //@}
>
>
> typedef struct Plane {
> uint8_t *buffers[2];
> uint8_t *pixels[2]; ///< pointer to the actual pixel data of the buffers above
> uint32_t width;
> uint32_t height;
> uint32_t pitch;
> } Plane;
>
> #define CELL_STACK_MAX 20
>
> typedef struct Cell {
> int16_t xpos; ///< cell coordinates in 4x4 blocks
> int16_t ypos;
> int16_t width; ///< cell width in 4x4 blocks
> int16_t height; ///< cell height in 4x4 blocks
> uint8_t tree; ///< tree id: 0- MC tree, 1 - VQ tree
> const int8_t *mv_ptr; ///< ptr to the motion vector if any
> } Cell;
>
> typedef struct Indeo3DecodeContext {
> AVCodecContext *avctx;
> AVFrame frame;
>
> int16_t width, height;
vertical align
> uint32_t frame_num;
> uint16_t frame_flags;
> uint32_t data_size;
> uint8_t cb_offset;
> uint8_t resync;
> const uint8_t *y_data_ptr;
> const uint8_t *v_data_ptr;
> const uint8_t *u_data_ptr;
> const uint8_t *alt_quant;
i thik some of these could benefit from documentation
[...]
> static uint8_t requant_tab[8][128];
>
> /**
> * Build the static requantization table.
> * This table is used to remap pixel values according to a specific
> * quant index and thus avoid overflows while adding deltas.
> */
> static av_cold void build_requant_tab(void)
> {
> int i;
>
> for (i = 0; i < 128; i++) {
> requant_tab[0][i] = (i + 1) - ((i + 1) % 2);
> requant_tab[1][i] = (i + 2) - ((i + 1) % 3);
> requant_tab[2][i] = (i + 2) - ((i + 2) % 4);
> requant_tab[3][i] = (i + 1) - ((i - 3) % 5);
> requant_tab[4][i] = (i + 1) - ((i - 3) % 6);
> requant_tab[5][i] = (i + 4) - ((i + 3) % 7);
> requant_tab[6][i] = (i + 4) - ((i + 4) % 8);
> requant_tab[7][i] = (i + 5) - ((i + 4) % 9);
> }
does this look less ugly if written as
(i + C)/D*D + E
?
[...]
> /* FIXME: I know we already have a bitreader in ffmpeg */
> /* it should be adapted to read ahead only one byte */
> /* otherwise it won't work for indeo3 !!! */
elaborate please
[...]
> /**
> * Copy pixels of the cell(x + mv_x, y + mv_y) from the previous frame into
> * the cell(x, y) in the current frame.
> */
> static void copy_cell(Indeo3DecodeContext *ctx, Plane *plane, Cell *cell)
> {
> int y, buf_switch, mv_x, mv_y, offset;
> uint8_t *src, *dst;
>
> /* use BS_BUFFER flag for buffer switching */
> buf_switch = (ctx->frame_flags >> BS_BUFFER) & 1;
this should not be done per cell as it eats cpu cycles ...
>
> /* setup output and reference pointers */
> dst = &plane->pixels[buf_switch][(cell->ypos << 2) * plane->pitch + (cell->xpos << 2)];
> /* reference block = prev_frame(cell_xpos + mv_x, cell_ypos + mv_y) */
> mv_y = cell->mv_ptr[0];
> mv_x = cell->mv_ptr[1];
> offset = ((cell->ypos << 2) + mv_y) * plane->pitch + (cell->xpos << 2) + mv_x;
> src = &plane->pixels[buf_switch ^ 1][offset];
>
> for (y = cell->height << 2; y > 0; src += plane->pitch, dst += plane->pitch, y--)
> memcpy(dst, src, cell->width << 2);
> }
also, cant the dsputil block copy code be used?
>
>
> #define INTERPOLATE_32(dst, src, ref) *(dst) = ((*(src) + *(ref)) >> 1) & 0x7F7F7F7F
>
> /**
> * Interpolate a line in the 8x8 block
> * thisLine = average(thisLine-1, thisLine+1)
> */
> static void interpolate_64(int32_t *buf, const int32_t row_offset)
> {
> /* average 8 pels in the softSIMD fashion and */
> /* make sure that values are in the 7bit range */
> buf[0] = ((buf[-row_offset] + buf[row_offset]) >> 1) & 0x7F7F7F7F;
> buf++;
> buf[0] = ((buf[-row_offset] + buf[row_offset]) >> 1) & 0x7F7F7F7F;
> }
again, cant the dsp util 1/2 pel MC code be used?
>
>
> /**
> * Copy n lines filled with 32bit pixel values
> */
> static void copy_32(int32_t *dst, int32_t *src, int n, int row_offset)
> {
> for (; n > 0; dst += row_offset, src += row_offset, n--)
> *dst = *src;
> }
and again, dsputil ...
[...]
> /**
> * Decode a vector-quantized cell.
> * It consists of several routines, each of which handles one or more "modes"
> * with which a cell can be encoded.
> *
> * @param ctx [in] pointer to the decoder context
> * @param avctx [in] ptr to the AVCodecContext
> * @param plane [in] pointer to the plane descriptor
> * @param cell [in] pointer to the cell descriptor
> * @param data_ptr [in] pointer to the compressed data
> * @param last_ptr [out] position in the input buffer after decoding will be reported here
> * @return result code: 0 = OK, -1 = error
> */
> static int decode_cell(Indeo3DecodeContext *ctx, AVCodecContext *avctx, Plane *plane, Cell *cell,
> const uint8_t *data_ptr, const uint8_t **last_ptr)
> {
> int x, y, buf_switch, mv_x, mv_y, mode, vq_index, prim_indx, second_indx, rle_blocks;
> int row_offset, blk_row_offset, line, num_lines, is_first_row, is_top_of_cell, skip_flag;
> uint8_t code, *block, *ref_block, *prim_sel, *second_sel;
> int32_t *prim_delta, *second_delta, *delta_tab, *src32, *ref32, *delta_lo, *delta_hi;
> int32_t *block32, ref_hi, ref_lo;
> int16_t *src16, *ref16;
>
> /* use BS_BUFFER flag for buffer switching */
> buf_switch = (ctx->frame_flags >> BS_BUFFER) & 1;
duplicate
>
> /* get coding mode and VQ table index from the VQ descriptor byte */
> code = *data_ptr++;
> mode = code >> 4;
> vq_index = code & 0xF;
>
> /* setup output and reference pointers */
> block = &plane->pixels[buf_switch][(cell->ypos << 2) * plane->pitch + (cell->xpos << 2)];
> if (!cell->mv_ptr) {
> /* use previous line as reference for INTRA cells */
> ref_block = &block[-plane->pitch];
> } else {
> if (mode >= 10) {
> /* for mode 10 and 11 INTER first copy the predicted cell into the current one */
> /* so we don't need to do data copying for each RLE code later */
> copy_cell(ctx, plane, cell);
> } else {
> mv_y = cell->mv_ptr[0];
> mv_x = cell->mv_ptr[1];
> /* reference block = prev_frame(cell_xpos + mv_x, cell_ypos + mv_y) */
> ref_block = &plane->pixels[buf_switch ^ 1][((cell->ypos << 2) + mv_y) * plane->pitch + (cell->xpos << 2) + mv_x];
> }
> }
>
> /* select VQ tables as follows: */
> /* modes 0 and 3 use only the primary table for all lines in a block */
> /* while modes 1 and 4 switch between primary and secondary tables on alternate lines */
> if (mode == 1 || mode == 4) {
> code = ctx->alt_quant[vq_index];
> prim_indx = (code >> 4) + ctx->cb_offset;
> second_indx = (code & 0xF) + ctx->cb_offset;
>
> assert(prim_indx <= 23 && second_indx <= 23);
just to make sure, this cannot be false with ANY input?
>
> prim_delta = &delta_tabs [prim_indx] [0];
> prim_sel = &selector_tabs[prim_indx] [0];
> second_delta = &delta_tabs [second_indx][0];
> second_sel = &selector_tabs[second_indx][0];
> } else {
> vq_index += ctx->cb_offset;
> assert(vq_index <= 23);
>
> prim_delta = &delta_tabs [vq_index][0];
> prim_sel = &selector_tabs[vq_index][0];
> second_delta = prim_delta;
> second_sel = prim_sel;
> }
>
> /* requantize the prediction if VQ index of this cell differs from VQ index */
> /* of the predicted cell in order to avoid overflows. */
> /* FIXME: if (vq_index >= 8 && (mode == 0 || mode == 3 || mode == 10) [win32] */
> if (vq_index >= 8) {
> for (x = 0; x < cell->width << 2; x++)
> ref_block[x] = requant_tab[vq_index & 7][ref_block[x]];
> }
>
> /* convert the pixel offset into 4x4 block one */
> row_offset = plane->pitch >> 2;
> blk_row_offset = (plane->pitch - cell->width) << 2;
>
> rle_blocks = 0; // reset RLE block counter
>
> switch (mode) {
> case 0: /*------------------ MODES 0 & 1 (4x4 block processing) --------------------*/
> case 1:
> skip_flag = 0;
>
> for (y = 0; y < cell->height; y++) {
> for (x = 0; x < cell->width; x++) {
> /* address 4 pixels as one 32bit integer */
> ref32 = (int32_t *)ref_block;
> src32 = (int32_t *)block;
>
> if (rle_blocks > 0) {
> /* apply 0 delta to whole next block */
> if (cell->mv_ptr || !skip_flag)
> copy_32(src32, ref32, 4, row_offset);
> rle_blocks--;
> } else {
> for (line = 0; line < 4;) {
> num_lines = 1;
>
> code = *data_ptr++;
> /* select primary VQ table for odd, secondary for even lines */
> delta_tab = (line & 1) ? prim_delta : second_delta;
>
> /* switch on code type: dyad, quad or RLE escape codes */
> switch ((line & 1) ? prim_sel[code] : second_sel[code]) {
> case DELTA_DYAD: /* apply VQ delta to two dyads (2+2 pixels) using softSIMD */
> if (((line & 1) ? prim_sel[*data_ptr] : second_sel[*data_ptr]) != DELTA_DYAD) {
> av_log(avctx, AV_LOG_ERROR, "Mode 0/1: invalid VQ data!\n");
> return -1;
> }
> ref16 = (int16_t *)ref32;
> src16 = (int16_t *)src32;
> src16[0] = ref16[0] + delta_tab[*data_ptr++];
> src16[1] = ref16[1] + delta_tab[code];
> break;
>
> case DELTA_QUAD: /* apply VQ delta to 4 pixels at once using softSIMD */
> src32[0] = ref32[0] + delta_tab[code];
> break;
>
> case RLE_ESC_FF: /* apply null delta to all lines up to the 2nd line */
> //assert(line < 1);
> copy_32(src32, ref32, 2, row_offset);
> num_lines = 2;
> break;
>
> case RLE_ESC_FE: /* apply null delta to all lines up to the 3rd line */
> //assert(line < 2);
> copy_32(src32, ref32, 3 - line, row_offset);
> num_lines = 3 - line;
> break;
>
> case RLE_ESC_FC:
> /* apply null delta to all remaining lines of this block
> and to whole next block */
> skip_flag = 0;
> rle_blocks = 1;
>
> case RLE_ESC_FD: /* apply null delta to all remaining lines of this block */
> copy_32(src32, ref32, 4 - line, row_offset);
> num_lines = 4 - line; /* go to process next block */
> break;
>
> case RLE_ESC_FB: /* apply null delta to n blocks/skip n blocks */
> /* get next byte after the escape code 0xFB */
> code = *data_ptr++;
> rle_blocks = (code & 0x1F) - 1; /* set the block counter */
> if (code >= 64 || rle_blocks < 0) {
> av_log(avctx, AV_LOG_ERROR, "Mode 0/1: RLE-FB invalid counter: %d!\n", code);
> return -1;
> }
> skip_flag = code & 0x20;
> if (cell->mv_ptr || !skip_flag)
> copy_32(src32, ref32, 4 - line, row_offset);
> num_lines = 4 - line; /* go to process next block */
> break;
>
> case RLE_ESC_F9: /* skip this block and the next one */
> skip_flag = 1;
> rle_blocks = 1;
>
> case RLE_ESC_FA: /* skip this block (INTRA) or copy the reference block (INTER) */
> assert(!line);
> if (cell->mv_ptr)
> copy_32(src32, ref32, 4, row_offset);
> num_lines = 4;
> break;
>
> default:
> av_log(avctx, AV_LOG_ERROR, "Mode 0/1: unsupported RLE code: %d!\n",
> (line & 1) ? prim_sel[code] : second_sel[code]);
> return(-1);
> }// switch code
>
> /* move forward num_lines */
> line += num_lines;
> ref32 += row_offset * num_lines;
> src32 += row_offset * num_lines;
> }// for line
> }// if/else
>
> /* move to next block horizontal */
> ref_block += 4;
> block += 4;
> }// for x
>
> /* move to next line of blocks */
> ref_block += blk_row_offset;
> block += blk_row_offset;
> }// for y
> break;
>
> case 3: /*------------------ MODES 3 & 4 (4x8 block processing) --------------------*/
> case 4:
> if (cell->mv_ptr) {
> av_log(avctx, AV_LOG_ERROR, "Trying to use Mode 3/4 for an INTER cell!\n");
> return -1;
> }
> block32 = (int32_t *)block;
> blk_row_offset = (row_offset << 3) - cell->width;
> skip_flag = 0;
>
> for (y = 0, is_first_row = 1; y < cell->height; y += 2) {
> for (x = 0; x < cell->width; x++) {
> /* address 4 pixels as one 32bit integer */
> ref32 = &block32[-row_offset];
> src32 = &block32[row_offset];
>
> if (rle_blocks > 0) {
> /* apply 0 delta to whole next block */
> if (!skip_flag)
> copy_32(block32, ref32, 8, row_offset);
> rle_blocks--;
> } else {
> for(line = 0; line < 4;) {
> num_lines = 1;
> is_top_of_cell = is_first_row & (!line);
>
> code = *data_ptr++;
> /* select primary VQ table for odd, secondary for even lines */
> delta_tab = (line & 1) ? prim_delta : second_delta;
>
> /* switch on code type: dyad, quad or RLE escape codes */
> switch ((line & 1) ? prim_sel[code] : second_sel[code]) {
> case DELTA_DYAD: /* apply VQ delta to two dyads (2+2 pixels) using softSIMD */
> if (((line & 1) ? prim_sel[*data_ptr] : second_sel[*data_ptr]) != DELTA_DYAD) {
> av_log(avctx, AV_LOG_ERROR, "Mode 3/4: invalid VQ data!\n");
> return -1;
> }
> ref16 = (int16_t *)ref32;
> src16 = (int16_t *)src32;
> src16[0] = ref16[0] + delta_tab[*data_ptr++];
> src16[1] = ref16[1] + delta_tab[code];
>
> /* odd lines are not coded but rather interpolated/replicated */
> /* first line of the cell on the top of image? - replicate */
> /* otherwise - interpolate */
> if (is_top_of_cell && !cell->ypos) {
> src32[-row_offset] = src32[0];
> } else
> INTERPOLATE_32(src32 -row_offset, src32, ref32);
> break;
>
> case DELTA_QUAD: /* apply VQ delta to 4 pixels at once using softSIMD */
> src32[0] = ref32[0] + delta_tab[code];
> if (is_top_of_cell && !cell->ypos) {
> src32[-row_offset] = src32[0];
> } else
> INTERPOLATE_32(src32 -row_offset, src32, ref32);
> break;
>
> case RLE_ESC_FF: /* apply null delta to all lines up to the 2nd line */
> assert(line < 1);
> copy_32(src32 - row_offset, ref32, 4, row_offset);
> num_lines = 2;
> break;
>
> case RLE_ESC_FE: /* apply null delta to all lines up to the 3rd line */
> assert(line < 2);
> copy_32(src32 - row_offset, ref32, (3 - line) << 1, row_offset);
> num_lines = 3 - line;
> break;
>
> case RLE_ESC_FC:
> /* apply null delta to all remaining lines of this block
> and to whole next block */
> skip_flag = 0;
> rle_blocks = 1;
>
> case RLE_ESC_FD: /* apply null delta to all remaining lines of this block */
> copy_32(src32 - row_offset, ref32, (4 - line) << 1, row_offset);
> num_lines = 4 - line; /* go to process next block */
> break;
>
> case RLE_ESC_FB: /* apply null delta to n blocks/skip n blocks */
> /* get next byte after the escape code 0xFB */
> code = *data_ptr++;
> rle_blocks = (code & 0x1F) - 1; /* set the block counter */
> if (code >= 64 || rle_blocks < 0) {
> av_log(avctx, AV_LOG_ERROR, "Mode 3/4: RLE-FB invalid counter: %d!\n", code);
> return -1;
> }
> skip_flag = code & 0x20;
> if (!skip_flag)
> copy_32(src32 - row_offset, ref32, (4 - line) << 1, row_offset);
> num_lines = 4 - line; /* go to process next block */
> break;
>
> case RLE_ESC_F9: /* skip this block and the next one */
> skip_flag = 1;
> rle_blocks = 1;
>
> case RLE_ESC_FA: /* skip this block */
> assert(!line);
> num_lines = 4;
> break;
>
> default:
> av_log(avctx, AV_LOG_ERROR, "Mode 3/4: unsupported RLE code: %d!\n",
> (line & 1) ? prim_sel[code] : second_sel[code]);
> return(-1);
> }// switch code
>
> /* move to num_lines (even) */
> line += num_lines;
> ref32 += row_offset * (num_lines << 1);
> src32 += row_offset * (num_lines << 1);
> }// for line
> }// if/else
>
> /* move to next block horizontal */
> block32++;
> }// for x
>
> /* move to next line of blocks */
> block32 += blk_row_offset;
> is_first_row = 0;
> }// for y
> break;
looks very similar to the 4x4 code ...
[...]
> while (curr_cell >= ctx->cell_stack) {
> if (!curr_cell->tree) {
> /* MC tree codes */
> switch (get_bintree_code(&bitctx)) {
> case H_SPLIT:
> /* split current cell into two vertical subcells */
> prev_cell = curr_cell;
> assert(curr_cell < &ctx->cell_stack[CELL_STACK_MAX]);
> DUPLICATE_CELL(curr_cell);
> SPLIT_CELL(prev_cell->height, curr_cell->height);
> prev_cell->ypos += curr_cell->height;
> prev_cell->height -= curr_cell->height;
> break;
[...]
> } else {
> /* VQ tree codes */
> switch (get_bintree_code(&bitctx)) {
> case H_SPLIT:
> /* split current cell into two vertical subcells */
> prev_cell = curr_cell;
> assert(curr_cell < &ctx->cell_stack[CELL_STACK_MAX]);
> DUPLICATE_CELL(curr_cell);
> SPLIT_CELL(prev_cell->height, curr_cell->height);
> prev_cell->ypos += curr_cell->height;
> prev_cell->height -= curr_cell->height;
> break;
please get rid of all the duplicated code
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
No snowflake in an avalanche ever feels responsible. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090726/f81d9a43/attachment.pgp>
More information about the ffmpeg-devel
mailing list