[FFmpeg-devel] [PATCH 01/N] RV30/40 Decoder - Core

Sat Dec 8 05:32:07 CET 2007

On Fri, Dec 07, 2007 at 09:04:07AM +0200, Kostya wrote:
[...]
> > [...]
> > > /**
> > >  * Decode Levenstein (also known as Elias Gamma) code.
> > >  */
> > > int ff_rv34_get_gamma(GetBitContext *gb)
> > > {
> > >     int code = 1;
> > > 
> > >     while(!get_bits1(gb)){
> > >         code = (code << 1) | get_bits1(gb);
> > >     }
> > >     return code;
> > > }
> > > 
> > > /**
> > >  * Decode Levenstein (also known as Elias Gamma) code as signed integer.
> > >  */
> > > int ff_rv34_get_gamma_signed(GetBitContext *gb)
> > > {
> > 
> > >     int code;
> > > 
> > >     code = ff_rv34_get_gamma(gb);
> > 
> > can be merged
> > 
> > 
> > >     if(code & 1)
> > >         return -(code >> 1);
> > >     else
> > >         return code >> 1;
> > > }
> > 
> > please check if the following are identical (+-1 doesnt count as
> > different)
> > 
> > svq3_get_ue_golomb() and 
> > svq3_get_se_golomb()
> > 
> > and if they are identical post START/STOP_TIMER scores
> > also id like to see START/STOP_TIMER scores in that case for the
> > original vlc based code
> > 
> > the fastest should be placed in golomb.c/h
> > and used
> 
> I don't know when I can do benchmarks but switched to svq3_get_[su]e_golomb()
> anyway.

ok, just dont forget about the benchmarks please

[...]
> > > 
> > >     if(!avctx->slice_count){
> > >         slice_count = (*buf++) + 1;
> > >         slices_hdr = buf + 4;
> > >         buf += 8 * slice_count;
> > >     }else
> > >         slice_count = avctx->slice_count;
> > 
> > do we need avctx->slice_count support? i think that should be droped!
> > it cant break any compatibility as this is a new decoder ...
>  
> I've left it for now to be the same as rv10/20 decoder.
> I'd like to have them dropped simultaneously. 

ok

[...]
> /** essential slice information */
> typedef struct SliceInfo{
>     int type;              ///< slice type (intra, inter)
>     int size;              ///< size of the slice in bits
>     int quant;             ///< quantizer used for this slice
>     int vlc_set;           ///< VLCs used for this slice
>     int start, end;        ///< start and end macroblocks of the slice
>     int width;             ///< coded width
>     int height;            ///< coded height
> }SliceInfo;
> 
> /** decoder context */
> typedef struct RV34DecContext{
>     MpegEncContext s;
>     int8_t *intra_types_hist;///< old block types, used for prediction
>     int8_t *intra_types;     ///< block types
>     int block_start;         ///< start of slice in blocks
>     uint8_t *luma_dc_quant_i;///< luma subblock DC quantizer for intraframes
>     uint8_t *luma_dc_quant_p;///< luma subblock DC quantizer for interframes
> 
>     RV34VLC *cur_vlcs;       ///< VLC set used for current frame decoding
>     int bits;                ///< slice size in bits
>     H264PredContext h;       ///< functions for 4x4 and 16x16 intra block prediction
>     SliceInfo si;            ///< current slice information
>     uint8_t *slice_data;     ///< saved slice data
> 
>     int *mb_type;            ///< internal macroblock types
>     int block_type;          ///< current block type
>     int luma_vlc;            ///< which VLC set will be used for decoding of luma blocks
>     int chroma_vlc;          ///< which VLC set will be used for decoding of chroma blocks
>     int is16;                ///< current block has additional 16x16 specific features or not
>     int dmv[4][2];           ///< differential motion vectors for the current macroblock
> 
>     int rv30;                ///< indicates which RV variasnt is currently decoded
>     int rpr;                 ///< one field size in RV30 slice header
> 
>     int avail[4];            ///< whether left, top, top rights and top left MBs are available
> 
>     int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
>     int (*decode_mb_info)(struct RV34DecContext *r);
>     int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst);
>     void (*loop_filter)(struct RV34DecContext *r);
> }RV34DecContext;

SliceInfo.size  is redundant with bits
SliceInfo.quant with qscale
SliceInfo.start with block_start

slice_data is a very ugly way to pass an argument
id use:
    slice_data = buf + offset;
    rv34_decode_slice(r, r->si.size, r->si.end, &last, slice_data);
instead of
    r->slice_data = buf + offset;
    rv34_decode_slice(r, r->si.size, r->si.end, &last);
theres also only one use of it ...

[...]
> /** translation of RV30/40 macroblock types to lavc ones */
> static const int rv34_mb_type_to_lavc[12] = {
>     MB_TYPE_INTRA,
>     MB_TYPE_INTRA16x16,
>     MB_TYPE_16x16 | MB_TYPE_L0,
>     MB_TYPE_8x8   | MB_TYPE_L0,
>     MB_TYPE_16x16 | MB_TYPE_L0,
>     MB_TYPE_16x16 | MB_TYPE_L1,
>     MB_TYPE_SKIP,

>     MB_TYPE_DIRECT2,

MB_TYPE_DIRECT2 | MB_TYPE_16x16 | MB_TYPE_L0L1

>     MB_TYPE_16x8  | MB_TYPE_L0,
>     MB_TYPE_8x16  | MB_TYPE_L0,

>     MB_TYPE_DIRECT2,

MB_TYPE_16x16 | MB_TYPE_L0L1,

[...]
> /**
>  * Initialize all tables.
>  */
> static void rv34_init_tables()
> {
>     int i, j, k;
> 
>     for(i = 0; i < NUM_INTRA_TABLES; i++){
>         for(j = 0; j < 2; j++){
>             rv34_gen_vlc(rv34_table_intra_cbppat   [i][j], CBPPAT_VLC_SIZE,   &intra_vlcs[i].cbppattern[j],     NULL);
>             rv34_gen_vlc(rv34_table_intra_secondpat[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].second_pattern[j], NULL);
>             rv34_gen_vlc(rv34_table_intra_thirdpat [i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].third_pattern[j],  NULL);
>             for(k = 0; k < 4; k++)
>                 rv34_gen_vlc(rv34_table_intra_cbp[i][j+k*2],  CBP_VLC_SIZE,      &intra_vlcs[i].cbp[j][k],         rv34_cbp_code);
>         }
>         for(j = 0; j < 4; j++)
>             rv34_gen_vlc(rv34_table_intra_firstpat[i][j], FIRSTBLK_VLC_SIZE, &intra_vlcs[i].first_pattern[j], NULL);
>         rv34_gen_vlc(rv34_intra_coeffvlc[i], COEFF_VLC_SIZE, &intra_vlcs[i].coefficient, NULL);
>     }
> 
>     for(i = 0; i < NUM_INTER_TABLES; i++){
>         rv34_gen_vlc(rv34_inter_cbppatvlc[i], CBPPAT_VLC_SIZE, &inter_vlcs[i].cbppattern[0], NULL);
>         for(j = 0; j < 4; j++)
>             rv34_gen_vlc(rv34_inter_cbpvlc[i][j], CBP_VLC_SIZE, &inter_vlcs[i].cbp[0][j], rv34_cbp_code);

the names 
rv34_inter_cbpvlc and rv34_table_intra_cbp are inconsistant with each other
rv34_inter_cbppatvlc and rv34_table_intra_cbppat are as well

[...]
> /**
>  * @defgroup transform RV30/40 inverse transform functions
>  * @{
>  */
> 
> static void rv34_row_transform(int temp[16], DCTELEM *block, const int offset)

sould be always_inline

[...]
> /**
>  * Dequantize ordinary 4x4 block.
>  * @todo optimize
>  */
> static inline void rv34_dequant4x4(DCTELEM *block, int offset, int Qdc, int Q)
> {
>     int i, j;
> 
>     block += offset;
>     block[0] = (block[0] * Qdc + 8) >> 4;
>     for(i = 0; i < 4; i++)
>         for(j = !i; j < 4; j++)
>             block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
> }
> 
> /**
>  * Dequantize 4x4 block of DC values for 16x16 macroblock.
>  * @todo optimize
>  */
> static inline void rv34_dequant4x4_16x16(DCTELEM *block, int offset, int Qdc, int Q)
> {
>     int i;
> 
>     block += offset;
>     for(i = 0; i < 3; i++)
>          block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Qdc + 8) >> 4;
>     for(; i < 16; i++)
>          block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Q + 8) >> 4;
> }
> /** @} */ //block functions

i think it would be more readable if the offset were added outside these
functions
rv34_dequant4x4_16x16(a,b,c,d)
is less clear about what it does than the equivalent
rv34_dequant4x4_16x16(a+b,c,d)

> 
> 

> /**
>  * @defgroup bitstream RV30/40 bitstream parsing
>  * @{
>  */
> 
> static inline int decode210(GetBitContext *gb){
>     if (get_bits1(gb))
>         return 0;
>     else
>         return 2 - get_bits1(gb);
> }

duplicate of the equally named function from vc1.c

[...]
> /**
>  * Decode quantizer difference and return modified quantizer.
>  */
> static inline int rv34_decode_dquant(GetBitContext *gb, int quant)
> {
>     if(get_bits1(gb))
>         return rv34_dquant_tab[get_bits1(gb)][quant];
>     else
>         return get_bits(gb, 5);
> }

btw, doesnt rv34 depend on h263* ? if no then ill accept the above as it would
be silly to add a dependancy just because of 5 lines of code but if it does
depend already then you really should use the table and ff_h263_decode_mba()
from h263 instead of duplicating it!

> 
> /** @} */ //bitstream functions
> 
> /**
>  * @defgroup mv motion vector related code (prediction, reconstruction, motion compensation)
>  * @{
>  */
> 

> /** macroblock partition width in 8x8 blocks */
> static const uint8_t part_sizes_w[RV34_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2 };
> 
> /** macroblock partition height in 8x8 blocks */
> static const uint8_t part_sizes_h[RV34_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2 };

these 2 tables can be overlapped to safe 7 bytes
no iam not serious, this is just a joke :)
but someone should write a linker which "compresses" const tables by by
attempting to overlap them

[...]
> /**
>  * Motion vector prediction for B-frames.
>  */
> static void rv34_pred_mv_b(RV34DecContext *r, int block_type)
> {
>     MpegEncContext *s = &r->s;
>     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
>     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
>     int A[2][2], B[2][2], C[2][2];
>     int no_A[2], no_B[2], no_C[2];
>     int c_mv_pos;
>     int mx[2], my[2];
>     int i, j;
> 
>     memset(A, 0, sizeof(A));
>     memset(B, 0, sizeof(B));
>     memset(C, 0, sizeof(C));
>     memset(mx, 0, sizeof(mx));
>     memset(my, 0, sizeof(my));
>     if(!r->avail[0])
>         no_A[0] = no_A[1] = 1;
>     else{
>         no_A[0] = no_A[1] = 0;
>         if(r->mb_type[mb_pos - 1] != RV34_MB_B_FORWARD  && r->mb_type[mb_pos - 1] != RV34_MB_B_BIDIR)
>             no_A[0] = 1;
>         if(r->mb_type[mb_pos - 1] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - 1] != RV34_MB_B_BIDIR)
>             no_A[1] = 1;
>         if(!no_A[0]){
>             A[0][0] = s->current_picture_ptr->motion_val[0][mv_pos - 1][0];
>             A[0][1] = s->current_picture_ptr->motion_val[0][mv_pos - 1][1];
>         }
>         if(!no_A[1]){
>             A[1][0] = s->current_picture_ptr->motion_val[1][mv_pos - 1][0];
>             A[1][1] = s->current_picture_ptr->motion_val[1][mv_pos - 1][1];
>         }
>     }
>     if(!r->avail[1]){
>         no_B[0] = no_B[1] = 1;
>     }else{
>         no_B[0] = no_B[1] = 0;
>         if(r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_FORWARD  && r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_BIDIR)
>             no_B[0] = 1;
>         if(r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_BIDIR)
>             no_B[1] = 1;
>         if(!no_B[0]){
>             B[0][0] = s->current_picture_ptr->motion_val[0][mv_pos - s->b8_stride][0];
>             B[0][1] = s->current_picture_ptr->motion_val[0][mv_pos - s->b8_stride][1];
>         }
>         if(!no_B[1]){
>             B[1][0] = s->current_picture_ptr->motion_val[1][mv_pos - s->b8_stride][0];
>             B[1][1] = s->current_picture_ptr->motion_val[1][mv_pos - s->b8_stride][1];
>         }
>     }
>     if(r->avail[2]){
>         no_C[0] = no_C[1] = 0;
>         if(r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_FORWARD  && r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_BIDIR)
>             no_C[0] = 1;
>         if(r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_BIDIR)
>             no_C[1] = 1;
>         c_mv_pos = mv_pos - s->b8_stride + 2;
>     }else if(r->avail[3]){
>         no_C[0] = no_C[1] = 0;
>         if(r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_FORWARD  && r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_BIDIR)
>             no_C[0] = 1;
>         if(r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_BIDIR)
>             no_C[1] = 1;
>         c_mv_pos = mv_pos - s->b8_stride - 1;
>     }else{
>         no_C[0] = no_C[1] = 1;
>         c_mv_pos = 0;
>     }
>     if(!no_C[0]){
>         C[0][0] = s->current_picture_ptr->motion_val[0][c_mv_pos][0];
>         C[0][1] = s->current_picture_ptr->motion_val[0][c_mv_pos][1];
>     }
>     if(!no_C[1]){
>         C[1][0] = s->current_picture_ptr->motion_val[1][c_mv_pos][0];
>         C[1][1] = s->current_picture_ptr->motion_val[1][c_mv_pos][1];
>     }

!no why? not yes!
this double negation obfuscated the code ...

[...]
>     default:
>         no_A[0] = no_A[1] = no_B[0] = no_B[1] = no_C[0] = no_C[1] = 1;
>     }
> 
>     mx[0] += r->dmv[0][0];
>     my[0] += r->dmv[0][1];
>     mx[1] += r->dmv[1][0];
>     my[1] += r->dmv[1][1];
>     for(j = 0; j < 2; j++){
>         for(i = 0; i < 2; i++){
>             s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][0] = mx[0];
>             s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][1] = my[0];
>             s->current_picture_ptr->motion_val[1][mv_pos + i + j*s->b8_stride][0] = mx[1];
>             s->current_picture_ptr->motion_val[1][mv_pos + i + j*s->b8_stride][1] = my[1];
>         }
>     }
> }

the no_A[0] = ... is useless

[...]
>     Y = s->dest[0] + xoff + yoff*s->linesize;
>     U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
>     V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;

vertical align nitpick
Y = s->dest[0] +  xoff     +  yoff    *s->  linesize;
U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;

>     if(block_type == RV34_MB_P_16x8){
>         qpel_mc[1][dxy](Y, srcY, s->linesize);
>         Y    += 8;
>         srcY += 8;
>         qpel_mc[1][dxy](Y, srcY, s->linesize);
>         chroma_mc[0]   (U, srcU, s->uvlinesize, 4, uvmx, uvmy);
>         chroma_mc[0]   (V, srcV, s->uvlinesize, 4, uvmx, uvmy);
>     }else if(block_type == RV34_MB_P_8x16){
>         qpel_mc[1][dxy](Y, srcY, s->linesize);
>         Y    += 8 * s->linesize;
>         srcY += 8 * s->linesize;
>         qpel_mc[1][dxy](Y, srcY, s->linesize);
>         chroma_mc[1]   (U, srcU, s->uvlinesize, 8, uvmx, uvmy);
>         chroma_mc[1]   (V, srcV, s->uvlinesize, 8, uvmx, uvmy);
>     }else if(block_type == RV34_MB_P_8x8){
>         qpel_mc[1][dxy](Y, srcY, s->linesize);
>         chroma_mc[1]   (U, srcU, s->uvlinesize, 4, uvmx, uvmy);
>         chroma_mc[1]   (V, srcV, s->uvlinesize, 4, uvmx, uvmy);
>     }else{
>         qpel_mc[0][dxy](Y, srcY, s->linesize);
>         chroma_mc[0]   (U, srcU, s->uvlinesize, 8, uvmx, uvmy);
>         chroma_mc[0]   (V, srcV, s->uvlinesize, 8, uvmx, uvmy);
>     }

if(block_type == RV34_MB_P_16x8){
    qpel_mc[1][dxy](Y, srcY, s->linesize);
    Y    += 8;
    srcY += 8;
}else if(block_type == RV34_MB_P_8x16){
    qpel_mc[1][dxy](Y, srcY, s->linesize);
    Y    += 8 * s->linesize;
    srcY += 8 * s->linesize;
}
qpel_mc[!is16x16][dxy](Y, srcY, s->linesize);
chroma_mc[2-width]   (U, srcU, s->uvlinesize, height*4, uvmx, uvmy);
chroma_mc[2-width]   (V, srcV, s->uvlinesize, height*4, uvmx, uvmy);

[...]
>     case RV34_MB_P_16x8:
>     case RV34_MB_P_8x16:

>     case RV34_MB_B_BIDIR:
>         rv34_pred_mv(r, block_type, 0);
>         rv34_pred_mv(r, block_type, 1);

i do not think that using the subblock number to indicate the mv direction is
a good idea, then i dont mind as long as it doesnt complicate the code ...

[...]
>         for(j = 0; j < 4; j++){
>             no_left = !r->avail[0];
>             YY = Y;
>             for(i = 0; i < 4; i++, cbp >>= 1, YY += 4){
>                 no_topright = no_up || (i==3 && j) || (i==3 && !j && (s->mb_x-1) == s->mb_width);
>                 rv34_pred_4x4_block(r, YY, s->linesize, ittrans[intra_types[i]], no_up, no_left, i || (j==3), no_topright);
>                 no_left = 0;

>                 if(!(cbp & 1)) continue;
>                 rv34_add_4x4_block(YY, s->linesize, s->block[(i>>1)+(j&2)], (i&1)*4+(j&1)*32);

if(cbp)
    rv34_add_4x4_block()

>             }
>             no_up = 0;
>             Y += s->linesize * 4;

Y += s->linesize * 4 - 4*4;
and YY becomes unneeded

[...]
>         dsp->add_pixels_clamped(s->block[0], Y, s->current_picture.linesize[0]);
>         dsp->add_pixels_clamped(s->block[1], Y + 8, s->current_picture.linesize[0]);
>         Y += s->current_picture.linesize[0] * 8;
>         dsp->add_pixels_clamped(s->block[2], Y, s->current_picture.linesize[0]);
>         dsp->add_pixels_clamped(s->block[3], Y + 8, s->current_picture.linesize[0]);

can be vertically aligned

[...]
>         if(!r->is16){
>             if(r->decode_intra_types(r, gb, intra_types) < 0)
>                 return -1;
>             r->luma_vlc   = 1;
>         }else{
>             t = get_bits(gb, 2);
>             for(i = 0; i < 16; i++)
>                 intra_types[(i & 3) + (i>>2) * s->b4_stride] = t;
>             r->luma_vlc   = 2;
>         }

if(!is16){
    B
}else{
    A
}

is less readable than
if(is16){
    A
}else{
    B
}
IMHO

[...]
>         if(!r->is16 && !(cbp & 1)) continue;
>         blknum = ((i & 2) >> 1) + ((i & 8) >> 2);
>         blkoff = ((i & 1) << 2) + ((i & 4) << 3);
>         if(cbp & 1)
>             rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->luma_vlc, 0);
>         if((cbp & 1) || r->is16){

how exactly can this be false here? if it cant the check is useless

[...]
> static int check_slice_end(RV34DecContext *r, MpegEncContext *s)
> {
>     int bits;
>     if(r->s.mb_skip_run > 1)
>         return 0;
>     if(s->mb_y >= s->mb_height)
>         return 1;

and
>     while(!check_slice_end(r, s) && s->mb_num_left-- && s->mb_y < s->mb_height) {

hmm i dunno, maybe iam tired but this somehow looks hackish
cant you implement the mb_num_left-- if its needed at a sane point?

[...]
>     *last = 0;
>     if(s->mb_y >= s->mb_height)
>         *last = 1;
>     if(r->bits > get_bits_count(gb) && show_bits(gb, r->bits-get_bits_count(gb)))
>         *last = 1;
> 
>     return 0;

what about using the return value to return the "lastness" ...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20071208/4639aa5f/attachment.pgp>