[FFmpeg-devel] [PATCH] RV30/40 decoder splitted

Thu Sep 20 07:27:02 CEST 2007

On Wed, Sep 19, 2007 at 11:05:35PM +0200, Michael Niedermayer wrote:
> Hi
> 
> On Wed, Sep 19, 2007 at 01:35:43PM +0300, Kostya wrote:
> > Here are split rv30 and rv40 decoders.
> > Due to the lack of time I have not committed all of this to rv40 SVN
> > and this patch lacks documentation. But all code should be here.
> > 
> > Please comment.
> 
> [...]
> > +    int maxbits = 0, realsize;
> > +    int ret;
> > +
> > +    realsize = 0;
> 
> the int and =0 is inconsistent

fixed 

> [...]
> > +    for(i = 0; i < NUM_INTRA_TABLES; i++){
> > +        for(j = 0; j < 2; j++)
> > +            rv34_gen_vlc(rv34_intra_cbppatvlc_pointers[i][j], CBPPAT_VLC_SIZE, &intra_vlcs[i].cbppattern[j]);
> > +        for(j = 0; j < 2; j++)
> > +            for(k = 0; k < 4; k++)
> > +                rv34_gen_vlc(rv34_intra_cbpvlc_pointers[i][j][k], CBP_VLC_SIZE, &intra_vlcs[i].cbp[j][k]);
> > +        for(j = 0; j < 4; j++)
> > +            rv34_gen_vlc(rv34_intra_firstpatvlc_pointers[i][j], FIRSTBLK_VLC_SIZE, &intra_vlcs[i].first_pattern[j]);
> > +        for(j = 0; j < 2; j++)
> > +            rv34_gen_vlc(rv34_intra_secondpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].second_pattern[j]);
> > +        for(j = 0; j < 2; j++)
> > +            rv34_gen_vlc(rv34_intra_thirdpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].third_pattern[j]);
> 
> all the for(j = 0; j < 2; j++) loops can be merged

done

> > +        rv34_gen_vlc(rv34_intra_coeffvlc_pointers[i], COEFF_VLC_SIZE, &intra_vlcs[i].coefficient);
> > +    }
> > +
> > +    for(i = 0; i < NUM_INTER_TABLES; i++){
> > +        rv34_gen_vlc(rv34_inter_cbppatvlc_pointers[i], CBPPAT_VLC_SIZE, &inter_vlcs[i].cbppattern[0]);
> > +        for(j = 0; j < 4; j++)
> > +            rv34_gen_vlc(rv34_inter_cbpvlc_pointers[i][j], CBP_VLC_SIZE, &inter_vlcs[i].cbp[0][j]);
> > +        for(j = 0; j < 2; j++)
> > +            rv34_gen_vlc(rv34_inter_firstpatvlc_pointers[i][j], FIRSTBLK_VLC_SIZE, &inter_vlcs[i].first_pattern[j]);
> > +        for(j = 0; j < 2; j++)
> > +            rv34_gen_vlc(rv34_inter_secondpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &inter_vlcs[i].second_pattern[j]);
> > +        for(j = 0; j < 2; j++)
> > +            rv34_gen_vlc(rv34_inter_thirdpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &inter_vlcs[i].third_pattern[j]);
> 
> same here

merged

> [...]
> > +/**
> > + * Real Video 4.0 inverse transform
> > + * Code is almost the same as in SVQ3, only scaling is different
> > + */
> > +static void rv34_intra_inv_transform(DCTELEM *block, const int offset){
> > +    int temp[16];
> > +    unsigned int i;
> 
> hmm

I will make patch to use SVQ3 transform with inplace dequantization and use it
for both SVQ3 and RV40 but it will take some time.

> [...]
> > +    for(mask = 8; mask; mask >>= 1, curshift++){
> > +        if(!(pattern & mask)) continue;
> > +        t = get_vlc2(gb, vlc->cbp[table][table2].table, vlc->cbp[table][table2].bits, 1);
> > +        cbp |= rv34_cbp_code[t] << curshift[0];
> > +    }
> 
> i think:
> 
> v= vlc->cbp[table][table2];
> for(i=0; i<4; i++){
>     if(pattern & (8>>i)){
>         t = get_vlc2(gb, v->table, v->bits, 1);
>         cbp |= rv34_cbp_code[t] << shifts[i];
>     }
> }
> 
> is clearer, though your variant is fine as well

I will leave it as is for now. 

> [...]
> > +/**
> > + * Decode quantizer difference and return modified quantizer
> > + */
> > +static inline int rv34_decode_dquant(GetBitContext *gb, int quant)
> > +{
> 
> > +    if(get_bits1(gb))
> > +        return av_clip(quant + rv34_dquant_tab[quant * 2 + get_bits1(gb)], 0, 31);
> 
> the cliping should be unneeded as the table already can ensure that no
> overflow happens

You are right, dropped av_clip()

> [...]
> > +/**
> > + * Decode motion vector differences
> > + * and perform motion vector reconstruction and motion compensation.
> > + */
> > +static int rv34_decode_mv(RV34DecContext *r, int block_type)
> > +{
> > +    MpegEncContext *s = &r->s;
> > +    GetBitContext *gb = &s->gb;
> > +    int i, j;
> > +
> > +    switch(block_type){
> > +    case RV34_MB_TYPE_INTRA:
> > +    case RV34_MB_TYPE_INTRA16x16:
> > +        for(j = 0; j < 2; j++){
> > +            for(i = 0; i < 2; i++){
> > +                s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride + i + j*s->b8_stride][0] = 0;
> > +                s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride + i + j*s->b8_stride][1] = 0;
> > +            }
> > +        }
> > +        return 0;
> > +    case RV34_MB_SKIP:
> > +        r->dmv[0][0] = 0;
> > +        r->dmv[0][1] = 0;
> > +        if(s->pict_type == P_TYPE){
> > +            rv34_pred_mv(r, block_type, 0);
> > +            rv34_mc(r, block_type, 0, 0, 0, 2, 2);
> > +            break;
> > +        }
> > +    case RV34_MB_B_INTERP:
> > +        r->dmv[0][0] = 0;
> > +        r->dmv[0][1] = 0;
> > +        r->dmv[1][0] = 0;
> > +        r->dmv[1][1] = 0;
> > +        rv34_pred_mv_b  (r, RV34_MB_B_INTERP);
> > +        rv34_mc_b       (r, RV34_MB_B_INTERP);
> > +        rv34_mc_b_interp(r, RV34_MB_B_INTERP);
> > +        break;
> > +    case RV34_MB_P_16x16:
> > +    case RV34_MB_P_MIX16x16:
> > +        r->dmv[0][0] = rv34_get_omega_signed(gb);
> > +        r->dmv[0][1] = rv34_get_omega_signed(gb);
> > +        rv34_pred_mv(r, block_type, 0);
> > +        rv34_mc(r, block_type, 0, 0, 0, 2, 2);
> > +        break;
> > +    case RV34_MB_B_FORWARD:
> > +    case RV34_MB_B_BACKWARD:
> > +        r->dmv[0][0] = rv34_get_omega_signed(gb);
> > +        r->dmv[0][1] = rv34_get_omega_signed(gb);
> > +        rv34_pred_mv_b  (r, block_type);
> > +        rv34_mc_b       (r, block_type);
> > +        break;
> > +    case RV34_MB_P_16x8:
> > +    case RV34_MB_P_8x16:
> > +    case RV34_MB_B_DIRECT:
> > +        r->dmv[0][0] = rv34_get_omega_signed(gb);
> > +        r->dmv[0][1] = rv34_get_omega_signed(gb);
> > +        r->dmv[1][0] = rv34_get_omega_signed(gb);
> > +        r->dmv[1][1] = rv34_get_omega_signed(gb);
> > +        rv34_pred_mv(r, block_type, 0);
> > +        rv34_pred_mv(r, block_type, 1);
> > +        if(block_type == RV34_MB_P_16x8){
> > +            rv34_mc(r, block_type, 0, 0, 0,            2, 1);
> > +            rv34_mc(r, block_type, 0, 8, s->b8_stride, 2, 1);
> > +        }
> > +        if(block_type == RV34_MB_P_8x16){
> > +            rv34_mc(r, block_type, 0, 0, 0, 1, 2);
> > +            rv34_mc(r, block_type, 8, 0, 1, 1, 2);
> > +        }
> > +        if(block_type == RV34_MB_B_DIRECT){
> > +            rv34_pred_mv_b  (r, block_type);
> > +            rv34_mc_b       (r, block_type);
> > +            rv34_mc_b_interp(r, block_type);
> > +        }
> > +        break;
> > +    case RV34_MB_P_8x8:
> > +        for(i=0;i< 4;i++){
> > +            r->dmv[i][0] = rv34_get_omega_signed(gb);
> > +            r->dmv[i][1] = rv34_get_omega_signed(gb);
> > +            rv34_pred_mv(r, block_type, i);
> > +            rv34_mc(r, block_type, (i&1)<<3, (i&2)<<2, (i&1)+(i>>1)*s->b8_stride, 1, 1);
> > +        }
> > +        break;
> > +    }
> > +
> > +    return 0;
> > +}
> 
> mv_count= tab[block_type];
> for(i=0; i<mv_count; i++){
>     r->dmv[i][0] = rv34_get_omega_signed(gb);
>     r->dmv[i][1] = rv34_get_omega_signed(gb);
> }

Will do this way. 

> [...]
> > +        if(no_up && no_left)
> > +            itype = DC_128_PRED8x8;
> > +        else if(no_up){
> > +            if(itype == PLANE_PRED8x8)itype = HOR_PRED8x8;
> > +            if(itype == VERT_PRED8x8) itype = HOR_PRED8x8;
> > +            if(itype == DC_PRED8x8)   itype = LEFT_DC_PRED8x8;
> > +        }else if(no_left){
> > +            if(itype == PLANE_PRED8x8)itype = VERT_PRED8x8;
> > +            if(itype == HOR_PRED8x8)  itype = VERT_PRED8x8;
> > +            if(itype == DC_PRED8x8)   itype = TOP_DC_PRED8x8;
> 
> > +        }
> > +        r->h.pred16x16[itype](Y, s->linesize);
> > +        dsp->add_pixels_clamped(s->block[0], Y, s->current_picture.linesize[0]);
> > +        dsp->add_pixels_clamped(s->block[1], Y + 8, s->current_picture.linesize[0]);
> > +        Y += s->current_picture.linesize[0] * 8;
> > +        dsp->add_pixels_clamped(s->block[2], Y, s->current_picture.linesize[0]);
> > +        dsp->add_pixels_clamped(s->block[3], Y + 8, s->current_picture.linesize[0]);
> > +
> > +        itype = ittrans16[intra_types[0]];
> > +        if(itype == PLANE_PRED8x8) itype = DC_PRED8x8;
> 
> > +        if(no_up && no_left)
> > +            itype = DC_128_PRED8x8;
> > +        else if(no_up){
> > +            if(itype == VERT_PRED8x8) itype = HOR_PRED8x8;
> > +            if(itype == DC_PRED8x8)   itype = LEFT_DC_PRED8x8;
> > +        }else if(no_left){
> > +            if(itype == HOR_PRED8x8)  itype = VERT_PRED8x8;
> > +            if(itype == DC_PRED8x8)   itype = TOP_DC_PRED8x8;
> > +        }
> 
> duplicate

Actually it's similar but not duplicate - case 3(PLANE_PRED) is treated as DC
prediction for chroma blocks, so on corner cases it results in different types
of prediction.

> [...]
> > +    if(!r->si.type && !r->rv30){
> > +        r->is16 = 0;
> > +        switch(decode210(gb)){
> > +        case 0: // 16x16 block
> > +            r->is16 = 1;
> > +            break;
> > +        case 1:
> > +            break;
> > +        case 2:
> > +            av_log(s->avctx, AV_LOG_ERROR, "Need DQUANT\n");
> > +            // q = decode_dquant(gb);
> > +            break;
> > +        }
> > +        s->current_picture_ptr->mb_type[mb_pos] = r->is16 ? MB_TYPE_INTRA16x16 : MB_TYPE_INTRA;
> > +        r->block_type = r->is16 ? RV34_MB_TYPE_INTRA16x16 : RV34_MB_TYPE_INTRA;
> > +    }else if(!r->si.type && r->rv30){
> > +        r->is16 = get_bits1(gb);
> > +        s->current_picture_ptr->mb_type[mb_pos] = r->is16 ? MB_TYPE_INTRA16x16 : MB_TYPE_INTRA;
> > +        r->block_type = r->is16 ? RV34_MB_TYPE_INTRA16x16 : RV34_MB_TYPE_INTRA;
> > +    }else{
> 
> if(!r->si.type){
>     r->is16= get_bits1(gb))
>     if(!r->is16 && !r->rv30){
>         int t=get_bits1(gb);
>         if(!t)
>             av_log(s->avctx, AV_LOG_ERROR, "Need DQUANT\n");
>     }
>     s->current_picture_ptr->mb_type[mb_pos] = r->is16 ?      MB_TYPE_INTRA16x16 :      MB_TYPE_INTRA;
>     r->block_type                           = r->is16 ? RV34_MB_TYPE_INTRA16x16 : RV34_MB_TYPE_INTRA;
> }else{

Will do this way. 

> [...]
> > +        if(!r->is16){
> > +            if(r->decode_intra_types(r, gb, intra_types) < 0)
> > +                return -1;
> > +            r->chroma_vlc = 0;
> > +            r->luma_vlc   = 1;
> > +        }else{
> > +            t = get_bits(gb, 2);
> > +            for(i = 0; i < 16; i++)
> > +                intra_types[(i & 3) + (i>>2) * r->intra_types_stride] = t;
> > +            r->chroma_vlc = 0;
> > +            r->luma_vlc   = 2;
> > +        }
> 
> chroma_vlc can be factored out of that if()

ok 

> [...]
> > +/**
> > + * Initialize decoder
> > + * @todo Maybe redone in some other way
> > + */
> > +int rv34_decode_init(AVCodecContext *avctx)
> > +{
> > +    RV34DecContext *r = avctx->priv_data;
> > +    MpegEncContext *s = &r->s;
> > +
> > +    static int tables_done = 0;
> > +
> > +    r->rv30 = (avctx->codec_id == CODEC_ID_RV30);
> 
> you set this to 0/1 in the specific init functions and here again ...

Leftover, dropped. 

> > +    MPV_decode_defaults(s);
> > +    s->avctx= avctx;
> > +    s->out_format = FMT_H263;
> > +    s->codec_id= avctx->codec_id;
> > +
> > +    s->width = avctx->width;
> > +    s->height = avctx->height;
> > +
> > +    r->s.avctx = avctx;
> > +    avctx->flags |= CODEC_FLAG_EMU_EDGE;
> > +    r->s.flags |= CODEC_FLAG_EMU_EDGE;
> > +    avctx->pix_fmt = PIX_FMT_YUV420P;
> > +    avctx->has_b_frames = 1;
> > +    s->low_delay = 0;
> > +
> > +    if (MPV_common_init(s) < 0)
> > +        return -1;
> > +
> > +    ff_h264_pred_init(&r->h, CODEC_ID_RV40);
> 
> shouldnt that be avctx->codec_id ?

For now there's no handling of CODEC_ID_RV30 in h264pred. 

> > +
> > +    r->intra_types_stride = (s->mb_width + 1) * 4;
> > +    r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(int));
> > +    r->intra_types = r->intra_types_hist + r->intra_types_stride * 4;
> > +
> 
> > +    r->mb_type = av_mallocz(r->s.mb_stride * r->s.mb_height * sizeof(int));
> 
> please use sizeof(*r->mb_type) it really can safe time if someone ever changes
> the type ...

done 

> [...]
> > +        //rv40_postprocess(r);
> 
> forgotten?

Left as a remainder that it should be implemented but it is not working.

> [...]
> > +/**
> > + * RV30 and RV40 Macroblock types
> > + * @{
> > + */
> > +enum RV40BlockTypes{
> > +    RV34_MB_TYPE_INTRA,      ///< Intra macroblock
> > +    RV34_MB_TYPE_INTRA16x16, ///< Intra macroblock with DCs in a separate 4x4 block
> > +    RV34_MB_P_16x16,         ///< P-frame macroblock, one motion frame
> > +    RV34_MB_P_8x8,           ///< P-frame macroblock, 8x8 motion compensation partitions
> > +    RV34_MB_B_FORWARD,       ///< B-frame macroblock, forward prediction
> > +    RV34_MB_B_BACKWARD,      ///< B-frame macroblock, backward prediction
> > +    RV34_MB_SKIP,            ///< Skipped block
> > +    RV34_MB_B_INTERP,        ///< Bidirectionally predicted B-frame macroblock, no motion vectors
> > +    RV34_MB_P_16x8,          ///< P-frame macroblock, 16x8 motion compensation partitions
> > +    RV34_MB_P_8x16,          ///< P-frame macroblock, 8x16 motion compensation partitions
> > +    RV34_MB_B_DIRECT,        ///< Bidirectionally predicted B-frame macroblock, two motion vectors
> > +    RV34_MB_P_MIX16x16,      ///< P-frame macroblock with DCs in a separate 4x4 block, one motion vector
> > +    RV34_MB_TYPES
> > +};
> > +/** @} */
> 
> for what is the @{} good here ?

looks like it does effectively nothing, removed 

> [...]
> > +/** Decoder context */
> > +typedef struct RV34DecContext{
> > +    MpegEncContext s;
> > +    int mb_bits;             ///< bits needed to read MB offet in slice header
> > +    int *intra_types_hist;   ///< old block types, used for prediction
> > +    int *intra_types;        ///< block types
> 
> > +    int intra_types_stride;  ///< stride for block types data
> 
> hmm, ignoring my reviews must be contagious
> expect to be flamed next time
> note: either fix what i complain about or say why it cant be done or that you
> dont want to do it, dont just ignore things

My changes have a great lag sometimes. Changed to b4_stride.  

> [...]
> > +/**
> > + * Common decoding functions
> > + */
> > +int rv34_get_start_offset(GetBitContext *gb, int blocks);
> > +int rv34_get_omega(GetBitContext *gb);
> > +int rv34_decode_init(AVCodecContext *avctx);
> > +int rv34_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size);
> > +int rv34_decode_end(AVCodecContext *avctx);
> 
> ff prefixes please we arent the official real video people for whom a rv
> prefix would maybe be ok

already done in SVN 

> [...]
> > +/**
> > + * This table is used for retrieving current intra type
> > + * basing on its neighbours and adjustment provided by
> > + * code read and decoded before.
> > + *
> > + * This is really three-dimensional matrix with dimensions
> > + * [-1..9][-1..9][0..9], first and second coordinates
> > + * are detemined by top and left neighbours (-1 if unavailable).
> > + */
> > +static const uint8_t rv30_itype_from_context[900] = {
> > +    0, 9, 9, 9, 9, 9, 9, 9, 9,
> 
> see previous reviews

It is not desirable. rv30_itype_code may be compacted easily because
values are read by pairs from it but this one has its data read from
different positions depending on rv30_itype_code, for example last
offsets may be 3 and 0 or 0 and 4 so packing it would be counterproductive.

> [...]
> > +/**
> > + * Get stored dimension from bitstream
> > + *
> > + * If the width/height is the standard one then it's coded as 3-bit index.
> > + * Otherwise it is coded as escaped 8-bit portions.
> > + */
> > +static int get_dimension(GetBitContext *gb, const int *dim1, const int *dim2)
> > +{
> > +    int val, t;
> > +
> > +    t = get_bits(gb, 3);
> > +    val = dim1[t];
> > +    if(!val && dim2)
> > +        val = dim2[(t*2 | get_bits1(gb)) & 3];
> > +    if(!val){
> > +        do{
> > +            t = get_bits(gb, 8);
> > +            val += t << 2;
> > +        }while(t == 0xFF);
> > +    }
> > +    return val;
> > +}
> 
> this function is duplicated, please remove ALL duplicated functions

Leftover from rv40, dropped. I think there are no more duplicated functions left. 

> [...]
> > +static int rv40_parse_slice_header(RV34DecContext *r, GetBitContext *gb, SliceInfo *si)
> > +{
> > +    int t, mb_bits;
> > +    int w = r->s.width, h = r->s.height;
> > +    int mb_size;
> > +
> > +    memset(si, 0, sizeof(SliceInfo));
> 
> > +    si->type = -1;
> > +    if(get_bits1(gb))
> > +        return -1;
> > +    si->type = get_bits(gb, 2);
> 
> hmm

All changes are and will be reflected in SOC-SVN, I see no sense submitting it here
unless RM demuxer problem is resolved.

> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB