[FFmpeg-devel] [PATCH 01/N] RV30/40 Decoder - Core
Michael Niedermayer
michaelni
Sat Dec 8 05:32:07 CET 2007
On Fri, Dec 07, 2007 at 09:04:07AM +0200, Kostya wrote:
[...]
> > [...]
> > > /**
> > > * Decode Levenstein (also known as Elias Gamma) code.
> > > */
> > > int ff_rv34_get_gamma(GetBitContext *gb)
> > > {
> > > int code = 1;
> > >
> > > while(!get_bits1(gb)){
> > > code = (code << 1) | get_bits1(gb);
> > > }
> > > return code;
> > > }
> > >
> > > /**
> > > * Decode Levenstein (also known as Elias Gamma) code as signed integer.
> > > */
> > > int ff_rv34_get_gamma_signed(GetBitContext *gb)
> > > {
> >
> > > int code;
> > >
> > > code = ff_rv34_get_gamma(gb);
> >
> > can be merged
> >
> >
> > > if(code & 1)
> > > return -(code >> 1);
> > > else
> > > return code >> 1;
> > > }
> >
> > please check if the following are identical (+-1 doesnt count as
> > different)
> >
> > svq3_get_ue_golomb() and
> > svq3_get_se_golomb()
> >
> > and if they are identical post START/STOP_TIMER scores
> > also id like to see START/STOP_TIMER scores in that case for the
> > original vlc based code
> >
> > the fastest should be placed in golomb.c/h
> > and used
>
> I don't know when I can do benchmarks but switched to svq3_get_[su]e_golomb()
> anyway.
ok, just dont forget about the benchmarks please
[...]
> > >
> > > if(!avctx->slice_count){
> > > slice_count = (*buf++) + 1;
> > > slices_hdr = buf + 4;
> > > buf += 8 * slice_count;
> > > }else
> > > slice_count = avctx->slice_count;
> >
> > do we need avctx->slice_count support? i think that should be droped!
> > it cant break any compatibility as this is a new decoder ...
>
> I've left it for now to be the same as rv10/20 decoder.
> I'd like to have them dropped simultaneously.
ok
[...]
> /** essential slice information */
> typedef struct SliceInfo{
> int type; ///< slice type (intra, inter)
> int size; ///< size of the slice in bits
> int quant; ///< quantizer used for this slice
> int vlc_set; ///< VLCs used for this slice
> int start, end; ///< start and end macroblocks of the slice
> int width; ///< coded width
> int height; ///< coded height
> }SliceInfo;
>
> /** decoder context */
> typedef struct RV34DecContext{
> MpegEncContext s;
> int8_t *intra_types_hist;///< old block types, used for prediction
> int8_t *intra_types; ///< block types
> int block_start; ///< start of slice in blocks
> uint8_t *luma_dc_quant_i;///< luma subblock DC quantizer for intraframes
> uint8_t *luma_dc_quant_p;///< luma subblock DC quantizer for interframes
>
> RV34VLC *cur_vlcs; ///< VLC set used for current frame decoding
> int bits; ///< slice size in bits
> H264PredContext h; ///< functions for 4x4 and 16x16 intra block prediction
> SliceInfo si; ///< current slice information
> uint8_t *slice_data; ///< saved slice data
>
> int *mb_type; ///< internal macroblock types
> int block_type; ///< current block type
> int luma_vlc; ///< which VLC set will be used for decoding of luma blocks
> int chroma_vlc; ///< which VLC set will be used for decoding of chroma blocks
> int is16; ///< current block has additional 16x16 specific features or not
> int dmv[4][2]; ///< differential motion vectors for the current macroblock
>
> int rv30; ///< indicates which RV variasnt is currently decoded
> int rpr; ///< one field size in RV30 slice header
>
> int avail[4]; ///< whether left, top, top rights and top left MBs are available
>
> int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
> int (*decode_mb_info)(struct RV34DecContext *r);
> int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst);
> void (*loop_filter)(struct RV34DecContext *r);
> }RV34DecContext;
SliceInfo.size is redundant with bits
SliceInfo.quant with qscale
SliceInfo.start with block_start
slice_data is a very ugly way to pass an argument
id use:
slice_data = buf + offset;
rv34_decode_slice(r, r->si.size, r->si.end, &last, slice_data);
instead of
r->slice_data = buf + offset;
rv34_decode_slice(r, r->si.size, r->si.end, &last);
theres also only one use of it ...
[...]
> /** translation of RV30/40 macroblock types to lavc ones */
> static const int rv34_mb_type_to_lavc[12] = {
> MB_TYPE_INTRA,
> MB_TYPE_INTRA16x16,
> MB_TYPE_16x16 | MB_TYPE_L0,
> MB_TYPE_8x8 | MB_TYPE_L0,
> MB_TYPE_16x16 | MB_TYPE_L0,
> MB_TYPE_16x16 | MB_TYPE_L1,
> MB_TYPE_SKIP,
> MB_TYPE_DIRECT2,
MB_TYPE_DIRECT2 | MB_TYPE_16x16 | MB_TYPE_L0L1
> MB_TYPE_16x8 | MB_TYPE_L0,
> MB_TYPE_8x16 | MB_TYPE_L0,
> MB_TYPE_DIRECT2,
MB_TYPE_16x16 | MB_TYPE_L0L1,
[...]
> /**
> * Initialize all tables.
> */
> static void rv34_init_tables()
> {
> int i, j, k;
>
> for(i = 0; i < NUM_INTRA_TABLES; i++){
> for(j = 0; j < 2; j++){
> rv34_gen_vlc(rv34_table_intra_cbppat [i][j], CBPPAT_VLC_SIZE, &intra_vlcs[i].cbppattern[j], NULL);
> rv34_gen_vlc(rv34_table_intra_secondpat[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].second_pattern[j], NULL);
> rv34_gen_vlc(rv34_table_intra_thirdpat [i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].third_pattern[j], NULL);
> for(k = 0; k < 4; k++)
> rv34_gen_vlc(rv34_table_intra_cbp[i][j+k*2], CBP_VLC_SIZE, &intra_vlcs[i].cbp[j][k], rv34_cbp_code);
> }
> for(j = 0; j < 4; j++)
> rv34_gen_vlc(rv34_table_intra_firstpat[i][j], FIRSTBLK_VLC_SIZE, &intra_vlcs[i].first_pattern[j], NULL);
> rv34_gen_vlc(rv34_intra_coeffvlc[i], COEFF_VLC_SIZE, &intra_vlcs[i].coefficient, NULL);
> }
>
> for(i = 0; i < NUM_INTER_TABLES; i++){
> rv34_gen_vlc(rv34_inter_cbppatvlc[i], CBPPAT_VLC_SIZE, &inter_vlcs[i].cbppattern[0], NULL);
> for(j = 0; j < 4; j++)
> rv34_gen_vlc(rv34_inter_cbpvlc[i][j], CBP_VLC_SIZE, &inter_vlcs[i].cbp[0][j], rv34_cbp_code);
the names
rv34_inter_cbpvlc and rv34_table_intra_cbp are inconsistant with each other
rv34_inter_cbppatvlc and rv34_table_intra_cbppat are as well
[...]
> /**
> * @defgroup transform RV30/40 inverse transform functions
> * @{
> */
>
> static void rv34_row_transform(int temp[16], DCTELEM *block, const int offset)
sould be always_inline
[...]
> /**
> * Dequantize ordinary 4x4 block.
> * @todo optimize
> */
> static inline void rv34_dequant4x4(DCTELEM *block, int offset, int Qdc, int Q)
> {
> int i, j;
>
> block += offset;
> block[0] = (block[0] * Qdc + 8) >> 4;
> for(i = 0; i < 4; i++)
> for(j = !i; j < 4; j++)
> block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
> }
>
> /**
> * Dequantize 4x4 block of DC values for 16x16 macroblock.
> * @todo optimize
> */
> static inline void rv34_dequant4x4_16x16(DCTELEM *block, int offset, int Qdc, int Q)
> {
> int i;
>
> block += offset;
> for(i = 0; i < 3; i++)
> block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Qdc + 8) >> 4;
> for(; i < 16; i++)
> block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Q + 8) >> 4;
> }
> /** @} */ //block functions
i think it would be more readable if the offset were added outside these
functions
rv34_dequant4x4_16x16(a,b,c,d)
is less clear about what it does than the equivalent
rv34_dequant4x4_16x16(a+b,c,d)
>
>
> /**
> * @defgroup bitstream RV30/40 bitstream parsing
> * @{
> */
>
> static inline int decode210(GetBitContext *gb){
> if (get_bits1(gb))
> return 0;
> else
> return 2 - get_bits1(gb);
> }
duplicate of the equally named function from vc1.c
[...]
> /**
> * Decode quantizer difference and return modified quantizer.
> */
> static inline int rv34_decode_dquant(GetBitContext *gb, int quant)
> {
> if(get_bits1(gb))
> return rv34_dquant_tab[get_bits1(gb)][quant];
> else
> return get_bits(gb, 5);
> }
btw, doesnt rv34 depend on h263* ? if no then ill accept the above as it would
be silly to add a dependancy just because of 5 lines of code but if it does
depend already then you really should use the table and ff_h263_decode_mba()
from h263 instead of duplicating it!
>
> /** @} */ //bitstream functions
>
> /**
> * @defgroup mv motion vector related code (prediction, reconstruction, motion compensation)
> * @{
> */
>
> /** macroblock partition width in 8x8 blocks */
> static const uint8_t part_sizes_w[RV34_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2 };
>
> /** macroblock partition height in 8x8 blocks */
> static const uint8_t part_sizes_h[RV34_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2 };
these 2 tables can be overlapped to safe 7 bytes
no iam not serious, this is just a joke :)
but someone should write a linker which "compresses" const tables by by
attempting to overlap them
[...]
> /**
> * Motion vector prediction for B-frames.
> */
> static void rv34_pred_mv_b(RV34DecContext *r, int block_type)
> {
> MpegEncContext *s = &r->s;
> int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
> int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
> int A[2][2], B[2][2], C[2][2];
> int no_A[2], no_B[2], no_C[2];
> int c_mv_pos;
> int mx[2], my[2];
> int i, j;
>
> memset(A, 0, sizeof(A));
> memset(B, 0, sizeof(B));
> memset(C, 0, sizeof(C));
> memset(mx, 0, sizeof(mx));
> memset(my, 0, sizeof(my));
> if(!r->avail[0])
> no_A[0] = no_A[1] = 1;
> else{
> no_A[0] = no_A[1] = 0;
> if(r->mb_type[mb_pos - 1] != RV34_MB_B_FORWARD && r->mb_type[mb_pos - 1] != RV34_MB_B_BIDIR)
> no_A[0] = 1;
> if(r->mb_type[mb_pos - 1] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - 1] != RV34_MB_B_BIDIR)
> no_A[1] = 1;
> if(!no_A[0]){
> A[0][0] = s->current_picture_ptr->motion_val[0][mv_pos - 1][0];
> A[0][1] = s->current_picture_ptr->motion_val[0][mv_pos - 1][1];
> }
> if(!no_A[1]){
> A[1][0] = s->current_picture_ptr->motion_val[1][mv_pos - 1][0];
> A[1][1] = s->current_picture_ptr->motion_val[1][mv_pos - 1][1];
> }
> }
> if(!r->avail[1]){
> no_B[0] = no_B[1] = 1;
> }else{
> no_B[0] = no_B[1] = 0;
> if(r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_FORWARD && r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_BIDIR)
> no_B[0] = 1;
> if(r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - s->mb_stride] != RV34_MB_B_BIDIR)
> no_B[1] = 1;
> if(!no_B[0]){
> B[0][0] = s->current_picture_ptr->motion_val[0][mv_pos - s->b8_stride][0];
> B[0][1] = s->current_picture_ptr->motion_val[0][mv_pos - s->b8_stride][1];
> }
> if(!no_B[1]){
> B[1][0] = s->current_picture_ptr->motion_val[1][mv_pos - s->b8_stride][0];
> B[1][1] = s->current_picture_ptr->motion_val[1][mv_pos - s->b8_stride][1];
> }
> }
> if(r->avail[2]){
> no_C[0] = no_C[1] = 0;
> if(r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_FORWARD && r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_BIDIR)
> no_C[0] = 1;
> if(r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - s->mb_stride + 1] != RV34_MB_B_BIDIR)
> no_C[1] = 1;
> c_mv_pos = mv_pos - s->b8_stride + 2;
> }else if(r->avail[3]){
> no_C[0] = no_C[1] = 0;
> if(r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_FORWARD && r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_BIDIR)
> no_C[0] = 1;
> if(r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_BACKWARD && r->mb_type[mb_pos - s->mb_stride - 1] != RV34_MB_B_BIDIR)
> no_C[1] = 1;
> c_mv_pos = mv_pos - s->b8_stride - 1;
> }else{
> no_C[0] = no_C[1] = 1;
> c_mv_pos = 0;
> }
> if(!no_C[0]){
> C[0][0] = s->current_picture_ptr->motion_val[0][c_mv_pos][0];
> C[0][1] = s->current_picture_ptr->motion_val[0][c_mv_pos][1];
> }
> if(!no_C[1]){
> C[1][0] = s->current_picture_ptr->motion_val[1][c_mv_pos][0];
> C[1][1] = s->current_picture_ptr->motion_val[1][c_mv_pos][1];
> }
!no why? not yes!
this double negation obfuscated the code ...
[...]
> default:
> no_A[0] = no_A[1] = no_B[0] = no_B[1] = no_C[0] = no_C[1] = 1;
> }
>
> mx[0] += r->dmv[0][0];
> my[0] += r->dmv[0][1];
> mx[1] += r->dmv[1][0];
> my[1] += r->dmv[1][1];
> for(j = 0; j < 2; j++){
> for(i = 0; i < 2; i++){
> s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][0] = mx[0];
> s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][1] = my[0];
> s->current_picture_ptr->motion_val[1][mv_pos + i + j*s->b8_stride][0] = mx[1];
> s->current_picture_ptr->motion_val[1][mv_pos + i + j*s->b8_stride][1] = my[1];
> }
> }
> }
the no_A[0] = ... is useless
[...]
> Y = s->dest[0] + xoff + yoff*s->linesize;
> U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
> V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
vertical align nitpick
Y = s->dest[0] + xoff + yoff *s-> linesize;
U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
> if(block_type == RV34_MB_P_16x8){
> qpel_mc[1][dxy](Y, srcY, s->linesize);
> Y += 8;
> srcY += 8;
> qpel_mc[1][dxy](Y, srcY, s->linesize);
> chroma_mc[0] (U, srcU, s->uvlinesize, 4, uvmx, uvmy);
> chroma_mc[0] (V, srcV, s->uvlinesize, 4, uvmx, uvmy);
> }else if(block_type == RV34_MB_P_8x16){
> qpel_mc[1][dxy](Y, srcY, s->linesize);
> Y += 8 * s->linesize;
> srcY += 8 * s->linesize;
> qpel_mc[1][dxy](Y, srcY, s->linesize);
> chroma_mc[1] (U, srcU, s->uvlinesize, 8, uvmx, uvmy);
> chroma_mc[1] (V, srcV, s->uvlinesize, 8, uvmx, uvmy);
> }else if(block_type == RV34_MB_P_8x8){
> qpel_mc[1][dxy](Y, srcY, s->linesize);
> chroma_mc[1] (U, srcU, s->uvlinesize, 4, uvmx, uvmy);
> chroma_mc[1] (V, srcV, s->uvlinesize, 4, uvmx, uvmy);
> }else{
> qpel_mc[0][dxy](Y, srcY, s->linesize);
> chroma_mc[0] (U, srcU, s->uvlinesize, 8, uvmx, uvmy);
> chroma_mc[0] (V, srcV, s->uvlinesize, 8, uvmx, uvmy);
> }
if(block_type == RV34_MB_P_16x8){
qpel_mc[1][dxy](Y, srcY, s->linesize);
Y += 8;
srcY += 8;
}else if(block_type == RV34_MB_P_8x16){
qpel_mc[1][dxy](Y, srcY, s->linesize);
Y += 8 * s->linesize;
srcY += 8 * s->linesize;
}
qpel_mc[!is16x16][dxy](Y, srcY, s->linesize);
chroma_mc[2-width] (U, srcU, s->uvlinesize, height*4, uvmx, uvmy);
chroma_mc[2-width] (V, srcV, s->uvlinesize, height*4, uvmx, uvmy);
[...]
> case RV34_MB_P_16x8:
> case RV34_MB_P_8x16:
> case RV34_MB_B_BIDIR:
> rv34_pred_mv(r, block_type, 0);
> rv34_pred_mv(r, block_type, 1);
i do not think that using the subblock number to indicate the mv direction is
a good idea, then i dont mind as long as it doesnt complicate the code ...
[...]
> for(j = 0; j < 4; j++){
> no_left = !r->avail[0];
> YY = Y;
> for(i = 0; i < 4; i++, cbp >>= 1, YY += 4){
> no_topright = no_up || (i==3 && j) || (i==3 && !j && (s->mb_x-1) == s->mb_width);
> rv34_pred_4x4_block(r, YY, s->linesize, ittrans[intra_types[i]], no_up, no_left, i || (j==3), no_topright);
> no_left = 0;
> if(!(cbp & 1)) continue;
> rv34_add_4x4_block(YY, s->linesize, s->block[(i>>1)+(j&2)], (i&1)*4+(j&1)*32);
if(cbp)
rv34_add_4x4_block()
> }
> no_up = 0;
> Y += s->linesize * 4;
Y += s->linesize * 4 - 4*4;
and YY becomes unneeded
[...]
> dsp->add_pixels_clamped(s->block[0], Y, s->current_picture.linesize[0]);
> dsp->add_pixels_clamped(s->block[1], Y + 8, s->current_picture.linesize[0]);
> Y += s->current_picture.linesize[0] * 8;
> dsp->add_pixels_clamped(s->block[2], Y, s->current_picture.linesize[0]);
> dsp->add_pixels_clamped(s->block[3], Y + 8, s->current_picture.linesize[0]);
can be vertically aligned
[...]
> if(!r->is16){
> if(r->decode_intra_types(r, gb, intra_types) < 0)
> return -1;
> r->luma_vlc = 1;
> }else{
> t = get_bits(gb, 2);
> for(i = 0; i < 16; i++)
> intra_types[(i & 3) + (i>>2) * s->b4_stride] = t;
> r->luma_vlc = 2;
> }
if(!is16){
B
}else{
A
}
is less readable than
if(is16){
A
}else{
B
}
IMHO
[...]
> if(!r->is16 && !(cbp & 1)) continue;
> blknum = ((i & 2) >> 1) + ((i & 8) >> 2);
> blkoff = ((i & 1) << 2) + ((i & 4) << 3);
> if(cbp & 1)
> rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->luma_vlc, 0);
> if((cbp & 1) || r->is16){
how exactly can this be false here? if it cant the check is useless
[...]
> static int check_slice_end(RV34DecContext *r, MpegEncContext *s)
> {
> int bits;
> if(r->s.mb_skip_run > 1)
> return 0;
> if(s->mb_y >= s->mb_height)
> return 1;
and
> while(!check_slice_end(r, s) && s->mb_num_left-- && s->mb_y < s->mb_height) {
hmm i dunno, maybe iam tired but this somehow looks hackish
cant you implement the mb_num_left-- if its needed at a sane point?
[...]
> *last = 0;
> if(s->mb_y >= s->mb_height)
> *last = 1;
> if(r->bits > get_bits_count(gb) && show_bits(gb, r->bits-get_bits_count(gb)))
> *last = 1;
>
> return 0;
what about using the return value to return the "lastness" ...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20071208/4639aa5f/attachment.pgp>
More information about the ffmpeg-devel
mailing list