[FFmpeg-devel] [PATCH] RV40 Loop Filter, hopefully final version

Sun Nov 23 03:01:15 CET 2008

On Sat, Nov 22, 2008 at 04:11:15PM +0200, Kostya wrote:
> This has been tested and verified to be bitexact if correct
> MC functions are used.

> Index: rv40.c
> ===================================================================
> --- rv40.c	(revision 15827)
> +++ rv40.c	(working copy)

> @@ -285,7 +285,374 @@
>      }
>  }
>  
> +static inline void rv40_adaptive_loop_filter(uint8_t *src, const int step,
> +                                             const int stride, const int dmode,
> +                                             const int lim_q1, const int lim_p1,
> +                                             const int alpha,
> +                                             const int beta, const int beta2,
> +                                             const int chroma, const int edge)
> +{
> +    int diff_p1p0[4], diff_q1q0[4], diff_p1p2[4], diff_q1q2[4];
> +    int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
> +    uint8_t *ptr;
> +    int flag_strong0 = 1, flag_strong1 = 1;
> +    int strength0 = 3, strength1 = 3;
> +    int i;
> +    int lims;
> +
> +    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
> +        diff_p1p0[i] = ptr[-2*step] - ptr[-1*step];
> +        diff_q1q0[i] = ptr[ 1*step] - ptr[ 0*step];
> +        s0 += diff_p1p0[i];
> +        s1 += diff_q1q0[i];
> +    }
> +    if(FFABS(s0) >= (beta<<2))
> +        strength0 = 1;
> +    if(FFABS(s1) >= (beta<<2))
> +        strength1 = 1;
> +    if(strength0 + strength1 <= 2)
> +        return;
> +
> +    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
> +        diff_p1p2[i] = ptr[-2*step] - ptr[-3*step];
> +        diff_q1q2[i] = ptr[ 1*step] - ptr[ 2*step];
> +        s2 += diff_p1p2[i];
> +        s3 += diff_q1q2[i];
> +    }

The variable names of s0 s1 s2 s3 are poor, the give no hint on what
they are

> +
> +    if(!edge){
> +        flag_strong0 = flag_strong1 = 0;
> +    }else{
> +        flag_strong0 = (strength0 == 3) && (FFABS(s2) < beta2);
> +        flag_strong1 = (strength1 == 3) && (FFABS(s3) < beta2);
> +    }
> +
> +    lims = (lim_q1 + lim_p1 + strength0 + strength1) >> 1;
> +    if(flag_strong0 && flag_strong1){ /* strong filtering */
> +        for(i = 0; i < 4; i++, src += stride){
> +            int sflag, p0, q0, p1, q1;
> +            int t = src[0*step] - src[-1*step];
> +
> +            if(!t) continue;
> +            sflag = (alpha * FFABS(t)) >> 7;
> +            if(sflag > 1) continue;
> +

> +            p0 = (25*src[-3*step] + 26*src[-2*step]
> +                + 26*src[-1*step]
> +                + 26*src[ 0*step] + 25*src[ 1*step] + rv40_dither_l[dmode + i]) >> 7;
> +            q0 = (25*src[-2*step] + 26*src[-1*step]
> +                + 26*src[ 0*step]
> +                + 26*src[ 1*step] + 25*src[ 2*step] + rv40_dither_r[dmode + i]) >> 7;
> +            if(sflag){
> +                p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
> +                q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
> +            }
> +            p1 = (25*src[-4*step] + 26*src[-3*step]
> +                + 26*src[-2*step]
> +                + 26*p0           + 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
> +            q1 = (25*src[-1*step] + 26*q0
> +                + 26*src[ 1*step]
> +                + 26*src[ 2*step] + 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
> +            if(sflag){
> +                p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
> +                q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
> +            }
> +            src[-2*step] = p1;
> +            src[-1*step] = p0;
> +            src[ 0*step] = q0;
> +            src[ 1*step] = q1;
> +            if(!chroma){
> +                src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
> +                src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
> +            }

this looks much nicer than before ...

[...]
>  /**
> + * RV40 loop filtering function
> + */
> +static void rv40_loop_filter(RV34DecContext *r)
> +{
> +    MpegEncContext *s = &r->s;
> +    int mb_pos;
> +    int i, j, k;
> +    uint8_t *Y, *C;
> +    int alpha, beta, betaY, betaC;
> +    int q;
> +    int mbtype[4];   ///< current macroblock and its neighbours types

> +    /**
> +     * macroblock filtering strength
> +     * 2 for intra coded MB and MB with DCs coded separately, 1 otherwise
> +     */
> +    int strength[4];

so its just a flag? if so is_intra seems a better name, otherwise the comment
is not correct ...

> +    int clip[4];     ///< MB filter clipping value calculated from filtering strength
> +    /**
> +     * coded block patterns for luma part of current macroblock and its neighbours
> +     * Format:
> +     * LSB corresponds to the top left block,
> +     * each nibble represents one row of subblocks.
> +     */
> +    int cbp[4];
> +    /**
> +     * coded block patterns for chroma part of current macroblock and its neighbours
> +     * Format is the same as for luma with two subblocks in a row.
> +     */
> +    int uvcbp[4][2];
> +    /**
> +     * This mask represents the pattern of luma subblocks that should be filtered
> +     * in addition to the coded ones because because they lie at the edge of
> +     * 8x8 block with different enough motion vectors
> +     */
> +    int mvmasks[4];
> +
> +    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++){
> +        mb_pos = s->mb_y * s->mb_stride;
> +        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++, mb_pos++){

> +            int btype = s->current_picture_ptr->mb_type[mb_pos];

i dont think btype is a good name for mb_type

[...]

> +            y_h_deblock =   cbp[POS_CUR]
> +                        | ((cbp[POS_BOTTOM]     & MASK_Y_TOP_ROW)  << 16)
[...]
[...]
> +                        |   mvmasks[POS_CUR]
> +                        | ((mvmasks[POS_BOTTOM] & MASK_Y_TOP_ROW)  << 16);
[...]
> +            cbp[POS_CUR] =   cbp[POS_CUR]
> +                         |  (cbp[POS_BOTTOM]                       << 16)
> +                         |   mvmasks[POS_CUR]
> +                         | ((mvmasks[POS_BOTTOM] & MASK_Y_TOP_ROW) << 16);

something looks redundant here
also i have my doubts about the need of the &

> +            /* Calculating chroma patterns is similar and easier since there is
> +             * no motion vector pattern for them.
> +             */
> +            for(i = 0; i < 2; i++){

> +                c_v_deblock[i] = ((uvcbp[POS_CUR][i]                        << 1) & ~MASK_C_RIGHT_COL)
> +                               | ((uvcbp[POS_LEFT][i]   & MASK_C_RIGHT_COL) >> 1)

> +                               | ((uvcbp[POS_BOTTOM][i] & MASK_C_TOP_ROW)   << 4)
> +                               | uvcbp[POS_CUR][i];
> +                c_h_deblock[i] = ((uvcbp[POS_BOTTOM][i] & MASK_C_TOP_ROW)   << 4)
> +                               | uvcbp[POS_CUR][i]
[...]
> +                uvcbp[POS_CUR][i] = ((uvcbp[POS_BOTTOM][i] & MASK_C_TOP_ROW) << 4) | uvcbp[POS_CUR][i];

more redudnandies

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Breaking DRM is a little like attempting to break through a door even
though the window is wide open and the only thing in the house is a bunch
of things you dont want and which you would get tomorrow for free anyway
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081123/d0561665/attachment.pgp>