[FFmpeg-devel] [PATCH i+1/N] RV40 loop filter

Sat Mar 8 18:21:57 CET 2008

On Sun, Feb 24, 2008 at 08:09:09PM +0100, Michael Niedermayer wrote:
> On Sat, Feb 23, 2008 at 12:58:44PM +0200, Kostya wrote:
> > Here is the monster.
> > 
> > I believe it's not too far from real loop filter used. 
> 
> [...]
> > +    t = src[0*step] - src[-1*step];
> > +    if(!t) return;
> > +    t = (mult * FFABS(t)) >> 7;
> > +    if(t > 3) return;
> > +    if(flag0 && flag1 && t > 2) return;
> > +    t = src[-1*step] - src[0*step];
> 
> t = src[-1*step] - src[0*step];
> if(!t) return;
> u = (mult * FFABS(t)) >> 7;
> if(u > 3 - (flag0 && flag1))
>     return;
 
done
 
> > +
> > +    if(flag0 && flag1)
> > +        diff = (src[-2*step] - src[1*step] + t*4 + 4) >> 3;
> > +    else
> > +        diff = (t + 1) >> 1;
> 
> if(flag0 && flag1)
>     t+= (src[-2*step] - src[1*step])>>2;
> diff = (t + 1) >> 1;

done
 
> [...]
> > +            if(!sflag){
> > +                src[-1*step] = p0;
> > +                src[ 0*step] = p1;
> > +            }else{
> > +                if((src[-1*step] - p0) >= -v88 && (src[-1*step] - p0) <= v88)
> > +                    src[-1*step] = p0;
> > +                else
> > +                    src[-1*step] = p1;
> > +                if((src[ 0*step] - p1) >= -v88 && (src[ 0*step] - p1) <= v88)
> > +                    src[ 0*step] = p1;
> > +                else
> > +                    src[ 0*step] = src[-1*step];
> > +            }
> 
> if(!sflag || FFABS(src[-1*step] - p0) <= v88)
>     src[-1*step] = p0;
> else
>     src[-1*step] = p1;
> 
> if(!sflag || FFABS(src[ 0*step] - p1) <= v88)
>     src[ 0*step] = p1;
> else
>     src[ 0*step] = src[-1*step];
> 
> and the last line looks suspicious

true, it should be p0 (the same way as code below that piece)
done

> [...]
> > +            if(!s->first_slice_line){
> > +                cbp2 = r->cbp_chroma[mb_pos - s->mb_stride] >> 2;
> > +                if(cbp & 1){
> > +                    lim1 = rv40_filter_clip_tbl[btype][q];
> > +                    if(!(cbp2 & 1))
> > +                        lim1 = lim0;
> > +                    rv40_h_loop_filter(U, s->uvlinesize, 0, lim0, lim1, alpha, beta, q, 1, 1);
> > +                }
> > +                if(cbp & 0x10){
> > +                    lim1 = rv40_filter_clip_tbl[btype][q];
> > +                    if(!(cbp2 & 0x10))
> > +                        lim1 = lim0;
> > +                    rv40_h_loop_filter(V, s->uvlinesize, 0, lim0, lim1, alpha, beta, q, 1, 1);
> > +                }
> > +            }
> > +            if(s->mb_x){
> > +                cbp2 = r->cbp_chroma[mb_pos - 1] >> 1;
> > +                if(cbp & 1){
> > +                    lim1 = rv40_filter_clip_tbl[btype][q];
> > +                    if(!(cbp2 & 1))
> > +                        lim1 = lim0;
> > +                    rv40_v_loop_filter(U, s->uvlinesize, 0, lim0, lim1, alpha, beta, q, 1, 1);
> > +                }
> > +                if(cbp & 0x10){
> > +                    lim1 = rv40_filter_clip_tbl[btype][q];
> > +                    if(!(cbp2 & 0x10))
> > +                        lim1 = lim0;
> > +                    rv40_v_loop_filter(V, s->uvlinesize, 0, lim0, lim1, alpha, beta, q, 1, 1);
> > +                }
> > +            }
> > +            if(!s->first_slice_line){
> > +                cbp2 = r->cbp_chroma[mb_pos - s->mb_stride] >> 3;
> > +                if(cbp & 1){
> > +                    lim1 = rv40_filter_clip_tbl[btype][q];
> > +                    if(!(cbp2 & 2))
> > +                        lim1 = lim0;
> > +                    rv40_h_loop_filter(U + 4, s->uvlinesize, 0, lim0, lim1, alpha, beta, q, 1, 1);
> > +                }
> > +                if(cbp & 0x10){
> > +                    lim1 = rv40_filter_clip_tbl[btype][q];
> > +                    if(!(cbp2 & 0x20))
> > +                        lim1 = lim0;
> > +                    rv40_h_loop_filter(V + 4, s->uvlinesize, 0, lim0, lim1, alpha, beta, q, 1, 1);
> > +                }
> > +            }
> 
> code duplication?

It is all done in order to prepare edge filter calls.
I remade it with macros instead to hide common flow
(and I'm positively sure inline function would be messier).

> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
-------------- next part --------------
Index: libavcodec/rv40.c
===================================================================

--- libavcodec/rv40.c	(revision 12129)
+++ libavcodec/rv40.c	(working copy)
@@ -247,7 +247,235 @@
     return 0;
 }
 
+#define CLIP_SYMM(a, b) av_clip(a, -(b), b)
 /**
+ * Weaker deblocking
+ */
+static inline void rv40_weak_loop_filter(uint8_t *src, const int step,
+                            const int flag0, const int flag1, const int mult,
+                            const int lim0, const int lim1, const int lim2, const int thr1,
+                            const int S0, const int S1, const int S2, const int S3)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int t, u, diff;
+
+    t = src[-1*step] - src[0*step];
+    if(!t) return;
+    u = (mult * FFABS(t)) >> 7;
+    if(u > 3 - (flag0 && flag1)) return;
+
+    if(flag0 && flag1)
+        t += (src[-2*step] - src[1*step]) >> 2;
+    diff = CLIP_SYMM((t + 1) >> 1, lim2);
+    src[-1*step] = cm[src[-1*step] + diff];
+    src[ 0*step] = cm[src[ 0*step] - diff];
+    if(FFABS(S1) <= thr1 && flag0){
+        t = (S0 + S1 - diff) >> 1;
+        src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim1)];
+    }
+    if(FFABS(S3) <= thr1 && flag1){
+        t = (S2 + S3 + diff) >> 1;
+        src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim0)];
+    }
+}
+
+/**
+ * This macro is used for calculating 25*x0+26*x1+26*x2+26*x3+25*x4
+ * or 25*x0+26*x1+51*x2+26*x3
+ * @param  sub - index of the value with coefficient = 25
+ * @param last - index of the value with coefficient 25 or 51
+ */
+#define RV40_STRONG_FILTER(src, step, start, last, sub) \
+     26*(src[start*step] + src[(start+1)*step] + src[(start+2)*step] + src[(start+3)*step] + src[last*step]) - src[last*step] - src[sub*step]
+/**
+ * Deblocking filter, the alternated version from JVT-A003r1 H.26L draft.
+ */
+static inline void rv40_adaptive_loop_filter(uint8_t *src, const int step, const int stride, const int dmode, const int lim0, const int lim1, const int mult, const int thr0, const int thr1, const int chroma, const int edge)
+{
+    int diffs[4][4];
+    int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+    uint8_t *ptr;
+    int flag0 = 1, flag1 = 1;
+    int llim0 = 3, llim1 = 3;
+    int i, t, sflag;
+    int p0, p1;
+    int lims;
+
+    for(i = 0, ptr = src; i < 4; i++, ptr += stride){
+        diffs[i][0] = ptr[-2*step] - ptr[-1*step];
+        diffs[i][1] = ptr[-2*step] - ptr[-3*step];
+        diffs[i][2] = ptr[ 1*step] - ptr[ 0*step];
+        diffs[i][3] = ptr[ 1*step] - ptr[ 2*step];
+        s0 += diffs[i][0];
+        s1 += diffs[i][1];
+        s2 += diffs[i][2];
+        s3 += diffs[i][3];
+    }
+    if(FFABS(s0) >= (thr0<<2)){
+        llim0 = 1;
+        flag0 = 0;
+    }
+    if(FFABS(s2) >= (thr0<<2)){
+        llim1 = 1;
+        flag1 = 0;
+    }
+    if(llim0 + llim1 == 2)
+        return;
+
+    if(!edge)
+        flag0 = flag1 = 0;
+    if(flag0 && FFABS(s1) >= thr1)
+        flag0 = 0;
+    if(flag1 && FFABS(s3) >= thr1)
+        flag1 = 0;
+
+    lims = (lim0 + lim1 + llim0 + llim1) >> 1;
+    if(flag0 + flag1 == 2){ /* strong filtering */
+        for(i = 0; i < 4; i++, src += stride){
+            t = src[0*step] - src[-1*step];
+            if(!t) continue;
+            sflag = (mult * FFABS(t)) >> 7;
+            if(sflag > 1) continue;
+
+            p0 = (RV40_STRONG_FILTER(src, step, -3, 1, -3) + rv40_dither_l[dmode + i]) >> 7;
+            p1 = (RV40_STRONG_FILTER(src, step, -1, 3, -1) + rv40_dither_r[dmode + i]) >> 7;
+            if(!sflag || FFABS(src[-1*step] - p0) <= lims)
+                src[-1*step] = p0;
+            else
+                src[-1*step] = p1;
+            if(!sflag || FFABS(src[ 0*step] - p1) <= lims)
+                src[ 0*step] = p1;
+            else
+                src[ 0*step] = p0;
+            p0 = (RV40_STRONG_FILTER(src, step, -4, 0, -4) + rv40_dither_l[dmode + i]) >> 7;
+            p1 = (RV40_STRONG_FILTER(src, step, -1, 3, -1) + rv40_dither_r[dmode + i]) >> 7;
+            if(!sflag || FFABS(src[-2*step] - p0) <= lims)
+                src[-2*step] = p0;
+            else
+                src[-2*step] += lims;
+            if(!sflag || FFABS(src[ 1*step] - p1) <= lims)
+                src[ 1*step] = p1;
+            else
+                src[ 1*step] += lims;
+            if(!chroma){
+                src[-3*step] = (RV40_STRONG_FILTER(src, step, -4, -1, -3) + 64) >> 7;
+                src[ 2*step] = (RV40_STRONG_FILTER(src, step,  0,  0,  2) + 64) >> 7;
+            }
+        }
+    }else if(llim0 == 3 && llim1 == 3)
+        for(i = 0; i < 4; i++, src += stride)
+            rv40_weak_loop_filter(src, step, 1, 1, mult, lim0, lim1, lims, thr1,
+                                  diffs[i][0], diffs[i][1], diffs[i][2], diffs[i][3]);
+    else
+        for(i = 0; i < 4; i++, src += stride)
+            rv40_weak_loop_filter(src, step, llim0==3, llim1==3, mult, lim0>>1, lim1>>1, lims>>1, thr1,
+                                  diffs[i][0], diffs[i][1], diffs[i][2], diffs[i][3]);
+}
+
+static void rv40_v_loop_filter(uint8_t *src, int stride, int dmode, int lim0, int lim1, int mult, int thr0, int thr1, int chroma, int edge){
+    rv40_adaptive_loop_filter(src, 1, stride, dmode, lim0, lim1, mult, thr0, thr1, chroma, edge);
+}
+static void rv40_h_loop_filter(uint8_t *src, int stride, int dmode, int lim0, int lim1, int mult, int thr0, int thr1, int chroma, int edge){
+    rv40_adaptive_loop_filter(src, stride, 1, dmode, lim0, lim1, mult, thr0, thr1, chroma, edge);
+}
+
+/**
+ * Common code used in calling loop filter
+ * @param    cond filter calling condition
+ * @param limcond lim1 modifying condition
+ * @param     dir filter direction (h or v)
+ */
+#define LUMA_EDGE_FILTER(cond, limcond, dir, dst, dither, edge) \
+    if(cond){\
+        lim1 = rv40_filter_clip_tbl[btype][q];\
+        if(!(limcond))\
+            lim1 = lim0;\
+        rv40_## dir ##_loop_filter(dst, s->linesize, dither, lim0, lim1, alpha, beta, q, 0, edge);\
+    }\
+
+#define CHROMA_EDGE_FILTER(cond, cbp2src, mask, limmask, dir, dstoff, dither, edge) \
+    if(cond){\
+        cbp2 = cbp2src;\
+        if(cbp & mask){\
+            lim1 = rv40_filter_clip_tbl[btype][q];\
+            if(!(cbp2 & limmask))\
+                lim1 = lim0;\
+            rv40_## dir ##_loop_filter(U + dstoff, s->uvlinesize, dither, lim0, lim1, alpha, beta, q, 1, edge);\
+        }\
+        if(cbp & (mask << 4)){\
+            lim1 = rv40_filter_clip_tbl[btype][q];\
+            if(!(cbp2 & (limmask << 4)))\
+                lim1 = lim0;\
+            rv40_## dir ##_loop_filter(V + dstoff, s->uvlinesize, dither, lim0, lim1, alpha, beta, q, 1, edge);\
+        }\
+    }\
+
+static void rv40_loop_filter(RV34DecContext *r)
+{
+    MpegEncContext *s = &r->s;
+    int mb_pos;
+    int i, j;
+    uint8_t *Y, *U, *V;
+    int alpha, beta, lim0, lim1;
+    int q, btype, cbp, cbp2;
+
+    s->first_slice_line = 1;
+    s->mb_x= 0;
+    s->mb_y= 0;
+    mb_pos = 0;
+    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++){
+        ff_init_block_index(s);
+        mb_pos = s->mb_y * s->mb_stride;
+        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++, mb_pos++){
+            ff_update_block_index(s);
+
+            q = s->current_picture_ptr->qscale_table[mb_pos];
+            cbp = r->cbp_luma[mb_pos];
+            alpha = rv40_alpha_tab[q];
+            beta  = rv40_beta_tab [q];
+            
+            btype = IS_INTRA(s->current_picture_ptr->mb_type[mb_pos]) ? 2 : 1;
+            
+            cbp2 = s->first_slice_line ? 0 : r->cbp_luma[mb_pos - s->mb_stride];
+            for(i = 0; i < 4; i++){
+                if(!((cbp >> i) & 1)) continue;
+                Y = s->dest[0] + i*4;
+                lim0 = rv40_filter_clip_tbl[1][q];
+                LUMA_EDGE_FILTER(!s->first_slice_line, (cbp2 >> (16 + i)) & 1, h, Y, i*4, 1);
+                LUMA_EDGE_FILTER(s->mb_x || i, (!i && (r->cbp_luma[mb_pos - s->mb_stride] & 8)) || (i && ((cbp >> (i-1)) & 1)),
+                                 v, Y, i*4, !i);
+                LUMA_EDGE_FILTER(1, (cbp >> (i+4)) & 1, h, Y+4*s->linesize, i*4, 0);
+            }
+            for(j = 4; j < 16; j += 4){
+                for(i = 0; i < 4; i++){
+                    if(!((cbp >> (i + j)) & 1)) continue;
+                    Y = s->dest[0] + i*4 + j*s->linesize;
+                    lim0 = rv40_filter_clip_tbl[1][q];
+                    LUMA_EDGE_FILTER(i || s->mb_x, (!i && ((r->cbp_luma[mb_pos - s->mb_stride] >> j) & 8)) || (i && ((cbp >> (i-1)) & 1)),
+                                     v, Y, i+j, !i);
+                    LUMA_EDGE_FILTER(j != 12, (cbp >> (j + 4)) & 1, h, Y + 4 * s->linesize, i+j, 0);
+                }
+            }
+
+            cbp = r->cbp_chroma[mb_pos];
+            lim0 = rv40_filter_clip_tbl[1][q];
+            U = s->dest[1];
+            V = s->dest[2];
+            CHROMA_EDGE_FILTER(!s->first_slice_line, r->cbp_chroma[mb_pos - s->mb_stride] >> 2, 0x01, 0x01, h, 0, 0, 1);
+            CHROMA_EDGE_FILTER(s->mb_x, r->cbp_chroma[mb_pos - 1] >> 1, 0x01, 0x01, v, 0, 0, 1);
+            CHROMA_EDGE_FILTER(!s->first_slice_line, r->cbp_chroma[mb_pos - s->mb_stride] >> 3, 0x01, 0x02, h, 4, 4, 1);
+            CHROMA_EDGE_FILTER(1, cbp, 0x02, 0x01, v, 4, 4, 0);
+            U += 4*s->uvlinesize;
+            V += 4*s->uvlinesize;
+            CHROMA_EDGE_FILTER(s->mb_x, r->cbp_chroma[mb_pos - 1] >> 3, 0x04, 0x01, v, 0, 8, 1);
+            CHROMA_EDGE_FILTER(1, cbp, 0x04, 0x01, h, 0, 8, 0);
+            CHROMA_EDGE_FILTER(1, cbp, 0x08, 0x02, h, 4, 12, 0);
+            CHROMA_EDGE_FILTER(1, cbp, 0x08, 0x04, v, 4, 12, 0);
+        }
+    }
+}
+
+/**
  * Initialize decoder.
  */
 static int rv40_decode_init(AVCodecContext *avctx)
@@ -261,6 +489,7 @@
     r->parse_slice_header = rv40_parse_slice_header;
     r->decode_intra_types = rv40_decode_intra_types;
     r->decode_mb_info     = rv40_decode_mb_info;
+    r->loop_filter        = rv40_loop_filter;
     r->luma_dc_quant_i = rv40_luma_dc_quant[0];
     r->luma_dc_quant_p = rv40_luma_dc_quant[1];
     return 0;