[FFmpeg-devel] [PATCH v2 3/4] hevcdec: move deblock template to h26x/h2656_deblock_template.c

Nuo Mi nuomi2021 at gmail.com
Sat Jan 6 12:51:31 EET 2024


---
 libavcodec/h26x/h2656_deblock_template.c | 99 ++++++++++++++++++++++++
 libavcodec/hevcdsp_template.c            | 96 ++++-------------------
 2 files changed, 116 insertions(+), 79 deletions(-)
 create mode 100644 libavcodec/h26x/h2656_deblock_template.c

diff --git a/libavcodec/h26x/h2656_deblock_template.c b/libavcodec/h26x/h2656_deblock_template.c
new file mode 100644
index 0000000000..8ed95e754d
--- /dev/null
+++ b/libavcodec/h26x/h2656_deblock_template.c
@@ -0,0 +1,99 @@
+
+/*
+ * HEVC/VVC deblocking dsp template
+ *
+ * Copyright (C) 2024 Nuo Mi
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static void FUNC(loop_filter_luma_strong)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int32_t tc, const int32_t tc2, const int tc3,
+    const uint8_t no_p, const uint8_t no_q)
+{
+    for (int d = 0; d < 4; d++) {
+        const int p3 = P3;
+        const int p2 = P2;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        const int q3 = Q3;
+        if (!no_p) {
+            P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc3, tc3);
+            P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
+            P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc, tc);
+        }
+        if (!no_q) {
+            Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc3, tc3);
+            Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
+            Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc, tc);
+        }
+        pix += ystride;
+    }
+}
+
+static void FUNC(loop_filter_luma_weak)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int32_t tc, const int32_t beta, const uint8_t no_p, const uint8_t no_q, const int nd_p, const int nd_q)
+{
+    const int tc_2 = tc >> 1;
+    for (int d = 0; d < 4; d++) {
+        const int p2 = P2;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
+        if (abs(delta0) < 10 * tc) {
+            delta0 = av_clip(delta0, -tc, tc);
+            if (!no_p)
+                P0 = av_clip_pixel(p0 + delta0);
+            if (!no_q)
+                Q0 = av_clip_pixel(q0 - delta0);
+            if (!no_p && nd_p > 1) {
+                const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
+                P1 = av_clip_pixel(p1 + deltap1);
+            }
+            if (!no_q && nd_q > 1) {
+                const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
+                Q1 = av_clip_pixel(q1 + deltaq1);
+            }
+        }
+        pix += ystride;
+    }
+}
+
+static void FUNC(loop_filter_chroma_weak)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
+{
+    for (int d = 0; d < size; d++) {
+        int delta0;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
+        if (!no_p)
+            P0 = av_clip_pixel(p0 + delta0);
+        if (!no_q)
+            Q0 = av_clip_pixel(q0 - delta0);
+        pix += ystride;
+    }
+}
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index 2ff1776b92..0de14e9dcf 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -1319,19 +1319,20 @@ static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
 #define TQ2 pix[2  * xstride + 3 * ystride]
 #define TQ3 pix[3  * xstride + 3 * ystride]
 
+#include "h26x/h2656_deblock_template.c"
+
 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
                                         ptrdiff_t _xstride, ptrdiff_t _ystride,
                                         int beta, const int *_tc,
                                         const uint8_t *_no_p, const uint8_t *_no_q)
 {
-    int d, j;
-    pixel *pix        = (pixel *)_pix;
     ptrdiff_t xstride = _xstride / sizeof(pixel);
     ptrdiff_t ystride = _ystride / sizeof(pixel);
 
     beta <<= BIT_DEPTH - 8;
 
-    for (j = 0; j < 2; j++) {
+    for (int j = 0; j < 2; j++) {
+        pixel* pix     = (pixel*)_pix + j * 4 * ystride;
         const int dp0  = abs(P2  - 2 * P1  + P0);
         const int dq0  = abs(Q2  - 2 * Q1  + Q0);
         const int dp3  = abs(TP2 - 2 * TP1 + TP0);
@@ -1342,10 +1343,7 @@ static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
         const int no_p = _no_p[j];
         const int no_q = _no_q[j];
 
-        if (d0 + d3 >= beta) {
-            pix += 4 * ystride;
-            continue;
-        } else {
+        if (d0 + d3 < beta) {
             const int beta_3 = beta >> 3;
             const int beta_2 = beta >> 2;
             const int tc25   = ((tc * 5 + 1) >> 1);
@@ -1353,63 +1351,16 @@ static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
             if (abs(P3  -  P0) + abs(Q3  -  Q0) < beta_3 && abs(P0  -  Q0) < tc25 &&
                 abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
                                       (d0 << 1) < beta_2 &&      (d3 << 1) < beta_2) {
-                // strong filtering
                 const int tc2 = tc << 1;
-                for (d = 0; d < 4; d++) {
-                    const int p3 = P3;
-                    const int p2 = P2;
-                    const int p1 = P1;
-                    const int p0 = P0;
-                    const int q0 = Q0;
-                    const int q1 = Q1;
-                    const int q2 = Q2;
-                    const int q3 = Q3;
-                    if (!no_p) {
-                        P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
-                        P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
-                        P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
-                    }
-                    if (!no_q) {
-                        Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
-                        Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
-                        Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
-                    }
-                    pix += ystride;
-                }
-            } else { // normal filtering
+                FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q);
+            } else {
                 int nd_p = 1;
                 int nd_q = 1;
-                const int tc_2 = tc >> 1;
                 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
                     nd_p = 2;
                 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
                     nd_q = 2;
-
-                for (d = 0; d < 4; d++) {
-                    const int p2 = P2;
-                    const int p1 = P1;
-                    const int p0 = P0;
-                    const int q0 = Q0;
-                    const int q1 = Q1;
-                    const int q2 = Q2;
-                    int delta0   = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
-                    if (abs(delta0) < 10 * tc) {
-                        delta0 = av_clip(delta0, -tc, tc);
-                        if (!no_p)
-                            P0 = av_clip_pixel(p0 + delta0);
-                        if (!no_q)
-                            Q0 = av_clip_pixel(q0 - delta0);
-                        if (!no_p && nd_p > 1) {
-                            const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
-                            P1 = av_clip_pixel(p1 + deltap1);
-                        }
-                        if (!no_q && nd_q > 1) {
-                            const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
-                            Q1 = av_clip_pixel(q1 + deltaq1);
-                        }
-                    }
-                    pix += ystride;
-                }
+                FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
             }
         }
     }
@@ -1419,32 +1370,19 @@ static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
                                           ptrdiff_t _ystride, const int *_tc,
                                           const uint8_t *_no_p, const uint8_t *_no_q)
 {
-    int d, j, no_p, no_q;
-    pixel *pix        = (pixel *)_pix;
+    int no_p, no_q;
     ptrdiff_t xstride = _xstride / sizeof(pixel);
     ptrdiff_t ystride = _ystride / sizeof(pixel);
+    const int size    = 4;
 
-    for (j = 0; j < 2; j++) {
+    for (int j = 0; j < 2; j++) {
+        pixel *pix   = (pixel *)_pix + j * size * ystride;
         const int tc = _tc[j] << (BIT_DEPTH - 8);
-        if (tc <= 0) {
-            pix += 4 * ystride;
-            continue;
-        }
-        no_p = _no_p[j];
-        no_q = _no_q[j];
-
-        for (d = 0; d < 4; d++) {
-            int delta0;
-            const int p1 = P1;
-            const int p0 = P0;
-            const int q0 = Q0;
-            const int q1 = Q1;
-            delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
-            if (!no_p)
-                P0 = av_clip_pixel(p0 + delta0);
-            if (!no_q)
-                Q0 = av_clip_pixel(q0 - delta0);
-            pix += ystride;
+        if (tc > 0) {
+            no_p = _no_p[j];
+            no_q = _no_q[j];
+
+            FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
         }
     }
 }
-- 
2.25.1



More information about the ffmpeg-devel mailing list