[FFmpeg-devel] [PATCH v2] avcodec/vvc: Don't use large array on stack

Fri Sep 20 06:43:41 EEST 2024

From: Zhao Zhili <zhilizhao at tencent.com>

tmp_array in dmvr_hv takes 33024 bytes on stack, which can be
dangerous.
---
 libavcodec/vvc/ctu.h             |  1 +
 libavcodec/vvc/dsp.h             |  2 +-
 libavcodec/vvc/inter.c           |  2 +-
 libavcodec/vvc/inter_template.c  | 12 +++++++-----
 libavcodec/x86/vvc/vvcdsp_init.c |  8 ++++----
 tests/checkasm/vvc_mc.c          | 17 +++++++++++++----
 6 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index eab4612561..eb3e51c7e5 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -385,6 +385,7 @@ typedef struct VVCLocalContext {
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR];
+    DECLARE_ALIGNED(32, int16_t, dmvr_tmp)[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
 
     struct {
         int sbt_num_fourths_tb0;                ///< SbtNumFourthsTb0
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index 635ebcafed..3594dfc5f5 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -99,7 +99,7 @@ typedef struct VVCInterDSPContext {
 
     int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
     void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
-        intptr_t mx, intptr_t my, int width);
+        intptr_t mx, intptr_t my, int width, int16_t *tmp);
 } VVCInterDSPContext;
 
 struct VVCLocalContext;
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 64a9dd1e46..48b633d580 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -806,7 +806,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
         const int wrap_enabled  = fc->ps.pps->r->pps_ref_wraparound_enabled_flag;
 
         MC_EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
-        fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
+        fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w, lc->dmvr_tmp);
     }
 
     min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h);
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index c073a73e76..fad1ba801f 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -474,7 +474,8 @@ static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const i
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width,
+    int16_t *tmp)
 {
 #if BIT_DEPTH != 10
     const pixel *src            = (const pixel *)_src;
@@ -502,7 +503,8 @@ static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_s
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width,
+    int16_t *tmp)
 {
     const pixel *src            = (const pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
@@ -520,7 +522,8 @@ static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width,
+    int16_t *tmp)
 {
     const pixel *src            = (pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
@@ -539,9 +542,8 @@ static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
 
 //8.5.3.2.2 Luma sample bilinear interpolation process
 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
-    const int height, const intptr_t mx, const intptr_t my, const int width)
+    const int height, const intptr_t mx, const intptr_t my, const int width, int16_t *tmp_array)
 {
-    int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
     int16_t *tmp                = tmp_array;
     const pixel *src            = (const pixel*)_src;
     const ptrdiff_t src_stride  = _src_stride / sizeof(pixel);
diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c
index f3e2e3a27b..7ff3e2bdff 100644
--- a/libavcodec/x86/vvc/vvcdsp_init.c
+++ b/libavcodec/x86/vvc/vvcdsp_init.c
@@ -90,13 +90,13 @@ AVG_PROTOTYPES(12, avx2)
 
 #define DMVR_PROTOTYPES(bd, opt)                                                                    \
 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,               \
-     int height, intptr_t mx, intptr_t my, int width);                                              \
+     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,             \
-     int height, intptr_t mx, intptr_t my, int width);                                              \
+     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,             \
-     int height, intptr_t mx, intptr_t my, int width);                                              \
+     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride,            \
-     int height, intptr_t mx, intptr_t my, int width);                                              \
+     int height, intptr_t mx, intptr_t my, int width, int16_t *unused);                             \
 
 DMVR_PROTOTYPES( 8, avx2)
 DMVR_PROTOTYPES(10, avx2)
diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index 754cf19065..591557dfa6 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -333,6 +333,7 @@ static void check_avg(void)
 }
 
 #define SR_RANGE 2
+#define DMVR_TMP_BUF_SIZE ((MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE * sizeof(int16_t))
 static void check_dmvr(void)
 {
     LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
@@ -340,14 +341,20 @@ static void check_dmvr(void)
     LOCAL_ALIGNED_32(uint8_t,  src0, [SRC_BUF_SIZE]);
     LOCAL_ALIGNED_32(uint8_t,  src1, [SRC_BUF_SIZE]);
     const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
+    int16_t *tmp0 = av_mallocz(DMVR_TMP_BUF_SIZE);
+    int16_t *tmp1 = av_mallocz(DMVR_TMP_BUF_SIZE);
 
     VVCDSPContext c;
     declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
-        intptr_t mx, intptr_t my, int width);
+        intptr_t mx, intptr_t my, int width, int16_t *tmp);
+
+    if (!tmp0 || !tmp1)
+        fail();
 
     for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_vvc_dsp_init(&c, bit_depth);
         randomize_pixels(src0, src1, SRC_BUF_SIZE);
+        randomize_buffers(tmp0, tmp1, DMVR_TMP_BUF_SIZE / 2, UINT32_MAX);
         for (int i = 0; i < 2; i++) {
             for (int j = 0; j < 2; j++) {
                 for (int h = 8; h <= 16; h *= 2) {
@@ -371,8 +378,8 @@ static void check_dmvr(void)
                         if (check_func(c.inter.dmvr[j][i], "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
                             memset(dst0, 0, DST_BUF_SIZE);
                             memset(dst1, 0, DST_BUF_SIZE);
-                            call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
-                            call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
+                            call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w, tmp0);
+                            call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w, tmp1);
                             for (int k = 0; k < pred_h; k++) {
                                 if (memcmp(dst0 + k * dst_stride, dst1 + k * dst_stride, pred_w * sizeof(int16_t))) {
                                     fail();
@@ -380,13 +387,15 @@ static void check_dmvr(void)
                                 }
                             }
 
-                            bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
+                            bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w, tmp1);
                         }
                     }
                 }
             }
         }
     }
+    av_free(tmp0);
+    av_free(tmp1);
     report("dmvr");
 }
 
-- 
2.39.3