[FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8

Rémi Denis-Courmont remi at remlab.net
Sat Jul 13 16:01:52 EEST 2024


---
 libavcodec/riscv/Makefile        |  3 +-
 libavcodec/riscv/h264addpx_rvv.S | 89 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/h264dsp_init.c  | 11 ++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/h264addpx_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 92e1544e76..0bbdd38116 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -31,7 +31,8 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
 OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
 RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
 OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
-RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
+RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
+                              riscv/h264idct_rvv.o
 OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
 RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
 OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
new file mode 100644
index 0000000000..fd36bd4896
--- /dev/null
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+        .macro  sx rd, addr
+#if (__riscv_xlen == 32)
+        sw      \rd, \addr
+#elif (__riscv_xlen == 64)
+        sd      \rd, \addr
+#else
+        sq      \rd, \addr
+#endif
+        .endm
+
+func ff_h264_add_pixels4_8_rvv, zve32x
+        vsetivli        zero, 4, e8, mf4, ta, ma
+        vlse32.v        v8, (a0), a2
+        vsetivli        zero, 4 * 4, e8, m1, ta, ma
+        vle16.v         v16, (a1)
+        .equ    offset, 0
+        .rept   256 / __riscv_xlen
+        sx      zero, offset(a1)
+        .equ    offset, offset + (__riscv_xlen / 8)
+        .endr
+        vncvt.x.x.w     v24, v16
+        vadd.vv         v8, v8, v24
+        vsetivli        zero, 4, e8, mf4, ta, ma
+        vsse32.v        v8, (a0), a2
+        ret
+endfunc
+
+func ff_h264_add_pixels4_16_rvv, zve64x
+        vsetivli        zero, 4, e16, mf2, ta, ma
+        vlse64.v        v8, (a0), a2
+        vsetivli        zero, 4 * 4, e16, m2, ta, ma
+        vle32.v         v16, (a1)
+        .equ    offset, 0
+        .rept   512 / __riscv_xlen
+        sx      zero, offset(a1)
+        .equ    offset, offset + (__riscv_xlen / 8)
+        .endr
+        vncvt.x.x.w     v24, v16
+        vadd.vv         v8, v8, v24
+        vsetivli        zero, 4, e16, mf2, ta, ma
+        vsse64.v        v8, (a0), a2
+        ret
+endfunc
+
+func ff_h264_add_pixels8_8_rvv, zve64x
+        li      t0, 8 * 8
+        vsetivli        zero, 8, e8, mf2, ta, ma
+        vlse64.v        v8, (a0), a2
+        vsetvli         zero, t0, e8, m4, ta, ma
+        vle16.v         v16, (a1)
+        .equ    offset, 0
+        .rept   1024 / __riscv_xlen
+        sx      zero, offset(a1)
+        .equ    offset, offset + (__riscv_xlen / 8)
+        .endr
+        vncvt.x.x.w     v24, v16
+        vadd.vv         v8, v8, v24
+        vsetivli        zero, 8, e8, mf2, ta, ma
+        vsse64.v        v8, (a0), a2
+        ret
+endfunc
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 825f34443b..a6d06b3ac4 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -61,6 +61,10 @@ void ff_h264_idct8_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
 void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
 void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
 
+void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
+
 extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
 extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
 
@@ -96,6 +100,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
 #  if __riscv_xlen == 64
             dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
 #  endif
+            if (flags & AV_CPU_FLAG_RVV_I64)
+                dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+            dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
         }
 
         if (bit_depth == 9) {
@@ -118,6 +125,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
                 dsp->h264_idct_add = ff_h264_idct_add_14_rvv;
             dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
         }
+        if (bit_depth > 8 && zvl128b) {
+            if (flags & AV_CPU_FLAG_RVV_I64)
+                dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+        }
 
         dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
     }
-- 
2.45.2



More information about the ffmpeg-devel mailing list