[FFmpeg-cvslog] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8
Rémi Denis-Courmont
git at videolan.org
Tue Jul 16 17:28:05 EEST 2024
ffmpeg | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sat Jul 13 15:43:27 2024 +0300| [7744c08240808c8517a9c088b465c15235a34c86] | committer: Rémi Denis-Courmont
lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8
T-Head C908 (cycles):
h264_add_pixels4_8bpp_c: 93.5
h264_add_pixels4_8bpp_rvv_i32: 39.5
h264_add_pixels4_9bpp_c: 87.5
h264_add_pixels4_9bpp_rvv_i64: 50.5
h264_add_pixels4_10bpp_c: 87.5
h264_add_pixels4_10bpp_rvv_i64: 50.5
h264_add_pixels4_12bpp_c: 87.5
h264_add_pixels4_12bpp_rvv_i64: 50.5
h264_add_pixels4_14bpp_c: 87.5
h264_add_pixels4_14bpp_rvv_i64: 50.5
h264_add_pixels8_8bpp_c: 265.2
h264_add_pixels8_8bpp_rvv_i64: 84.5
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7744c08240808c8517a9c088b465c15235a34c86
---
libavcodec/riscv/Makefile | 3 +-
libavcodec/riscv/h264addpx_rvv.S | 89 ++++++++++++++++++++++++++++++++++++++++
libavcodec/riscv/h264dsp_init.c | 11 +++++
3 files changed, 102 insertions(+), 1 deletion(-)
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 92e1544e76..0bbdd38116 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -31,7 +31,8 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
-RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
+RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
+ riscv/h264idct_rvv.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
new file mode 100644
index 0000000000..fd36bd4896
--- /dev/null
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+ .macro sx rd, addr
+#if (__riscv_xlen == 32)
+ sw \rd, \addr
+#elif (__riscv_xlen == 64)
+ sd \rd, \addr
+#else
+ sq \rd, \addr
+#endif
+ .endm
+
+func ff_h264_add_pixels4_8_rvv, zve32x
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vlse32.v v8, (a0), a2
+ vsetivli zero, 4 * 4, e8, m1, ta, ma
+ vle16.v v16, (a1)
+ .equ offset, 0
+ .rept 256 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vsse32.v v8, (a0), a2
+ ret
+endfunc
+
+func ff_h264_add_pixels4_16_rvv, zve64x
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vlse64.v v8, (a0), a2
+ vsetivli zero, 4 * 4, e16, m2, ta, ma
+ vle32.v v16, (a1)
+ .equ offset, 0
+ .rept 512 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vsse64.v v8, (a0), a2
+ ret
+endfunc
+
+func ff_h264_add_pixels8_8_rvv, zve64x
+ li t0, 8 * 8
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vlse64.v v8, (a0), a2
+ vsetvli zero, t0, e8, m4, ta, ma
+ vle16.v v16, (a1)
+ .equ offset, 0
+ .rept 1024 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v8, (a0), a2
+ ret
+endfunc
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 1fb73f810e..2787485647 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -61,6 +61,10 @@ void ff_h264_idct8_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
+
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -96,6 +100,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
# endif
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+ dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
}
if (bit_depth == 9) {
@@ -118,6 +125,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add = ff_h264_idct_add_14_rvv;
dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
}
+ if (bit_depth > 8 && zvl128b) {
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+ }
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
}
More information about the ffmpeg-cvslog
mailing list