[FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8
Rémi Denis-Courmont
remi at remlab.net
Sat Jul 13 16:01:52 EEST 2024
---
libavcodec/riscv/Makefile | 3 +-
libavcodec/riscv/h264addpx_rvv.S | 89 ++++++++++++++++++++++++++++++++
libavcodec/riscv/h264dsp_init.c | 11 ++++
3 files changed, 102 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/riscv/h264addpx_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 92e1544e76..0bbdd38116 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -31,7 +31,8 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
-RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
+RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
+ riscv/h264idct_rvv.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
new file mode 100644
index 0000000000..fd36bd4896
--- /dev/null
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+ .macro sx rd, addr
+#if (__riscv_xlen == 32)
+ sw \rd, \addr
+#elif (__riscv_xlen == 64)
+ sd \rd, \addr
+#else
+ sq \rd, \addr
+#endif
+ .endm
+
+func ff_h264_add_pixels4_8_rvv, zve32x
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vlse32.v v8, (a0), a2
+ vsetivli zero, 4 * 4, e8, m1, ta, ma
+ vle16.v v16, (a1)
+ .equ offset, 0
+ .rept 256 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vsse32.v v8, (a0), a2
+ ret
+endfunc
+
+func ff_h264_add_pixels4_16_rvv, zve64x
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vlse64.v v8, (a0), a2
+ vsetivli zero, 4 * 4, e16, m2, ta, ma
+ vle32.v v16, (a1)
+ .equ offset, 0
+ .rept 512 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vsse64.v v8, (a0), a2
+ ret
+endfunc
+
+func ff_h264_add_pixels8_8_rvv, zve64x
+ li t0, 8 * 8
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vlse64.v v8, (a0), a2
+ vsetvli zero, t0, e8, m4, ta, ma
+ vle16.v v16, (a1)
+ .equ offset, 0
+ .rept 1024 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v8, (a0), a2
+ ret
+endfunc
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 825f34443b..a6d06b3ac4 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -61,6 +61,10 @@ void ff_h264_idct8_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
+
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -96,6 +100,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
# if __riscv_xlen == 64
dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
# endif
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+ dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
}
if (bit_depth == 9) {
@@ -118,6 +125,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add = ff_h264_idct_add_14_rvv;
dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
}
+ if (bit_depth > 8 && zvl128b) {
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+ }
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
}
--
2.45.2
More information about the ffmpeg-devel
mailing list