[FFmpeg-devel] [PATCH] lavc/h264idct: fix RISC-V group multiplier
Rémi Denis-Courmont
remi at remlab.net
Sat Nov 16 16:25:54 EET 2024
After the branch, the expected SEW/LMUL ratio is 1 byte/vector.
So we have to set the same ratio before branching (QEMU does not care,
but real hardware does).
---
libavcodec/riscv/h264idct_rvv.S | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index fda1f37a3c..5adcfd295e 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -644,7 +644,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
sd a4, 24(sp)
ld a0, 0(a0) # dest[0]
addi a1, a1, 16 * 4 # &block_offset[16]
- vsetivli zero, 4, e8, mf4, ta, ma
+ vsetivli zero, 4, e8, m1, ta, ma
jal .Lidct4_add4_\depth\()_rvv
ld a4, 24(sp) # nnzc
@@ -655,7 +655,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
ld a0, 8(a0) # dest[1]
lla t0, ff_h264_scan8 + 32
.ifc \type, 8_422
- vsetivli zero, 4, e8, mf4, ta, ma
+ vsetivli zero, 4, e8, m1, ta, ma
jal .Lidct4_add4_\depth\()_rvv
ld a4, 24(sp) # nnzc
@@ -665,7 +665,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
addi a1, t5, (-8 - 4) * 4 # &block_offset[24]
ld a0, 0(a0) # dest[0]
lla t0, ff_h264_scan8 + 24
- vsetivli zero, 4, e8, mf4, ta, ma
+ vsetivli zero, 4, e8, m1, ta, ma
jal .Lidct4_add4_\depth\()_rvv
ld a4, 24(sp) # nnzc
@@ -679,7 +679,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
ld ra, 8(sp)
ld s0, 0(sp)
addi sp, sp, 32
- vsetivli zero, 4, e8, mf4, ta, ma
+ vsetivli zero, 4, e8, m1, ta, ma
j .Lidct4_add4_\depth\()_rvv
endfunc
.endm
--
2.45.2
More information about the ffmpeg-devel
mailing list