[FFmpeg-devel] [PATCH 1/3] swscale/x86/rgb2rgb: fix deinterleaveBytes for unaligned dst pointers
Ramiro Polla
ramiro.polla at gmail.com
Sun Sep 1 16:09:33 EEST 2024
---
libswscale/x86/input.asm | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index 21cd8b37fd..516e4384b1 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -736,11 +736,11 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
packuswb m2, m3 ; (byte) { U0, ..., U15 }
packuswb m0, m1 ; (byte) { V0, ..., V15 }
%ifidn %2, nv12
- mova [dstUq+wq], m2
- mova [dstVq+wq], m0
+ mov%1 [dstUq+wq], m2
+ mov%1 [dstVq+wq], m0
%else ; nv21
- mova [dstVq+wq], m2
- mova [dstUq+wq], m0
+ mov%1 [dstVq+wq], m2
+ mov%1 [dstUq+wq], m0
%endif ; nv12/21
add wq, mmsize
jl .loop_%1
@@ -750,15 +750,18 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
; %1 = nr. of XMM registers
; %2 = nv12 or nv21
%macro NVXX_TO_UV_FN 2
-cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, tmp, src, w
%if ARCH_X86_64
movsxd wq, dword r5m
%else ; x86-32
mov wq, r5m
%endif
+ mov tmpq, srcq
+ or tmpq, dstUq
+ or tmpq, dstVq
add dstUq, wq
add dstVq, wq
- test srcq, 15
+ test tmpq, 15
lea srcq, [srcq+wq*2]
pcmpeqb m5, m5 ; (byte) { 0xff } x 16
psrlw m5, 8 ; (word) { 0x00ff } x 8
--
2.30.2
More information about the ffmpeg-devel
mailing list