[FFmpeg-devel] [PATCH 1/3] swscale/x86/rgb2rgb: fix deinterleaveBytes for unaligned dst pointers

Ramiro Polla ramiro.polla at gmail.com
Sun Sep 1 16:09:33 EEST 2024


---
 libswscale/x86/input.asm | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index 21cd8b37fd..516e4384b1 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -736,11 +736,11 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
     packuswb       m2, m3                 ; (byte) { U0, ..., U15 }
     packuswb       m0, m1                 ; (byte) { V0, ..., V15 }
 %ifidn %2, nv12
-    mova   [dstUq+wq], m2
-    mova   [dstVq+wq], m0
+    mov%1  [dstUq+wq], m2
+    mov%1  [dstVq+wq], m0
 %else ; nv21
-    mova   [dstVq+wq], m2
-    mova   [dstUq+wq], m0
+    mov%1  [dstVq+wq], m2
+    mov%1  [dstUq+wq], m0
 %endif ; nv12/21
     add            wq, mmsize
     jl .loop_%1
@@ -750,15 +750,18 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
 ; %1 = nr. of XMM registers
 ; %2 = nv12 or nv21
 %macro NVXX_TO_UV_FN 2
-cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, tmp, src, w
 %if ARCH_X86_64
     movsxd         wq, dword r5m
 %else ; x86-32
     mov            wq, r5m
 %endif
+    mov          tmpq, srcq
+    or           tmpq, dstUq
+    or           tmpq, dstVq
     add         dstUq, wq
     add         dstVq, wq
-    test         srcq, 15
+    test         tmpq, 15
     lea          srcq, [srcq+wq*2]
     pcmpeqb        m5, m5                 ; (byte) { 0xff } x 16
     psrlw          m5, 8                  ; (word) { 0x00ff } x 8
-- 
2.30.2



More information about the ffmpeg-devel mailing list