[FFmpeg-cvslog] lavc/aarch64: Fix ff_pred16x16_plane_neon_10
    Bin Peng 
    git at videolan.org
       
    Tue Jan  7 22:41:28 EET 2025
    
    
  
ffmpeg | branch: release/7.1 | Bin Peng <pengbin at visionular.com> | Fri Dec 13 22:19:47 2024 +0800| [54331d4305909afdb8957c8567626aeb139c4fb2] | committer: Martin Storsjö
lavc/aarch64: Fix ff_pred16x16_plane_neon_10
Fix test failure on aarch64:
./tests/checkasm/checkasm --test=h264pred 367840
Signed-off-by: Peng Bin <pengbin at visionular.com>
Signed-off-by: Martin Storsjö <martin at martin.st>
(cherry picked from commit 72a3656e8468a394373b6397aacc906d7f7794c2)
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=54331d4305909afdb8957c8567626aeb139c4fb2
---
 libavcodec/aarch64/h264pred_neon.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/libavcodec/aarch64/h264pred_neon.S b/libavcodec/aarch64/h264pred_neon.S
index 168f8191ad..d0999938ef 100644
--- a/libavcodec/aarch64/h264pred_neon.S
+++ b/libavcodec/aarch64/h264pred_neon.S
@@ -502,28 +502,27 @@ function ff_pred16x16_plane_neon_10, export=1
         add             v7.4h,  v7.4h,  v0.4h
         shl             v2.4h,  v7.4h,  #4
         ssubl           v2.4s,  v2.4h,  v3.4h
-        shl             v3.4h,  v4.4h,  #4
         ext             v0.16b, v0.16b, v0.16b, #14
-        ssubl           v6.4s,  v5.4h,  v3.4h
+        sxtl            v6.4s,  v5.4h          // c
 
         mov             v0.h[0],  wzr
         mul             v0.8h,  v0.8h,  v4.h[0]
         dup             v16.4s, v2.s[0]
         dup             v17.4s, v2.s[0]
-        dup             v2.8h,  v4.h[0]
-        dup             v3.4s,  v6.s[0]
-        shl             v2.8h,  v2.8h,  #3
+        dup             v2.8h,  v4.h[0]        // b
+        dup             v3.4s,  v6.s[0]        // c
+        sshll           v2.4s,  v2.4h,  #3     // b * 8
         saddw           v16.4s, v16.4s, v0.4h
         saddw2          v17.4s, v17.4s, v0.8h
-        saddw           v3.4s,  v3.4s,  v2.4h
+        sub             v3.4s,  v3.4s,  v2.4s
 
         mov             w3,      #16
         mvni            v4.8h,   #0xFC, lsl #8 // 1023 for clipping
 1:
         sqshrun         v0.4h,  v16.4s, #5
         sqshrun2        v0.8h,  v17.4s, #5
-        saddw           v16.4s, v16.4s, v2.4h
-        saddw           v17.4s, v17.4s, v2.4h
+        add             v16.4s, v16.4s, v2.4s
+        add             v17.4s, v17.4s, v2.4s
         sqshrun         v1.4h,  v16.4s, #5
         sqshrun2        v1.8h,  v17.4s, #5
         add             v16.4s, v16.4s, v3.4s
    
    
More information about the ffmpeg-cvslog
mailing list