[FFmpeg-devel] [PATCH 1/3] x86/ac3dsp: reduce instruction count inside the float_to_fixed24 loop

James Almer jamrial at gmail.com
Wed Nov 22 21:49:11 EET 2023


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/ac3dsp.asm | 46 +++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index a95d359d95..42c8310462 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -77,16 +77,20 @@ AC3_EXPONENT_MIN
 INIT_XMM sse2
 cglobal float_to_fixed24, 3, 3, 9, dst, src, len
     movaps     m0, [pf_1_24]
+    shl      lenq, 2
+    add      srcq, lenq
+    add      dstq, lenq
+    neg      lenq
 .loop:
-    movaps     m1, [srcq    ]
-    movaps     m2, [srcq+16 ]
-    movaps     m3, [srcq+32 ]
-    movaps     m4, [srcq+48 ]
+    movaps     m1, [srcq+lenq    ]
+    movaps     m2, [srcq+lenq+16 ]
+    movaps     m3, [srcq+lenq+32 ]
+    movaps     m4, [srcq+lenq+48 ]
 %ifdef m8
-    movaps     m5, [srcq+64 ]
-    movaps     m6, [srcq+80 ]
-    movaps     m7, [srcq+96 ]
-    movaps     m8, [srcq+112]
+    movaps     m5, [srcq+lenq+64 ]
+    movaps     m6, [srcq+lenq+80 ]
+    movaps     m7, [srcq+lenq+96 ]
+    movaps     m8, [srcq+lenq+112]
 %endif
     mulps      m1, m0
     mulps      m2, m0
@@ -108,24 +112,20 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
     cvtps2dq   m7, m7
     cvtps2dq   m8, m8
 %endif
-    movdqa  [dstq    ], m1
-    movdqa  [dstq+16 ], m2
-    movdqa  [dstq+32 ], m3
-    movdqa  [dstq+48 ], m4
+    movdqa  [dstq+lenq    ], m1
+    movdqa  [dstq+lenq+16 ], m2
+    movdqa  [dstq+lenq+32 ], m3
+    movdqa  [dstq+lenq+48 ], m4
 %ifdef m8
-    movdqa  [dstq+64 ], m5
-    movdqa  [dstq+80 ], m6
-    movdqa  [dstq+96 ], m7
-    movdqa  [dstq+112], m8
-    add      srcq, 128
-    add      dstq, 128
-    sub      lenq, 32
+    movdqa  [dstq+lenq+64 ], m5
+    movdqa  [dstq+lenq+80 ], m6
+    movdqa  [dstq+lenq+96 ], m7
+    movdqa  [dstq+lenq+112], m8
+    add      lenq, 128
 %else
-    add      srcq, 64
-    add      dstq, 64
-    sub      lenq, 16
+    add      lenq, 64
 %endif
-    ja .loop
+    jl .loop
     RET
 
 ;------------------------------------------------------------------------------
-- 
2.42.1



More information about the ffmpeg-devel mailing list