[FFmpeg-devel] [PATCH 1/3] x86/ac3dsp: reduce instruction count inside the float_to_fixed24 loop
James Almer
jamrial at gmail.com
Wed Nov 22 21:49:11 EET 2023
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/ac3dsp.asm | 46 +++++++++++++++++++--------------------
1 file changed, 23 insertions(+), 23 deletions(-)
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index a95d359d95..42c8310462 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -77,16 +77,20 @@ AC3_EXPONENT_MIN
INIT_XMM sse2
cglobal float_to_fixed24, 3, 3, 9, dst, src, len
movaps m0, [pf_1_24]
+ shl lenq, 2
+ add srcq, lenq
+ add dstq, lenq
+ neg lenq
.loop:
- movaps m1, [srcq ]
- movaps m2, [srcq+16 ]
- movaps m3, [srcq+32 ]
- movaps m4, [srcq+48 ]
+ movaps m1, [srcq+lenq ]
+ movaps m2, [srcq+lenq+16 ]
+ movaps m3, [srcq+lenq+32 ]
+ movaps m4, [srcq+lenq+48 ]
%ifdef m8
- movaps m5, [srcq+64 ]
- movaps m6, [srcq+80 ]
- movaps m7, [srcq+96 ]
- movaps m8, [srcq+112]
+ movaps m5, [srcq+lenq+64 ]
+ movaps m6, [srcq+lenq+80 ]
+ movaps m7, [srcq+lenq+96 ]
+ movaps m8, [srcq+lenq+112]
%endif
mulps m1, m0
mulps m2, m0
@@ -108,24 +112,20 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
cvtps2dq m7, m7
cvtps2dq m8, m8
%endif
- movdqa [dstq ], m1
- movdqa [dstq+16 ], m2
- movdqa [dstq+32 ], m3
- movdqa [dstq+48 ], m4
+ movdqa [dstq+lenq ], m1
+ movdqa [dstq+lenq+16 ], m2
+ movdqa [dstq+lenq+32 ], m3
+ movdqa [dstq+lenq+48 ], m4
%ifdef m8
- movdqa [dstq+64 ], m5
- movdqa [dstq+80 ], m6
- movdqa [dstq+96 ], m7
- movdqa [dstq+112], m8
- add srcq, 128
- add dstq, 128
- sub lenq, 32
+ movdqa [dstq+lenq+64 ], m5
+ movdqa [dstq+lenq+80 ], m6
+ movdqa [dstq+lenq+96 ], m7
+ movdqa [dstq+lenq+112], m8
+ add lenq, 128
%else
- add srcq, 64
- add dstq, 64
- sub lenq, 16
+ add lenq, 64
%endif
- ja .loop
+ jl .loop
RET
;------------------------------------------------------------------------------
--
2.42.1
More information about the ffmpeg-devel
mailing list