[FFmpeg-devel] [PATCH 2/2] x86/dsputil: use HADDD where applicable
James Almer
jamrial at gmail.com
Sat Apr 12 01:00:21 CEST 2014
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/dsputil.asm | 23 +++--------------------
1 file changed, 3 insertions(+), 20 deletions(-)
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 8b938d4..c1ea9bf 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -50,14 +50,7 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
paddd m2, m1
add orderq, mmsize*2
jl .loop
-%if mmsize == 16
- movhlps m0, m2
- paddd m2, m0
- pshuflw m0, m2, 0x4e
-%else
- pshufw m0, m2, 0x4e
-%endif
- paddd m2, m0
+ HADDD m2, m0
movd eax, m2
%if mmsize == 8
emms
@@ -99,14 +92,7 @@ cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
mova [v1q + orderq + mmsize], m3
add orderq, mmsize*2
jl .loop
-%if mmsize == 16
- movhlps m0, m6
- paddd m6, m0
- pshuflw m0, m6, 0x4e
-%else
- pshufw m0, m6, 0x4e
-%endif
- paddd m6, m0
+ HADDD m6, m0
movd eax, m6
RET
%endmacro
@@ -200,10 +186,7 @@ SCALARPRODUCT_LOOP 4
SCALARPRODUCT_LOOP 2
SCALARPRODUCT_LOOP 0
.end:
- movhlps m0, m6
- paddd m6, m0
- pshuflw m0, m6, 0x4e
- paddd m6, m0
+ HADDD m6, m0
movd eax, m6
RET
--
1.8.3.2
More information about the ffmpeg-devel
mailing list