[FFmpeg-devel] [PATCH 1/2] x86: move horizontal add macros to x86util
James Almer
jamrial at gmail.com
Thu Apr 17 01:15:35 CEST 2014
Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/h264_intrapred_10bit.asm | 16 ----------------
libavutil/x86/x86util.asm | 33 +++++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 40f1c9f..9dee577 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -171,22 +171,6 @@ PRED4x4_HD
;-----------------------------------------------------------------------------
; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
-%macro HADDD 2 ; sum junk
-%if mmsize == 16
- movhlps %2, %1
- paddd %1, %2
- pshuflw %2, %1, 0xE
- paddd %1, %2
-%else
- pshufw %2, %1, 0xE
- paddd %1, %2
-%endif
-%endmacro
-
-%macro HADDW 2
- pmaddwd %1, [pw_1]
- HADDD %1, %2
-%endmacro
INIT_MMX mmxext
cglobal pred4x4_dc_10, 3, 3
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index df58cad..67d7905 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -273,6 +273,39 @@
%endif
%endmacro
+%macro HADDD 2 ; sum junk
+%if sizeof%1 == 32
+%define %2 xmm%2
+ vextracti128 %2, %1, 1
+%define %1 xmm%1
+ paddd %1, %2
+%endif
+%if mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
+ vphadddq %1, %1
+%endif
+ movhlps %2, %1
+ paddd %1, %2
+%endif
+%if notcpuflag(xop) || sizeof%1 != 16
+ PSHUFLW %2, %1, q0032
+ paddd %1, %2
+%endif
+%undef %1
+%undef %2
+%endmacro
+
+%macro HADDW 2 ; reg, tmp
+%if cpuflag(xop) && sizeof%1 == 16
+ vphaddwq %1, %1
+ movhlps %2, %1
+ paddd %1, %2
+%else
+ pmaddwd %1, [pw_1]
+ HADDD %1, %2
+%endif
+%endmacro
+
%macro PALIGNR 4-5
%if cpuflag(ssse3)
%if %0==5
--
1.8.3.2
More information about the ffmpeg-devel
mailing list