[FFmpeg-cvslog] x86: move horizontal add macros to x86util
James Almer
git at videolan.org
Thu Apr 17 14:23:31 CEST 2014
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Apr 16 20:15:35 2014 -0300| [76ed71a72bffb45027923e4da5f6fc6a97bfb218] | committer: Michael Niedermayer
x86: move horizontal add macros to x86util
Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors
Signed-off-by: James Almer <jamrial at gmail.com>
Reviewed-by: "Ronald S. Bultje" <rsbultje at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=76ed71a72bffb45027923e4da5f6fc6a97bfb218
---
libavcodec/x86/h264_intrapred_10bit.asm | 16 ---------------
libavutil/x86/x86util.asm | 33 +++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 40f1c9f..9dee577 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -171,22 +171,6 @@ PRED4x4_HD
;-----------------------------------------------------------------------------
; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
-%macro HADDD 2 ; sum junk
-%if mmsize == 16
- movhlps %2, %1
- paddd %1, %2
- pshuflw %2, %1, 0xE
- paddd %1, %2
-%else
- pshufw %2, %1, 0xE
- paddd %1, %2
-%endif
-%endmacro
-
-%macro HADDW 2
- pmaddwd %1, [pw_1]
- HADDD %1, %2
-%endmacro
INIT_MMX mmxext
cglobal pred4x4_dc_10, 3, 3
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index df58cad..67d7905 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -273,6 +273,39 @@
%endif
%endmacro
+%macro HADDD 2 ; sum junk
+%if sizeof%1 == 32
+%define %2 xmm%2
+ vextracti128 %2, %1, 1
+%define %1 xmm%1
+ paddd %1, %2
+%endif
+%if mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
+ vphadddq %1, %1
+%endif
+ movhlps %2, %1
+ paddd %1, %2
+%endif
+%if notcpuflag(xop) || sizeof%1 != 16
+ PSHUFLW %2, %1, q0032
+ paddd %1, %2
+%endif
+%undef %1
+%undef %2
+%endmacro
+
+%macro HADDW 2 ; reg, tmp
+%if cpuflag(xop) && sizeof%1 == 16
+ vphaddwq %1, %1
+ movhlps %2, %1
+ paddd %1, %2
+%else
+ pmaddwd %1, [pw_1]
+ HADDD %1, %2
+%endif
+%endmacro
+
%macro PALIGNR 4-5
%if cpuflag(ssse3)
%if %0==5
More information about the ffmpeg-cvslog
mailing list