[FFmpeg-devel] [PATCH 4/9] x86: proresdsp: simple_idct: free or use 1 xmm reg
Christophe Gisquet
christophe.gisquet at gmail.com
Fri Oct 9 23:53:41 CEST 2015
m15 is zeroed but never used. If it's not needed, decrease by 1 the
number of xmm regs used (prores), otherwise, make use of it, for the
rounder in the row pass of simple_idct.
---
libavcodec/x86/proresdsp.asm | 8 ++++----
libavcodec/x86/simple_idct10.asm | 9 +++++----
libavcodec/x86/simple_idct10_template.asm | 17 ++++++++---------
3 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
index 18cf15b..3fb71ba 100644
--- a/libavcodec/x86/proresdsp.asm
+++ b/libavcodec/x86/proresdsp.asm
@@ -37,17 +37,17 @@ cextern pw_1019
section .text align=16
-%macro idct_put_fn 1
-cglobal prores_idct_put_10, 4, 4, %1
+%macro idct_put_fn 0
+cglobal prores_idct_put_10, 4, 4, 15
IDCT_PUT_FN pw_1, 15, pw_88, 18, pw_4, pw_1019, r3
RET
%endmacro
INIT_XMM sse2
-idct_put_fn 16
+idct_put_fn
%if HAVE_AVX_EXTERNAL
INIT_XMM avx
-idct_put_fn 16
+idct_put_fn
%endif
%endif
diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm
index 77db0a7..725de82 100644
--- a/libavcodec/x86/simple_idct10.asm
+++ b/libavcodec/x86/simple_idct10.asm
@@ -37,17 +37,18 @@ pd_round: times 4 dd 1<<(13-1)
section .text align=16
-%macro idct_put_fn 1
-cglobal simple_idct10_put, 3, 3, %1
+%macro idct_put_fn 0
+cglobal simple_idct10_put, 3, 3, 16
+ mova m15, [pd_round]
IDCT_PUT_FN "", 13, pw_8, 18, 0, pw_1023
RET
%endmacro
INIT_XMM sse2
-idct_put_fn 16
+idct_put_fn
%if HAVE_AVX_EXTERNAL
INIT_XMM avx
-idct_put_fn 16
+idct_put_fn
%endif
%endif
diff --git a/libavcodec/x86/simple_idct10_template.asm b/libavcodec/x86/simple_idct10_template.asm
index 86c2765..d4a08f8 100644
--- a/libavcodec/x86/simple_idct10_template.asm
+++ b/libavcodec/x86/simple_idct10_template.asm
@@ -90,14 +90,14 @@ cextern w7_min_w5
pmaddwd m1, [w4_plus_w2]
%ifstr %1
; 1<<(%1-1)
- paddd m2, [pd_round]
- paddd m3, [pd_round]
- paddd m4, [pd_round]
- paddd m5, [pd_round]
- paddd m6, [pd_round]
- paddd m7, [pd_round]
- paddd m0, [pd_round]
- paddd m1, [pd_round]
+ paddd m2, m15
+ paddd m3, m15
+ paddd m4, m15
+ paddd m5, m15
+ paddd m6, m15
+ paddd m7, m15
+ paddd m0, m15
+ paddd m1, m15
%endif
; a0: -1*row[0]-1*row[2]
@@ -237,7 +237,6 @@ cextern w7_min_w5
%macro IDCT_PUT_FN 6-7
movsxd r1, r1d
- pxor m15, m15 ; zero
; for (i = 0; i < 8; i++)
; idctRowCondDC(block + i*8);
--
2.6.0
More information about the ffmpeg-devel
mailing list