[FFmpeg-devel] [PATCH 02/15] vp9/x86: make cglobal statement more conservative in register allocation.
Clément Bœsch
u at pkh.me
Sat Dec 27 17:31:21 CET 2014
On Sat, Dec 27, 2014 at 11:02:37AM -0500, Ronald S. Bultje wrote:
> ---
> libavcodec/x86/vp9lpf.asm | 21 ++++++++++++++++-----
> 1 file changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
> index e0f7386..c62ac46 100644
> --- a/libavcodec/x86/vp9lpf.asm
> +++ b/libavcodec/x86/vp9lpf.asm
> @@ -307,7 +307,20 @@ SECTION .text
> %endif
> %endmacro
>
> -%macro LOOPFILTER 2 ; %1=v/h %2=size1
> +%macro LOOPFILTER 3 ; %1=v/h %2=size1 %3=stack
> +%if UNIX64
> +cglobal vp9_loop_filter_%1_%2_16, 5, 9, 16, %3, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
> +%else
> +%if WIN64
> +cglobal vp9_loop_filter_%1_%2_16, 4, 8, 16, %3, dst, stride, E, I, mstride, dst2, stride3, mstride3
> +%else
> +cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stride3, mstride3
> +%define Ed dword r2m
> +%define Id dword r3m
> +%endif
> +%define Hd dword r4m
So every 32-bit arch end up here, right?
> +%endif
> +
> mov mstrideq, strideq
> neg mstrideq
>
> @@ -795,10 +808,8 @@ SECTION .text
>
> %macro LPF_16_VH 2
> INIT_XMM %2
> -cglobal vp9_loop_filter_v_%1_16, 5,10,16, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
> - LOOPFILTER v, %1
> -cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
> - LOOPFILTER h, %1
> +LOOPFILTER v, %1, 0
> +LOOPFILTER h, %1, 256
Should be OK assuming 0 is indeed the default stack size (x86inc seems to
suggest it to be set to 16 or 32 somehow).
> %endmacro
>
> %macro LPF_16_VH_ALL_OPTS 1
--
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141227/9f00706f/attachment.asc>
More information about the ffmpeg-devel
mailing list