[MPlayer-dev-eng] Fix libmpcodecs inline asm on ICL

Fri May 2 06:54:42 CEST 2014

On Mon, Apr 14, 2014 at 03:13:22PM +1000, Matt Oliver wrote:
> This is a patch to enable intel compiler on Windows to compile the
> libmpcodec inline asm functions. It is based of similar work previously
> done in FFmpeg (and in fact was initially posted there) and extends some
> previous patches that were applied to libmpcodecs a few months ago. This is
> required to compile ffmpeg under icl with asm and was previously posted
> (probably incorrectly) on the ffmpeg mailing list but im now posting it
> here in the correct location.
> 
> Matt

>  vf_fspp.c |   18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
> b54f42435c23df435d2243e8e11f2be539ee014c  4-4-Fix-libmpcodecs-inline-asm-on-ICL-part-2.patch
> From 0a314fb3faa063a008e654862375e2ef8a674658 Mon Sep 17 00:00:00 2001
> From: Matt Oliver <protogonoi at gmail.com>
> Date: Sun, 9 Feb 2014 22:25:15 +1100
> Subject: [PATCH] Fix libmpcodecs inline asm on ICL.
> 
> ---
>  libavfilter/libmpcodecs/vf_fspp.c | 18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c
> index dadfae7..65af77d 100644
> --- a/libavfilter/libmpcodecs/vf_fspp.c
> +++ b/libavfilter/libmpcodecs/vf_fspp.c
> @@ -1598,6 +1598,10 @@ static void column_fidct_mmx(int16_t* thr_adr,  int16_t *data,  int16_t *output,
>  
>          : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps)
>          : "d"(thr_adr)
> +          NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,MM_2,MM_FIX_1_414213562_A,MM_FIX_1_414213562,MM_FIX_0_382683433,
> +          ff_MM_FIX_0_541196100,MM_FIX_1_306562965,MM_FIX_0_847759065)
> +          NAMED_CONSTRAINTS_ADD(MM_FIX_0_566454497,MM_FIX_0_198912367,MM_FIX_2_613125930,MM_FIX_1_847759065,
> +          MM_FIX_1_082392200,ff_MM_FIX_0_541196100,MM_FIX_1_306562965)
>          : "%"REG_a
>          );
>  }
> @@ -1867,6 +1871,8 @@ static void row_idct_mmx (int16_t* workspace,
>  
>          : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps)
>          : "a"(output_stride*sizeof(short))
> +        NAMED_CONSTRAINTS_ADD(MM_FIX_1_414213562_A,MM_FIX_2_613125930,MM_FIX_1_847759065,MM_FIX_1_082392200,
> +        MM_FIX_1_414213562,MM_DESCALE_RND)
>          : "%"REG_d
>          );
>  }
> @@ -1974,10 +1980,10 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
>          "movd (%%"REG_S",%%"REG_a",2), %%mm3    \n\t" //5
>          "paddw %%mm4, %%mm1            \n\t"
>  
> -        "movq %%mm5, 0*8+%3            \n\t" //t7
> +        "movq %%mm5, %3                \n\t" //t7
>          "punpcklbw %%mm7, %%mm3        \n\t"
>  
> -        "movq %%mm6, 1*8+%3            \n\t" //t6
> +        "movq %%mm6, %4                \n\t" //t6
>          "movq %%mm2, %%mm4             \n\t"
>  
>          "movd (%%"REG_S"), %%mm5           \n\t" //3
> @@ -2023,7 +2029,7 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
>          "psubw %%mm1, %%mm5            \n\t" //d1
>          "movq %%mm0, %%mm6             \n\t"
>  
> -        "movq 1*8+%3, %%mm1            \n\t"
> +        "movq %4, %%mm1                \n\t"
>          "punpcklwd %%mm5, %%mm0        \n\t"
>  
>          "punpckhwd %%mm5, %%mm6        \n\t"
> @@ -2047,7 +2053,7 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
>          "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
>          "psllw $2, %%mm3              \n\t" //t10
>  
> -        "movq 0*8+%3, %%mm2           \n\t"
> +        "movq %3, %%mm2               \n\t"
>          "psllw $2, %%mm4              \n\t" //t11
>  
>          "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3

> @@ -2110,8 +2116,8 @@ static void row_fdct_mmx(int16_t *data,  const uint8_t *pixels,  int line_size,
>          "dec %%"REG_c"                   \n\t"
>          "jnz 6b                  \n\t"
>  
> -        : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps)
> -        : "a"(line_size)
> +        : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps), "=o"(temps[1])
> +        : NAMED_CONSTRAINTS(ff_MM_FIX_0_707106781,ff_MM_FIX_0_541196100,MM_FIX_0_382683433,MM_FIX_1_306562965)
>          : "%"REG_d);

line_size is lost here, this doesnt work
make sure you test the code ...

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/attachments/20140502/9383db2a/attachment.asc>