[MPlayer-dev-eng] Fix libmpcodecs inline asm on ICL
Michael Niedermayer
michaelni at gmx.at
Fri May 2 06:54:42 CEST 2014
On Mon, Apr 14, 2014 at 03:13:22PM +1000, Matt Oliver wrote:
> This is a patch to enable intel compiler on Windows to compile the
> libmpcodec inline asm functions. It is based of similar work previously
> done in FFmpeg (and in fact was initially posted there) and extends some
> previous patches that were applied to libmpcodecs a few months ago. This is
> required to compile ffmpeg under icl with asm and was previously posted
> (probably incorrectly) on the ffmpeg mailing list but im now posting it
> here in the correct location.
>
> Matt
> vf_fspp.c | 18 ++++++++++++------
> 1 file changed, 12 insertions(+), 6 deletions(-)
> b54f42435c23df435d2243e8e11f2be539ee014c 4-4-Fix-libmpcodecs-inline-asm-on-ICL-part-2.patch
> From 0a314fb3faa063a008e654862375e2ef8a674658 Mon Sep 17 00:00:00 2001
> From: Matt Oliver <protogonoi at gmail.com>
> Date: Sun, 9 Feb 2014 22:25:15 +1100
> Subject: [PATCH] Fix libmpcodecs inline asm on ICL.
>
> ---
> libavfilter/libmpcodecs/vf_fspp.c | 18 ++++++++++++------
> 1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c
> index dadfae7..65af77d 100644
> --- a/libavfilter/libmpcodecs/vf_fspp.c
> +++ b/libavfilter/libmpcodecs/vf_fspp.c
> @@ -1598,6 +1598,10 @@ static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output,
>
> : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps)
> : "d"(thr_adr)
> + NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,MM_2,MM_FIX_1_414213562_A,MM_FIX_1_414213562,MM_FIX_0_382683433,
> + ff_MM_FIX_0_541196100,MM_FIX_1_306562965,MM_FIX_0_847759065)
> + NAMED_CONSTRAINTS_ADD(MM_FIX_0_566454497,MM_FIX_0_198912367,MM_FIX_2_613125930,MM_FIX_1_847759065,
> + MM_FIX_1_082392200,ff_MM_FIX_0_541196100,MM_FIX_1_306562965)
> : "%"REG_a
> );
> }
> @@ -1867,6 +1871,8 @@ static void row_idct_mmx (int16_t* workspace,
>
> : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps)
> : "a"(output_stride*sizeof(short))
> + NAMED_CONSTRAINTS_ADD(MM_FIX_1_414213562_A,MM_FIX_2_613125930,MM_FIX_1_847759065,MM_FIX_1_082392200,
> + MM_FIX_1_414213562,MM_DESCALE_RND)
> : "%"REG_d
> );
> }
> @@ -1974,10 +1980,10 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
> "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5
> "paddw %%mm4, %%mm1 \n\t"
>
> - "movq %%mm5, 0*8+%3 \n\t" //t7
> + "movq %%mm5, %3 \n\t" //t7
> "punpcklbw %%mm7, %%mm3 \n\t"
>
> - "movq %%mm6, 1*8+%3 \n\t" //t6
> + "movq %%mm6, %4 \n\t" //t6
> "movq %%mm2, %%mm4 \n\t"
>
> "movd (%%"REG_S"), %%mm5 \n\t" //3
> @@ -2023,7 +2029,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
> "psubw %%mm1, %%mm5 \n\t" //d1
> "movq %%mm0, %%mm6 \n\t"
>
> - "movq 1*8+%3, %%mm1 \n\t"
> + "movq %4, %%mm1 \n\t"
> "punpcklwd %%mm5, %%mm0 \n\t"
>
> "punpckhwd %%mm5, %%mm6 \n\t"
> @@ -2047,7 +2053,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
> "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
> "psllw $2, %%mm3 \n\t" //t10
>
> - "movq 0*8+%3, %%mm2 \n\t"
> + "movq %3, %%mm2 \n\t"
> "psllw $2, %%mm4 \n\t" //t11
>
> "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3
> @@ -2110,8 +2116,8 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
> "dec %%"REG_c" \n\t"
> "jnz 6b \n\t"
>
> - : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps)
> - : "a"(line_size)
> + : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps), "=o"(temps[1])
> + : NAMED_CONSTRAINTS(ff_MM_FIX_0_707106781,ff_MM_FIX_0_541196100,MM_FIX_0_382683433,MM_FIX_1_306562965)
> : "%"REG_d);
line_size is lost here, this doesnt work
make sure you test the code ...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/attachments/20140502/9383db2a/attachment.asc>
More information about the MPlayer-dev-eng
mailing list