[FFmpeg-devel] [PATCH] Remove mmx.h from dsputil_mmx.c
Michael Niedermayer
michaelni
Wed Mar 11 03:04:46 CET 2009
On Tue, Mar 10, 2009 at 09:19:56PM -0400, Alex Converse wrote:
> This is my first mmx patch so please bear with me.
> diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
> index 2f47f5f..3771c5f 100644
> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -28,7 +28,6 @@
> #include "libavcodec/mpegvideo.h"
> #include "libavcodec/simple_idct.h"
> #include "dsputil_mmx.h"
> -#include "mmx.h"
> #include "vp3dsp_mmx.h"
> #include "vp3dsp_sse2.h"
> #include "vp6dsp_mmx.h"
> @@ -278,16 +277,33 @@ static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
>
> void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
> {
> + const DCTELEM *p = block;
> + uint8_t *pix = pixels;
> int i;
>
> - movq_m2r(*vector128, mm1);
> - for (i = 0; i < 8; i++) {
> - movq_m2r(*(block), mm0);
> - packsswb_m2r(*(block + 4), mm0);
> - block += 8;
> - paddb_r2r(mm1, mm0);
> - movq_r2m(mm0, *pixels);
> - pixels += line_size;
> + __asm__ volatile ("movq %0, %%mm0" : : "m" (*vector128));
> + for (i = 0; i < 8; i+=4) {
> + __asm__ volatile (
> + "movq (%1), %%mm1 \n\t"
> + "movq 16(%1), %%mm2 \n\t"
> + "movq 32(%1), %%mm3 \n\t"
> + "movq 48(%1), %%mm4 \n\t"
> + "packsswb 8(%1), %%mm1 \n\t"
> + "packsswb 24(%1), %%mm2 \n\t"
> + "packsswb 40(%1), %%mm3 \n\t"
> + "packsswb 56(%1), %%mm4 \n\t"
> + "paddb %%mm0, %%mm1 \n\t"
> + "paddb %%mm0, %%mm2 \n\t"
> + "paddb %%mm0, %%mm3 \n\t"
> + "paddb %%mm0, %%mm4 \n\t"
> + "movq %%mm1, (%0) \n\t"
> + "movq %%mm2, (%0, %2) \n\t"
> + "movq %%mm3, (%0, %2, 2) \n\t"
> + "movq %%mm4, (%0, %3) \n\t"
> + ::"r" (pix), "r" (p), "r"((x86_reg)line_size), "r"((x86_reg)3*line_size)
> + :"memory");
> + p += 32;
> + pix += 4*line_size;
1. the for() should not be there, it should be in the asm()
2. benchmark is always needed when messing with optimized code.
START/STOP_TIMER around the point that calls the changed
function is likely the easiest way to do it
[..]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Observe your enemies, for they first find out your faults. -- Antisthenes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090311/b736c60b/attachment.pgp>
More information about the ffmpeg-devel
mailing list