[Mplayer-cvslog] CVS: main/libavcodec/i386 dsputil_mmx.c,NONE,1.1

Nick Kurshev nick at mplayer.dev.hu
Tue Jul 10 23:34:17 CEST 2001


Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv2779/main/libavcodec/i386

Added Files:
	dsputil_mmx.c 
Log Message:
Added MMX optimized pixel operation. It speedup decoding on 6% on K7 system

--- NEW FILE ---
/*
 * DSP utils
 * Copyright (c) 2000 Gerard Lantau.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
 */

/* pixel operations */

/*
 cropTable[0..255] = idx;
 cropTable[256..MAX_NEG_CROP(384)] = 255;
*/

void put_pixels_clamped(const DCTELEM *block, UINT8 *pixels, int line_size)
{
    const DCTELEM *p;
    UINT8 *pix;
    int i;
    
    /* read the pixels */
    p = block;
    pix = pixels;
    for(i=0;i<2;i++) {
	__asm __volatile(
		"movq	%4, %%mm0\n\t"
		"movq	8%4, %%mm1\n\t"
		"movq	16%4, %%mm2\n\t"
		"movq	24%4, %%mm3\n\t"
		"movq	32%4, %%mm4\n\t"
		"movq	40%4, %%mm5\n\t"
		"movq	48%4, %%mm6\n\t"
		"movq	56%4, %%mm7\n\t"
		"packuswb %%mm1, %%mm0\n\t"
		"packuswb %%mm3, %%mm2\n\t"
		"packuswb %%mm5, %%mm4\n\t"
		"packuswb %%mm7, %%mm6\n\t"
		"movq	%%mm0, %0\n\t"
		"movq	%%mm2, %1\n\t"
		"movq	%%mm4, %2\n\t"
		"movq	%%mm6, %3\n\t"
		:"=m"(*pix), "=m"(*(pix+line_size))
		,"=m"(*(pix+line_size*2)), "=m"(*(pix+line_size*3))
		:"m"(*p)
		:"memory");
        pix += line_size*4;
        p += 32;
    }
    emms();
}

void add_pixels_clamped(const DCTELEM *block, UINT8 *pixels, int line_size)
{
    const DCTELEM *p;
    UINT8 *pix;
    int i;
    
    /* read the pixels */
    p = block;
    pix = pixels;
	__asm __volatile("pxor	%%mm7, %%mm7":::"memory");
    for(i=0;i<4;i++) {
	__asm __volatile(
		"movq	%2, %%mm0\n\t"
		"movq	8%2, %%mm1\n\t"
		"movq	16%2, %%mm2\n\t"
		"movq	24%2, %%mm3\n\t"
		"movq	%0, %%mm4\n\t"
		"movq	%1, %%mm6\n\t"
		"movq	%%mm4, %%mm5\n\t"
		"punpcklbw %%mm7, %%mm4\n\t"
		"punpckhbw %%mm7, %%mm5\n\t"
		"paddsw	%%mm4, %%mm0\n\t"
		"paddsw	%%mm5, %%mm1\n\t"
		"movq	%%mm6, %%mm5\n\t"
		"punpcklbw %%mm7, %%mm6\n\t"
		"punpckhbw %%mm7, %%mm5\n\t"
		"paddsw	%%mm6, %%mm2\n\t"
		"paddsw	%%mm5, %%mm3\n\t"
		"packuswb %%mm1, %%mm0\n\t"
		"packuswb %%mm3, %%mm2\n\t"
		"movq	%%mm0, %0\n\t"
		"movq	%%mm2, %1\n\t"
		:"=m"(*pix), "=m"(*(pix+line_size))
		:"m"(*p)
		:"memory");
        pix += line_size*2;
        p += 16;
    }
    emms();
}




More information about the MPlayer-cvslog mailing list