[Mplayer-cvslog] CVS: main/libavcodec/i386 dsputil_mmx.c,NONE,1.1
Nick Kurshev
nick at mplayer.dev.hu
Tue Jul 10 23:34:17 CEST 2001
Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv2779/main/libavcodec/i386
Added Files:
dsputil_mmx.c
Log Message:
Added MMX optimized pixel operation. It speedup decoding on 6% on K7 system
--- NEW FILE ---
/*
* DSP utils
* Copyright (c) 2000 Gerard Lantau.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* MMX optimization by Nick Kurshev <nickols_k at mail.ru>
*/
/* pixel operations */
/*
cropTable[0..255] = idx;
cropTable[256..MAX_NEG_CROP(384)] = 255;
*/
void put_pixels_clamped(const DCTELEM *block, UINT8 *pixels, int line_size)
{
const DCTELEM *p;
UINT8 *pix;
int i;
/* read the pixels */
p = block;
pix = pixels;
for(i=0;i<2;i++) {
__asm __volatile(
"movq %4, %%mm0\n\t"
"movq 8%4, %%mm1\n\t"
"movq 16%4, %%mm2\n\t"
"movq 24%4, %%mm3\n\t"
"movq 32%4, %%mm4\n\t"
"movq 40%4, %%mm5\n\t"
"movq 48%4, %%mm6\n\t"
"movq 56%4, %%mm7\n\t"
"packuswb %%mm1, %%mm0\n\t"
"packuswb %%mm3, %%mm2\n\t"
"packuswb %%mm5, %%mm4\n\t"
"packuswb %%mm7, %%mm6\n\t"
"movq %%mm0, %0\n\t"
"movq %%mm2, %1\n\t"
"movq %%mm4, %2\n\t"
"movq %%mm6, %3\n\t"
:"=m"(*pix), "=m"(*(pix+line_size))
,"=m"(*(pix+line_size*2)), "=m"(*(pix+line_size*3))
:"m"(*p)
:"memory");
pix += line_size*4;
p += 32;
}
emms();
}
void add_pixels_clamped(const DCTELEM *block, UINT8 *pixels, int line_size)
{
const DCTELEM *p;
UINT8 *pix;
int i;
/* read the pixels */
p = block;
pix = pixels;
__asm __volatile("pxor %%mm7, %%mm7":::"memory");
for(i=0;i<4;i++) {
__asm __volatile(
"movq %2, %%mm0\n\t"
"movq 8%2, %%mm1\n\t"
"movq 16%2, %%mm2\n\t"
"movq 24%2, %%mm3\n\t"
"movq %0, %%mm4\n\t"
"movq %1, %%mm6\n\t"
"movq %%mm4, %%mm5\n\t"
"punpcklbw %%mm7, %%mm4\n\t"
"punpckhbw %%mm7, %%mm5\n\t"
"paddsw %%mm4, %%mm0\n\t"
"paddsw %%mm5, %%mm1\n\t"
"movq %%mm6, %%mm5\n\t"
"punpcklbw %%mm7, %%mm6\n\t"
"punpckhbw %%mm7, %%mm5\n\t"
"paddsw %%mm6, %%mm2\n\t"
"paddsw %%mm5, %%mm3\n\t"
"packuswb %%mm1, %%mm0\n\t"
"packuswb %%mm3, %%mm2\n\t"
"movq %%mm0, %0\n\t"
"movq %%mm2, %1\n\t"
:"=m"(*pix), "=m"(*(pix+line_size))
:"m"(*p)
:"memory");
pix += line_size*2;
p += 16;
}
emms();
}
More information about the MPlayer-cvslog
mailing list