[Ffmpeg-cvslog] CVS: ffmpeg/libavcodec/i386 cputest.c, 1.14, 1.15 dsputil_mmx.c, 1.106, 1.107 dsputil_mmx_avg.h, 1.26, 1.27 dsputil_mmx_rnd.h, 1.19, 1.20 fdct_mmx.c, 1.17, 1.18 fft_sse.c, 1.3, 1.4 h264dsp_mmx.c, 1.4, 1.5 idct_mmx_xvid.c, 1.2, 1.3 motion_est_mmx.c, 1.16, 1.17 mpegvideo_mmx.c, 1.34, 1.35 mpegvideo_mmx_template.c, 1.26, 1.27 simple_idct_mmx.c, 1.13, 1.14 vp3dsp_mmx.c, 1.5, 1.6 vp3dsp_sse2.c, 1.7, 1.8
Diego Biurrun CVS
diego
Sat Dec 17 19:15:12 CET 2005
- Previous message: [Ffmpeg-cvslog] CVS: ffmpeg/libavcodec/ppc dsputil_altivec.c, 1.27, 1.28 dsputil_h264_altivec.c, 1.1, 1.2 dsputil_h264_template_altivec.c, 1.1, 1.2 dsputil_ppc.c, 1.32, 1.33 fft_altivec.c, 1.9, 1.10 gcc_fixes.h, 1.5, 1.6 gmc_altivec.c, 1.10, 1.11 idct_altivec.c, 1.8, 1.9 mpegvideo_altivec.c, 1.13, 1.14 mpegvideo_ppc.c, 1.12, 1.13
- Next message: [Ffmpeg-cvslog] CVS: ffmpeg/libavcodec h263dec.c,1.171,1.172
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/ffmpeg/ffmpeg/libavcodec/i386
In directory mail:/var2/tmp/cvs-serv18206/libavcodec/i386
Modified Files:
cputest.c dsputil_mmx.c dsputil_mmx_avg.h dsputil_mmx_rnd.h
fdct_mmx.c fft_sse.c h264dsp_mmx.c idct_mmx_xvid.c
motion_est_mmx.c mpegvideo_mmx.c mpegvideo_mmx_template.c
simple_idct_mmx.c vp3dsp_mmx.c vp3dsp_sse2.c
Log Message:
COSMETICS: Remove all trailing whitespace.
Index: cputest.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/cputest.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- cputest.c 13 Dec 2004 16:11:38 -0000 1.14
+++ cputest.c 17 Dec 2005 18:14:32 -0000 1.15
@@ -29,28 +29,28 @@
int eax, ebx, ecx, edx;
int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
long a, c;
-
+
__asm__ __volatile__ (
/* See if CPUID instruction is supported ... */
/* ... Get copies of EFLAGS into eax and ecx */
"pushf\n\t"
"pop %0\n\t"
"mov %0, %1\n\t"
-
+
/* ... Toggle the ID bit in one copy and store */
/* to the EFLAGS reg */
"xor $0x200000, %0\n\t"
"push %0\n\t"
"popf\n\t"
-
+
/* ... Get the (hopefully modified) EFLAGS */
"pushf\n\t"
"pop %0\n\t"
: "=a" (a), "=c" (c)
:
- : "cc"
+ : "cc"
);
-
+
if (a == c)
return 0; /* CPUID not supported */
@@ -60,9 +60,9 @@
cpuid(1, eax, ebx, ecx, std_caps);
if (std_caps & (1<<23))
rval |= MM_MMX;
- if (std_caps & (1<<25))
+ if (std_caps & (1<<25))
rval |= MM_MMXEXT | MM_SSE;
- if (std_caps & (1<<26))
+ if (std_caps & (1<<26))
rval |= MM_SSE2;
}
@@ -103,18 +103,18 @@
According to the table, the only CPU which supports level
2 is also the only one which supports extended CPUID levels.
*/
- if (eax < 2)
+ if (eax < 2)
return rval;
if (ext_caps & (1<<24))
rval |= MM_MMXEXT;
}
#if 0
- av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
- (rval&MM_MMX) ? "MMX ":"",
- (rval&MM_MMXEXT) ? "MMX2 ":"",
- (rval&MM_SSE) ? "SSE ":"",
- (rval&MM_SSE2) ? "SSE2 ":"",
- (rval&MM_3DNOW) ? "3DNow ":"",
+ av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+ (rval&MM_MMX) ? "MMX ":"",
+ (rval&MM_MMXEXT) ? "MMX2 ":"",
+ (rval&MM_SSE) ? "SSE ":"",
+ (rval&MM_SSE2) ? "SSE2 ":"",
+ (rval&MM_3DNOW) ? "3DNow ":"",
(rval&MM_3DNOWEXT) ? "3DNowExt ":"");
#endif
return rval;
Index: dsputil_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.106
retrieving revision 1.107
diff -u -d -r1.106 -r1.107
--- dsputil_mmx.c 12 Nov 2005 05:23:25 -0000 1.106
+++ dsputil_mmx.c 17 Dec 2005 18:14:33 -0000 1.107
@@ -602,9 +602,9 @@
const int strength= ff_h263_loop_filter_strength[qscale];
asm volatile(
-
+
H263_LOOP_FILTER
-
+
"movq %%mm3, %1 \n\t"
"movq %%mm4, %2 \n\t"
"movq %%mm5, %0 \n\t"
@@ -634,7 +634,7 @@
"movd %%mm1, %2 \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movd %%mm1, %3 \n\t"
-
+
: "=m" (*(uint32_t*)(dst + 0*dst_stride)),
"=m" (*(uint32_t*)(dst + 1*dst_stride)),
"=m" (*(uint32_t*)(dst + 2*dst_stride)),
@@ -650,14 +650,14 @@
const int strength= ff_h263_loop_filter_strength[qscale];
uint64_t temp[4] __attribute__ ((aligned(8)));
uint8_t *btemp= (uint8_t*)temp;
-
+
src -= 2;
transpose4x4(btemp , src , 8, stride);
transpose4x4(btemp+4, src + 4*stride, 8, stride);
asm volatile(
H263_LOOP_FILTER // 5 3 4 6
-
+
: "+m" (temp[0]),
"+m" (temp[1]),
"+m" (temp[2]),
@@ -796,7 +796,7 @@
"psrlq $32, %%mm7\n" /* shift hi dword to lo */
"paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -856,7 +856,7 @@
"psrlq $32, %%mm7\n" /* shift hi dword to lo */
"paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -919,7 +919,7 @@
"psrldq $4, %%xmm7\n" /* shift hi dword to lo */
"paddd %%xmm1,%%xmm7\n"
"movd %%xmm7,%3\n"
- : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
: "r" ((long)line_size));
return tmp;
}
@@ -930,7 +930,7 @@
"movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n"
-
+
"movq (%0),%%mm0\n"
"movq %%mm0, %%mm1\n"
"psllq $8, %%mm0\n"
@@ -944,9 +944,9 @@
"punpckhbw %%mm7,%%mm3\n"
"psubw %%mm1, %%mm0\n"
"psubw %%mm3, %%mm2\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq %%mm4, %%mm1\n"
"psllq $8, %%mm4\n"
@@ -968,14 +968,14 @@
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
"add %2,%0\n"
"1:\n"
-
+
"movq (%0),%%mm0\n"
"movq %%mm0, %%mm1\n"
"psllq $8, %%mm0\n"
@@ -997,13 +997,13 @@
"pcmpgtw %%mm5, %%mm1\n\t"
"pxor %%mm3, %%mm4\n"
"pxor %%mm1, %%mm5\n"
- "psubw %%mm3, %%mm4\n"
+ "psubw %%mm3, %%mm4\n"
"psubw %%mm1, %%mm5\n"
"paddw %%mm4, %%mm5\n"
"paddw %%mm5, %%mm6\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq %%mm4, %%mm1\n"
"psllq $8, %%mm4\n"
@@ -1025,7 +1025,7 @@
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
@@ -1038,12 +1038,12 @@
"punpcklwd %%mm7,%%mm0\n"
"punpckhwd %%mm7,%%mm6\n"
"paddd %%mm0, %%mm6\n"
-
+
"movq %%mm6,%%mm0\n"
"psrlq $32, %%mm6\n"
"paddd %%mm6,%%mm0\n"
"movd %%mm0,%1\n"
- : "+r" (pix1), "=r"(tmp)
+ : "+r" (pix1), "=r"(tmp)
: "r" ((long)line_size) , "g" (h-2)
: "%ecx");
return tmp;
@@ -1056,7 +1056,7 @@
"movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n"
-
+
"movq (%0),%%mm0\n"
"movq 1(%0),%%mm1\n"
"movq %%mm0, %%mm2\n"
@@ -1067,9 +1067,9 @@
"punpckhbw %%mm7,%%mm3\n"
"psubw %%mm1, %%mm0\n"
"psubw %%mm3, %%mm2\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq 1(%0),%%mm1\n"
"movq %%mm4, %%mm5\n"
@@ -1088,14 +1088,14 @@
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
"add %2,%0\n"
"1:\n"
-
+
"movq (%0),%%mm0\n"
"movq 1(%0),%%mm1\n"
"movq %%mm0, %%mm2\n"
@@ -1118,9 +1118,9 @@
"psubw %%mm1, %%mm5\n"
"paddw %%mm4, %%mm5\n"
"paddw %%mm5, %%mm6\n"
-
+
"add %2,%0\n"
-
+
"movq (%0),%%mm4\n"
"movq 1(%0),%%mm1\n"
"movq %%mm4, %%mm5\n"
@@ -1139,7 +1139,7 @@
"pcmpgtw %%mm2, %%mm1\n\t"
"pxor %%mm3, %%mm0\n"
"pxor %%mm1, %%mm2\n"
- "psubw %%mm3, %%mm0\n"
+ "psubw %%mm3, %%mm0\n"
"psubw %%mm1, %%mm2\n"
"paddw %%mm0, %%mm2\n"
"paddw %%mm2, %%mm6\n"
@@ -1152,12 +1152,12 @@
"punpcklwd %%mm7,%%mm0\n"
"punpckhwd %%mm7,%%mm6\n"
"paddd %%mm0, %%mm6\n"
-
+
"movq %%mm6,%%mm0\n"
"psrlq $32, %%mm6\n"
"paddd %%mm6,%%mm0\n"
"movd %%mm0,%1\n"
- : "+r" (pix1), "=r"(tmp)
+ : "+r" (pix1), "=r"(tmp)
: "r" ((long)line_size) , "g" (h-2)
: "%ecx");
return tmp + hf_noise8_mmx(pix+8, line_size, h);
@@ -1186,10 +1186,10 @@
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0), %%mm2\n"\
"movq 8(%0), %%mm3\n"\
@@ -1213,7 +1213,7 @@
"paddw %%mm2, " #in0 "\n"\
"paddw " #in0 ", %%mm6\n"
-
+
asm volatile (
"movl %3,%%ecx\n"
"pxor %%mm6,%%mm6\n"
@@ -1224,11 +1224,11 @@
"subl $2, %%ecx\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
@@ -1239,7 +1239,7 @@
"psrlq $16, %%mm0\n"
"paddw %%mm6,%%mm0\n"
"movd %%mm0,%1\n"
- : "+r" (pix), "=r"(tmp)
+ : "+r" (pix), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp & 0xFFFF;
@@ -1248,10 +1248,10 @@
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0), " #out0 "\n"\
"movq 8(%0), " #out1 "\n"\
@@ -1271,16 +1271,16 @@
"subl $2, %%ecx\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
"movd %%mm6,%1\n"
- : "+r" (pix), "=r"(tmp)
+ : "+r" (pix), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -1289,11 +1289,11 @@
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix1) & 7) == 0);
assert( (((int)pix2) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0),%%mm2\n"\
"movq (%1)," #out0 "\n"\
@@ -1324,7 +1324,7 @@
"paddw %%mm2, " #in0 "\n"\
"paddw " #in0 ", %%mm6\n"
-
+
asm volatile (
"movl %4,%%ecx\n"
"pxor %%mm6,%%mm6\n"
@@ -1344,11 +1344,11 @@
"pxor %%mm7, %%mm1\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
@@ -1359,7 +1359,7 @@
"psrlq $16, %%mm0\n"
"paddw %%mm6,%%mm0\n"
"movd %%mm0,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp & 0x7FFF;
@@ -1368,11 +1368,11 @@
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
-
+
assert( (((int)pix1) & 7) == 0);
assert( (((int)pix2) & 7) == 0);
assert((line_size &7) ==0);
-
+
#define SUM(in0, in1, out0, out1) \
"movq (%0)," #out0 "\n"\
"movq (%1),%%mm2\n"\
@@ -1408,16 +1408,16 @@
"pxor %%mm7, %%mm1\n"
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
"1:\n"
-
+
SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-
+
SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
+
"subl $2, %%ecx\n"
"jnz 1b\n"
"movd %%mm6,%2\n"
- : "+r" (pix1), "+r" (pix2), "=r"(tmp)
+ : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h)
: "%ecx");
return tmp;
@@ -1449,7 +1449,7 @@
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
long i=0;
uint8_t l, lt;
-
+
asm volatile(
"1: \n\t"
"movq -1(%1, %0), %%mm0 \n\t" // LT
@@ -1462,7 +1462,7 @@
"movq %%mm4, %%mm5 \n\t" // L
"pmaxub %%mm1, %%mm4 \n\t" // max(T, L)
"pminub %%mm5, %%mm1 \n\t" // min(T, L)
- "pminub %%mm2, %%mm4 \n\t"
+ "pminub %%mm2, %%mm4 \n\t"
"pmaxub %%mm1, %%mm4 \n\t"
"psubb %%mm4, %%mm3 \n\t" // dst - pred
"movq %%mm3, (%3, %0) \n\t"
@@ -1475,9 +1475,9 @@
l= *left;
lt= *left_top;
-
+
dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
-
+
*left_top= src1[w-1];
*left = src2[w-1];
}
@@ -1521,7 +1521,7 @@
"psubw " #a ", " #z " \n\t"\
"pmaxsw " #z ", " #a " \n\t"\
"paddusw " #a ", " #sum " \n\t"
-
+
#define SBUTTERFLY(a,b,t,n)\
"movq " #a ", " #t " \n\t" /* abcd */\
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
@@ -1548,7 +1548,7 @@
static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
uint64_t temp[16] __align8;
int sum=0;
-
+
assert(h==8);
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1556,38 +1556,38 @@
asm volatile(
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 112(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 112(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 120(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 120(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
"movq %%mm7, %%mm5 \n\t"//FIXME remove
"movq %%mm6, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
-
+
LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, 64(%1) \n\t"
MMABS(%%mm0, %%mm7)
@@ -1600,10 +1600,10 @@
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq %%mm0, 64(%1) \n\t"
-
+
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, (%1) \n\t"
MMABS(%%mm0, %%mm7)
@@ -1617,7 +1617,7 @@
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
-
+
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
@@ -1625,7 +1625,7 @@
"psrlq $16, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
-
+
: "=r" (sum)
: "r"(temp)
);
@@ -1635,7 +1635,7 @@
static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
uint64_t temp[16] __align8;
int sum=0;
-
+
assert(h==8);
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1643,38 +1643,38 @@
asm volatile(
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 112(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 112(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
-
+
"movq %%mm7, 120(%1) \n\t"
-
+
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
-
+
"movq 120(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
"movq %%mm7, %%mm5 \n\t"//FIXME remove
"movq %%mm6, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
-
+
LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, 64(%1) \n\t"
MMABS_MMX2(%%mm0, %%mm7)
@@ -1687,10 +1687,10 @@
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
"movq %%mm0, 64(%1) \n\t"
-
+
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
-
+
HADAMARD48
"movq %%mm7, (%1) \n\t"
MMABS_MMX2(%%mm0, %%mm7)
@@ -1704,13 +1704,13 @@
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
-
+
"pshufw $0x0E, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"pshufw $0x01, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
-
+
: "=r" (sum)
: "r"(temp)
);
@@ -2405,7 +2405,7 @@
static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
long i=0;
-
+
assert(ABS(scale) < 256);
scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
@@ -2413,11 +2413,11 @@
"pcmpeqw %%mm6, %%mm6 \n\t" // -1w
"psrlw $15, %%mm6 \n\t" // 1w
"pxor %%mm7, %%mm7 \n\t"
- "movd %4, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
+ "movd %4, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
"1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t"
"pmulhw %%mm5, %%mm0 \n\t"
"pmulhw %%mm5, %%mm1 \n\t"
@@ -2444,7 +2444,7 @@
"paddd %%mm6, %%mm7 \n\t"
"psrld $2, %%mm7 \n\t"
"movd %%mm7, %0 \n\t"
-
+
: "+r" (i)
: "r"(basis), "r"(rem), "r"(weight), "g"(scale)
);
@@ -2453,21 +2453,21 @@
static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
long i=0;
-
+
if(ABS(scale) < 256){
scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
asm volatile(
"pcmpeqw %%mm6, %%mm6 \n\t" // -1w
"psrlw $15, %%mm6 \n\t" // 1w
- "movd %3, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
+ "movd %3, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
"1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t"
"pmulhw %%mm5, %%mm0 \n\t"
"pmulhw %%mm5, %%mm1 \n\t"
- "paddw %%mm6, %%mm0 \n\t"
+ "paddw %%mm6, %%mm0 \n\t"
"paddw %%mm6, %%mm1 \n\t"
"psraw $1, %%mm0 \n\t"
"psraw $1, %%mm1 \n\t"
@@ -2478,19 +2478,19 @@
"add $16, %0 \n\t"
"cmp $128, %0 \n\t" //FIXME optimize & bench
" jb 1b \n\t"
-
+
: "+r" (i)
: "r"(basis), "r"(rem), "g"(scale)
);
}else{
for(i=0; i<8*8; i++){
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
- }
+ }
}
}
#include "h264dsp_mmx.c"
-
+
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
@@ -2563,7 +2563,7 @@
add_pixels_clamped_mmx(block, dest, line_size);
}
#endif
-
+
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
mm_flags = mm_support();
@@ -2701,14 +2701,14 @@
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
-
+
c->add_bytes= add_bytes_mmx;
#ifdef CONFIG_ENCODERS
c->diff_bytes= diff_bytes_mmx;
-
+
c->hadamard8_diff[0]= hadamard8_diff16_mmx;
c->hadamard8_diff[1]= hadamard8_diff_mmx;
-
+
c->pix_norm1 = pix_norm1_mmx;
c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx;
c->sse[1] = sse8_mmx;
@@ -2719,19 +2719,19 @@
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->vsad[0] = vsad16_mmx;
}
-
+
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->try_8x8basis= try_8x8basis_mmx;
}
c->add_8x8basis= add_8x8basis_mmx;
-
+
#endif //CONFIG_ENCODERS
c->h263_v_loop_filter= h263_v_loop_filter_mmx;
- c->h263_h_loop_filter= h263_h_loop_filter_mmx;
+ c->h263_h_loop_filter= h263_h_loop_filter_mmx;
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
-
+
if (mm_flags & MM_MMXEXT) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
@@ -2945,7 +2945,7 @@
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
}
}
-
+
#ifdef CONFIG_ENCODERS
dsputil_init_pix_mmx(c, avctx);
#endif //CONFIG_ENCODERS
Index: dsputil_mmx_avg.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx_avg.h,v
retrieving revision 1.26
retrieving revision 1.27
diff -u -d -r1.26 -r1.27
--- dsputil_mmx_avg.h 17 Apr 2005 13:30:45 -0000 1.26
+++ dsputil_mmx_avg.h 17 Dec 2005 18:14:33 -0000 1.27
@@ -21,7 +21,7 @@
* mostly rewritten by Michael Niedermayer <michaelni at gmx.at>
* and improved by Zdenek Kabelac <kabi at users.sf.net>
*/
-
+
/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
clobber bug - now it will work with 2.95.2 and also with -fPIC
*/
@@ -100,7 +100,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
}
@@ -147,7 +147,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -217,7 +217,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -272,7 +272,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
}
@@ -324,7 +324,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -412,7 +412,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -466,7 +466,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
@@ -539,13 +539,13 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
//the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
:"r"(src1Stride), "r"(dstStride)
:"memory");*/
}
-
+
/* GL: this function does incorrect rounding if overflow */
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
@@ -746,7 +746,7 @@
:"%"REG_a, "memory");
}
-// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
+// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
MOVQ_BONE(mm6);
Index: dsputil_mmx_rnd.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -d -r1.19 -r1.20
--- dsputil_mmx_rnd.h 6 Sep 2005 21:25:35 -0000 1.19
+++ dsputil_mmx_rnd.h 17 Dec 2005 18:14:33 -0000 1.20
@@ -197,7 +197,7 @@
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"((long)src1Stride), "D"((long)dstStride)
- :"memory");
+ :"memory");
}
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
Index: fdct_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/fdct_mmx.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -d -r1.17 -r1.18
--- fdct_mmx.c 1 Aug 2005 20:07:04 -0000 1.17
+++ fdct_mmx.c 17 Dec 2005 18:14:33 -0000 1.18
@@ -5,7 +5,7 @@
* SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
*
* from fdctam32.c - AP922 MMX(3D-Now) forward-DCT
- *
+ *
* Intel Application Note AP-922 - fast, precise implementation of DCT
* http://developer.intel.com/vtune/cbts/appnotes.htm
*
@@ -51,7 +51,7 @@
static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
-struct
+struct
{
const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
} fdct_r_row_sse2 ATTR_ALIGN(16)=
@@ -61,90 +61,90 @@
//static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table
- 16384, 16384, 22725, 19266,
- 16384, 16384, 12873, 4520,
- 21407, 8867, 19266, -4520,
- -8867, -21407, -22725, -12873,
- 16384, -16384, 12873, -22725,
- -16384, 16384, 4520, 19266,
- 8867, -21407, 4520, -12873,
- 21407, -8867, 19266, -22725,
+ 16384, 16384, 22725, 19266,
+ 16384, 16384, 12873, 4520,
+ 21407, 8867, 19266, -4520,
+ -8867, -21407, -22725, -12873,
+ 16384, -16384, 12873, -22725,
+ -16384, 16384, 4520, 19266,
+ 8867, -21407, 4520, -12873,
+ 21407, -8867, 19266, -22725,
- 22725, 22725, 31521, 26722,
- 22725, 22725, 17855, 6270,
- 29692, 12299, 26722, -6270,
- -12299, -29692, -31521, -17855,
- 22725, -22725, 17855, -31521,
- -22725, 22725, 6270, 26722,
- 12299, -29692, 6270, -17855,
- 29692, -12299, 26722, -31521,
+ 22725, 22725, 31521, 26722,
+ 22725, 22725, 17855, 6270,
+ 29692, 12299, 26722, -6270,
+ -12299, -29692, -31521, -17855,
+ 22725, -22725, 17855, -31521,
+ -22725, 22725, 6270, 26722,
+ 12299, -29692, 6270, -17855,
+ 29692, -12299, 26722, -31521,
- 21407, 21407, 29692, 25172,
- 21407, 21407, 16819, 5906,
- 27969, 11585, 25172, -5906,
- -11585, -27969, -29692, -16819,
- 21407, -21407, 16819, -29692,
- -21407, 21407, 5906, 25172,
- 11585, -27969, 5906, -16819,
- 27969, -11585, 25172, -29692,
+ 21407, 21407, 29692, 25172,
+ 21407, 21407, 16819, 5906,
+ 27969, 11585, 25172, -5906,
+ -11585, -27969, -29692, -16819,
+ 21407, -21407, 16819, -29692,
+ -21407, 21407, 5906, 25172,
+ 11585, -27969, 5906, -16819,
+ 27969, -11585, 25172, -29692,
- 19266, 19266, 26722, 22654,
- 19266, 19266, 15137, 5315,
- 25172, 10426, 22654, -5315,
- -10426, -25172, -26722, -15137,
- 19266, -19266, 15137, -26722,
- -19266, 19266, 5315, 22654,
- 10426, -25172, 5315, -15137,
- 25172, -10426, 22654, -26722,
+ 19266, 19266, 26722, 22654,
+ 19266, 19266, 15137, 5315,
+ 25172, 10426, 22654, -5315,
+ -10426, -25172, -26722, -15137,
+ 19266, -19266, 15137, -26722,
+ -19266, 19266, 5315, 22654,
+ 10426, -25172, 5315, -15137,
+ 25172, -10426, 22654, -26722,
- 16384, 16384, 22725, 19266,
- 16384, 16384, 12873, 4520,
- 21407, 8867, 19266, -4520,
- -8867, -21407, -22725, -12873,
- 16384, -16384, 12873, -22725,
- -16384, 16384, 4520, 19266,
- 8867, -21407, 4520, -12873,
- 21407, -8867, 19266, -22725,
+ 16384, 16384, 22725, 19266,
+ 16384, 16384, 12873, 4520,
+ 21407, 8867, 19266, -4520,
+ -8867, -21407, -22725, -12873,
+ 16384, -16384, 12873, -22725,
+ -16384, 16384, 4520, 19266,
+ 8867, -21407, 4520, -12873,
+ 21407, -8867, 19266, -22725,
- 19266, 19266, 26722, 22654,
- 19266, 19266, 15137, 5315,
- 25172, 10426, 22654, -5315,
- -10426, -25172, -26722, -15137,
- 19266, -19266, 15137, -26722,
- -19266, 19266, 5315, 22654,
- 10426, -25172, 5315, -15137,
- 25172, -10426, 22654, -26722,
+ 19266, 19266, 26722, 22654,
+ 19266, 19266, 15137, 5315,
+ 25172, 10426, 22654, -5315,
+ -10426, -25172, -26722, -15137,
+ 19266, -19266, 15137, -26722,
+ -19266, 19266, 5315, 22654,
+ 10426, -25172, 5315, -15137,
+ 25172, -10426, 22654, -26722,
- 21407, 21407, 29692, 25172,
- 21407, 21407, 16819, 5906,
- 27969, 11585, 25172, -5906,
- -11585, -27969, -29692, -16819,
- 21407, -21407, 16819, -29692,
- -21407, 21407, 5906, 25172,
- 11585, -27969, 5906, -16819,
- 27969, -11585, 25172, -29692,
+ 21407, 21407, 29692, 25172,
+ 21407, 21407, 16819, 5906,
+ 27969, 11585, 25172, -5906,
+ -11585, -27969, -29692, -16819,
+ 21407, -21407, 16819, -29692,
+ -21407, 21407, 5906, 25172,
+ 11585, -27969, 5906, -16819,
+ 27969, -11585, 25172, -29692,
- 22725, 22725, 31521, 26722,
- 22725, 22725, 17855, 6270,
- 29692, 12299, 26722, -6270,
- -12299, -29692, -31521, -17855,
- 22725, -22725, 17855, -31521,
- -22725, 22725, 6270, 26722,
- 12299, -29692, 6270, -17855,
- 29692, -12299, 26722, -31521,
+ 22725, 22725, 31521, 26722,
+ 22725, 22725, 17855, 6270,
+ 29692, 12299, 26722, -6270,
+ -12299, -29692, -31521, -17855,
+ 22725, -22725, 17855, -31521,
+ -22725, 22725, 6270, 26722,
+ 12299, -29692, 6270, -17855,
+ 29692, -12299, 26722, -31521,
};
-struct
+struct
{
const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
} tab_frw_01234567_sse2 ATTR_ALIGN(16) =
{{
-//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = { // forward_dct coeff table
+//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = { // forward_dct coeff table
#define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
C4, C4, C5, C7, C2, C6, C3, -C7, \
-C4, C4, C7, C3, C6, -C2, C7, -C5, \
- C4, -C4, C5, -C1, C2, -C6, C3, -C1,
-// c1..c7 * cos(pi/4) * 2^15
+ C4, -C4, C5, -C1, C2, -C6, C3, -C1,
+// c1..c7 * cos(pi/4) * 2^15
#define C1 22725
#define C2 21407
#define C3 19266
@@ -355,17 +355,17 @@
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
- "movdqa \\t+48(%1), %%xmm7 \n\t"
+ "movdqa \\t+48(%1), %%xmm7 \n\t"
"movdqa \\t(%1), %%xmm4 \n\t"
- "movdqa \\t+16(%1), %%xmm5 \n\t"
+ "movdqa \\t+16(%1), %%xmm5 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2_H2 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
- "movdqa \\t+48(%1), %%xmm7 \n\t"
+ "movdqa \\t+48(%1), %%xmm7 \n\t"
".endm \n\t"
- ".macro FDCT_ROW_SSE2 i \n\t"
+ ".macro FDCT_ROW_SSE2 i \n\t"
"movq %%xmm2, %%xmm1 \n\t"
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
"paddsw %%xmm0, %%xmm1 \n\t"
@@ -376,7 +376,7 @@
"pmaddwd %%xmm1, %%xmm7 \n\t"
"pmaddwd %%xmm5, %%xmm2 \n\t"
"pmaddwd %%xmm4, %%xmm1 \n\t"
- "paddd %%xmm7, %%xmm3 \n\t"
+ "paddd %%xmm7, %%xmm3 \n\t"
"paddd %%xmm2, %%xmm1 \n\t"
"paddd %%xmm6, %%xmm3 \n\t"
"paddd %%xmm6, %%xmm1 \n\t"
@@ -384,8 +384,8 @@
"psrad %3, %%xmm1 \n\t"
"packssdw %%xmm3, %%xmm1 \n\t"
"movdqa %%xmm1, \\i(%4) \n\t"
- ".endm \n\t"
- "movdqa (%2), %%xmm6 \n\t"
+ ".endm \n\t"
+ "movdqa (%2), %%xmm6 \n\t"
"FDCT_ROW_SSE2_H1 0 0 \n\t"
"FDCT_ROW_SSE2 0 \n\t"
"FDCT_ROW_SSE2_H2 64 0 \n\t"
@@ -411,7 +411,7 @@
}
static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
-{
+{
pshufw_m2r(*(in + 4), mm5, 0x1B);
movq_m2r(*(in + 0), mm0);
movq_r2r(mm0, mm1);
@@ -454,7 +454,7 @@
}
static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
-{
+{
//FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
movd_m2r(*(in + 6), mm1);
punpcklwd_m2r(*(in + 4), mm1);
@@ -547,7 +547,7 @@
}
}
-void ff_fdct_sse2(int16_t *block)
+void ff_fdct_sse2(int16_t *block)
{
int64_t align_tmp[16] ATTR_ALIGN(8);
int16_t * const block_tmp= (int16_t*)align_tmp;
Index: fft_sse.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/fft_sse.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- fft_sse.c 13 Mar 2004 21:43:23 -0000 1.3
+++ fft_sse.c 17 Dec 2005 18:14:33 -0000 1.4
@@ -23,13 +23,13 @@
#include <xmmintrin.h>
-static const float p1p1p1m1[4] __attribute__((aligned(16))) =
+static const float p1p1p1m1[4] __attribute__((aligned(16))) =
{ 1.0, 1.0, 1.0, -1.0 };
-static const float p1p1m1p1[4] __attribute__((aligned(16))) =
+static const float p1p1m1p1[4] __attribute__((aligned(16))) =
{ 1.0, 1.0, -1.0, 1.0 };
-static const float p1p1m1m1[4] __attribute__((aligned(16))) =
+static const float p1p1m1m1[4] __attribute__((aligned(16))) =
{ 1.0, 1.0, -1.0, -1.0 };
#if 0
@@ -107,27 +107,27 @@
a = *(__m128 *)p;
b = *(__m128 *)q;
-
+
/* complex mul */
c = *(__m128 *)cptr;
/* cre*re cim*re */
- t1 = _mm_mul_ps(c,
- _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0)));
+ t1 = _mm_mul_ps(c,
+ _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0)));
c = *(__m128 *)(cptr + 2);
/* -cim*im cre*im */
t2 = _mm_mul_ps(c,
- _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1)));
+ _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1)));
b = _mm_add_ps(t1, t2);
-
+
/* butterfly */
*(__m128 *)p = _mm_add_ps(a, b);
*(__m128 *)q = _mm_sub_ps(a, b);
-
+
p += 2;
q += 2;
cptr += 4;
} while (--k);
-
+
p += nloops;
q += nloops;
} while (--j);
Index: h264dsp_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/h264dsp_mmx.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- h264dsp_mmx.c 27 Oct 2005 06:45:29 -0000 1.4
+++ h264dsp_mmx.c 17 Dec 2005 18:14:33 -0000 1.5
@@ -384,7 +384,7 @@
"psraw $5, %%mm6 \n\t"\
"packuswb %%mm6, %%mm6 \n\t"\
OP(%%mm6, (%1), A, d)\
- "add %3, %1 \n\t"
+ "add %3, %1 \n\t"
#define QPEL_H264HV(A,B,C,D,E,F,OF)\
"movd (%0), "#F" \n\t"\
@@ -399,7 +399,7 @@
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"movq %%mm6, "#OF"(%1) \n\t"
-
+
#define QPEL_H264(OPNAME, OP, MMX)\
static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=4;\
Index: idct_mmx_xvid.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/idct_mmx_xvid.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- idct_mmx_xvid.c 10 Sep 2005 19:03:37 -0000 1.2
+++ idct_mmx_xvid.c 17 Dec 2005 18:14:33 -0000 1.3
@@ -72,13 +72,13 @@
//-----------------------------------------------------------------------------
-static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
+static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
13036,13036,13036,13036, // tg * (2<<16) + 0.5
27146,27146,27146,27146, // tg * (2<<16) + 0.5
-21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5
23170,23170,23170,23170}; // cos * (2<<15) + 0.5
-static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
+static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
65536,65536,
3597,3597,
2260,2260,
@@ -148,7 +148,7 @@
//-----------------------------------------------------------------------------
// Table for rows 0,4 - constants are multiplied by cos_4_16
-static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
+static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
16384,16384,16384,-16384, // movq-> w06 w04 w02 w00
21407,8867,8867,-21407, // w07 w05 w03 w01
16384,-16384,16384,16384, // w14 w12 w10 w08
@@ -190,7 +190,7 @@
//-----------------------------------------------------------------------------
// %3 for rows 0,4 - constants are multiplied by cos_4_16
-static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
+static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
16384,21407,16384,8867, // movq-> w05 w04 w01 w00
16384,8867,-16384,-21407, // w07 w06 w03 w02
16384,-8867,16384,-21407, // w13 w12 w09 w08
@@ -501,7 +501,7 @@
DCT_8_INV_ROW_MMX(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
DCT_8_INV_ROW_MMX(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
DCT_8_INV_ROW_MMX(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
-
+
//# Process the columns (4 at a time)
DCT_8_INV_COL(0(%0), 0(%0))
DCT_8_INV_COL(8(%0), 8(%0))
@@ -524,7 +524,7 @@
DCT_8_INV_ROW_XMM(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
DCT_8_INV_ROW_XMM(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
DCT_8_INV_ROW_XMM(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
-
+
//# Process the columns (4 at a time)
DCT_8_INV_COL(0(%0), 0(%0))
DCT_8_INV_COL(8(%0), 8(%0))
Index: motion_est_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/motion_est_mmx.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -d -r1.16 -r1.17
--- motion_est_mmx.c 11 Oct 2004 02:19:29 -0000 1.16
+++ motion_est_mmx.c 17 Dec 2005 18:14:33 -0000 1.17
@@ -393,7 +393,7 @@
c->sad[0]= sad16_mmx2;
c->sad[1]= sad8_mmx2;
-
+
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->pix_abs[0][1] = sad16_x2_mmx2;
c->pix_abs[0][2] = sad16_y2_mmx2;
Index: mpegvideo_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/mpegvideo_mmx.c,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -d -r1.34 -r1.35
--- mpegvideo_mmx.c 11 Oct 2004 02:19:29 -0000 1.34
+++ mpegvideo_mmx.c 17 Dec 2005 18:14:33 -0000 1.35
@@ -40,7 +40,7 @@
qmul = qscale << 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
-
+
if (!s->h263_aic) {
if (n < 4)
level = block[0] * s->y_dc_scale;
@@ -116,7 +116,7 @@
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
-
+
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
asm volatile(
@@ -209,7 +209,7 @@
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
- if (n < 4)
+ if (n < 4)
block0 = block[0] * s->y_dc_scale;
else
block0 = block[0] * s->c_dc_scale;
@@ -263,7 +263,7 @@
"js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
- );
+ );
block[0]= block0;
}
@@ -339,13 +339,13 @@
long nCoeffs;
const uint16_t *quant_matrix;
int block0;
-
+
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
- if (n < 4)
+ if (n < 4)
block0 = block[0] * s->y_dc_scale;
else
block0 = block[0] * s->c_dc_scale;
@@ -394,7 +394,7 @@
"jng 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory"
- );
+ );
block[0]= block0;
//Note, we dont do mismatch control for intra as errors cannot accumulate
}
@@ -404,7 +404,7 @@
{
long nCoeffs;
const uint16_t *quant_matrix;
-
+
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
@@ -470,13 +470,13 @@
"psrlq $15, %%mm7 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"movd %%mm0, 124(%0, %3) \n\t"
-
+
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
: "%"REG_a, "memory"
);
}
-/* draw the edges of width 'w' of an image of size width, height
+/* draw the edges of width 'w' of an image of size width, height
this mmx version can only handle w==8 || w==16 */
static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
{
@@ -491,7 +491,7 @@
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
@@ -512,7 +512,7 @@
asm volatile(
"1: \n\t"
"movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
"punpcklwd %%mm0, %%mm0 \n\t"
"punpckldq %%mm0, %%mm0 \n\t"
"movq %%mm0, -8(%0) \n\t"
@@ -525,12 +525,12 @@
"movq %%mm1, 8(%0, %2) \n\t"
"add %1, %0 \n\t"
"cmp %3, %0 \n\t"
- " jb 1b \n\t"
+ " jb 1b \n\t"
: "+r" (ptr)
: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
);
}
-
+
for(i=0;i<w;i+=4) {
/* top and bottom (and hopefully also the corners) */
ptr= buf - (i + 1) * wrap - w;
@@ -694,7 +694,7 @@
{
if (mm_flags & MM_MMX) {
const int dct_algo = s->avctx->dct_algo;
-
+
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
@@ -703,7 +703,7 @@
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
draw_edges = draw_edges_mmx;
-
+
if (mm_flags & MM_SSE2) {
s->denoise_dct= denoise_dct_sse2;
} else {
Index: mpegvideo_mmx_template.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c,v
retrieving revision 1.26
retrieving revision 1.27
diff -u -d -r1.26 -r1.27
--- mpegvideo_mmx_template.c 21 Sep 2005 21:17:09 -0000 1.26
+++ mpegvideo_mmx_template.c 17 Dec 2005 18:14:33 -0000 1.27
@@ -52,7 +52,7 @@
int level=0, q; //=0 is cuz gcc says uninitalized ...
const uint16_t *qmat, *bias;
__align8 int16_t temp_block[64];
-
+
assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
//s->fdct (block);
@@ -88,7 +88,7 @@
} else
/* For AIC we skip quant/dequant of INTRADC */
level = (block[0] + 4)>>3;
-
+
block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
last_non_zero_p1 = 1;
@@ -101,7 +101,7 @@
}
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
-
+
asm volatile(
"movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1
SPREADW(%%mm3)
@@ -116,16 +116,16 @@
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
- "pxor %%mm1, %%mm0 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
- "por %%mm0, %%mm4 \n\t"
- "pxor %%mm1, %%mm0 \n\t"
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
- "movq (%4, %%"REG_a"), %%mm1 \n\t"
+ "movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
@@ -142,7 +142,7 @@
asm volatile(
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
- "psubusw %%mm1, %%mm4 \n\t"
+ "psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
: "=g" (*overflow)
@@ -160,18 +160,18 @@
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
"pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
- "pxor %%mm1, %%mm0 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
"movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0]
"paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
"movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i]
"pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
- "por %%mm0, %%mm4 \n\t"
- "pxor %%mm1, %%mm0 \n\t"
+ "por %%mm0, %%mm4 \n\t"
+ "pxor %%mm1, %%mm0 \n\t"
"psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
"movq %%mm0, (%5, %%"REG_a") \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
- "movq (%4, %%"REG_a"), %%mm1 \n\t"
+ "movq (%4, %%"REG_a"), %%mm1 \n\t"
"movq %%mm7, (%1, %%"REG_a") \n\t" // 0
"pandn %%mm1, %%mm0 \n\t"
PMAXW(%%mm0, %%mm3)
@@ -188,7 +188,7 @@
asm volatile(
"movd %1, %%mm1 \n\t" // max_qcoeff
SPREADW(%%mm1)
- "psubusw %%mm1, %%mm4 \n\t"
+ "psubusw %%mm1, %%mm4 \n\t"
"packuswb %%mm4, %%mm4 \n\t"
"movd %%mm4, %0 \n\t" // *overflow
: "=g" (*overflow)
@@ -201,135 +201,135 @@
if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
if(last_non_zero_p1 <= 1) goto end;
- block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
- block[0x20] = temp_block[0x10];
+ block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
+ block[0x20] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
- block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
- block[0x09] = temp_block[0x03];
+ block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
+ block[0x09] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
- block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
- block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
+ block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
+ block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
- block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
- block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
- block[0x0C] = temp_block[0x05];
+ block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
+ block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
+ block[0x0C] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
- block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
- block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
- block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
- block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
+ block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
+ block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
+ block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
+ block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
- block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
- block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
- block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
- block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
+ block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
+ block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
+ block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
+ block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
- block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
- block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
- block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
- block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
+ block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
+ block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
+ block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
+ block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
- block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
- block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
- block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
- block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
+ block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
+ block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
+ block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
+ block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
- block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
- block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
- block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
- block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
+ block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+ block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
+ block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
- block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
- block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
- block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
+ block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
+ block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
+ block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
if(last_non_zero_p1 <= 1) goto end;
- block[0x04] = temp_block[0x01];
- block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+ block[0x04] = temp_block[0x01];
+ block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
- block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
- block[0x05] = temp_block[0x03];
+ block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
+ block[0x05] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
- block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
- block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+ block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
+ block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
- block[0x1C] = temp_block[0x19];
- block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
- block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
+ block[0x1C] = temp_block[0x19];
+ block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
+ block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
- block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
- block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
- block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
- block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
+ block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
+ block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
+ block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+ block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
- block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
- block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
- block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
- block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
+ block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
+ block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
+ block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
+ block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
- block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
- block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
- block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
- block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
+ block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
+ block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+ block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
+ block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
- block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
- block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
- block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
- block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
+ block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
+ block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+ block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
+ block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
- block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
- block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
- block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
- block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
+ block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
+ block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
+ block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
- block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
- block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
- block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+ block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
+ block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
+ block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else{
if(last_non_zero_p1 <= 1) goto end;
- block[0x01] = temp_block[0x01];
- block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+ block[0x01] = temp_block[0x01];
+ block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
- block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
- block[0x03] = temp_block[0x03];
+ block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
+ block[0x03] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
- block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
- block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+ block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
+ block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
- block[0x19] = temp_block[0x19];
- block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
- block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
+ block[0x19] = temp_block[0x19];
+ block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
+ block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
- block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
- block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
- block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
- block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
+ block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
+ block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
+ block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+ block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
- block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
- block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
- block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
- block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
+ block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
+ block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
+ block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
+ block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
- block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
- block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
- block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
- block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
+ block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
+ block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+ block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
+ block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
- block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
- block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
- block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
- block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
+ block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
+ block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+ block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
+ block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
- block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
- block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
- block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
- block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
+ block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+ block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
+ block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
- block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
- block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
- block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+ block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
+ block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
+ block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}
end:
Index: simple_idct_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/simple_idct_mmx.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- simple_idct_mmx.c 18 May 2004 17:09:46 -0000 1.13
+++ simple_idct_mmx.c 17 Dec 2005 18:14:33 -0000 1.14
@@ -60,19 +60,19 @@
C4, C4, C4, C4,
C4, -C4, C4, -C4,
-
+
C2, C6, C2, C6,
C6, -C2, C6, -C2,
-
+
C1, C3, C1, C3,
C5, C7, C5, C7,
-
+
C3, -C7, C3, -C7,
-C1, -C5, -C1, -C5,
-
+
C5, -C1, C5, -C1,
C7, C3, C7, C3,
-
+
C7, -C5, C7, -C5,
C3, -C1, C3, -C1
};
@@ -357,7 +357,7 @@
"movd %%mm4, 64+" #dst " \n\t"\
"movd %%mm5, 80+" #dst " \n\t"\
-
+
#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
"movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
"movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
@@ -857,7 +857,7 @@
"packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
"movd %%mm6, 48+" #dst " \n\t"\
"movd %%mm1, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
+ "movd %%mm5, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -924,7 +924,7 @@
"packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
"movd %%mm6, 48+" #dst " \n\t"\
"movd %%mm1, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
+ "movd %%mm5, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
@@ -1137,8 +1137,8 @@
"packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
"movq %%mm6, 48+" #dst " \n\t"\
"movq %%mm6, 64+" #dst " \n\t"\
- "movq %%mm5, 80+" #dst " \n\t"
-
+ "movq %%mm5, 80+" #dst " \n\t"
+
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -1214,7 +1214,7 @@
"packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
"movd %%mm4, 64+" #dst " \n\t"\
"movd %%mm5, 80+" #dst " \n\t"
-
+
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -1256,7 +1256,7 @@
"movq %%mm0, 32+" #dst " \n\t"\
"movq %%mm4, 48+" #dst " \n\t"\
"movq %%mm4, 64+" #dst " \n\t"\
- "movq %%mm0, 80+" #dst " \n\t"
+ "movq %%mm0, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
@@ -1277,7 +1277,7 @@
12 32 16 36 52 72 56 76
05 45 07 47 25 65 27 67
15 35 17 37 55 75 57 77
-
+
Temp
00 04 10 14 20 24 30 34
40 44 50 54 60 64 70 74
Index: vp3dsp_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/vp3dsp_mmx.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- vp3dsp_mmx.c 1 Jun 2005 21:19:00 -0000 1.5
+++ vp3dsp_mmx.c 17 Dec 2005 18:14:33 -0000 1.6
@@ -208,7 +208,7 @@
I(1) = d1 c1 b1 a1
I(2) = d2 c2 b2 a2
I(3) = d3 c3 b3 a3
-
+
J(4) = h0 g0 f0 e0
J(5) = h1 g1 f1 e1
J(6) = h2 g2 f2 e2
Index: vp3dsp_sse2.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/vp3dsp_sse2.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- vp3dsp_sse2.c 14 Aug 2005 15:42:40 -0000 1.7
+++ vp3dsp_sse2.c 17 Dec 2005 18:14:33 -0000 1.8
@@ -36,21 +36,21 @@
};
static const unsigned int __align16 eight_data[] =
-{
- 0x00080008,
+{
0x00080008,
- 0x00080008,
- 0x00080008
-};
+ 0x00080008,
+ 0x00080008,
+ 0x00080008
+};
static const unsigned short __align16 SSE2_idct_data[7 * 8] =
{
- 64277,64277,64277,64277,64277,64277,64277,64277,
- 60547,60547,60547,60547,60547,60547,60547,60547,
- 54491,54491,54491,54491,54491,54491,54491,54491,
- 46341,46341,46341,46341,46341,46341,46341,46341,
- 36410,36410,36410,36410,36410,36410,36410,36410,
- 25080,25080,25080,25080,25080,25080,25080,25080,
+ 64277,64277,64277,64277,64277,64277,64277,64277,
+ 60547,60547,60547,60547,60547,60547,60547,60547,
+ 54491,54491,54491,54491,54491,54491,54491,54491,
+ 46341,46341,46341,46341,46341,46341,46341,46341,
+ 36410,36410,36410,36410,36410,36410,36410,36410,
+ 25080,25080,25080,25080,25080,25080,25080,25080,
12785,12785,12785,12785,12785,12785,12785,12785
};
@@ -820,6 +820,6 @@
SSE2_Row_IDCT();
SSE2_Transpose();
-
+
SSE2_Column_IDCT();
}
- Previous message: [Ffmpeg-cvslog] CVS: ffmpeg/libavcodec/ppc dsputil_altivec.c, 1.27, 1.28 dsputil_h264_altivec.c, 1.1, 1.2 dsputil_h264_template_altivec.c, 1.1, 1.2 dsputil_ppc.c, 1.32, 1.33 fft_altivec.c, 1.9, 1.10 gcc_fixes.h, 1.5, 1.6 gmc_altivec.c, 1.10, 1.11 idct_altivec.c, 1.8, 1.9 mpegvideo_altivec.c, 1.13, 1.14 mpegvideo_ppc.c, 1.12, 1.13
- Next message: [Ffmpeg-cvslog] CVS: ffmpeg/libavcodec h263dec.c,1.171,1.172
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the ffmpeg-cvslog
mailing list