[FFmpeg-cvslog] rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2	functions.
    Ronald S. Bultje 
    git at videolan.org
       
    Fri May 27 02:53:54 CEST 2011
    
    
  
ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Tue May 24 18:28:40 2011 -0400| [522d65ba259a263d0cd91db27b79c07e13d7fcf2] | committer: Ronald S. Bultje
rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2 functions.
Many functions have such a prefix, but do not actually use any
instructions or features from that set, thus giving the false
impression that swscale is highly optimized for a particular
system, whereas in reality it is not.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=522d65ba259a263d0cd91db27b79c07e13d7fcf2
---
 libswscale/x86/rgb2rgb.c          |    2 +-
 libswscale/x86/rgb2rgb_template.c |   74 ++++++++++++++++++++-----------------
 2 files changed, 41 insertions(+), 35 deletions(-)
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index cf901af..97c50dd 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -111,7 +111,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_AMD3DNOW
 #define COMPILE_TEMPLATE_MMX2 0
-#define COMPILE_TEMPLATE_SSE2 1
+#define COMPILE_TEMPLATE_SSE2 0
 #define COMPILE_TEMPLATE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNOW
 #include "rgb2rgb_template.c"
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index ce635df..70673f7 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -30,15 +30,8 @@
 #undef MOVNTQ
 #undef EMMS
 #undef SFENCE
-#undef MMREG_SIZE
 #undef PAVGB
 
-#if COMPILE_TEMPLATE_SSE2
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 8
-#endif
-
 #if COMPILE_TEMPLATE_AMD3DNOW
 #define PREFETCH  "prefetch"
 #define PAVGB     "pavgusb"
@@ -64,6 +57,10 @@
 #define SFENCE " # nop"
 #endif
 
+#if !COMPILE_TEMPLATE_SSE2
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+
 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
 {
     uint8_t *dest = dst;
@@ -1513,7 +1510,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
 {
     long x,y;
@@ -1530,7 +1529,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     dst+= dstStride;
 
     for (y=1; y<srcHeight; y++) {
-#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
         const x86_reg mmxSize= srcWidth&~15;
         __asm__ volatile(
             "mov           %4, %%"REG_a"            \n\t"
@@ -1564,17 +1562,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
             "punpckhbw              %%mm3, %%mm7    \n\t"
             "punpcklbw              %%mm2, %%mm4    \n\t"
             "punpckhbw              %%mm2, %%mm6    \n\t"
-#if 1
             MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
             MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#else
-            "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-#endif
             "add                       $8, %%"REG_a"            \n\t"
             "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
             "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
@@ -1584,12 +1575,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
                "g" (-mmxSize)
             : "%"REG_a
         );
-#else
-        const x86_reg mmxSize=1;
-
-        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
-        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
-#endif
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
@@ -1605,7 +1590,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
     }
 
     // last line
-#if 1
     dst[0]= src[0];
 
     for (x=0; x<srcWidth-1; x++) {
@@ -1613,18 +1597,14 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
     }
     dst[2*srcWidth-1]= src[srcWidth-1];
-#else
-    for (x=0; x<srcWidth; x++) {
-        dst[2*x+0]=
-        dst[2*x+1]= src[x];
-    }
-#endif
 
     __asm__ volatile(EMMS"       \n\t"
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
@@ -1728,6 +1708,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -1978,7 +1959,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
 
      rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
 }
+#endif /* !COMPILE_TEMPLATE_SSE2 */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
                                     long width, long height, long src1Stride,
                                     long src2Stride, long dstStride)
@@ -2048,7 +2031,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             ::: "memory"
             );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        uint8_t *dst1, uint8_t *dst2,
                                        long width, long height,
@@ -2228,6 +2214,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
 {
@@ -2266,6 +2253,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
     }
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2311,6 +2299,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
         count++;
     }
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2365,6 +2354,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
     }
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2411,6 +2401,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
         count++;
     }
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2492,6 +2483,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
                                  long lumStride, long chromStride, long srcStride)
@@ -2514,6 +2506,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
@@ -2540,6 +2533,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
+#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  long width, long height,
                                  long lumStride, long chromStride, long srcStride)
@@ -2562,9 +2556,13 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_SSE2 */
 
 static inline void RENAME(rgb2rgb_init)(void)
 {
+#if !COMPILE_TEMPLATE_SSE2
+#if !COMPILE_TEMPLATE_AMD3DNOW
     rgb15to16          = RENAME(rgb15to16);
     rgb15tobgr24       = RENAME(rgb15tobgr24);
     rgb15to32          = RENAME(rgb15to32);
@@ -2588,14 +2586,22 @@ static inline void RENAME(rgb2rgb_init)(void)
     yuv422ptoyuy2      = RENAME(yuv422ptoyuy2);
     yuv422ptouyvy      = RENAME(yuv422ptouyvy);
     yuy2toyv12         = RENAME(yuy2toyv12);
-    planar2x           = RENAME(planar2x);
-    rgb24toyv12        = RENAME(rgb24toyv12);
-    interleaveBytes    = RENAME(interleaveBytes);
     vu9_to_vu12        = RENAME(vu9_to_vu12);
     yvu9_to_yuy2       = RENAME(yvu9_to_yuy2);
-
-    uyvytoyuv420       = RENAME(uyvytoyuv420);
     uyvytoyuv422       = RENAME(uyvytoyuv422);
-    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
     yuyvtoyuv422       = RENAME(yuyvtoyuv422);
+#endif /* !COMPILE_TEMPLATE_SSE2 */
+
+#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
+    planar2x           = RENAME(planar2x);
+#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
+    rgb24toyv12        = RENAME(rgb24toyv12);
+
+    yuyvtoyuv420       = RENAME(yuyvtoyuv420);
+    uyvytoyuv420       = RENAME(uyvytoyuv420);
+#endif /* COMPILE_TEMPLATE_SSE2 */
+
+#if !COMPILE_TEMPLATE_AMD3DNOW
+    interleaveBytes    = RENAME(interleaveBytes);
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 }
    
    
More information about the ffmpeg-cvslog
mailing list