[FFmpeg-cvslog] postproc: Replaced inline asm for prefetching with	prefetch functions
    Tucker DiNapoli 
    git at videolan.org
       
    Wed Apr 22 23:51:21 CEST 2015
    
    
  
ffmpeg | branch: master | Tucker DiNapoli <t.dinapoli42 at gmail.com> | Wed Apr 22 16:27:26 2015 -0400| [6264b6227c779af9d2520722f6acb45a2c51cdfd] | committer: Michael Niedermayer
postproc: Replaced inline asm for prefetching with prefetch functions
Prefetching functions are defined in postprocess_template using the
RENAME macro so that prefetching is used when available. For x86
targets inline asm is used and the functions are non-empty only for
cpus where prefetching is available. For non x86 targets the gcc bultin
prefetch is used if it is available, otherwise no prefetching is done.
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6264b6227c779af9d2520722f6acb45a2c51cdfd
---
 libpostproc/postprocess.c          |   31 ---------
 libpostproc/postprocess_template.c |  126 ++++++++++++++++++++----------------
 2 files changed, 71 insertions(+), 86 deletions(-)
diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 9d89782..af70bb3 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -168,37 +168,6 @@ static const char * const replaceTable[]=
     NULL //End Marker
 };
 
-
-#if ARCH_X86 && HAVE_INLINE_ASM
-static inline void prefetchnta(const void *p)
-{
-    __asm__ volatile(   "prefetchnta (%0)\n\t"
-        : : "r" (p)
-    );
-}
-
-static inline void prefetcht0(const void *p)
-{
-    __asm__ volatile(   "prefetcht0 (%0)\n\t"
-        : : "r" (p)
-    );
-}
-
-static inline void prefetcht1(const void *p)
-{
-    __asm__ volatile(   "prefetcht1 (%0)\n\t"
-        : : "r" (p)
-    );
-}
-
-static inline void prefetcht2(const void *p)
-{
-    __asm__ volatile(   "prefetcht2 (%0)\n\t"
-        : : "r" (p)
-    );
-}
-#endif
-
 /* The horizontal functions exist only in C because the MMX
  * code is faster with vertical filters and transposing. */
 
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 16e441a..e153b13 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3242,6 +3242,69 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride)
 #endif
 }
 
+#if ARCH_X86 && TEMPLATE_PP_MMXEXT
+static inline void RENAME(prefetchnta)(const void *p)
+{
+    __asm__ volatile(   "prefetchnta (%0)\n\t"
+        : : "r" (p)
+    );
+}
+
+static inline void RENAME(prefetcht0)(const void *p)
+{
+    __asm__ volatile(   "prefetcht0 (%0)\n\t"
+        : : "r" (p)
+    );
+}
+
+static inline void RENAME(prefetcht1)(const void *p)
+{
+    __asm__ volatile(   "prefetcht1 (%0)\n\t"
+        : : "r" (p)
+    );
+}
+
+static inline void RENAME(prefetcht2)(const void *p)
+{
+    __asm__ volatile(   "prefetcht2 (%0)\n\t"
+        : : "r" (p)
+    );
+}
+#elif !ARCH_X86 && AV_GCC_VERSION_AT_LEAST(3,2)
+static inline void RENAME(prefetchnta)(const void *p)
+{
+    __builtin_prefetch(p,0,0);
+}
+static inline void RENAME(prefetcht0)(const void *p)
+{
+    __builtin_prefetch(p,0,1);
+}
+static inline void RENAME(prefetcht1)(const void *p)
+{
+    __builtin_prefetch(p,0,2);
+}
+static inline void RENAME(prefetcht2)(const void *p)
+{
+    __builtin_prefetch(p,0,3);
+}
+#else
+static inline void RENAME(prefetchnta)(const void *p)
+{
+    return;
+}
+static inline void RENAME(prefetcht0)(const void *p)
+{
+    return;
+}
+static inline void RENAME(prefetcht1)(const void *p)
+{
+    return;
+}
+static inline void RENAME(prefetcht2)(const void *p)
+{
+    return;
+}
+#endif
 /**
  * Filter array of bytes (Y or U or V values)
  */
@@ -3368,34 +3431,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         // finish 1 block before the next otherwise we might have a problem
         // with the L1 Cache of the P4 ... or only a few blocks at a time or something
         for(x=0; x<width; x+=BLOCK_SIZE){
-
-#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
-/*
-            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-            __asm__(
-                "mov %4, %%"REG_a"              \n\t"
-                "shr $2, %%"REG_a"              \n\t"
-                "and $6, %%"REG_a"              \n\t"
-                "add %5, %%"REG_a"              \n\t"
-                "mov %%"REG_a", %%"REG_d"       \n\t"
-                "imul %1, %%"REG_a"             \n\t"
-                "imul %3, %%"REG_d"             \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                "add %1, %%"REG_a"              \n\t"
-                "add %3, %%"REG_d"              \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
-                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
-                : "%"REG_a, "%"REG_d
-            );
-#endif
+            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
                               srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
@@ -3474,33 +3513,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             uint8_t *dstBlockStart = dstBlock;
             const uint8_t *srcBlockStart = srcBlock;
           for(; x < endx; x+=BLOCK_SIZE){
-#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
-/*
-            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-            __asm__(
-                "mov %4, %%"REG_a"              \n\t"
-                "shr $2, %%"REG_a"              \n\t"
-                "and $6, %%"REG_a"              \n\t"
-                "add %5, %%"REG_a"              \n\t"
-                "mov %%"REG_a", %%"REG_d"       \n\t"
-                "imul %1, %%"REG_a"             \n\t"
-                "imul %3, %%"REG_d"             \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                "add %1, %%"REG_a"              \n\t"
-                "add %3, %%"REG_d"              \n\t"
-                "prefetchnta 32(%%"REG_a", %0)  \n\t"
-                "prefetcht0 32(%%"REG_d", %2)   \n\t"
-                :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
-                "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
-                : "%"REG_a, "%"REG_d
-            );
-#endif
+            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+            RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+            RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
                               srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
    
    
More information about the ffmpeg-cvslog
mailing list