[FFmpeg-cvslog] Automatically change MANGLE() into named inline asm operands when direct symbol reference in inline asm are not supported.

Matt Oliver git at videolan.org
Wed Mar 19 00:35:45 CET 2014


ffmpeg | branch: master | Matt Oliver <protogonoi at gmail.com> | Tue Mar 18 15:53:26 2014 +1100| [823674751196e382c1d6334b8c92839f95d0ba9e] | committer: Michael Niedermayer

Automatically change MANGLE() into named inline asm operands when direct symbol reference in inline asm are not supported.

This is part of the patch-set for intel C inline asm on windows support

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=823674751196e382c1d6334b8c92839f95d0ba9e
---

 configure                          |    3 +++
 libavcodec/x86/cabac.h             |    3 ++-
 libavcodec/x86/cavsdsp.c           |    2 ++
 libavcodec/x86/dsputil_mmx.c       |    1 +
 libavcodec/x86/h264_i386.h         |    2 ++
 libavcodec/x86/idct_sse2_xvid.c    |    2 +-
 libavcodec/x86/lpc.c               |    3 +++
 libavcodec/x86/motion_est.c        |    3 ++-
 libavcodec/x86/simple_idct.c       |    1 +
 libavcodec/x86/vc1dsp_mmx.c        |    6 ++++++
 libavutil/x86/asm.h                |   36 +++++++++++++++++++++++++++++++++++-
 libpostproc/postprocess_template.c |    7 +++++++
 libswresample/x86/resample_mmx.h   |    2 ++
 libswscale/x86/rgb2rgb_template.c  |   11 +++++++++++
 libswscale/x86/swscale_template.c  |   12 ++++++++++++
 libswscale/x86/yuv2rgb_template.c  |    9 +++++++++
 16 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 3939cfa..b41e0dc 100755
--- a/configure
+++ b/configure
@@ -1691,6 +1691,7 @@ TOOLCHAIN_FEATURES="
     ibm_asm
     inline_asm_labels
     inline_asm_nonlocal_labels
+    inline_asm_direct_symbol_refs
     pragma_deprecated
     rsync_contimeout
     symver_asm_label
@@ -4306,6 +4307,8 @@ EOF
     # check whether xmm clobbers are supported
     check_inline_asm xmm_clobbers '"":::"%xmm0"'
 
+    check_inline_asm inline_asm_direct_symbol_refs '"movl test, %eax"'
+
     # check whether binutils is new enough to compile SSSE3/MMXEXT
     enabled ssse3  && check_inline_asm ssse3_inline  '"pabsw %xmm0, %xmm0"'
     enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"'
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 3a82c1e..0a68b7b 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -110,7 +110,7 @@
         "2:                                                             \n\t"
 
 #else /* BROKEN_RELOCATIONS */
-#define TABLES_ARG
+#define TABLES_ARG NAMED_CONSTRAINTS_ADD(ff_h264_cabac_tables)
 #define RIP_ARG
 
 #if HAVE_FAST_CMOV
@@ -184,6 +184,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
     __asm__ volatile(
         "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
         : "=&r"(tables)
+        : NAMED_CONSTRAINTS(ff_h264_cabac_tables)
     );
 #endif
 
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index aaa09d1..805e120 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -309,6 +309,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
         \
         : "+a"(src), "+c"(dst)\
         : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
+          NAMED_CONSTRAINTS_ADD(MUL2)\
         : "memory"\
      );\
      if(h==16){\
@@ -324,6 +325,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
             \
            : "+a"(src), "+c"(dst)\
            : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD),  "m"(MUL1)\
+             NAMED_CONSTRAINTS_ADD(MUL2)\
            : "memory"\
         );\
      }\
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 47835ac..420a4ef 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -123,6 +123,7 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
         put_signed_pixels_clamped_mmx_half(64)
         : "+&r" (pixels), "=&r" (line_skip3)
         : "r" (block), "r" (line_skip)
+          NAMED_CONSTRAINTS_ADD(ff_pb_80)
         : "memory");
 }
 
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 0dc0a7c..9d811f0 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -55,6 +55,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
     __asm__ volatile(
         "lea   "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
         : "=&r"(tables)
+        : NAMED_CONSTRAINTS(ff_h264_cabac_tables)
     );
 #endif
 
@@ -130,6 +131,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
     __asm__ volatile(
         "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
         : "=&r"(tables)
+        : NAMED_CONSTRAINTS(ff_h264_cabac_tables)
     );
 #endif
 
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index d9bc841..e1878fa 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -381,7 +381,7 @@ inline void ff_idct_xvid_sse2(short *block)
     iLLM_PASS("%0")
     "6:                                                          \n\t"
     : "+r"(block)
-    :
+    : NAMED_CONSTRAINTS(m127,iTab1,walkenIdctRounders,iTab2,iTab3,iTab4,tan3,tan1,tan2,sqrt2)
     : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,
                    "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)
 #if ARCH_X86_64
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c
index 9682733..bff2635 100644
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@ -72,6 +72,7 @@ static void lpc_apply_welch_window_sse2(const int32_t *data, int len,
         "3:                                    \n\t"
         :"+&r"(i), "+&r"(j)
         :"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len)
+         NAMED_CONSTRAINTS_ADD(pd_1,pd_2)
          XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
                                     "%xmm5", "%xmm6", "%xmm7")
     );
@@ -116,6 +117,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
                 "movsd     %%xmm2, 16(%1)           \n\t"
                 :"+&r"(i)
                 :"r"(autoc+j), "r"(data+len), "r"(data+len-j)
+                 NAMED_CONSTRAINTS_ADD(pd_1)
                 :"memory"
             );
         } else {
@@ -139,6 +141,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
                 "movsd     %%xmm1, %2               \n\t"
                 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
                 :"r"(data+len), "r"(data+len-j)
+                 NAMED_CONSTRAINTS_ADD(pd_1)
             );
         }
     }
diff --git a/libavcodec/x86/motion_est.c b/libavcodec/x86/motion_est.c
index 2b11cf9..3df471e 100644
--- a/libavcodec/x86/motion_est.c
+++ b/libavcodec/x86/motion_est.c
@@ -193,7 +193,8 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
         "sub $2, %0                     \n\t"
         " jg 1b                         \n\t"
         : "+r" (h), "+r" (blk1), "+r" (blk2)
-        : "r" ((x86_reg) stride));
+        : "r" ((x86_reg) stride)
+          NAMED_CONSTRAINTS_ADD(bone));
 }
 
 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index e10dc7b..3ae30f3 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -1143,6 +1143,7 @@ Temp
 
 "9: \n\t"
                 :: "r" (block), "r" (temp), "r" (coeffs)
+                   NAMED_CONSTRAINTS_ADD(wm1010,d40000)
                 : "%eax"
         );
 }
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 5ceacd3..942aa42 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -110,6 +110,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
         : "+r"(src), "+r"(dst)
         : "r"(stride), "r"(-2*stride),
           "m"(shift), "m"(rnd), "r"(9*stride-4)
+          NAMED_CONSTRAINTS_ADD(ff_pw_9)
         : "%"REG_c, "memory"
     );
 }
@@ -154,6 +155,7 @@ static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
         "jnz 1b                            \n\t"\
         : "+r"(h), "+r" (src),  "+r" (dst)\
         : "r"(stride), "m"(rnd)\
+          NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
         : "memory"\
     );\
 }
@@ -212,6 +214,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
         : "+r"(src),  "+r"(dst)\
         : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
           "g"(stride-offset)\
+          NAMED_CONSTRAINTS_ADD(ff_pw_9)\
         : "%"REG_c, "memory"\
     );\
 }
@@ -314,6 +317,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src,      \
         : "+r"(h), "+r" (src),  "+r" (dst)                              \
         : "r"(src_stride), "r"(3*src_stride),                           \
           "m"(rnd), "m"(shift)                                          \
+          NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_53,ff_pw_18)              \
         : "memory"                                                      \
     );                                                                  \
 }
@@ -351,6 +355,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride,    \
         "jnz 1b                    \n\t"                                \
         : "+r"(h), "+r" (src),  "+r" (dst)                              \
         : "r"(stride), "m"(rnd)                                         \
+          NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_18,ff_pw_53,ff_pw_128)    \
         : "memory"                                                      \
     );                                                                  \
 }
@@ -386,6 +391,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,         \
         "jnz 1b                    \n\t"                                \
         : "+r"(h), "+r" (src),  "+r" (dst)                              \
         : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd)             \
+          NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3)              \
         : "memory"                                                      \
     );                                                                  \
 }
diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h
index 70ccac7..5328e2b 100644
--- a/libavutil/x86/asm.h
+++ b/libavutil/x86/asm.h
@@ -107,6 +107,40 @@ typedef int x86_reg;
 #    define LOCAL_MANGLE(a) #a
 #endif
 
-#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+#if HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
+#   define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+#   define NAMED_CONSTRAINTS_ADD(...)
+#   define NAMED_CONSTRAINTS(...)
+#else
+    /* When direct symbol references are used in code passed to a compiler that does not support them
+     *  then these references need to be converted to named asm constraints instead.
+     * Instead of returning a direct symbol MANGLE now returns a named constraint for that specific symbol.
+     * In order for this to work there must also be a corresponding entry in the asm-interface. To add this
+     *  entry use the macro NAMED_CONSTRAINTS() and pass in a list of each symbol reference used in the
+     *  corresponding block of code. (e.g. NAMED_CONSTRAINTS(var1,var2,var3) where var1 is the first symbol etc. ).
+     * If there are already existing constraints then use NAMED_CONSTRAINTS_ADD to add to the existing constraint list.
+     */
+#   define MANGLE(a) "%["#a"]"
+    // Intel/MSVC does not correctly expand va-args so we need a rather ugly hack in order to get it to work
+#   define FE_0(P,X) P(X)
+#   define FE_1(P,X,X1) P(X), FE_0(P,X1)
+#   define FE_2(P,X,X1,X2) P(X), FE_1(P,X1,X2)
+#   define FE_3(P,X,X1,X2,X3) P(X), FE_2(P,X1,X2,X3)
+#   define FE_4(P,X,X1,X2,X3,X4) P(X), FE_3(P,X1,X2,X3,X4)
+#   define FE_5(P,X,X1,X2,X3,X4,X5) P(X), FE_4(P,X1,X2,X3,X4,X5)
+#   define FE_6(P,X,X1,X2,X3,X4,X5,X6) P(X), FE_5(P,X1,X2,X3,X4,X5,X6)
+#   define FE_7(P,X,X1,X2,X3,X4,X5,X6,X7) P(X), FE_6(P,X1,X2,X3,X4,X5,X6,X7)
+#   define FE_8(P,X,X1,X2,X3,X4,X5,X6,X7,X8) P(X), FE_7(P,X1,X2,X3,X4,X5,X6,X7,X8)
+#   define FE_9(P,X,X1,X2,X3,X4,X5,X6,X7,X8,X9) P(X), FE_8(P,X1,X2,X3,X4,X5,X6,X7,X8,X9)
+#   define GET_FE_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
+#   define GET_FE(A) GET_FE_IMPL A
+#   define GET_FE_GLUE(x, y) x y
+#   define FOR_EACH_VA(P,...) GET_FE_GLUE(GET_FE((__VA_ARGS__,FE_9,FE_8,FE_7,FE_6,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)), (P,__VA_ARGS__))
+#   define NAME_CONSTRAINT(x) [x] "m"(x)
+    // Parameters are a list of each symbol reference required
+#   define NAMED_CONSTRAINTS_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
+    // Same but without comma for when there are no previously defined constraints
+#   define NAMED_CONSTRAINTS(...) FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
+#endif
 
 #endif /* AVUTIL_X86_ASM_H */
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 74b0ab4..3991ae9 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -490,6 +490,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 
         :
         : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
+          NAMED_CONSTRAINTS_ADD(b01)
         : "%"REG_a, "%"REG_c
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -755,6 +756,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 
         :
         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
+          NAMED_CONSTRAINTS_ADD(b80,b00,b01)
         : "%"REG_a, "%"REG_c
     );
 
@@ -1042,6 +1044,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 
         : "+r" (src)
         : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
+          NAMED_CONSTRAINTS_ADD(w05,w20)
         : "%"REG_a
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -1313,6 +1316,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
 
         "1:                        \n\t"
         : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
+          NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08)
         : "%"REG_a, "%"REG_d, "%"REG_SP
     );
 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
@@ -2446,6 +2450,7 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         "4:                                     \n\t"
 
         :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
+          NAMED_CONSTRAINTS_ADD(b80)
         : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -2790,6 +2795,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 
             : "+&r"(src)
             : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
+              NAMED_CONSTRAINTS_ADD(w04)
         );
 
         src+= step; // src points to begin of the 8x8 Block
@@ -3061,6 +3067,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 
             : "+r" (temp_src)
             : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
+              NAMED_CONSTRAINTS_ADD(w05,w20)
             : "%"REG_a
         );
     }
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h
index fab52f7..f366cc7 100644
--- a/libswresample/x86/resample_mmx.h
+++ b/libswresample/x86/resample_mmx.h
@@ -46,6 +46,7 @@ __asm__ volatile(\
     : "r" (((uint8_t*)(src+sample_index))-len),\
       "r" (((uint8_t*)filter)-len),\
       "r" (dst+dst_index)\
+      NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
 );
 
 #define COMMON_CORE_INT16_SSE2 \
@@ -69,4 +70,5 @@ __asm__ volatile(\
     : "r" (((uint8_t*)(src+sample_index))-len),\
       "r" (((uint8_t*)filter)-len),\
       "r" (dst+dst_index)\
+      NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
 );
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 455e7c2..b68824d 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -163,6 +163,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "movq       %%mm5, %%mm7    \n\t"
             STORE_BGR24_MMX
             :: "r"(dest), "r"(s)
+              NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
             :"memory");
         dest += 24;
         s += 32;
@@ -785,6 +786,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
 
             :"=m"(*d)
             :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+             NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi)
             :"memory");
         /* borrowed 32 to 24 */
         __asm__ volatile(
@@ -801,6 +803,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             STORE_BGR24_MMX
 
             :: "r"(d), "m"(*s)
+              NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
             :"memory");
         d += 24;
         s += 8;
@@ -890,6 +893,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             :"=m"(*d)
             :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+             NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi)
             :"memory");
         /* borrowed 32 to 24 */
         __asm__ volatile(
@@ -906,6 +910,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             STORE_BGR24_MMX
 
             :: "r"(d), "m"(*s)
+              NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
             :"memory");
         d += 24;
         s += 8;
@@ -966,6 +971,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
             "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
             PACK_RGB32
             ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
+              NAMED_CONSTRAINTS_ADD(mul15_hi)
             :"memory");
         d += 16;
         s += 4;
@@ -1009,6 +1015,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
             "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
             PACK_RGB32
             ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
+              NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi)
             :"memory");
         d += 16;
         s += 4;
@@ -1133,6 +1140,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
         "2:                                             \n\t"
         : "+a" (mmx_size)
         : "r" (src-mmx_size), "r"(dst-mmx_size)
+          NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b)
     );
 
     __asm__ volatile(SFENCE:::"memory");
@@ -1468,6 +1476,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
             :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
                "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
                "g" (-mmxSize)
+               NAMED_CONSTRAINTS_ADD(mmx_ff)
             : "%"REG_a
         );
 
@@ -1689,6 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
                 "add                        $8,      %%"REG_a"  \n\t"
                 " js                        1b                  \n\t"
                 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
+                  NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
                 : "%"REG_a, "%"REG_d
             );
             ydst += lumStride;
@@ -1837,6 +1847,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "add                        $4, %%"REG_a"       \n\t"
             " js                        1b                  \n\t"
             : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
+              NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
             : "%"REG_a, "%"REG_d
         );
 
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index c7a1bb4..71a60bc 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -172,6 +172,7 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
         :: "r" (&c->redDither),                   \
             "m" (dummy), "m" (dummy), "m" (dummy),\
             "r" (dest), "m" (dstW_reg), "m"(uv_off) \
+            NAMED_CONSTRAINTS_ADD(bF8,bFC) \
         : "%"REG_a, "%"REG_d, "%"REG_S            \
     );
 
@@ -680,6 +681,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
        "r" (dest), "m" (dstW_reg), "m"(uv_off)
+       NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
     );
 }
@@ -704,6 +706,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
        "r" (dest),  "m" (dstW_reg), "m"(uv_off)
+       NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
     : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
     );
 }
@@ -931,6 +934,7 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
+           NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
     );
 }
 
@@ -960,6 +964,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
+           NAMED_CONSTRAINTS_ADD(bF8)
     );
 }
 
@@ -989,6 +994,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
         "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
+           NAMED_CONSTRAINTS_ADD(bF8,bFC)
     );
 }
 
@@ -1262,6 +1268,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
+               NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
         );
     } else {
         const int16_t *ubuf1 = ubuf[1];
@@ -1276,6 +1283,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
+               NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
         );
     }
 }
@@ -1307,6 +1315,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
+               NAMED_CONSTRAINTS_ADD(bF8)
         );
     } else {
         const int16_t *ubuf1 = ubuf[1];
@@ -1327,6 +1336,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
+               NAMED_CONSTRAINTS_ADD(bF8)
         );
     }
 }
@@ -1358,6 +1368,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
+               NAMED_CONSTRAINTS_ADD(bF8,bFC)
         );
     } else {
         const int16_t *ubuf1 = ubuf[1];
@@ -1378,6 +1389,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
             "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
+               NAMED_CONSTRAINTS_ADD(bF8,bFC)
         );
     }
 }
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index c879102..d29e3a4 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -134,10 +134,18 @@
     "add  $4, %0\n\t"                            \
     "js   1b\n\t"                                \
 
+#if COMPILE_TEMPLATE_MMXEXT
+#define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ADD(mask1101,mask0110,mask0100,mask0010,mask1001)
+#else
+#define RGB_PACK24_B_OPERANDS
+#endif
+
 #define YUV2RGB_OPERANDS                                          \
         : "+r" (index), "+r" (image)                              \
         : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
           "r" (py - 2*index)                                      \
+          NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \
+          RGB_PACK24_B_OPERANDS                                   \
         : "memory"                                                \
         );                                                        \
     }                                                             \
@@ -146,6 +154,7 @@
         : "+r" (index), "+r" (image)                              \
         : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
           "r" (py - 2*index), "r" (pa - 2*index)                  \
+          NAMED_CONSTRAINTS_ADD(mmx_00ffw)                        \
         : "memory"                                                \
         );                                                        \
     }                                                             \



More information about the ffmpeg-cvslog mailing list