[FFmpeg-devel] [PATCH] Fix compilation with llvm-gcc on x86_64

Sun Nov 1 17:45:11 CET 2009

Apparently llvm dislikes +g constraints with int operands.  This uses
a temporary x86_reg variable to avoid that situation.  Using a cast is
is not an option as output operands must be lvalues with llvm.

Note: I have not inspected the generated asm.
---
 libavcodec/x86/dsputil_mmx.c              |   15 ++++++++----
 libavcodec/x86/dsputil_mmx_avg_template.c |   33 +++++++++++++++++++---------
 libavcodec/x86/dsputil_mmx_rnd_template.c |   18 ++++++++++-----
 libavutil/internal.h                      |    2 +-
 4 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index af33707..ac83fa7 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -358,6 +358,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
 
 static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
          "lea (%3, %3), %%"REG_a"       \n\t"
          ASMALIGN(3)
@@ -376,7 +377,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
          "add %%"REG_a", %2             \n\t"
          "subl $4, %0                   \n\t"
          "jnz 1b                        \n\t"
-         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "+g"(hr), "+r" (pixels),  "+r" (block)
          : "r"((x86_reg)line_size)
          : "%"REG_a, "memory"
         );
@@ -384,6 +385,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
 
 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
          "lea (%3, %3), %%"REG_a"       \n\t"
          ASMALIGN(3)
@@ -402,7 +404,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
          "add %%"REG_a", %2             \n\t"
          "subl $4, %0                   \n\t"
          "jnz 1b                        \n\t"
-         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "+g"(hr), "+r" (pixels),  "+r" (block)
          : "r"((x86_reg)line_size)
          : "%"REG_a, "memory"
         );
@@ -410,6 +412,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
 
 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
          "lea (%3, %3), %%"REG_a"       \n\t"
          ASMALIGN(3)
@@ -436,7 +439,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
          "add %%"REG_a", %2             \n\t"
          "subl $4, %0                   \n\t"
          "jnz 1b                        \n\t"
-         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "+g"(hr), "+r" (pixels),  "+r" (block)
          : "r"((x86_reg)line_size)
          : "%"REG_a, "memory"
         );
@@ -444,6 +447,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
 
 static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
          "1:                            \n\t"
          "movdqu (%1), %%xmm0           \n\t"
@@ -458,7 +462,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
          "lea (%1,%3,4), %1             \n\t"
          "lea (%2,%3,4), %2             \n\t"
          "jnz 1b                        \n\t"
-         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "+g"(hr), "+r" (pixels),  "+r" (block)
          : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
          : "memory"
         );
@@ -466,6 +470,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
 
 static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
          "1:                            \n\t"
          "movdqu (%1), %%xmm0           \n\t"
@@ -484,7 +489,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
          "lea (%1,%3,4), %1             \n\t"
          "lea (%2,%3,4), %2             \n\t"
          "jnz 1b                        \n\t"
-         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "+g"(hr), "+r" (pixels),  "+r" (block)
          : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
          : "memory"
         );
diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_mmx_avg_template.c
index 8220867..5126011 100644
--- a/libavcodec/x86/dsputil_mmx_avg_template.c
+++ b/libavcodec/x86/dsputil_mmx_avg_template.c
@@ -29,6 +29,7 @@
  */
 static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "1:                             \n\t"
@@ -50,7 +51,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
@@ -335,6 +336,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
 
 static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "1:                             \n\t"
@@ -368,7 +370,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
@@ -551,6 +553,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
 /* GL: this function does incorrect rounding if overflow */
 static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BONE(mm6);
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
@@ -581,13 +584,14 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
 
 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "movq (%1), %%mm0               \n\t"
@@ -611,7 +615,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D" (block)
+        :"+g"(hr), "+S"(pixels), "+D" (block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
@@ -619,6 +623,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
 /* GL: this function does incorrect rounding if overflow */
 static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BONE(mm6);
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
@@ -645,13 +650,14 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D" (block)
+        :"+g"(hr), "+S"(pixels), "+D" (block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
 
 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "1:                             \n\t"
@@ -673,13 +679,14 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
 
 static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "1:                             \n\t"
@@ -705,13 +712,14 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
 
 static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "movq (%1), %%mm0               \n\t"
@@ -743,7 +751,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a, "memory");
 }
@@ -752,6 +760,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
  * used for B-frames so it does not matter. */
 static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BONE(mm6);
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
@@ -786,7 +795,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
         "add %%"REG_a", %2              \n\t"
         "subl $4, %0                    \n\t"
         "jnz 1b                         \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r" ((x86_reg)line_size)
         :"%"REG_a,  "memory");
 }
@@ -848,6 +857,7 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin
 
 #define QPEL_2TAP_L3(OPNAME) \
 static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+    x86_reg hr = h;\
     __asm__ volatile(\
         "1:                    \n\t"\
         "movq   (%1,%2), %%mm0 \n\t"\
@@ -863,13 +873,14 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride
         "add   %5, %1          \n\t"\
         "decl  %0              \n\t"\
         "jnz   1b              \n\t"\
-        :"+g"(h), "+r"(src)\
+        :"+g"(hr), "+r"(src)\
         :"r"((x86_reg)off1), "r"((x86_reg)off2),\
          "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
         :"memory"\
     );\
 }\
 static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+    x86_reg hr = h;\
     __asm__ volatile(\
         "1:                    \n\t"\
         "movq   (%1,%2), %%mm0 \n\t"\
@@ -880,7 +891,7 @@ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride,
         "add   %5, %1          \n\t"\
         "decl  %0              \n\t"\
         "jnz   1b              \n\t"\
-        :"+g"(h), "+r"(src)\
+        :"+g"(hr), "+r"(src)\
         :"r"((x86_reg)off1), "r"((x86_reg)off2),\
          "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
         :"memory"\
diff --git a/libavcodec/x86/dsputil_mmx_rnd_template.c b/libavcodec/x86/dsputil_mmx_rnd_template.c
index 2fc1756..6755b11 100644
--- a/libavcodec/x86/dsputil_mmx_rnd_template.c
+++ b/libavcodec/x86/dsputil_mmx_rnd_template.c
@@ -27,6 +27,7 @@
 // put_pixels
 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BFE(mm6);
     __asm__ volatile(
         "lea    (%3, %3), %%"REG_a"     \n\t"
@@ -52,7 +53,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
         "add    %%"REG_a", %2           \n\t"
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
         :REG_a, "memory");
 }
@@ -109,6 +110,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
 
 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BFE(mm6);
     __asm__ volatile(
         "lea        (%3, %3), %%"REG_a" \n\t"
@@ -148,7 +150,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
         "add    %%"REG_a", %2           \n\t"
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
         :REG_a, "memory");
 }
@@ -204,6 +206,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
 
 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BFE(mm6);
     __asm__ volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
@@ -226,13 +229,14 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
         "add    %%"REG_a", %2           \n\t"
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
         :REG_a, "memory");
 }
 
 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_ZERO(mm7);
     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
     __asm__ volatile(
@@ -292,7 +296,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
 
         "subl   $2, %0                  \n\t"
         "jnz    1b                      \n\t"
-        :"+g"(h), "+S"(pixels)
+        :"+g"(hr), "+S"(pixels)
         :"D"(block), "r"((x86_reg)line_size)
         :REG_a, "memory");
 }
@@ -456,6 +460,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
 
 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_BFE(mm6);
     __asm__ volatile(
         "lea    (%3, %3), %%"REG_a"     \n\t"
@@ -488,7 +493,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
 
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
-        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"+g"(hr), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
         :REG_a, "memory");
 }
@@ -496,6 +501,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
 // this routine is 'slightly' suboptimal but mostly unused
 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
+    x86_reg hr = h;
     MOVQ_ZERO(mm7);
     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
     __asm__ volatile(
@@ -563,7 +569,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
 
         "subl   $2, %0                  \n\t"
         "jnz    1b                      \n\t"
-        :"+g"(h), "+S"(pixels)
+        :"+g"(hr), "+S"(pixels)
         :"D"(block), "r"((x86_reg)line_size)
         :REG_a, "memory");
 }
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 141186b..717cc8f 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -133,7 +133,7 @@ extern const uint32_t ff_inverse[257];
         __asm__ volatile(\
             "mull %3"\
             :"=d"(ret), "=a"(dmy)\
-            :"1"(a), "g"(ff_inverse[b])\
+            :"1"((int)a), "g"(ff_inverse[b])\
             );\
         ret;\
     })
-- 
1.6.5.2