[FFmpeg-devel] [PATCH] Fix compilation with llvm-gcc on x86_64
Mans Rullgard
mans
Sun Nov 1 17:45:11 CET 2009
Apparently llvm dislikes +g constraints with int operands. This uses
a temporary x86_reg variable to avoid that situation. Using a cast is
is not an option as output operands must be lvalues with llvm.
Note: I have not inspected the generated asm.
---
libavcodec/x86/dsputil_mmx.c | 15 ++++++++----
libavcodec/x86/dsputil_mmx_avg_template.c | 33 +++++++++++++++++++---------
libavcodec/x86/dsputil_mmx_rnd_template.c | 18 ++++++++++-----
libavutil/internal.h | 2 +-
4 files changed, 45 insertions(+), 23 deletions(-)
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index af33707..ac83fa7 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -358,6 +358,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
@@ -376,7 +377,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
+ : "+g"(hr), "+r" (pixels), "+r" (block)
: "r"((x86_reg)line_size)
: "%"REG_a, "memory"
);
@@ -384,6 +385,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
@@ -402,7 +404,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
+ : "+g"(hr), "+r" (pixels), "+r" (block)
: "r"((x86_reg)line_size)
: "%"REG_a, "memory"
);
@@ -410,6 +412,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3)
@@ -436,7 +439,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
+ : "+g"(hr), "+r" (pixels), "+r" (block)
: "r"((x86_reg)line_size)
: "%"REG_a, "memory"
);
@@ -444,6 +447,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"1: \n\t"
"movdqu (%1), %%xmm0 \n\t"
@@ -458,7 +462,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
"lea (%1,%3,4), %1 \n\t"
"lea (%2,%3,4), %2 \n\t"
"jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
+ : "+g"(hr), "+r" (pixels), "+r" (block)
: "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
: "memory"
);
@@ -466,6 +470,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"1: \n\t"
"movdqu (%1), %%xmm0 \n\t"
@@ -484,7 +489,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
"lea (%1,%3,4), %1 \n\t"
"lea (%2,%3,4), %2 \n\t"
"jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
+ : "+g"(hr), "+r" (pixels), "+r" (block)
: "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
: "memory"
);
diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_mmx_avg_template.c
index 8220867..5126011 100644
--- a/libavcodec/x86/dsputil_mmx_avg_template.c
+++ b/libavcodec/x86/dsputil_mmx_avg_template.c
@@ -29,6 +29,7 @@
*/
static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
@@ -50,7 +51,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
@@ -335,6 +336,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
@@ -368,7 +370,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
@@ -551,6 +553,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
/* GL: this function does incorrect rounding if overflow */
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BONE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -581,13 +584,14 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
@@ -611,7 +615,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D" (block)
+ :"+g"(hr), "+S"(pixels), "+D" (block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
@@ -619,6 +623,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
/* GL: this function does incorrect rounding if overflow */
static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BONE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -645,13 +650,14 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D" (block)
+ :"+g"(hr), "+S"(pixels), "+D" (block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
@@ -673,13 +679,14 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t"
@@ -705,13 +712,14 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
@@ -743,7 +751,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
@@ -752,6 +760,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
* used for B-frames so it does not matter. */
static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BONE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -786,7 +795,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r" ((x86_reg)line_size)
:"%"REG_a, "memory");
}
@@ -848,6 +857,7 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin
#define QPEL_2TAP_L3(OPNAME) \
static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+ x86_reg hr = h;\
__asm__ volatile(\
"1: \n\t"\
"movq (%1,%2), %%mm0 \n\t"\
@@ -863,13 +873,14 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride
"add %5, %1 \n\t"\
"decl %0 \n\t"\
"jnz 1b \n\t"\
- :"+g"(h), "+r"(src)\
+ :"+g"(hr), "+r"(src)\
:"r"((x86_reg)off1), "r"((x86_reg)off2),\
"r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
:"memory"\
);\
}\
static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+ x86_reg hr = h;\
__asm__ volatile(\
"1: \n\t"\
"movq (%1,%2), %%mm0 \n\t"\
@@ -880,7 +891,7 @@ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride,
"add %5, %1 \n\t"\
"decl %0 \n\t"\
"jnz 1b \n\t"\
- :"+g"(h), "+r"(src)\
+ :"+g"(hr), "+r"(src)\
:"r"((x86_reg)off1), "r"((x86_reg)off2),\
"r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
:"memory"\
diff --git a/libavcodec/x86/dsputil_mmx_rnd_template.c b/libavcodec/x86/dsputil_mmx_rnd_template.c
index 2fc1756..6755b11 100644
--- a/libavcodec/x86/dsputil_mmx_rnd_template.c
+++ b/libavcodec/x86/dsputil_mmx_rnd_template.c
@@ -27,6 +27,7 @@
// put_pixels
static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BFE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -52,7 +53,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size)
:REG_a, "memory");
}
@@ -109,6 +110,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BFE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -148,7 +150,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size)
:REG_a, "memory");
}
@@ -204,6 +206,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BFE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -226,13 +229,14 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size)
:REG_a, "memory");
}
static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
@@ -292,7 +296,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"subl $2, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels)
+ :"+g"(hr), "+S"(pixels)
:"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory");
}
@@ -456,6 +460,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_BFE(mm6);
__asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t"
@@ -488,7 +493,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
"subl $4, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
+ :"+g"(hr), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size)
:REG_a, "memory");
}
@@ -496,6 +501,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
// this routine is 'slightly' suboptimal but mostly unused
static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
+ x86_reg hr = h;
MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
@@ -563,7 +569,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"subl $2, %0 \n\t"
"jnz 1b \n\t"
- :"+g"(h), "+S"(pixels)
+ :"+g"(hr), "+S"(pixels)
:"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory");
}
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 141186b..717cc8f 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -133,7 +133,7 @@ extern const uint32_t ff_inverse[257];
__asm__ volatile(\
"mull %3"\
:"=d"(ret), "=a"(dmy)\
- :"1"(a), "g"(ff_inverse[b])\
+ :"1"((int)a), "g"(ff_inverse[b])\
);\
ret;\
})
--
1.6.5.2
More information about the ffmpeg-devel
mailing list