[FFmpeg-devel] [PATCH 06/14] [inline assembly] add mmx/xmm clobbers in fdct.c
frederic.recoules at univ-grenoble-alpes.fr
frederic.recoules at univ-grenoble-alpes.fr
Sun Apr 26 22:44:16 EEST 2020
From: Frédéric Recoules <frederic.recoules at orange.fr>
---
libavcodec/x86/fdct.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c
index 112566ded0..2635d58cbf 100644
--- a/libavcodec/x86/fdct.c
+++ b/libavcodec/x86/fdct.c
@@ -288,7 +288,7 @@ TABLE_SSE2
#define S(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
-#define FDCT_COL(cpu, mm, mov)\
+#define FDCT_COL(cpu, mm, mov, MM) \
static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int offset)\
{\
__asm__ volatile (\
@@ -369,11 +369,13 @@ static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int
#mov" %%"#mm"3, 112(%3) \n\t" \
: \
: "r" (in + offset), "r" (fdct_tg_all_16), "r" (fdct_one_corr), \
- "r" (out + offset), "r" (ocos_4_16)); \
+ "r" (out + offset), "r" (ocos_4_16) \
+ MM##_CLOBBERS_ONLY(#mm"0", #mm"1", #mm"2", #mm"3", \
+ #mm"4", #mm"5", #mm"6", #mm"7") ); \
}
-FDCT_COL(mmx, mm, movq)
-FDCT_COL(sse2, xmm, movdqa)
+FDCT_COL(mmx, mm, movq, MMX)
+FDCT_COL(sse2, xmm, movdqa, XMM)
static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
@@ -484,7 +486,9 @@ static av_always_inline void fdct_row_mmxext(const int16_t *in, int16_t *out,
"movq %%mm3, (%3) \n\t"
"movq %%mm7, 8(%3) \n\t"
:
- : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
+ : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out)
+ MMX_CLOBBERS_ONLY("%mm0", "%mm1", "%mm2", "%mm3",
+ "%mm4", "%mm5", "%mm6", "%mm7") );
}
static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
@@ -535,7 +539,9 @@ static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const
"movq %%mm3, 0(%3) \n\t"
"movq %%mm7, 8(%3) \n\t"
:
- : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out));
+ : "r" (in), "r" (table), "r" (fdct_r_row), "r" (out)
+ MMX_CLOBBERS_ONLY("%mm0", "%mm1", "%mm2", "%mm3",
+ "%mm4", "%mm5", "%mm6", "%mm7") );
}
void ff_fdct_mmx(int16_t *block)
--
2.17.1
More information about the ffmpeg-devel
mailing list