[FFmpeg-devel] [PATCH 7/7] Moved contitional inline asm into a macro to cleanup code
Tucker DiNapoli
t.dinapoli42 at gmail.com
Fri Mar 27 21:51:48 CET 2015
From: Tucker DiNapoli <T.DiNapoli42 at gmail.com>
---
libpostproc/postprocess.c | 2 +-
libpostproc/postprocess_template.c | 41 ++++++++++++++++++++++++--------------
2 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 9d89782..b8740db 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -117,7 +117,7 @@ const char *postproc_license(void)
#define OPTIONS_ARRAY_SIZE 10
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
-//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
+#define BLOCKS_PER_ITERATION 1 //1 for now to keep old code working
#if ARCH_X86 && HAVE_INLINE_ASM
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 794ea17..34c1cc1 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -1449,7 +1449,11 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
*/
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
{
+ int block_index;
+ uint8_t *src_base = src;
+ for(block_index=0;block_index<BLOCKS_PER_ITERATION; block_index++){
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
+ src = src_base;
src+= 4*stride;
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1476,6 +1480,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
);
#else
int a, b, x;
+ src = src_base;
src+= 4*stride;
for(x=0; x<2; x++){
@@ -1491,6 +1496,8 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
src += 4;
}
#endif
+ src_base += 8;
+ }
}
/**
@@ -1502,6 +1509,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
*/
static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
{
+
#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= stride*3;
__asm__ volatile(
@@ -2539,7 +2547,6 @@ Switch between
#endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
}
#endif //TEMPLATE_PP_ALTIVEC
-
#if TEMPLATE_PP_MMX
/**
* accurate deblock filter
@@ -3082,7 +3089,6 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
-
/**
* Copy a block from src to dst and fixes the blacklevel.
* levelFix == 0 -> do not touch the brightness & contrast
@@ -3175,9 +3181,10 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
: "%"REG_d
);
#else //TEMPLATE_PP_MMX && HAVE_6REGS
- for(i=0; i<8; i++)
+ for(i=0; i<8; i++){
memcpy( &(dst[dstStride*i]),
&(src[srcStride*i]), BLOCK_SIZE);
+ }
#endif //TEMPLATE_PP_MMX && HAVE_6REGS
}else{
#if TEMPLATE_PP_MMX && HAVE_6REGS
@@ -3241,7 +3248,21 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride)
}
#endif
}
-
+#undef mmx_pack_qp
+#if TEMPLATE_PP_MMX
+#define mmx_pack_qp(QP, pQP) \
+ __asm__ volatile( \
+ "movd %1, %%mm7 \n\t" \
+ "packuswb %%mm7, %%mm7 \n\t" /*0, 0, 0, QP, 0, 0, 0, QP*/ \
+ "packuswb %%mm7, %%mm7 \n\t" /* 0,QP, 0, QP, 0,QP, 0, QP*/ \
+ "packuswb %%mm7, %%mm7 \n\t" /*QP,..., QP*/ \
+ "movq %%mm7, %0 \n\t" \
+ : "=m" (pQP) \
+ : "r" (QP) \
+ );
+#else
+#define mmx_pack_qp(QP,pQP)
+#endif
/**
* Filter array of bytes (Y or U or V values)
*/
@@ -3457,17 +3478,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
}
c.QP_block[qp_index]= QP;
c.nonBQP_block[qp_index]= nonBQP;
-#if TEMPLATE_PP_MMX
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
- "movq %%mm7, %0 \n\t"
- : "=m" (c.pQPb_block[qp_index])
- : "r" (QP)
- );
-#endif
+ mmx_pack_qp(QP, c.pQPb_block[qp_index]);
}
qp_index = 0;
for(; x < endx; x+=BLOCK_SIZE){
--
2.3.3
More information about the ffmpeg-devel
mailing list