[FFmpeg-devel] [PATCH 4/6] pp: move optim templating config to template itself.
Michael Niedermayer
michaelni at gmx.at
Thu Nov 15 01:10:51 CET 2012
On Wed, Nov 14, 2012 at 11:29:58PM +0100, Clément Bœsch wrote:
> Also avoid messing up the HAVE_* flags.
> ---
> libpostproc/postprocess.c | 73 ++--------
> libpostproc/postprocess_altivec_template.c | 2 +-
> libpostproc/postprocess_template.c | 226 +++++++++++++++++------------
> 3 files changed, 146 insertions(+), 155 deletions(-)
>
> diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
> index f0d97d3..7001778 100644
> --- a/libpostproc/postprocess.c
> +++ b/libpostproc/postprocess.c
> @@ -538,84 +538,33 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
> //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
> //Plain C versions
> //we always compile C for testing which needs bitexactness
> -#define COMPILE_C
> +#define COMPILE_C 1
> +#include "postprocess_template.c"
>
> #if HAVE_ALTIVEC
> -#define COMPILE_ALTIVEC
> +#define COMPILE_ALTIVEC 1
> +#include "postprocess_altivec_template.c"
> +#include "postprocess_template.c"
> #endif //HAVE_ALTIVEC
>
> #if ARCH_X86 && HAVE_INLINE_ASM
>
> #if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
> -#define COMPILE_MMX
> -#endif
> -
> -#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
> -#define COMPILE_MMX2
> -#endif
> -
> -#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
> -#define COMPILE_3DNOW
> -#endif
> -#endif /* ARCH_X86 */
> -
> -#undef HAVE_MMX_INLINE
> -#define HAVE_MMX_INLINE 0
> -#undef HAVE_MMXEXT_INLINE
> -#define HAVE_MMXEXT_INLINE 0
> -#undef HAVE_AMD3DNOW_INLINE
> -#define HAVE_AMD3DNOW_INLINE 0
> -#undef HAVE_ALTIVEC
> -#define HAVE_ALTIVEC 0
> -
> -#ifdef COMPILE_C
> -#define RENAME(a) a ## _C
> +#define COMPILE_MMX 1
> #include "postprocess_template.c"
> #endif
>
> -#ifdef COMPILE_ALTIVEC
> -#undef RENAME
> -#undef HAVE_ALTIVEC
> -#define HAVE_ALTIVEC 1
> -#define RENAME(a) a ## _altivec
> -#include "postprocess_altivec_template.c"
> -#include "postprocess_template.c"
> -#endif
> -
> -//MMX versions
> -#ifdef COMPILE_MMX
> -#undef RENAME
> -#undef HAVE_MMX_INLINE
> -#define HAVE_MMX_INLINE 1
> -#define RENAME(a) a ## _MMX
> -#include "postprocess_template.c"
> -#endif
> -
> -//MMX2 versions
> -#ifdef COMPILE_MMX2
> -#undef RENAME
> -#undef HAVE_MMX_INLINE
> -#undef HAVE_MMXEXT_INLINE
> -#define HAVE_MMX_INLINE 1
> -#define HAVE_MMXEXT_INLINE 1
> -#define RENAME(a) a ## _MMX2
> +#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
> +#define COMPILE_MMXEXT 1
> #include "postprocess_template.c"
> #endif
>
> -//3DNOW versions
> -#ifdef COMPILE_3DNOW
> -#undef RENAME
> -#undef HAVE_MMX_INLINE
> -#undef HAVE_MMXEXT_INLINE
> -#undef HAVE_AMD3DNOW_INLINE
> -#define HAVE_MMX_INLINE 1
> -#define HAVE_MMXEXT_INLINE 0
> -#define HAVE_AMD3DNOW_INLINE 1
> -#define RENAME(a) a ## _3DNow
> +#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
> +#define COMPILE_3DNOW 1
> #include "postprocess_template.c"
> #endif
>
> -// minor note: the HAVE_xyz is messed up after that line so do not use it.
> +#endif /* ARCH_X86 && HAVE_INLINE_ASM */
>
> static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
> const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
> diff --git a/libpostproc/postprocess_altivec_template.c b/libpostproc/postprocess_altivec_template.c
> index 3a37562..fa6ebe2 100644
> --- a/libpostproc/postprocess_altivec_template.c
> +++ b/libpostproc/postprocess_altivec_template.c
> @@ -825,7 +825,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
> #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
> #define do_a_deblock_altivec(a...) do_a_deblock_C(a)
>
> -static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
> +static inline void tempNoiseReducer_altivec(uint8_t *src, int stride,
> uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
> {
> const vector signed char neg1 = vec_splat_s8(-1);
> diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
> index f45ccf8..6611670 100644
> --- a/libpostproc/postprocess_template.c
> +++ b/libpostproc/postprocess_template.c
> @@ -25,37 +25,72 @@
>
> #include "libavutil/x86/asm.h"
>
> +#ifdef COMPILE_C
> +# define RENAME(a) a ## _C
> +#else
> +# define COMPILE_C 0
> +#endif
> +
> +#ifdef COMPILE_ALTIVEC
> +# define COMPILE_ALTIVEC 1
this looks wrong
> +# define RENAME(a) a ## _altivec
> +#else
> +# define COMPILE_ALTIVEC 0
> +#endif
> +
> +#ifdef COMPILE_MMX
> +# define RENAME(a) a ## _MMX
> +#else
> +# define COMPILE_MMX 0
> +#endif
> +
> +#ifdef COMPILE_MMXEXT
> +# undef COMPILE_MMX
> +# define COMPILE_MMX 1
> +# define RENAME(a) a ## _MMX2
> +#else
> +# define COMPILE_MMXEXT 0
> +#endif
> +
> +#ifdef COMPILE_3DNOW
> +# undef COMPILE_MMX
> +# define COMPILE_MMX 1
> +# define RENAME(a) a ## _3DNow
> +#else
> +# define COMPILE_3DNOW 0
> +#endif
> +
> #undef REAL_PAVGB
> #undef PAVGB
> #undef PMINUB
> #undef PMAXUB
>
> -#if HAVE_MMXEXT_INLINE
> +#if COMPILE_MMXEXT
> #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
> -#elif HAVE_AMD3DNOW_INLINE
> +#elif COMPILE_3DNOW
> #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
> #endif
> #define PAVGB(a,b) REAL_PAVGB(a,b)
>
> -#if HAVE_MMXEXT_INLINE
> +#if COMPILE_MMXEXT
> #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
> -#elif HAVE_MMX_INLINE
> +#elif COMPILE_MMX
> #define PMINUB(b,a,t) \
> "movq " #a ", " #t " \n\t"\
> "psubusb " #b ", " #t " \n\t"\
> "psubb " #t ", " #a " \n\t"
> #endif
>
> -#if HAVE_MMXEXT_INLINE
> +#if COMPILE_MMXEXT
> #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
> -#elif HAVE_MMX_INLINE
> +#elif COMPILE_MMX
> #define PMAXUB(a,b) \
> "psubusb " #a ", " #b " \n\t"\
> "paddb " #a ", " #b " \n\t"
> #endif
>
> //FIXME? |255-0| = 1 (should not be a problem ...)
> -#if HAVE_MMX_INLINE
> +#if COMPILE_MMX
These changes dont feel correct
For the template in its instance of compilation the question is
do i HAVE MMX/MMX2/3dnow/SSE on the target i get compiled for
or to say it differently
one could compile for MMX2 but not MMX in this case COMPILE_MMX2
would be set but not MMX. Yet the template should have its define
for MMX+MMX2 set. Aka i get COMPILED for MMX2 and HAVE MMX+MMX2
If you use COMPILE_* for this then COMPILE_* has a different semantic
meaning inside and outside of teh template, this appears bad to me.
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Avoid a single point of failure, be that a person or equipment.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20121115/d02946f5/attachment.asc>
More information about the ffmpeg-devel
mailing list