[FFmpeg-devel] [PATCH] x86/intmath: add sse optimized av_clipf and av_clipd

Thu Jan 7 11:26:05 CET 2016

On Thu, Jan 7, 2016 at 4:36 AM, James Almer <jamrial at gmail.com> wrote:
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> I could also include stdlib.h inside the __GNU__ section if that's
> prefered, since other compilers don't need it.
>
>  libavutil/x86/intmath.h | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
>
> diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
> index 611ef88..2b2c869 100644
> --- a/libavutil/x86/intmath.h
> +++ b/libavutil/x86/intmath.h
> @@ -22,6 +22,7 @@
>  #define AVUTIL_X86_INTMATH_H
>
>  #include <stdint.h>
> +#include <stdlib.h>
>  #if HAVE_FAST_CLZ
>  #if defined(_MSC_VER)
>  #include <intrin.h>
> @@ -98,6 +99,38 @@ static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigne
>
>  #endif /* __BMI2__ */
>
> +#if defined(__SSE2__)
> +
> +#define av_clipd av_clipd_sse2
> +static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax)
> +{
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +    if (amin > amax) abort();
> +#endif
> +    __asm__ ("minsd %2, %0 \n\t"
> +             "maxsd %1, %0 \n\t"
> +             : "+x"(a) : "xm"(amin), "xm"(amax));
> +    return a;
> +}
> +
> +#endif /* __SSE2__ */
> +
> +#if defined(__SSE__)
> +
> +#define av_clipf av_clipf_sse
> +static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax)
> +{
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +    if (amin > amax) abort();
> +#endif
> +    __asm__ ("minss %2, %0 \n\t"
> +             "maxss %1, %0 \n\t"
> +             : "+x"(a) : "xm"(amin), "xm"(amax));
> +    return a;
> +}
> +
> +#endif /* __SSE__ */
> +
>  #endif /* __GNUC__ */
>
>  #endif /* AVUTIL_X86_INTMATH_H */

Strictly speaking this needs a check for HAVE_INLINE_ASM, even if all
GNUC's should probably have it.

- Hendrik