[FFmpeg-devel] [PATCH 1/2] libavutil/cpu: Adds av_cpu_has_fast_gather to detect cpus with avx fast gather instruction

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Mon Jun 14 14:41:03 EEST 2021


Alan Kelly:
> Broadwell and later have fast gather instructions.
> ---
>  This is so that the avx2 version of ff_hscale8to15X which uses gather
>  instructions is only selected on machines where it will actually be
>  faster.
>  libavutil/cpu.c          |  6 ++++++
>  libavutil/cpu.h          |  6 ++++++
>  libavutil/cpu_internal.h |  1 +
>  libavutil/x86/cpu.c      | 18 ++++++++++++++++++
>  4 files changed, 31 insertions(+)
> 
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> index 8960415d00..0a723eeb7a 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -49,6 +49,12 @@
>  
>  static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1);
>  
> +int av_cpu_has_fast_gather(void){
> +    if (ARCH_X86)
> +        return ff_cpu_has_fast_gather();
> +    return 0;
> +}
> +
>  static int get_cpu_flags(void)
>  {
>      if (ARCH_MIPS)
> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> index b555422dae..faf3a221f4 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -72,6 +72,7 @@
>  #define AV_CPU_FLAG_MMI          (1 << 0)
>  #define AV_CPU_FLAG_MSA          (1 << 1)
>  
> +int av_cpu_has_fast_gather(void);
>  /**
>   * Return the flags which specify extensions supported by the CPU.
>   * The returned value is affected by av_force_cpu_flags() if that was used
> @@ -107,6 +108,11 @@ int av_cpu_count(void);
>   *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
>   *  present.
>   */
> +
> +/**
> + * Returns true if the cpu has fast gather instructions.
> + * Broadwell and later cpus have fast gather
> + */

You added the documentation to av_cpu_max_align(), not
av_cpu_has_fast_gather().

>  size_t av_cpu_max_align(void);
>  
>  #endif /* AVUTIL_CPU_H */
> diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
> index 889764320b..92525df0c1 100644
> --- a/libavutil/cpu_internal.h
> +++ b/libavutil/cpu_internal.h
> @@ -46,6 +46,7 @@ int ff_get_cpu_flags_aarch64(void);
>  int ff_get_cpu_flags_arm(void);
>  int ff_get_cpu_flags_ppc(void);
>  int ff_get_cpu_flags_x86(void);
> +int ff_cpu_has_fast_gather(void);
>  
>  size_t ff_get_cpu_max_align_mips(void);
>  size_t ff_get_cpu_max_align_aarch64(void);
> diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
> index bcd41a50a2..9724e0017b 100644
> --- a/libavutil/x86/cpu.c
> +++ b/libavutil/x86/cpu.c
> @@ -270,3 +270,21 @@ size_t ff_get_cpu_max_align_x86(void)
>  
>      return 8;
>  }
> +
> +int ff_cpu_has_fast_gather(void){
> +    int eax, ebx, ecx;
> +    int max_std_level, std_caps = 0;
> +    int family = 0, model = 0;
> +    cpuid(0, max_std_level, ebx, ecx, std_caps);
> +
> +    if (max_std_level >= 1) {
> +        cpuid(1, eax, ebx, ecx, std_caps);
> +        family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
> +        model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
> +        // Broadwell and later
> +        if(family == 6 && model >= 70){
> +          return 1;
> +        }
> +    }
> +    return 0;
> +}
> 

The usual way to signal things that a processor supports even if slow is
by a CPU flag; see AV_CPU_FLAG_(AVX|SSE2|SSE3)SLOW. That way one also
avoids adding a new public function that is completely useless when not
on X86.

- Andreas


More information about the ffmpeg-devel mailing list