[FFmpeg-devel] [PATCH 1/2] libavutil/cpu: Adds av_cpu_has_fast_gather to detect cpus with avx fast gather instruction
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Mon Jun 14 14:41:03 EEST 2021
Alan Kelly:
> Broadwell and later have fast gather instructions.
> ---
> This is so that the avx2 version of ff_hscale8to15X which uses gather
> instructions is only selected on machines where it will actually be
> faster.
> libavutil/cpu.c | 6 ++++++
> libavutil/cpu.h | 6 ++++++
> libavutil/cpu_internal.h | 1 +
> libavutil/x86/cpu.c | 18 ++++++++++++++++++
> 4 files changed, 31 insertions(+)
>
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> index 8960415d00..0a723eeb7a 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -49,6 +49,12 @@
>
> static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1);
>
> +int av_cpu_has_fast_gather(void){
> + if (ARCH_X86)
> + return ff_cpu_has_fast_gather();
> + return 0;
> +}
> +
> static int get_cpu_flags(void)
> {
> if (ARCH_MIPS)
> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> index b555422dae..faf3a221f4 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -72,6 +72,7 @@
> #define AV_CPU_FLAG_MMI (1 << 0)
> #define AV_CPU_FLAG_MSA (1 << 1)
>
> +int av_cpu_has_fast_gather(void);
> /**
> * Return the flags which specify extensions supported by the CPU.
> * The returned value is affected by av_force_cpu_flags() if that was used
> @@ -107,6 +108,11 @@ int av_cpu_count(void);
> * av_set_cpu_flags_mask(), then this function will behave as if AVX is not
> * present.
> */
> +
> +/**
> + * Returns true if the cpu has fast gather instructions.
> + * Broadwell and later cpus have fast gather
> + */
You added the documentation to av_cpu_max_align(), not
av_cpu_has_fast_gather().
> size_t av_cpu_max_align(void);
>
> #endif /* AVUTIL_CPU_H */
> diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
> index 889764320b..92525df0c1 100644
> --- a/libavutil/cpu_internal.h
> +++ b/libavutil/cpu_internal.h
> @@ -46,6 +46,7 @@ int ff_get_cpu_flags_aarch64(void);
> int ff_get_cpu_flags_arm(void);
> int ff_get_cpu_flags_ppc(void);
> int ff_get_cpu_flags_x86(void);
> +int ff_cpu_has_fast_gather(void);
>
> size_t ff_get_cpu_max_align_mips(void);
> size_t ff_get_cpu_max_align_aarch64(void);
> diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
> index bcd41a50a2..9724e0017b 100644
> --- a/libavutil/x86/cpu.c
> +++ b/libavutil/x86/cpu.c
> @@ -270,3 +270,21 @@ size_t ff_get_cpu_max_align_x86(void)
>
> return 8;
> }
> +
> +int ff_cpu_has_fast_gather(void){
> + int eax, ebx, ecx;
> + int max_std_level, std_caps = 0;
> + int family = 0, model = 0;
> + cpuid(0, max_std_level, ebx, ecx, std_caps);
> +
> + if (max_std_level >= 1) {
> + cpuid(1, eax, ebx, ecx, std_caps);
> + family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
> + model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
> + // Broadwell and later
> + if(family == 6 && model >= 70){
> + return 1;
> + }
> + }
> + return 0;
> +}
>
The usual way to signal things that a processor supports even if slow is
by a CPU flag; see AV_CPU_FLAG_(AVX|SSE2|SSE3)SLOW. That way one also
avoids adding a new public function that is completely useless when not
on X86.
- Andreas
More information about the ffmpeg-devel
mailing list