[FFmpeg-devel] [PATCH 1/2] libavutil/cpu: Adds av_cpu_has_fast_gather to detect cpus with avx fast gather instruction
Alan Kelly
alankelly at google.com
Mon Jun 14 14:14:06 EEST 2021
Broadwell and later have fast gather instructions.
---
This is so that the avx2 version of ff_hscale8to15X which uses gather
instructions is only selected on machines where it will actually be
faster.
libavutil/cpu.c | 6 ++++++
libavutil/cpu.h | 6 ++++++
libavutil/cpu_internal.h | 1 +
libavutil/x86/cpu.c | 18 ++++++++++++++++++
4 files changed, 31 insertions(+)
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 8960415d00..0a723eeb7a 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -49,6 +49,12 @@
static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1);
+int av_cpu_has_fast_gather(void){
+ if (ARCH_X86)
+ return ff_cpu_has_fast_gather();
+ return 0;
+}
+
static int get_cpu_flags(void)
{
if (ARCH_MIPS)
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index b555422dae..faf3a221f4 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -72,6 +72,7 @@
#define AV_CPU_FLAG_MMI (1 << 0)
#define AV_CPU_FLAG_MSA (1 << 1)
+int av_cpu_has_fast_gather(void);
/**
* Return the flags which specify extensions supported by the CPU.
* The returned value is affected by av_force_cpu_flags() if that was used
@@ -107,6 +108,11 @@ int av_cpu_count(void);
* av_set_cpu_flags_mask(), then this function will behave as if AVX is not
* present.
*/
+
+/**
+ * Returns true if the cpu has fast gather instructions.
+ * Broadwell and later cpus have fast gather
+ */
size_t av_cpu_max_align(void);
#endif /* AVUTIL_CPU_H */
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 889764320b..92525df0c1 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -46,6 +46,7 @@ int ff_get_cpu_flags_aarch64(void);
int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);
+int ff_cpu_has_fast_gather(void);
size_t ff_get_cpu_max_align_mips(void);
size_t ff_get_cpu_max_align_aarch64(void);
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index bcd41a50a2..9724e0017b 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -270,3 +270,21 @@ size_t ff_get_cpu_max_align_x86(void)
return 8;
}
+
+int ff_cpu_has_fast_gather(void){
+ int eax, ebx, ecx;
+ int max_std_level, std_caps = 0;
+ int family = 0, model = 0;
+ cpuid(0, max_std_level, ebx, ecx, std_caps);
+
+ if (max_std_level >= 1) {
+ cpuid(1, eax, ebx, ecx, std_caps);
+ family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+ model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
+ // Broadwell and later
+ if(family == 6 && model >= 70){
+ return 1;
+ }
+ }
+ return 0;
+}
--
2.32.0.272.g935e593368-goog
More information about the ffmpeg-devel
mailing list