[FFmpeg-devel] [PATCH 02/11] avutil: add AVX-512 flags
James Almer
jamrial at gmail.com
Fri Nov 10 03:54:28 EET 2017
On 11/9/2017 8:58 AM, James Darnley wrote:
> ---
> libavutil/cpu.c | 6 +++++-
> libavutil/cpu.h | 1 +
> libavutil/tests/cpu.c | 1 +
> libavutil/x86/cpu.h | 2 ++
> 4 files changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> index c8401b8258..6548cc3042 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -80,7 +80,8 @@ void av_force_cpu_flags(int arg){
> AV_CPU_FLAG_XOP |
> AV_CPU_FLAG_FMA3 |
> AV_CPU_FLAG_FMA4 |
> - AV_CPU_FLAG_AVX2 ))
> + AV_CPU_FLAG_AVX2 |
> + AV_CPU_FLAG_AVX512 ))
> && !(arg & AV_CPU_FLAG_MMX)) {
> av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n");
> arg |= AV_CPU_FLAG_MMX;
> @@ -126,6 +127,7 @@ int av_parse_cpu_flags(const char *s)
> #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX)
> #define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | AV_CPU_FLAG_BMI1)
> #define CPUFLAG_AESNI (AV_CPU_FLAG_AESNI | CPUFLAG_SSE42)
> +#define CPUFLAG_AVX512 (AV_CPU_FLAG_AVX512 | CPUFLAG_AVX2)
> static const AVOption cpuflags_opts[] = {
> { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
> #if ARCH_PPC
> @@ -154,6 +156,7 @@ int av_parse_cpu_flags(const char *s)
> { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT }, .unit = "flags" },
> { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" },
> { "aesni" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI }, .unit = "flags" },
> + { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX512 }, .unit = "flags" },
> #elif ARCH_ARM
> { "armv5te", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE }, .unit = "flags" },
> { "armv6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6 }, .unit = "flags" },
> @@ -216,6 +219,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
> { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT }, .unit = "flags" },
> { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" },
> { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" },
> + { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" },
>
> #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
> #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE
> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> index 9e5d40affe..91523f3f5a 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -55,6 +55,7 @@
> #define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions
> #define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1
> #define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2
> +#define AV_CPU_FLAG_AVX512 0x100000 ///< AVX-512 functions
NIt: "AVX-512 functions: requires OS support even if YMM/ZMM registers
aren't used"
That's more in line with the AVX and AVX2 lines.
>
> #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
> #define AV_CPU_FLAG_VSX 0x0002 ///< ISA 2.06
> diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
> index f02a54cbbb..ce45b715a0 100644
> --- a/libavutil/tests/cpu.c
> +++ b/libavutil/tests/cpu.c
> @@ -73,6 +73,7 @@ static const struct {
> { AV_CPU_FLAG_BMI1, "bmi1" },
> { AV_CPU_FLAG_BMI2, "bmi2" },
> { AV_CPU_FLAG_AESNI, "aesni" },
> + { AV_CPU_FLAG_AVX512, "avx512" },
> #endif
> { 0 }
> };
> diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
> index 309b8e746c..7f4e5d08bb 100644
> --- a/libavutil/x86/cpu.h
> +++ b/libavutil/x86/cpu.h
> @@ -50,6 +50,7 @@
> #define X86_FMA4(flags) CPUEXT(flags, FMA4)
> #define X86_AVX2(flags) CPUEXT(flags, AVX2)
> #define X86_AESNI(flags) CPUEXT(flags, AESNI)
> +#define X86_AVX512(flags) CPUEXT(flags, AVX512)
>
> #define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
> #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT)
> @@ -79,6 +80,7 @@
> #define EXTERNAL_AVX2_FAST(flags) CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, AVX2, AVX)
> #define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
> #define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
> +#define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
>
> #define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
> #define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
>
Missing minor version bump and APIChanges entry.
LGTM aside from the above.
More information about the ffmpeg-devel
mailing list