[FFmpeg-devel] [PATCH 2/5] configure: Add detection of assembler support for SVE/SVE2
Martin Storsjö
martin at martin.st
Tue Sep 17 15:14:15 EEST 2024
It turns out that recent versions of MS armasm64 does support some
SVE instructions, but not all of them. Test for one of the
instructions that it currently doesn't support.
---
Just as disclaimer, I'm not currently actively planning on writing
SVE/SVE2 optimizations. However, related projects such as x264 and
dav1d do have a few functions using these extensions, so we might just
as well add the framework support for these features in ffmpeg
anyway, as functions needing this support will come sooner or later
anyway.
In the related projects, there's no really use of longer vectors
(as there's very little such HW available anyway), but SVE gives
widening loads (used in a couple places in x264) and 16 bit dot
products (used in dav1d), which can be useful with 128 bit vectors.
---
configure | 14 +++++++++++++-
ffbuild/arch.mak | 2 ++
libavutil/aarch64/asm.S | 18 ++++++++++++++++++
3 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
index da36419f2d..d05c4a5a51 100755
--- a/configure
+++ b/configure
@@ -466,6 +466,8 @@ Optimization options (experts only):
--disable-neon disable NEON optimizations
--disable-dotprod disable DOTPROD optimizations
--disable-i8mm disable I8MM optimizations
+ --disable-sve disable SVE optimizations
+ --disable-sve2 disable SVE2 optimizations
--disable-inline-asm disable use of inline assembly
--disable-x86asm disable use of standalone x86 assembly
--disable-mipsdsp disable MIPS DSP ASE R1 optimizations
@@ -2163,6 +2165,8 @@ ARCH_EXT_LIST_ARM="
vfp
vfpv3
setend
+ sve
+ sve2
"
ARCH_EXT_LIST_MIPS="
@@ -2435,6 +2439,8 @@ TOOLCHAIN_FEATURES="
as_arch_directive
as_archext_dotprod_directive
as_archext_i8mm_directive
+ as_archext_sve_directive
+ as_archext_sve2_directive
as_dn_directive
as_fpu_directive
as_func
@@ -2755,6 +2761,8 @@ vfpv3_deps="vfp"
setend_deps="arm"
dotprod_deps="aarch64 neon"
i8mm_deps="aarch64 neon"
+sve_deps="aarch64 neon"
+sve2_deps="aarch64 neon sve"
map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
@@ -6223,9 +6231,11 @@ if enabled aarch64; then
# internal assembler in clang 3.3 does not support this instruction
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
- archext_list="dotprod i8mm"
+ archext_list="dotprod i8mm sve sve2"
enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b'
+ enabled sve && check_archext_insn sve 'whilelt p0.s, x0, x1'
+ enabled sve2 && check_archext_insn sve2 'sqrdmulh z0.s, z0.s, z0.s'
# Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
# assembly support the feature out of the box. Skip this for the features
@@ -7913,6 +7923,8 @@ if enabled aarch64; then
echo "NEON enabled ${neon-no}"
echo "DOTPROD enabled ${dotprod-no}"
echo "I8MM enabled ${i8mm-no}"
+ echo "SVE enabled ${sve-no}"
+ echo "SVE2 enabled ${sve2-no}"
fi
if enabled arm; then
echo "ARMv5TE enabled ${armv5te-no}"
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index 3fc40e5e5d..af71aacfd2 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -3,6 +3,8 @@ OBJS-$(HAVE_ARMV6) += $(ARMV6-OBJS) $(ARMV6-OBJS-yes)
OBJS-$(HAVE_ARMV8) += $(ARMV8-OBJS) $(ARMV8-OBJS-yes)
OBJS-$(HAVE_VFP) += $(VFP-OBJS) $(VFP-OBJS-yes)
OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes)
+OBJS-$(HAVE_SVE) += $(SVE-OBJS) $(SVE-OBJS-yes)
+OBJS-$(HAVE_SVE2) += $(SVE2-OBJS) $(SVE2-OBJS-yes)
OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes)
OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes)
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index 1840f9fb01..50ce7d4dfd 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -56,8 +56,26 @@
#define DISABLE_I8MM
#endif
+#if HAVE_AS_ARCHEXT_SVE_DIRECTIVE
+#define ENABLE_SVE .arch_extension sve
+#define DISABLE_SVE .arch_extension nosve
+#else
+#define ENABLE_SVE
+#define DISABLE_SVE
+#endif
+
+#if HAVE_AS_ARCHEXT_SVE2_DIRECTIVE
+#define ENABLE_SVE2 .arch_extension sve2
+#define DISABLE_SVE2 .arch_extension nosve2
+#else
+#define ENABLE_SVE2
+#define DISABLE_SVE2
+#endif
+
DISABLE_DOTPROD
DISABLE_I8MM
+DISABLE_SVE
+DISABLE_SVE2
/* Support macros for
--
2.34.1
More information about the ffmpeg-devel
mailing list