[FFmpeg-devel] [PATCH 1/4] configure: aarch64: Support assembling the dotprod and i8mm arch extensions
Martin Storsjö
martin at martin.st
Fri May 26 11:03:12 EEST 2023
These are available since ARMv8.4-a and ARMv8.6-a respectively,
but can also be available optionally since ARMv8.2-a.
Check if these are available for use unconditionally (e.g. if compiling
with -march=armv8.6-a), or if they can be enabled with specific
assembler directives.
Use ".arch_extension <ext>" for enabling a specific extension in
assembly; the same can also be achieved with ".arch armv8.2-a+<ext>",
but with .arch_extension is easier to combine multiple separate
features.
Enabling these extensions requires setting a base architecture level
of armv8.2-a with .arch. Don't add ".arch armv8.2-a" unless necessary;
if the base level is high enough (which might unlock other extensions
without .arch_extension), we don't want to lower it.
Only add .arch/.arch_extension if needed, e.g. current clang fails
to recognize the dotprod and i8mm features in .arch_extension, but
can successfully assemble these instructions if part of the baseline
set with -march.
---
configure | 77 ++++++++++++++++++++++++++++++++++++++++-
libavutil/aarch64/asm.S | 13 +++++++
2 files changed, 89 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
index 87f7afc2e1..3c7473efb2 100755
--- a/configure
+++ b/configure
@@ -454,6 +454,8 @@ Optimization options (experts only):
--disable-armv6t2 disable armv6t2 optimizations
--disable-vfp disable VFP optimizations
--disable-neon disable NEON optimizations
+ --disable-dotprod disable DOTPROD optimizations
+ --disable-i8mm disable I8MM optimizations
--disable-inline-asm disable use of inline assembly
--disable-x86asm disable use of standalone x86 assembly
--disable-mipsdsp disable MIPS DSP ASE R1 optimizations
@@ -1154,6 +1156,41 @@ check_insn(){
check_as ${1}_external "$2"
}
+check_archext_insn(){
+ log check_archext_insn "$@"
+ feature="$1"
+ base_arch="$2"
+ archext="$3"
+ instr="$4"
+ # Check if the assembly is accepted unconditionally in either inline or
+ # external assembly.
+ check_inline_asm ${feature}_inline "\"$instr\""
+ check_as ${feature}_external "$instr"
+
+ enabled_any ${feature}_inline ${feature}_external || disable ${feature}
+
+ if disabled ${feature}_external; then
+ # If not accepted unconditionally, check if we can assemble it
+ # with a suitable .arch_extension directive.
+ test_as <<EOF && enable ${feature} as_archext_${archext}_directive
+.arch_extension $archext
+$instr
+EOF
+ if disabled ${feature}; then
+ # If the base arch level is too low, .arch_extension can require setting
+ # a higher arch level with .arch too. Only do this if strictly needed;
+ # if the base level is e.g. arvm8.4-a and some features are available
+ # without any .arch_extension, we don't want to set ".arch armv8.2-a"
+ # for some other .arch_extension.
+ test_as <<EOF && enable ${feature} as_archext_${archext}_directive as_archext_${archext}_needs_arch
+.arch $base_arch
+.arch_extension $archext
+$instr
+EOF
+ fi
+ fi
+}
+
check_x86asm(){
log check_x86asm "$@"
name=$1
@@ -2059,6 +2096,8 @@ ARCH_EXT_LIST_ARM="
armv6
armv6t2
armv8
+ dotprod
+ i8mm
neon
vfp
vfpv3
@@ -2322,6 +2361,10 @@ SYSTEM_LIBRARIES="
TOOLCHAIN_FEATURES="
as_arch_directive
+ as_archext_dotprod_directive
+ as_archext_dotprod_needs_arch
+ as_archext_i8mm_directive
+ as_archext_i8mm_needs_arch
as_dn_directive
as_fpu_directive
as_func
@@ -2622,6 +2665,8 @@ intrinsics_neon_deps="neon"
vfp_deps_any="aarch64 arm"
vfpv3_deps="vfp"
setend_deps="arm"
+dotprod_deps="aarch64 neon"
+i8mm_deps="aarch64 neon"
map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
@@ -5979,12 +6024,26 @@ check_inline_asm inline_asm_labels '"1:\n"'
check_inline_asm inline_asm_nonlocal_labels '"Label:\n"'
if enabled aarch64; then
+ check_as as_arch_directive ".arch armv8.2-a"
+
enabled armv8 && check_insn armv8 'prfm pldl1strm, [x0]'
# internal assembler in clang 3.3 does not support this instruction
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
enabled vfp && check_insn vfp 'fmadd d0, d0, d1, d2'
- map 'enabled_any ${v}_external ${v}_inline || disable $v' $ARCH_EXT_LIST_ARM
+ archext_list="dotprod i8mm"
+ enabled dotprod && check_archext_insn dotprod armv8.2-a dotprod 'udot v0.4s, v0.16b, v0.16b'
+ enabled i8mm && check_archext_insn i8mm armv8.2-a i8mm 'usdot v0.4s, v0.16b, v0.16b'
+
+ # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
+ # assembly support the feature out of the box. Skip this for the features
+ # checked with check_archext_insn above; they are checked separately whether
+ # they can be built out of the box or enabled with an .arch_extension
+ # flag.
+ for v in $ARCH_EXT_LIST_ARM; do
+ is_in $v $archext_list && continue
+ enabled_any ${v}_external ${v}_inline || disable $v
+ done
elif enabled alpha; then
@@ -6013,6 +6072,12 @@ EOF
warn "Compiler does not indicate floating-point ABI, guessing $fpabi."
fi
+ # Test for various instruction sets, testing support both in inline and
+ # external assembly. This sets the ${v}_inline or ${v}_external flags
+ # if the instruction can be used unconditionally in either inline or
+ # external assembly. This means that if the ${v}_external feature is set,
+ # that feature can be used unconditionally in various support macros
+ # anywhere in external assembly, in any function.
enabled armv5te && check_insn armv5te 'qadd r0, r0, r0'
enabled armv6 && check_insn armv6 'sadd16 r0, r0, r0'
enabled armv6t2 && check_insn armv6t2 'movt r0, #0'
@@ -6021,6 +6086,14 @@ EOF
enabled vfpv3 && check_insn vfpv3 'vmov.f32 s0, #1.0'
enabled setend && check_insn setend 'setend be'
+ # If neither inline nor external assembly can use the feature by default,
+ # disable the main unsuffixed feature (e.g. HAVE_NEON).
+ #
+ # For targets that support runtime CPU feature detection, don't disable
+ # the main feature flag - there we assume that all supported toolchains
+ # can assemble code for all instruction set features (e.g. NEON) with
+ # suitable assembly flags (such as ".fpu neon"); we don't check
+ # specifically that they really do.
[ $target_os = linux ] || [ $target_os = android ] ||
map 'enabled_any ${v}_external ${v}_inline || disable $v' \
$ARCH_EXT_LIST_ARM
@@ -7601,6 +7674,8 @@ fi
if enabled aarch64; then
echo "NEON enabled ${neon-no}"
echo "VFP enabled ${vfp-no}"
+ echo "DOTPROD enabled ${dotprod-no}"
+ echo "I8MM enabled ${i8mm-no}"
fi
if enabled arm; then
echo "ARMv5TE enabled ${armv5te-no}"
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index a7782415d7..7cf907f93c 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -36,6 +36,19 @@
# define __has_feature(x) 0
#endif
+#if HAVE_AS_ARCH_DIRECTIVE
+#if HAVE_AS_ARCHEXT_DOTPROD_NEEDS_ARCH || HAVE_AS_ARCHEXT_I8MM_NEEDS_ARCH
+ .arch armv8.2-a
+#endif
+#endif
+
+#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE
+ .arch_extension dotprod
+#endif
+#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE
+ .arch_extension i8mm
+#endif
+
/* Support macros for
* - Armv8.3-A Pointer Authentication and
--
2.37.1 (Apple Git-137.1)
More information about the ffmpeg-devel
mailing list