[FFmpeg-devel] [PATCH 1/4] configure: aarch64: Support assembling the dotprod and i8mm arch extensions

Martin Storsjö martin at martin.st
Fri May 26 11:03:12 EEST 2023


These are available since ARMv8.4-a and ARMv8.6-a respectively,
but can also be available optionally since ARMv8.2-a.

Check if these are available for use unconditionally (e.g. if compiling
with -march=armv8.6-a), or if they can be enabled with specific
assembler directives.

Use ".arch_extension <ext>" for enabling a specific extension in
assembly; the same can also be achieved with ".arch armv8.2-a+<ext>",
but with .arch_extension is easier to combine multiple separate
features.

Enabling these extensions requires setting a base architecture level
of armv8.2-a with .arch. Don't add ".arch armv8.2-a" unless necessary;
if the base level is high enough (which might unlock other extensions
without .arch_extension), we don't want to lower it.

Only add .arch/.arch_extension if needed, e.g. current clang fails
to recognize the dotprod and i8mm features in .arch_extension, but
can successfully assemble these instructions if part of the baseline
set with -march.
---
 configure               | 77 ++++++++++++++++++++++++++++++++++++++++-
 libavutil/aarch64/asm.S | 13 +++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 87f7afc2e1..3c7473efb2 100755
--- a/configure
+++ b/configure
@@ -454,6 +454,8 @@ Optimization options (experts only):
   --disable-armv6t2        disable armv6t2 optimizations
   --disable-vfp            disable VFP optimizations
   --disable-neon           disable NEON optimizations
+  --disable-dotprod        disable DOTPROD optimizations
+  --disable-i8mm           disable I8MM optimizations
   --disable-inline-asm     disable use of inline assembly
   --disable-x86asm         disable use of standalone x86 assembly
   --disable-mipsdsp        disable MIPS DSP ASE R1 optimizations
@@ -1154,6 +1156,41 @@ check_insn(){
     check_as ${1}_external "$2"
 }
 
+check_archext_insn(){
+    log check_archext_insn "$@"
+    feature="$1"
+    base_arch="$2"
+    archext="$3"
+    instr="$4"
+    # Check if the assembly is accepted unconditionally in either inline or
+    # external assembly.
+    check_inline_asm ${feature}_inline "\"$instr\""
+    check_as ${feature}_external "$instr"
+
+    enabled_any ${feature}_inline ${feature}_external || disable ${feature}
+
+    if disabled ${feature}_external; then
+        # If not accepted unconditionally, check if we can assemble it
+        # with a suitable .arch_extension directive.
+        test_as <<EOF && enable ${feature} as_archext_${archext}_directive
+.arch_extension $archext
+$instr
+EOF
+        if disabled ${feature}; then
+            # If the base arch level is too low, .arch_extension can require setting
+            # a higher arch level with .arch too. Only do this if strictly needed;
+            # if the base level is e.g. arvm8.4-a and some features are available
+            # without any .arch_extension, we don't want to set ".arch armv8.2-a"
+            # for some other .arch_extension.
+            test_as <<EOF && enable ${feature} as_archext_${archext}_directive as_archext_${archext}_needs_arch
+.arch $base_arch
+.arch_extension $archext
+$instr
+EOF
+        fi
+    fi
+}
+
 check_x86asm(){
     log check_x86asm "$@"
     name=$1
@@ -2059,6 +2096,8 @@ ARCH_EXT_LIST_ARM="
     armv6
     armv6t2
     armv8
+    dotprod
+    i8mm
     neon
     vfp
     vfpv3
@@ -2322,6 +2361,10 @@ SYSTEM_LIBRARIES="
 
 TOOLCHAIN_FEATURES="
     as_arch_directive
+    as_archext_dotprod_directive
+    as_archext_dotprod_needs_arch
+    as_archext_i8mm_directive
+    as_archext_i8mm_needs_arch
     as_dn_directive
     as_fpu_directive
     as_func
@@ -2622,6 +2665,8 @@ intrinsics_neon_deps="neon"
 vfp_deps_any="aarch64 arm"
 vfpv3_deps="vfp"
 setend_deps="arm"
+dotprod_deps="aarch64 neon"
+i8mm_deps="aarch64 neon"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
@@ -5979,12 +6024,26 @@ check_inline_asm inline_asm_labels '"1:\n"'
 check_inline_asm inline_asm_nonlocal_labels '"Label:\n"'
 
 if enabled aarch64; then
+    check_as as_arch_directive ".arch armv8.2-a"
+
     enabled armv8 && check_insn armv8 'prfm   pldl1strm, [x0]'
     # internal assembler in clang 3.3 does not support this instruction
     enabled neon && check_insn neon 'ext   v0.8B, v0.8B, v1.8B, #1'
     enabled vfp  && check_insn vfp  'fmadd d0,    d0,    d1,    d2'
 
-    map 'enabled_any ${v}_external ${v}_inline || disable $v' $ARCH_EXT_LIST_ARM
+    archext_list="dotprod i8mm"
+    enabled dotprod && check_archext_insn dotprod armv8.2-a dotprod 'udot v0.4s, v0.16b, v0.16b'
+    enabled i8mm    && check_archext_insn i8mm    armv8.2-a i8mm    'usdot v0.4s, v0.16b, v0.16b'
+
+    # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
+    # assembly support the feature out of the box. Skip this for the features
+    # checked with check_archext_insn above; they are checked separately whether
+    # they can be built out of the box or enabled with an .arch_extension
+    # flag.
+    for v in $ARCH_EXT_LIST_ARM; do
+        is_in $v $archext_list && continue
+        enabled_any ${v}_external ${v}_inline || disable $v
+    done
 
 elif enabled alpha; then
 
@@ -6013,6 +6072,12 @@ EOF
         warn "Compiler does not indicate floating-point ABI, guessing $fpabi."
     fi
 
+    # Test for various instruction sets, testing support both in inline and
+    # external assembly. This sets the ${v}_inline or ${v}_external flags
+    # if the instruction can be used unconditionally in either inline or
+    # external assembly. This means that if the ${v}_external feature is set,
+    # that feature can be used unconditionally in various support macros
+    # anywhere in external assembly, in any function.
     enabled armv5te && check_insn armv5te 'qadd r0, r0, r0'
     enabled armv6   && check_insn armv6   'sadd16 r0, r0, r0'
     enabled armv6t2 && check_insn armv6t2 'movt r0, #0'
@@ -6021,6 +6086,14 @@ EOF
     enabled vfpv3   && check_insn vfpv3   'vmov.f32 s0, #1.0'
     enabled setend  && check_insn setend  'setend be'
 
+    # If neither inline nor external assembly can use the feature by default,
+    # disable the main unsuffixed feature (e.g. HAVE_NEON).
+    #
+    # For targets that support runtime CPU feature detection, don't disable
+    # the main feature flag - there we assume that all supported toolchains
+    # can assemble code for all instruction set features (e.g. NEON) with
+    # suitable assembly flags (such as ".fpu neon"); we don't check
+    # specifically that they really do.
     [ $target_os = linux ] || [ $target_os = android ] ||
         map 'enabled_any ${v}_external ${v}_inline || disable $v' \
             $ARCH_EXT_LIST_ARM
@@ -7601,6 +7674,8 @@ fi
 if enabled aarch64; then
     echo "NEON enabled              ${neon-no}"
     echo "VFP enabled               ${vfp-no}"
+    echo "DOTPROD enabled           ${dotprod-no}"
+    echo "I8MM enabled              ${i8mm-no}"
 fi
 if enabled arm; then
     echo "ARMv5TE enabled           ${armv5te-no}"
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index a7782415d7..7cf907f93c 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -36,6 +36,19 @@
 #   define __has_feature(x) 0
 #endif
 
+#if HAVE_AS_ARCH_DIRECTIVE
+#if HAVE_AS_ARCHEXT_DOTPROD_NEEDS_ARCH || HAVE_AS_ARCHEXT_I8MM_NEEDS_ARCH
+        .arch           armv8.2-a
+#endif
+#endif
+
+#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE
+        .arch_extension dotprod
+#endif
+#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE
+        .arch_extension i8mm
+#endif
+
 
 /* Support macros for
  *   - Armv8.3-A Pointer Authentication and
-- 
2.37.1 (Apple Git-137.1)



More information about the ffmpeg-devel mailing list