[FFmpeg-devel] [PATCH v3 1/7] avutil: [loongarch] Add support for loongarch SIMD.
Hao Chen
chenhao at loongson.cn
Wed Dec 15 05:51:04 EET 2021
From: Shiyou Yin <yinshiyou-hf at loongson.cn>
LSX and LASX is loongarch SIMD extention.
They are enabled by default if compiler support it, and can be disabled
with '--disable-lsx' '--disable-lasx'.
Change-Id: Ie2608ea61dbd9b7fffadbf0ec2348bad6c124476
---
Makefile | 2 +-
configure | 20 +++++++++--
ffbuild/arch.mak | 4 ++-
ffbuild/common.mak | 8 +++++
libavutil/cpu.c | 7 ++++
libavutil/cpu.h | 4 +++
libavutil/cpu_internal.h | 2 ++
libavutil/loongarch/Makefile | 1 +
libavutil/loongarch/cpu.c | 69 ++++++++++++++++++++++++++++++++++++
libavutil/loongarch/cpu.h | 31 ++++++++++++++++
libavutil/tests/cpu.c | 3 ++
tests/checkasm/checkasm.c | 3 ++
12 files changed, 150 insertions(+), 4 deletions(-)
create mode 100644 libavutil/loongarch/Makefile
create mode 100644 libavutil/loongarch/cpu.c
create mode 100644 libavutil/loongarch/cpu.h
diff --git a/Makefile b/Makefile
index 26c9107237..5b20658b52 100644
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \
ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \
ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \
MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \
- MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
+ MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
define RESET
$(1) :=
diff --git a/configure b/configure
index a7593ec2db..c4afde4c5c 100755
--- a/configure
+++ b/configure
@@ -452,7 +452,9 @@ Optimization options (experts only):
--disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations
--disable-msa disable MSA optimizations
--disable-mipsfpu disable floating point MIPS optimizations
- --disable-mmi disable Loongson SIMD optimizations
+ --disable-mmi disable Loongson MMI optimizations
+ --disable-lsx disable Loongson LSX optimizations
+ --disable-lasx disable Loongson LASX optimizations
--disable-fast-unaligned consider unaligned accesses slow
Developer options (useful when working on FFmpeg itself):
@@ -2081,6 +2083,8 @@ ARCH_EXT_LIST_LOONGSON="
loongson2
loongson3
mmi
+ lsx
+ lasx
"
ARCH_EXT_LIST_X86_SIMD="
@@ -2617,6 +2621,10 @@ power8_deps="vsx"
loongson2_deps="mips"
loongson3_deps="mips"
+mmi_deps_any="loongson2 loongson3"
+lsx_deps="loongarch"
+lasx_deps="lsx"
+
mips32r2_deps="mips"
mips32r5_deps="mips"
mips32r6_deps="mips"
@@ -2625,7 +2633,6 @@ mips64r6_deps="mips"
mipsfpu_deps="mips"
mipsdsp_deps="mips"
mipsdspr2_deps="mips"
-mmi_deps_any="loongson2 loongson3"
msa_deps="mipsfpu"
cpunop_deps="i686"
@@ -6134,6 +6141,9 @@ EOF
;;
esac
+elif enabled loongarch; then
+ enabled lsx && check_inline_asm lsx '"vadd.b $vr0, $vr1, $vr2"' '-mlsx' && append LSXFLAGS '-mlsx'
+ enabled lasx && check_inline_asm lasx '"xvadd.b $xr0, $xr1, $xr2"' '-mlasx' && append LASXFLAGS '-mlasx'
fi
check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)"
@@ -7484,6 +7494,10 @@ if enabled ppc; then
echo "PPC 4xx optimizations ${ppc4xx-no}"
echo "dcbzl available ${dcbzl-no}"
fi
+if enabled loongarch; then
+ echo "LSX enabled ${lsx-no}"
+ echo "LASX enabled ${lasx-no}"
+fi
echo "debug symbols ${debug-no}"
echo "strip symbols ${stripping-no}"
echo "optimize for size ${small-no}"
@@ -7645,6 +7659,8 @@ ASMSTRIPFLAGS=$ASMSTRIPFLAGS
X86ASMFLAGS=$X86ASMFLAGS
MSAFLAGS=$MSAFLAGS
MMIFLAGS=$MMIFLAGS
+LSXFLAGS=$LSXFLAGS
+LASXFLAGS=$LASXFLAGS
BUILDSUF=$build_suffix
PROGSSUF=$progs_suffix
FULLNAME=$FULLNAME
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index e09006efca..997e31e85e 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -8,7 +8,9 @@ OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes)
OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes)
OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes)
OBJS-$(HAVE_MSA) += $(MSA-OBJS) $(MSA-OBJS-yes)
-OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
+OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
+OBJS-$(HAVE_LSX) += $(LSX-OBJS) $(LSX-OBJS-yes)
+OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes)
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes)
diff --git a/ffbuild/common.mak b/ffbuild/common.mak
index 268ae61154..0eb831d434 100644
--- a/ffbuild/common.mak
+++ b/ffbuild/common.mak
@@ -59,6 +59,8 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC)
COMPILE_NVCC = $(call COMPILE,NVCC)
COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS)
COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
+COMPILE_LSX = $(call COMPILE,CC,LSXFLAGS)
+COMPILE_LASX = $(call COMPILE,CC,LASXFLAGS)
%_mmi.o: %_mmi.c
$(COMPILE_MMI)
@@ -66,6 +68,12 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
%_msa.o: %_msa.c
$(COMPILE_MSA)
+%_lsx.o: %_lsx.c
+ $(COMPILE_LSX)
+
+%_lasx.o: %_lasx.c
+ $(COMPILE_LASX)
+
%.o: %.c
$(COMPILE_C)
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 4627af4f23..63efb97ffd 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -62,6 +62,8 @@ static int get_cpu_flags(void)
return ff_get_cpu_flags_ppc();
if (ARCH_X86)
return ff_get_cpu_flags_x86();
+ if (ARCH_LOONGARCH)
+ return ff_get_cpu_flags_loongarch();
return 0;
}
@@ -168,6 +170,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
#elif ARCH_MIPS
{ "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" },
{ "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" },
+#elif ARCH_LOONGARCH
+ { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" },
+ { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" },
#endif
{ NULL },
};
@@ -253,6 +258,8 @@ size_t av_cpu_max_align(void)
return ff_get_cpu_max_align_ppc();
if (ARCH_X86)
return ff_get_cpu_max_align_x86();
+ if (ARCH_LOONGARCH)
+ return ff_get_cpu_max_align_loongarch();
return 8;
}
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index afea0640b4..ae443eccad 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -72,6 +72,10 @@
#define AV_CPU_FLAG_MMI (1 << 0)
#define AV_CPU_FLAG_MSA (1 << 1)
+//Loongarch SIMD extension.
+#define AV_CPU_FLAG_LSX (1 << 0)
+#define AV_CPU_FLAG_LASX (1 << 1)
+
/**
* Return the flags which specify extensions supported by the CPU.
* The returned value is affected by av_force_cpu_flags() if that was used
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 889764320b..e207b2d480 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -46,11 +46,13 @@ int ff_get_cpu_flags_aarch64(void);
int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);
+int ff_get_cpu_flags_loongarch(void);
size_t ff_get_cpu_max_align_mips(void);
size_t ff_get_cpu_max_align_aarch64(void);
size_t ff_get_cpu_max_align_arm(void);
size_t ff_get_cpu_max_align_ppc(void);
size_t ff_get_cpu_max_align_x86(void);
+size_t ff_get_cpu_max_align_loongarch(void);
#endif /* AVUTIL_CPU_INTERNAL_H */
diff --git a/libavutil/loongarch/Makefile b/libavutil/loongarch/Makefile
new file mode 100644
index 0000000000..2addd9351c
--- /dev/null
+++ b/libavutil/loongarch/Makefile
@@ -0,0 +1 @@
+OBJS += loongarch/cpu.o
diff --git a/libavutil/loongarch/cpu.c b/libavutil/loongarch/cpu.c
new file mode 100644
index 0000000000..e4b240bc44
--- /dev/null
+++ b/libavutil/loongarch/cpu.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Loongson Technology Corporation Limited
+ * Contributed by Shiyou Yin <yinshiyou-hf at loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "cpu.h"
+
+#define LOONGARCH_CFG2 0x2
+#define LOONGARCH_CFG2_LSX (1 << 6)
+#define LOONGARCH_CFG2_LASX (1 << 7)
+
+static int cpu_flags_cpucfg(void)
+{
+ int flags = 0;
+ uint32_t cfg2 = 0;
+
+ __asm__ volatile(
+ "cpucfg %0, %1 \n\t"
+ : "+&r"(cfg2)
+ : "r"(LOONGARCH_CFG2)
+ );
+
+ if (cfg2 & LOONGARCH_CFG2_LSX)
+ flags |= AV_CPU_FLAG_LSX;
+
+ if (cfg2 & LOONGARCH_CFG2_LASX)
+ flags |= AV_CPU_FLAG_LASX;
+
+ return flags;
+}
+
+int ff_get_cpu_flags_loongarch(void)
+{
+#if defined __linux__
+ return cpu_flags_cpucfg();
+#else
+ /* Assume no SIMD ASE supported */
+ return 0;
+#endif
+}
+
+size_t ff_get_cpu_max_align_loongarch(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_LASX)
+ return 32;
+ if (flags & AV_CPU_FLAG_LSX)
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/loongarch/cpu.h b/libavutil/loongarch/cpu.h
new file mode 100644
index 0000000000..1a445c69bc
--- /dev/null
+++ b/libavutil/loongarch/cpu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Loongson Technology Corporation Limited
+ * Contributed by Shiyou Yin <yinshiyou-hf at loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_LOONGARCH_CPU_H
+#define AVUTIL_LOONGARCH_CPU_H
+
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+
+#define have_lsx(flags) CPUEXT(flags, LSX)
+#define have_lasx(flags) CPUEXT(flags, LASX)
+
+#endif /* AVUTIL_LOONGARCH_CPU_H */
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index c853371fb3..0a6c0cd32e 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -77,6 +77,9 @@ static const struct {
{ AV_CPU_FLAG_BMI2, "bmi2" },
{ AV_CPU_FLAG_AESNI, "aesni" },
{ AV_CPU_FLAG_AVX512, "avx512" },
+#elif ARCH_LOONGARCH
+ { AV_CPU_FLAG_LSX, "lsx" },
+ { AV_CPU_FLAG_LASX, "lasx" },
#endif
{ 0 }
};
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index b1353f7cbe..90d080de02 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -236,6 +236,9 @@ static const struct {
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
+#elif ARCH_LOONGARCH
+ { "LSX", "lsx", AV_CPU_FLAG_LSX },
+ { "LASX", "lasx", AV_CPU_FLAG_LASX },
#endif
{ NULL }
};
--
2.20.1
More information about the ffmpeg-devel
mailing list