[FFmpeg-devel] [PATCH] Clean up av_get_cpu_flag()
Janne Grunau
janne-ffmpeg
Thu Sep 9 19:59:45 CEST 2010
On Wed, Sep 08, 2010 at 09:58:57PM +0100, M?ns Rullg?rd wrote:
> "Ronald S. Bultje" <rsbultje at gmail.com> writes:
>
> > Hi,
> >
> > 2010/9/8 M?ns Rullg?rd <mans at mansr.com>:
> >> "Ronald S. Bultje" <rsbultje at gmail.com> writes:
> >>> On Wed, Sep 8, 2010 at 3:46 PM, Mans Rullgard <mans at mansr.com> wrote:
> >>>> +/* The following CPU-specific functions shall not be called directly. */
> >>>> +int av_get_cpu_flags_arm(void);
> >>>> +int av_get_cpu_flags_ppc(void);
> >>>> +int av_get_cpu_flags_x86(void);
> >>>
> >>> So remove the av_ prefix? Looks nice otherwise.
> >>
> >> ff_ prefix instead?
done
> > Probably yes, and then put them in a private header also if wanted (or
>
> I though a separate header for those functions seemed overkill.
I moved the prototypes to libavutil/internal.h
I've also added the caching of the flags as requested by Micheal in the
other thread.
Janne
-------------- next part --------------
commit ee7051494374a84729788eafe09724aabc0e27b7
Author: Janne Grunau <janne at grunau.be>
Date: Thu Sep 9 19:55:21 2010 +0200
Clean up av_get_cpu_flag()
Instead of defining functions in per-arch header files included
by the main cpu.c, define them normally and call them from the
generic one and cache the results.
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 545e1c2..22b5452 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -57,6 +57,10 @@ OBJS = adler32.o \
tree.o \
utils.o \
+OBJS-$(ARCH_ARM) += arm/cpu.o
+OBJS-$(ARCH_PPC) += ppc/cpu.o
+OBJS-$(ARCH_X86) += x86/cpu.o
+
TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha softfloat tree
TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c
new file mode 100644
index 0000000..742c3e4
--- /dev/null
+++ b/libavutil/arm/cpu.c
@@ -0,0 +1,25 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "config.h"
+
+int ff_get_cpu_flags_arm(void)
+{
+ return HAVE_IWMMXT * AV_CPU_FLAG_IWMMXT;
+}
diff --git a/libavutil/arm/cpu.h b/libavutil/arm/cpu.h
deleted file mode 100644
index 45ab761..0000000
--- a/libavutil/arm/cpu.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_ARM_CPU_H
-#define AVUTIL_ARM_CPU_H
-
-#include "config.h"
-
-int av_get_cpu_flags(void)
-{
- return HAVE_IWMMXT * AV_CPU_FLAG_IWMMXT;
-}
-
-#endif /* AVUTIL_ARM_CPU_H */
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index fb37edb..77359bc 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -18,19 +18,23 @@
#include "cpu.h"
#include "config.h"
+#include "internal.h"
-#if ARCH_ARM
-# include "arm/cpu.h"
-#elif ARCH_PPC
-# include "ppc/cpu.h"
-#elif ARCH_X86
-# include "x86/cpu.h"
-#else
int av_get_cpu_flags(void)
{
- return 0;
+ static int cpu_flags;
+
+ if (!cpu_flags) {
+ if (ARCH_ARM) cpu_flags = ff_get_cpu_flags_arm();
+ if (ARCH_PPC) cpu_flags = ff_get_cpu_flags_ppc();
+ if (ARCH_X86) cpu_flags = ff_get_cpu_flags_x86();
+
+ // use AV_CPU_FLAGS_FORCE as sentinel for empty cpu flags
+ if (!cpu_flags)
+ cpu_flags = AV_CPU_FLAG_FORCE;
+ }
+ return cpu_flags & ~AV_CPU_FLAG_FORCE;
}
-#endif
#ifdef TEST
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 53d2b94..099eacb 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -231,4 +231,9 @@
type ff_##name args
#endif
+/* The following CPU-specific functions shall not be called directly. */
+int ff_get_cpu_flags_arm(void);
+int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_x86(void);
+
#endif /* AVUTIL_INTERNAL_H */
diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c
new file mode 100644
index 0000000..9dc9471
--- /dev/null
+++ b/libavutil/ppc/cpu.c
@@ -0,0 +1,84 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef __APPLE__
+#undef _POSIX_C_SOURCE
+#include <sys/sysctl.h>
+#elif defined(__OpenBSD__)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#elif defined(__AMIGAOS4__)
+#include <exec/exec.h>
+#include <interfaces/exec.h>
+#include <proto/exec.h>
+#endif /* __APPLE__ */
+
+#include "libavutil/cpu.h"
+#include "config.h"
+
+/**
+ * This function MAY rely on signal() or fork() in order to make sure AltiVec
+ * is present.
+ */
+int ff_get_cpu_flags_ppc(void)
+{
+#if HAVE_ALTIVEC
+#ifdef __AMIGAOS4__
+ ULONG result = 0;
+ extern struct ExecIFace *IExec;
+
+ IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
+ if (result == VECTORTYPE_ALTIVEC)
+ return AV_CPU_FLAG_ALTIVEC;
+ return 0;
+#elif defined(__APPLE__) || defined(__OpenBSD__)
+#ifdef __OpenBSD__
+ int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC};
+#else
+ int sels[2] = {CTL_HW, HW_VECTORUNIT};
+#endif
+ int has_vu = 0;
+ size_t len = sizeof(has_vu);
+ int err;
+
+ err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
+
+ if (err == 0)
+ return has_vu ? AV_CPU_FLAG_ALTIVEC : 0;
+ return 0;
+#elif CONFIG_RUNTIME_CPUDETECT
+ int proc_ver;
+ // Support of mfspr PVR emulation added in Linux 2.6.17.
+ __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver));
+ proc_ver >>= 16;
+ if (proc_ver & 0x8000 ||
+ proc_ver == 0x000c ||
+ proc_ver == 0x0039 || proc_ver == 0x003c ||
+ proc_ver == 0x0044 || proc_ver == 0x0045 ||
+ proc_ver == 0x0070)
+ return AV_CPU_FLAG_ALTIVEC;
+ return 0;
+#else
+ // Since we were compiled for AltiVec, just assume we have it
+ // until someone comes up with a proper way (not involving signal hacks).
+ return AV_CPU_FLAG_ALTIVEC;
+#endif /* __AMIGAOS4__ */
+#endif /* HAVE_ALTIVEC */
+ return 0;
+}
diff --git a/libavutil/ppc/cpu.h b/libavutil/ppc/cpu.h
deleted file mode 100644
index 684361c..0000000
--- a/libavutil/ppc/cpu.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_PPC_CPU_H
-#define AVUTIL_PPC_CPU_H
-
-#ifdef __APPLE__
-#undef _POSIX_C_SOURCE
-#include <sys/sysctl.h>
-#elif defined(__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-#elif defined(__AMIGAOS4__)
-#include <exec/exec.h>
-#include <interfaces/exec.h>
-#include <proto/exec.h>
-#endif /* __APPLE__ */
-
-#include "config.h"
-
-/**
- * This function MAY rely on signal() or fork() in order to make sure AltiVec
- * is present.
- */
-int av_get_cpu_flags(void)
-{
-#if HAVE_ALTIVEC
-#ifdef __AMIGAOS4__
- ULONG result = 0;
- extern struct ExecIFace *IExec;
-
- IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
- if (result == VECTORTYPE_ALTIVEC)
- return AV_CPU_FLAG_ALTIVEC;
- return 0;
-#elif defined(__APPLE__) || defined(__OpenBSD__)
-#ifdef __OpenBSD__
- int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC};
-#else
- int sels[2] = {CTL_HW, HW_VECTORUNIT};
-#endif
- int has_vu = 0;
- size_t len = sizeof(has_vu);
- int err;
-
- err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
-
- if (err == 0)
- return has_vu ? AV_CPU_FLAG_ALTIVEC : 0;
- return 0;
-#elif CONFIG_RUNTIME_CPUDETECT
- int proc_ver;
- // Support of mfspr PVR emulation added in Linux 2.6.17.
- __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver));
- proc_ver >>= 16;
- if (proc_ver & 0x8000 ||
- proc_ver == 0x000c ||
- proc_ver == 0x0039 || proc_ver == 0x003c ||
- proc_ver == 0x0044 || proc_ver == 0x0045 ||
- proc_ver == 0x0070)
- return AV_CPU_FLAG_ALTIVEC;
- return 0;
-#else
- // Since we were compiled for AltiVec, just assume we have it
- // until someone comes up with a proper way (not involving signal hacks).
- return AV_CPU_FLAG_ALTIVEC;
-#endif /* __AMIGAOS4__ */
-#endif /* HAVE_ALTIVEC */
- return 0;
-}
-
-#endif /* AVUTIL_PPC_CPU_H */
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
new file mode 100644
index 0000000..4df3286
--- /dev/null
+++ b/libavutil/x86/cpu.c
@@ -0,0 +1,123 @@
+/*
+ * CPU detection code, extracted from mmx.h
+ * (c)1997-99 by H. Dietz and R. Fisher
+ * Converted to C and improved by Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include "libavutil/x86_cpu.h"
+#include "libavutil/cpu.h"
+
+/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
+#define cpuid(index,eax,ebx,ecx,edx)\
+ __asm__ volatile\
+ ("mov %%"REG_b", %%"REG_S"\n\t"\
+ "cpuid\n\t"\
+ "xchg %%"REG_b", %%"REG_S\
+ : "=a" (eax), "=S" (ebx),\
+ "=c" (ecx), "=d" (edx)\
+ : "0" (index));
+
+/* Function to test if multimedia instructions are supported... */
+int ff_get_cpu_flags_x86(void)
+{
+ int rval = 0;
+ int eax, ebx, ecx, edx;
+ int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
+ int family=0, model=0;
+ union { int i[3]; char c[12]; } vendor;
+
+#if ARCH_X86_32
+ x86_reg a, c;
+ __asm__ volatile (
+ /* See if CPUID instruction is supported ... */
+ /* ... Get copies of EFLAGS into eax and ecx */
+ "pushfl\n\t"
+ "pop %0\n\t"
+ "mov %0, %1\n\t"
+
+ /* ... Toggle the ID bit in one copy and store */
+ /* to the EFLAGS reg */
+ "xor $0x200000, %0\n\t"
+ "push %0\n\t"
+ "popfl\n\t"
+
+ /* ... Get the (hopefully modified) EFLAGS */
+ "pushfl\n\t"
+ "pop %0\n\t"
+ : "=a" (a), "=c" (c)
+ :
+ : "cc"
+ );
+
+ if (a == c)
+ return 0; /* CPUID not supported */
+#endif
+
+ cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
+
+ if(max_std_level >= 1){
+ cpuid(1, eax, ebx, ecx, std_caps);
+ family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
+ model = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
+ if (std_caps & (1<<23))
+ rval |= AV_CPU_FLAG_MMX;
+ if (std_caps & (1<<25))
+ rval |= AV_CPU_FLAG_MMX2
+#if HAVE_SSE
+ | AV_CPU_FLAG_SSE;
+ if (std_caps & (1<<26))
+ rval |= AV_CPU_FLAG_SSE2;
+ if (ecx & 1)
+ rval |= AV_CPU_FLAG_SSE3;
+ if (ecx & 0x00000200 )
+ rval |= AV_CPU_FLAG_SSSE3;
+ if (ecx & 0x00080000 )
+ rval |= AV_CPU_FLAG_SSE4;
+ if (ecx & 0x00100000 )
+ rval |= AV_CPU_FLAG_SSE42;
+#endif
+ ;
+ }
+
+ cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
+
+ if(max_ext_level >= 0x80000001){
+ cpuid(0x80000001, eax, ebx, ecx, ext_caps);
+ if (ext_caps & (1<<31))
+ rval |= AV_CPU_FLAG_3DNOW;
+ if (ext_caps & (1<<30))
+ rval |= AV_CPU_FLAG_3DNOWEXT;
+ if (ext_caps & (1<<23))
+ rval |= AV_CPU_FLAG_MMX;
+ if (ext_caps & (1<<22))
+ rval |= AV_CPU_FLAG_MMX2;
+ }
+
+ if (!strncmp(vendor.c, "GenuineIntel", 12) &&
+ family == 6 && (model == 9 || model == 13 || model == 14)) {
+ /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
+ * theoretically support sse2, but it's usually slower than mmx,
+ * so let's just pretend they don't. */
+ if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2;
+ if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3;
+ }
+
+ return rval;
+}
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
deleted file mode 100644
index 052d2c5..0000000
--- a/libavutil/x86/cpu.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * CPU detection code, extracted from mmx.h
- * (c)1997-99 by H. Dietz and R. Fisher
- * Converted to C and improved by Fabrice Bellard.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_X86_CPU_H2
-#define AVUTIL_X86_CPU_H2
-
-#include <stdlib.h>
-#include "libavutil/x86_cpu.h"
-#include "libavutil/cpu.h"
-
-/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
-#define cpuid(index,eax,ebx,ecx,edx)\
- __asm__ volatile\
- ("mov %%"REG_b", %%"REG_S"\n\t"\
- "cpuid\n\t"\
- "xchg %%"REG_b", %%"REG_S\
- : "=a" (eax), "=S" (ebx),\
- "=c" (ecx), "=d" (edx)\
- : "0" (index));
-
-/* Function to test if multimedia instructions are supported... */
-int av_get_cpu_flags(void)
-{
- int rval = 0;
- int eax, ebx, ecx, edx;
- int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
- int family=0, model=0;
- union { int i[3]; char c[12]; } vendor;
-
-#if ARCH_X86_32
- x86_reg a, c;
- __asm__ volatile (
- /* See if CPUID instruction is supported ... */
- /* ... Get copies of EFLAGS into eax and ecx */
- "pushfl\n\t"
- "pop %0\n\t"
- "mov %0, %1\n\t"
-
- /* ... Toggle the ID bit in one copy and store */
- /* to the EFLAGS reg */
- "xor $0x200000, %0\n\t"
- "push %0\n\t"
- "popfl\n\t"
-
- /* ... Get the (hopefully modified) EFLAGS */
- "pushfl\n\t"
- "pop %0\n\t"
- : "=a" (a), "=c" (c)
- :
- : "cc"
- );
-
- if (a == c)
- return 0; /* CPUID not supported */
-#endif
-
- cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
-
- if(max_std_level >= 1){
- cpuid(1, eax, ebx, ecx, std_caps);
- family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
- model = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
- if (std_caps & (1<<23))
- rval |= AV_CPU_FLAG_MMX;
- if (std_caps & (1<<25))
- rval |= AV_CPU_FLAG_MMX2
-#if HAVE_SSE
- | AV_CPU_FLAG_SSE;
- if (std_caps & (1<<26))
- rval |= AV_CPU_FLAG_SSE2;
- if (ecx & 1)
- rval |= AV_CPU_FLAG_SSE3;
- if (ecx & 0x00000200 )
- rval |= AV_CPU_FLAG_SSSE3;
- if (ecx & 0x00080000 )
- rval |= AV_CPU_FLAG_SSE4;
- if (ecx & 0x00100000 )
- rval |= AV_CPU_FLAG_SSE42;
-#endif
- ;
- }
-
- cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
-
- if(max_ext_level >= 0x80000001){
- cpuid(0x80000001, eax, ebx, ecx, ext_caps);
- if (ext_caps & (1<<31))
- rval |= AV_CPU_FLAG_3DNOW;
- if (ext_caps & (1<<30))
- rval |= AV_CPU_FLAG_3DNOWEXT;
- if (ext_caps & (1<<23))
- rval |= AV_CPU_FLAG_MMX;
- if (ext_caps & (1<<22))
- rval |= AV_CPU_FLAG_MMX2;
- }
-
- if (!strncmp(vendor.c, "GenuineIntel", 12) &&
- family == 6 && (model == 9 || model == 13 || model == 14)) {
- /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
- * theoretically support sse2, but it's usually slower than mmx,
- * so let's just pretend they don't. */
- if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2;
- if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3;
- }
-
- return rval;
-}
-
-#endif /* AVUTIL_X86_CPU_H2 */
More information about the ffmpeg-devel
mailing list