[MPlayer-dev-eng] [PATCH] fixed point faad, gnu assembler etc.
Reimar Döffinger
Reimar.Doeffinger at stud.uni-karlsruhe.de
Sat Apr 22 19:01:51 CEST 2006
Hi,
On Thu, Apr 20, 2006 at 09:04:58AM +0200, Michael Niedermayer wrote:
> btw, someone replace %%cl with a constant, as its faster (assuming its
> constant)
Like the attached patch? At least for me it produces good code
(gcc-4.1.0).
Also replaced the "r" constraints by "mr", but had to replace imul by
imull for that. Hope this is correct (tested on x86_64, but not for 32
bit).
Greetings,
Reimar Döffinger
-------------- next part --------------
Index: libfaad2/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/libfaad2/Makefile,v
retrieving revision 1.10
diff -u -r1.10 Makefile
--- libfaad2/Makefile 18 Apr 2006 19:39:29 -0000 1.10
+++ libfaad2/Makefile 19 Apr 2006 09:46:21 -0000
@@ -48,7 +48,7 @@
# Uncomment this to use the FIXED_POINT implementation of FAAD2.
# This should improve performance, especially for SBR files.
-#CFLAGS = -I. $(OPTFLAGS) -DFIXED_POINT
+CFLAGS = -I. $(OPTFLAGS) -DFIXED_POINT
.SUFFIXES: .c .o
Index: libfaad2/fixed.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfaad2/fixed.h,v
retrieving revision 1.7
diff -u -r1.7 fixed.h
--- libfaad2/fixed.h 18 Apr 2006 19:39:30 -0000 1.7
+++ libfaad2/fixed.h 19 Apr 2006 09:46:29 -0000
@@ -226,12 +226,62 @@
*y2 = yt2 << (FRAC_SIZE-FRAC_BITS);
}
+#elif defined(__GNUC__) && (defined (ARCH_X86) || defined(ARCH_X86_64))
+#define MUL_S(A,B,S) \
+ __asm__ __volatile__ (\
+ "imull %1 \n\t"\
+ "shrd %2, %%edx, %%eax \n\t"\
+ : "+a" (A) : "mr" (B), "i" (S) : "%edx");
+
+ static INLINE real_t MUL_R(real_t A, real_t B) {
+ MUL_S(A, B, REAL_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_C(real_t A, real_t B) {
+ MUL_S(A, B, COEF_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_F(real_t A, real_t B) {
+ MUL_S(A, B, FRAC_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_Q2(real_t A, real_t B) {
+ MUL_S(A, B, Q2_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_SHIFT6(real_t A, real_t B) {
+ MUL_S(A, B, 6);
+ return A;
+ }
+
+ static INLINE real_t MUL_SHIFT23(real_t A, real_t B) {
+ MUL_S(A, B, 23);
+ return A;
+ }
+
+ static INLINE real_t _MulHigh(real_t A, real_t B) {
+ __asm__ __volatile__ (\
+ "imull %1 \n\t"\
+ "mov %%edx, %%eax \n\t"\
+ : "+a" (A) : "mr" (B) : "%edx");
+ return A;
+ }
+
+ static INLINE void ComplexMult(real_t *y1, real_t *y2, real_t x1, real_t x2,
+ real_t c1, real_t c2) {
+ *y1 = (_MulHigh(x1, c1) + _MulHigh(x2, c2))<<(FRAC_SIZE-FRAC_BITS);
+ *y2 = (_MulHigh(x2, c1) - _MulHigh(x1, c2))<<(FRAC_SIZE-FRAC_BITS);
+ }
#else
/* multiply with real shift */
- #define MUL_R(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (REAL_BITS-1))) >> REAL_BITS)
+ #define MUL_R(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> REAL_BITS)
/* multiply with coef shift */
- #define MUL_C(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (COEF_BITS-1))) >> COEF_BITS)
+ #define MUL_C(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> COEF_BITS)
/* multiply with fractional shift */
#if defined(_WIN32_WCE) && defined(_ARM_)
/* eVC for PocketPC has an intrinsic function that returns only the high 32 bits of a 32x32 bit multiply */
@@ -240,12 +290,12 @@
return _MulHigh(A,B) << (32-FRAC_BITS);
}
#else
- #define _MulHigh(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (FRAC_SIZE-1))) >> FRAC_SIZE)
- #define MUL_F(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (FRAC_BITS-1))) >> FRAC_BITS)
+ #define _MulHigh(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> FRAC_SIZE)
+ #define MUL_F(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> FRAC_BITS)
#endif
- #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
- #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
- #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+ #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> Q2_BITS)
+ #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> 6)
+ #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> 23)
/* Complex multiplication */
static INLINE void ComplexMult(real_t *y1, real_t *y2,
More information about the MPlayer-dev-eng
mailing list