[Ffmpeg-devel] [RFC] port cabac asm to AMD64
Reimar Döffinger
Reimar.Doeffinger
Fri Oct 20 23:08:46 CEST 2006
Hello,
attached (admittedly a bit hackish but minimal) patch allows to use the
cabac asm optimizations to be used on AMD64.
It may be a bit early to do this, but I wondered if maybe someone would
be less lazy than me and do some benchmarking.
Also, I wondered if there is a reason to use the "l" prefix in the asm,
because I have to remove it in may places to make it compile, and all
the code ported to AMD64 AFAICT had it removed. Would a patch to remove
them despite the early state of the optimizations be accepted?
Greetings,
Reimar D?ffinger
-------------- next part --------------
Index: libavcodec/cabac.h
===================================================================
--- libavcodec/cabac.h (revision 6749)
+++ libavcodec/cabac.h (working copy)
@@ -28,6 +28,12 @@
//#undef NDEBUG
#include <assert.h>
+#ifdef ARCH_X86_64
+#define ARCH_X86
+#endif
+#ifdef ARCH_X86
+#include "x86_cpu.h"
+#endif
#define CABAC_BITS 16
#define CABAC_MASK ((1<<CABAC_BITS)-1)
@@ -364,10 +370,16 @@
//FIXME gcc generates duplicate load/stores for c->low and c->range
#define LOW "0"
#define RANGE "4"
+#ifdef ARCH_X86_64
+#define BYTESTART "16"
+#define BYTE "24"
+#define BYTEEND "32"
+#else
#define BYTESTART "12"
#define BYTE "16"
#define BYTEEND "20"
-#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
+#endif
+#if defined(ARCH_X86)&& !(defined(PIC) && defined(__GNUC__))
int bit;
#ifndef BRANCHLESS_CABAC_DECODER
@@ -403,14 +415,14 @@
//eax:state ebx:low, edx:range, esi:RangeLPS
"test %%bx, %%bx \n\t"
" jnz 2f \n\t"
- "movl "BYTE "(%2), %%esi \n\t"
+ "mov "BYTE "(%2), %%"REG_S" \n\t"
"subl $0xFFFF, %%ebx \n\t"
- "movzwl (%%esi), %%ecx \n\t"
+ "movzwl (%%"REG_S"), %%ecx \n\t"
"bswap %%ecx \n\t"
"shrl $15, %%ecx \n\t"
- "addl $2, %%esi \n\t"
+ "add $2, %%"REG_S" \n\t"
"addl %%ecx, %%ebx \n\t"
- "movl %%esi, "BYTE "(%2) \n\t"
+ "mov %%"REG_S", "BYTE "(%2) \n\t"
"jmp 2f \n\t"
"1: \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS
@@ -421,17 +433,17 @@
"shll %%cl, %%edx \n\t"
"movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t"
"movb %%cl, (%1) \n\t"
- "addl $1, %0 \n\t"
+ "add $1, %0 \n\t"
"test %%bx, %%bx \n\t"
" jnz 2f \n\t"
- "movl "BYTE "(%2), %%ecx \n\t"
- "movzwl (%%ecx), %%esi \n\t"
+ "mov "BYTE "(%2), %%"REG_c" \n\t"
+ "movzwl (%%"REG_c"), %%esi \n\t"
"bswap %%esi \n\t"
"shrl $15, %%esi \n\t"
"subl $0xFFFF, %%esi \n\t"
- "addl $2, %%ecx \n\t"
- "movl %%ecx, "BYTE "(%2) \n\t"
+ "add $2, %%"REG_c" \n\t"
+ "mov %%i"REG_c", "BYTE "(%2) \n\t"
"leal -1(%%ebx), %%ecx \n\t"
"xorl %%ebx, %%ecx \n\t"
@@ -447,7 +459,7 @@
"movl %%ebx, "LOW "(%2) \n\t"
:"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
:"r"(state), "r"(c)
- : "%ecx", "%ebx", "%edx", "%esi", "memory"
+ : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
);
bit&=1;
#else /* BRANCHLESS_CABAC_DECODER */
@@ -493,13 +505,13 @@
"shl %%cl , "low" \n\t"\
"test "lowword" , "lowword" \n\t"\
" jnz 1f \n\t"\
- "mov "BYTE"("cabac"), %%ecx \n\t"\
- "movzwl (%%ecx) , "tmp" \n\t"\
+ "mov "BYTE"("cabac"), %%"REG_c" \n\t"\
+ "movzwl (%%"REG_c") , "tmp" \n\t"\
"bswap "tmp" \n\t"\
"shr $15 , "tmp" \n\t"\
"sub $0xFFFF , "tmp" \n\t"\
- "add $2 , %%ecx \n\t"\
- "mov %%ecx , "BYTE "("cabac") \n\t"\
+ "add $2 , %%"REG_c" \n\t"\
+ "mov %%"REG_c" , "BYTE "("cabac") \n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
@@ -519,7 +531,7 @@
:"=&a"(bit)
:"r"(state), "r"(c)
- : "%ecx", "%ebx", "%edx", "%esi", "memory"
+ : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
);
bit&=1;
#endif /* BRANCHLESS_CABAC_DECODER */
@@ -637,20 +649,20 @@
"sub %%edx, %%ecx \n\t"
"test %%ax, %%ax \n\t"
" jnz 1f \n\t"
- "movl "BYTE "(%1), %%ebx \n\t"
+ "mov "BYTE "(%1), %%"REG_b" \n\t"
"subl $0xFFFF, %%eax \n\t"
- "movzwl (%%ebx), %%edx \n\t"
+ "movzwl (%%"REG_b"), %%edx \n\t"
"bswap %%edx \n\t"
"shrl $15, %%edx \n\t"
- "addl $2, %%ebx \n\t"
+ "add $2, %%"REG_b" \n\t"
"addl %%edx, %%eax \n\t"
- "movl %%ebx, "BYTE "(%1) \n\t"
+ "mov %%"REG_b", "BYTE "(%1) \n\t"
"1: \n\t"
"movl %%eax, "LOW "(%1) \n\t"
:"+c"(val)
:"r"(c)
- : "%eax", "%ebx", "%edx", "memory"
+ : "%eax", "%"REG_b, "%edx", "memory"
);
return val;
#else
@@ -690,27 +702,27 @@
BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
- "movl %2, %%eax \n\t"
+ "mov %2, %%"REG_a" \n\t"
"movl %4, %%ecx \n\t"
- "addl %1, %%ecx \n\t"
- "movl %%ecx, (%%eax) \n\t"
+ "add %1, %%"REG_c" \n\t"
+ "movl %%ecx, (%%"REG_a") \n\t"
"test $1, %%edx \n\t"
" jnz 4f \n\t"
- "addl $4, %%eax \n\t"
- "movl %%eax, %2 \n\t"
+ "add $4, %%"REG_a" \n\t"
+ "mov %%"REG_a", %2 \n\t"
"3: \n\t"
- "addl $1, %1 \n\t"
- "cmpl %5, %1 \n\t"
+ "add $1, %1 \n\t"
+ "cmp %5, %1 \n\t"
" jb 2b \n\t"
- "movl %2, %%eax \n\t"
+ "mov %2, %%"REG_a" \n\t"
"movl %4, %%ecx \n\t"
- "addl %1, %%ecx \n\t"
- "movl %%ecx, (%%eax) \n\t"
+ "add %1, %%"REG_c" \n\t"
+ "movl %%ecx, (%%"REG_a") \n\t"
"4: \n\t"
- "addl %6, %%eax \n\t"
+ "add %6, %%eax \n\t"
"shr $2, %%eax \n\t"
"movl %%esi, "RANGE "(%3) \n\t"
More information about the ffmpeg-devel
mailing list