[Ffmpeg-cvslog] r6573 - trunk/libavcodec/cabac.h
michael
subversion
Sat Oct 7 13:15:11 CEST 2006
Author: michael
Date: Sat Oct 7 13:15:10 2006
New Revision: 6573
Modified:
trunk/libavcodec/cabac.h
Log:
several x86 renorm_cabac_decoder_once optimizations
START/STOP_TIMER benchmarking code for them
please benchmark on P4 & athlon
(ill remove the benchmarking code and the always slower variants as soon as p4/athlon benchmarks have been posted or commited)
Modified: trunk/libavcodec/cabac.h
==============================================================================
--- trunk/libavcodec/cabac.h (original)
+++ trunk/libavcodec/cabac.h Sat Oct 7 13:15:10 2006
@@ -295,15 +295,77 @@
}
static inline void renorm_cabac_decoder_once(CABACContext *c){
+#ifdef ARCH_X86
+ int temp;
+#if 0
+ //P3:683
+ asm(
+ "lea -0x20000(%0), %2 \n\t"
+ "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
+ "shl %%cl, %0 \n\t"
+ "shl %%cl, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+c"(temp)
+ );
+#elif 0
+ //P3:680
+ asm(
+ "cmp $0x20000, %0 \n\t"
+ "setb %%cl \n\t" //FIXME 31->63 for x86-64
+ "shl %%cl, %0 \n\t"
+ "shl %%cl, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+c"(temp)
+ );
+#elif 1
+ int temp2;
+ //P3:665
+ asm(
+ "lea -0x20000(%0), %%eax \n\t"
+ "cdq \n\t"
+ "mov %0, %%eax \n\t"
+ "and %%edx, %0 \n\t"
+ "and %1, %%edx \n\t"
+ "add %%eax, %0 \n\t"
+ "add %%edx, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+ );
+#elif 0
+ int temp2;
+ //P3:673
+ asm(
+ "cmp $0x20000, %0 \n\t"
+ "sbb %%edx, %%edx \n\t"
+ "mov %0, %%eax \n\t"
+ "and %%edx, %0 \n\t"
+ "and %1, %%edx \n\t"
+ "add %%eax, %0 \n\t"
+ "add %%edx, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+ );
+#else
+ int temp2;
+ //P3:677
+ asm(
+ "cmp $0x20000, %0 \n\t"
+ "lea (%0, %0), %%eax \n\t"
+ "lea (%1, %1), %%edx \n\t"
+ "cmovb %%eax, %0 \n\t"
+ "cmovb %%edx, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+ );
+#endif
+#else
+ //P3:675
int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31;
c->range<<= shift;
c->low <<= shift;
+#endif
if(!(c->low & CABAC_MASK))
refill(c);
}
static int get_cabac(CABACContext *c, uint8_t * const state){
//FIXME gcc generates duplicate load/stores for c->low and c->range
+START_TIMER
int s = *state;
int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
int bit, lps_mask attribute_unused;
@@ -342,7 +404,7 @@
if(!(c->low & CABAC_MASK))
refill2(c);
#endif
-
+STOP_TIMER("get_cabac")
return bit;
}
More information about the ffmpeg-cvslog
mailing list