[FFmpeg-devel] [PATCH] Lagarith range decoder.
Loren Merritt
lorenm
Sat Sep 5 17:51:45 CEST 2009
On Sat, 5 Sep 2009, Nathan Caldwell wrote:
> Here's the patch for just the lagarith range decoder. Hopefully I've
> taken care of anyone's concerns.
division-free version. 1.5x faster lag_decode_line on Core2.
--Loren Merritt
-------------- next part --------------
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 758f613..48676e2 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -111,7 +111,7 @@ const uint8_t ff_alternate_vertical_scan[64] = {
};
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
-const uint32_t ff_inverse[256]={
+const uint32_t ff_inverse[257]={
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
@@ -144,6 +144,7 @@ const uint32_t ff_inverse[256]={
18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
+ 16777216
};
/* Input permutation for the simple_idct_mmx */
diff --git a/libavcodec/lagarithrac.h b/libavcodec/lagarithrac.h
index 861e91f..b4321b1 100644
--- a/libavcodec/lagarithrac.h
+++ b/libavcodec/lagarithrac.h
@@ -62,27 +62,39 @@ static inline void lag_rac_refill(lag_rac *l)
}
}
-/* TODO: Optimize */
// decodes a byte
static inline int lag_get_rac(lag_rac *l)
{
- unsigned range_scaled, low_scaled;
- int val = 0;
+ unsigned range_scaled, low_scaled, div;
+ int val;
+ uint8_t shift;
lag_rac_refill(l);
range_scaled = l->range >> l->scale;
- low_scaled = l->low / range_scaled;
-
- if (low_scaled < l->prob[255]) {
- val = l->range_hash[low_scaled >> l->hash_shift];
- while (l->prob[val + 1] <= low_scaled)
- val++;
- l->range = range_scaled * (l->prob[val + 1] - l->prob[val]);
- } else {
+ if (l->low >= l->prob[255]*range_scaled) {
val = 255;
l->range -= range_scaled * l->prob[255];
+ } else {
+ // val = 0 is frequent enough to deserve a shortcut
+ if (l->low < l->prob[1]*range_scaled) {
+ val = 0;
+ } else {
+ // FIXME make av_log2 fast so it's usable here
+ shift = __builtin_clz(range_scaled) - 1;
+ div = ((range_scaled << shift) + (1<<23)-1) >> 23;
+ low_scaled = FASTDIV(l->low, div);
+ shift -= 23 + l->hash_shift;
+ shift &= 31;
+ low_scaled = (low_scaled<<shift) | (low_scaled>>(32-shift));
+ // low_scaled is now a lower bound of low/range_scaled
+ val = l->range_hash[(uint8_t)low_scaled];
+ while (l->low >= l->prob[val+1]*range_scaled)
+ val++;
+ }
+
+ l->range = range_scaled * (l->prob[val+1] - l->prob[val]);
}
l->low -= range_scaled * l->prob[val];
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 38bca42..0e37a94 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -124,7 +124,7 @@
/* math */
-extern const uint32_t ff_inverse[256];
+extern const uint32_t ff_inverse[257];
#if ARCH_X86
# define FASTDIV(a,b) \
More information about the ffmpeg-devel
mailing list