[FFmpeg-devel] [PATCH] FFV1 speed tweaks
Jason Garrett-Glaser
darkshikari
Fri Aug 8 23:29:51 CEST 2008
On the order of 7-10 clocks faster per pixel, out of 120-160 clocks
total, for encoding and decoding.
Dark Shikari
Index: libavcodec/ffv1.c
===================================================================
--- libavcodec/ffv1.c (revision 14661)
+++ libavcodec/ffv1.c (working copy)
@@ -354,10 +354,10 @@
static inline int encode_line(FFV1Context *s, int w, int_fast16_t
*sample[2], int plane_index, int bits){
PlaneContext * const p= &s->plane[plane_index];
RangeCoder * const c= &s->c;
- int x;
int run_index= s->run_index;
int run_count=0;
int run_mode=0;
+ int_fast16_t *cur_sample[3] = {sample[0],sample[1],sample[2]};
if(s->ac){
if(c->bytestream_end - c->bytestream < w*20){
@@ -370,18 +370,17 @@
return -1;
}
}
+ for(; w>0; w--){
+ int diff, context, sign;
- for(x=0; x<w; x++){
- int diff, context;
+ context= get_context(s, cur_sample[0], cur_sample[1], cur_sample[2]);
+ diff= cur_sample[0][0] - predict(cur_sample[0], cur_sample[1]);
+
+ /* Negate context and diff if context is negative */
+ sign = context >> 31;
+ context = (sign ^ context) - sign;
+ diff = (sign ^ diff) - sign;
- context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x);
- diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
-
- if(context < 0){
- context = -context;
- diff= -diff;
- }
-
diff= fold(diff, bits);
if(s->ac){
@@ -413,6 +412,9 @@
if(run_mode == 0)
put_vlc_symbol(&s->pb, &p->vlc_state[context], diff, bits);
}
+ cur_sample[0]++;
+ cur_sample[1]++;
+ cur_sample[2]++;
}
if(run_mode){
while(run_count >= 1<<ff_log2_run[run_index]){
@@ -707,22 +709,17 @@
static inline void decode_line(FFV1Context *s, int w, int_fast16_t
*sample[2], int plane_index, int bits){
PlaneContext * const p= &s->plane[plane_index];
RangeCoder * const c= &s->c;
- int x;
int run_count=0;
int run_mode=0;
int run_index= s->run_index;
-
- for(x=0; x<w; x++){
+ int_fast16_t *cur_sample[2] = {sample[0],sample[1]};
+//1334
+ for(; w>0; w--){START_TIMER;
int diff, context, sign;
+ context= get_context(s, cur_sample[1], cur_sample[0], cur_sample[1]);
+ sign = context >> 31;
+ context = (sign ^ context) - sign;
- context= get_context(s, sample[1] + x, sample[0] + x, sample[1] + x);
- if(context < 0){
- context= -context;
- sign=1;
- }else
- sign=0;
-
-
if(s->ac){
diff= get_symbol(c, p->state[context], 1);
}else{
@@ -732,31 +729,34 @@
if(run_count==0 && run_mode==1){
if(get_bits1(&s->gb)){
run_count = 1<<ff_log2_run[run_index];
- if(x + run_count <= w) run_index++;
+ if(run_count <= w) run_index++;
}else{
- if(ff_log2_run[run_index]) run_count =
get_bits(&s->gb, ff_log2_run[run_index]);
+ /* equivalent to if(ff_log2_run[run_index]) */
+ if(run_index&0x1C) run_count =
get_bits(&s->gb, ff_log2_run[run_index]);
else run_count=0;
if(run_index) run_index--;
run_mode=2;
}
}
- run_count--;
- if(run_count < 0){
+ if(run_count == 0){
run_mode=0;
- run_count=0;
diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
if(diff>=0) diff++;
- }else
+ }else{
diff=0;
+ run_count--;
+ }
}else
diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
// printf("count:%d index:%d, mode:%d, x:%d y:%d
pos:%d\n", run_count, run_index, run_mode, x, y,
get_bits_count(&s->gb));
}
- if(sign) diff= -diff;
+ diff = (sign ^ diff) - sign;
- sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff)
& ((1<<bits)-1);
+ cur_sample[1][0]= (predict(cur_sample[1], cur_sample[0]) +
diff) & ((1<<bits)-1);
+ cur_sample[0]++;
+ cur_sample[1]++;STOP_TIMER("decode line");
}
s->run_index= run_index;
}
More information about the ffmpeg-devel
mailing list