[FFmpeg-devel] MPEG-2 Acceleration Refactor
Greg Hulands
ghulands
Mon Jun 18 01:47:58 CEST 2007
> [...]
> Wow! You've reduced the patch to nothing, and it still gives a
> speedup. That's impressive work. I can't wait to test it.
>
Ah crap. Sorry.
Index: mpeg12.c
===================================================================
--- mpeg12.c (revision 9339)
+++ mpeg12.c (working copy)
@@ -53,19 +53,19 @@
#endif //CONFIG_ENCODERS
static inline int mpeg1_decode_block_inter(MpegEncContext *s,
DCTELEM *block,
- int n);
+ int n,
+ int fast);
static inline int mpeg1_decode_block_intra(MpegEncContext *s,
DCTELEM *block,
int n);
-static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s,
DCTELEM *block, int n);
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
DCTELEM *block,
- int n);
+ int n,
+ int fast);
static inline int mpeg2_decode_block_intra(MpegEncContext *s,
DCTELEM *block,
- int n);
-static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext
*s, DCTELEM *block, int n);
-static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
DCTELEM *block, int n);
+ int n,
+ int fast);
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
static void exchange_uv(MpegEncContext *s);
@@ -1233,11 +1233,11 @@
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
if(s->flags2 & CODEC_FLAG2_FAST){
for(i=0;i<6;i++) {
- mpeg2_fast_decode_block_intra(s, s->pblocks[i], i);
+ mpeg2_decode_block_intra(s, s->pblocks[i], i, 1);
}
}else{
for(i=0;i<mb_block_count;i++) {
- if (mpeg2_decode_block_intra(s, s->pblocks[i],
i) < 0)
+ if (mpeg2_decode_block_intra(s, s->pblocks[i],
i, 0) < 0)
return -1;
}
}
@@ -1445,7 +1445,7 @@
if(s->flags2 & CODEC_FLAG2_FAST){
for(i=0;i<6;i++) {
if(cbp & 32) {
- mpeg2_fast_decode_block_non_intra(s, s-
>pblocks[i], i);
+ mpeg2_decode_block_non_intra(s, s-
>pblocks[i], i, 1);
} else {
s->block_last_index[i] = -1;
}
@@ -1456,7 +1456,7 @@
for(i=0;i<mb_block_count;i++) {
if ( cbp & (1<<11) ) {
- if (mpeg2_decode_block_non_intra(s, s-
>pblocks[i], i) < 0)
+ if (mpeg2_decode_block_non_intra(s, s-
>pblocks[i], i, 0) < 0)
return -1;
} else {
s->block_last_index[i] = -1;
@@ -1468,7 +1468,7 @@
if(s->flags2 & CODEC_FLAG2_FAST){
for(i=0;i<6;i++) {
if (cbp & 32) {
- mpeg1_fast_decode_block_inter(s, s-
>pblocks[i], i);
+ mpeg1_decode_block_inter(s, s->pblocks
[i], i, 1);
} else {
s->block_last_index[i] = -1;
}
@@ -1477,7 +1477,7 @@
}else{
for(i=0;i<6;i++) {
if (cbp & 32) {
- if (mpeg1_decode_block_inter(s, s-
>pblocks[i], i) < 0)
+ if (mpeg1_decode_block_inter(s, s-
>pblocks[i], i, 0) < 0)
return -1;
} else {
s->block_last_index[i] = -1;
@@ -1622,15 +1622,15 @@
return 0;
}
-static inline int mpeg1_decode_block_inter(MpegEncContext *s,
- DCTELEM *block,
- int n)
+static inline int mpeg1_decode_block_inter(MpegEncContext *s,
DCTELEM *block, int n, int fast)
{
int level, i, j, run;
RLTable *rl = &rl_mpeg1;
uint8_t * const scantable= s->intra_scantable.permutated;
- const uint16_t *quant_matrix= s->inter_matrix;
const int qscale= s->qscale;
+ const uint16_t *quant_matrix;
+
+ if (!fast) quant_matrix= s->inter_matrix;
{
OPEN_READER(re, &s->gb);
@@ -1638,7 +1638,10 @@
/* special case for the first coef. no need to add a second
vlc table */
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
- level= (3*qscale*quant_matrix[0])>>5;
+ if (fast)
+ level= (3*qscale)>>1;
+ else
+ level= (3*qscale*quant_matrix[0])>>5;
level= (level-1)|1;
if(GET_CACHE(re, &s->gb)&0x40000000)
level= -level;
@@ -1656,7 +1659,10 @@
if(level != 0) {
i += run;
j = scantable[i];
- level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+ if (fast)
+ level= ((level*2+1)*qscale)>>1;
+ else
+ level= ((level*2+1)*qscale*quant_matrix[j])>>5;
level= (level-1)|1;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
SHOW_SBITS(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
@@ -1674,17 +1680,25 @@
j = scantable[i];
if(level<0){
level= -level;
- level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+ if (fast)
+ level= ((level*2+1)*qscale)>>1;
+ else
+ level= ((level*2+1)*qscale*quant_matrix[j])>>5;
level= (level-1)|1;
level= -level;
}else{
- level= ((level*2+1)*qscale*quant_matrix[j])>>5;
+ if (fast)
+ level= ((level*2+1)*qscale)>>1;
+ else
+ level= ((level*2+1)*qscale*quant_matrix[j])>>5;
level= (level-1)|1;
}
}
- if (i > 63){
- av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d
%d\n", s->mb_x, s->mb_y);
- return -1;
+ if (!fast) {
+ if (i > 63){
+ av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %
d %d\n", s->mb_x, s->mb_y);
+ return -1;
+ }
}
block[j] = level;
@@ -1700,177 +1714,43 @@
return 0;
}
-static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s,
DCTELEM *block, int n)
-{
- int level, i, j, run;
- RLTable *rl = &rl_mpeg1;
- uint8_t * const scantable= s->intra_scantable.permutated;
- const int qscale= s->qscale;
- {
- OPEN_READER(re, &s->gb);
- i = -1;
- /* special case for the first coef. no need to add a second
vlc table */
- UPDATE_CACHE(re, &s->gb);
- if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
- level= (3*qscale)>>1;
- level= (level-1)|1;
- if(GET_CACHE(re, &s->gb)&0x40000000)
- level= -level;
- block[0] = level;
- i++;
- SKIP_BITS(re, &s->gb, 2);
- if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
- goto end;
- }
-
- /* now quantify & encode AC coefs */
- for(;;) {
- GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],
TEX_VLC_BITS, 2, 0);
-
- if(level != 0) {
- i += run;
- j = scantable[i];
- level= ((level*2+1)*qscale)>>1;
- level= (level-1)|1;
- level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
SHOW_SBITS(re, &s->gb, 1);
- SKIP_BITS(re, &s->gb, 1);
- } else {
- /* escape */
- run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS
(re, &s->gb, 6);
- UPDATE_CACHE(re, &s->gb);
- level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s-
>gb, 8);
- if (level == -128) {
- level = SHOW_UBITS(re, &s->gb, 8) - 256;
SKIP_BITS(re, &s->gb, 8);
- } else if (level == 0) {
- level = SHOW_UBITS(re, &s->gb, 8) ;
SKIP_BITS(re, &s->gb, 8);
- }
- i += run;
- j = scantable[i];
- if(level<0){
- level= -level;
- level= ((level*2+1)*qscale)>>1;
- level= (level-1)|1;
- level= -level;
- }else{
- level= ((level*2+1)*qscale)>>1;
- level= (level-1)|1;
- }
- }
-
- block[j] = level;
- if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
- break;
- UPDATE_CACHE(re, &s->gb);
- }
-end:
- LAST_SKIP_BITS(re, &s->gb, 2);
- CLOSE_READER(re, &s->gb);
- }
- s->block_last_index[n] = i;
- return 0;
-}
-
-
static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
- DCTELEM *block,
- int n)
+ DCTELEM *block,
+ int n,
+ int fast)
{
int level, i, j, run;
RLTable *rl = &rl_mpeg1;
uint8_t * const scantable= s->intra_scantable.permutated;
- const uint16_t *quant_matrix;
const int qscale= s->qscale;
- int mismatch;
+ const uint16_t *quant_matrix; // !fast
+ int mismatch;
- mismatch = 1;
-
- {
- OPEN_READER(re, &s->gb);
- i = -1;
- if (n < 4)
- quant_matrix = s->inter_matrix;
- else
- quant_matrix = s->chroma_inter_matrix;
-
- /* special case for the first coef. no need to add a second
vlc table */
- UPDATE_CACHE(re, &s->gb);
- if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
- level= (3*qscale*quant_matrix[0])>>5;
- if(GET_CACHE(re, &s->gb)&0x40000000)
- level= -level;
- block[0] = level;
- mismatch ^= level;
- i++;
- SKIP_BITS(re, &s->gb, 2);
- if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
- goto end;
- }
-
- /* now quantify & encode AC coefs */
- for(;;) {
- GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],
TEX_VLC_BITS, 2, 0);
-
- if(level != 0) {
- i += run;
- j = scantable[i];
- level= ((level*2+1)*qscale*quant_matrix[j])>>5;
- level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
SHOW_SBITS(re, &s->gb, 1);
- SKIP_BITS(re, &s->gb, 1);
- } else {
- /* escape */
- run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS
(re, &s->gb, 6);
- UPDATE_CACHE(re, &s->gb);
- level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s-
>gb, 12);
-
- i += run;
- j = scantable[i];
- if(level<0){
- level= ((-level*2+1)*qscale*quant_matrix[j])>>5;
- level= -level;
- }else{
- level= ((level*2+1)*qscale*quant_matrix[j])>>5;
- }
- }
- if (i > 63){
- av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d
%d\n", s->mb_x, s->mb_y);
- return -1;
- }
-
- mismatch ^= level;
- block[j] = level;
- if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
- break;
- UPDATE_CACHE(re, &s->gb);
- }
-end:
- LAST_SKIP_BITS(re, &s->gb, 2);
- CLOSE_READER(re, &s->gb);
- }
- block[63] ^= (mismatch & 1);
-
- s->block_last_index[n] = i;
- return 0;
-}
-
-static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
- DCTELEM *block,
- int n)
-{
- int level, i, j, run;
- RLTable *rl = &rl_mpeg1;
- uint8_t * const scantable= s->intra_scantable.permutated;
- const int qscale= s->qscale;
OPEN_READER(re, &s->gb);
i = -1;
+ if (!fast)
+ {
+ mismatch = 1;
+
+ if (n < 4)
+ quant_matrix = s->inter_matrix;
+ else
+ quant_matrix = s->chroma_inter_matrix;
+ }
+
/* special case for the first coef. no need to add a second vlc
table */
UPDATE_CACHE(re, &s->gb);
if (((int32_t)GET_CACHE(re, &s->gb)) < 0) {
- level= (3*qscale)>>1;
+ if (fast)
+ level= (3*qscale)>>1;
+ else
+ level= (3*qscale*quant_matrix[0])>>5;
if(GET_CACHE(re, &s->gb)&0x40000000)
level= -level;
block[0] = level;
+ if (!fast) mismatch ^= level;
i++;
SKIP_BITS(re, &s->gb, 2);
if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
@@ -1884,7 +1764,10 @@
if(level != 0) {
i += run;
j = scantable[i];
- level= ((level*2+1)*qscale)>>1;
+ if (fast)
+ level= ((level*2+1)*qscale)>>1;
+ else
+ level= ((level*2+1)*qscale*quant_matrix[j])>>5;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS
(re, &s->gb, 1);
SKIP_BITS(re, &s->gb, 1);
} else {
@@ -1896,13 +1779,26 @@
i += run;
j = scantable[i];
if(level<0){
- level= ((-level*2+1)*qscale)>>1;
+ if (fast)
+ level= ((-level*2+1)*qscale)>>1;
+ else
+ level= ((-level*2+1)*qscale*quant_matrix[j])>>5;
level= -level;
}else{
- level= ((level*2+1)*qscale)>>1;
+ if (fast)
+ level= ((level*2+1)*qscale)>>1;
+ else
+ level= ((level*2+1)*qscale*quant_matrix[j])>>5;
}
}
+ if (!fast) {
+ if (i > 63){
+ av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d
\n", s->mb_x, s->mb_y);
+ return -1;
+ }
+ mismatch ^= level;
+ }
block[j] = level;
if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
break;
@@ -1911,19 +1807,21 @@
end:
LAST_SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
+ if (!fast) block[63] ^= (mismatch & 1);
s->block_last_index[n] = i;
return 0;
}
static inline int mpeg2_decode_block_intra(MpegEncContext *s,
- DCTELEM *block,
- int n)
+ DCTELEM *block,
+ int n,
+ int fast)
{
int level, dc, diff, i, j, run;
int component;
RLTable *rl;
- uint8_t * const scantable= s->intra_scantable.permutated;
+ uint8_t * scantable= s->intra_scantable.permutated; // could be
const if (!fast)
const uint16_t *quant_matrix;
const int qscale= s->qscale;
int mismatch;
@@ -1943,9 +1841,11 @@
dc += diff;
s->last_dc[component] = dc;
block[0] = dc << (3 - s->intra_dc_precision);
- dprintf(s->avctx, "dc=%d\n", block[0]);
- mismatch = block[0] ^ 1;
- i = 0;
+ if (!fast) {
+ dprintf("dc=%d\n", block[0]);
+ mismatch = block[0] ^ 1;
+ i = 0;
+ }
if (s->intra_vlc_format)
rl = &rl_mpeg2;
else
@@ -1961,8 +1861,13 @@
if(level == 127){
break;
} else if(level != 0) {
- i += run;
- j = scantable[i];
+ if (fast) {
+ scantable += run;
+ j = *scantable;
+ } else {
+ i += run;
+ j = scantable[i];
+ }
level= (level*qscale*quant_matrix[j])>>4;
level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
SHOW_SBITS(re, &s->gb, 1);
LAST_SKIP_BITS(re, &s->gb, 1);
@@ -1971,8 +1876,13 @@
run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS
(re, &s->gb, 6);
UPDATE_CACHE(re, &s->gb);
level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re,
&s->gb, 12);
- i += run;
- j = scantable[i];
+ if (fast) {
+ scantable += run;
+ j = *scantable;
+ } else {
+ i += run;
+ j = scantable[i];
+ }
if(level<0){
level= (-level*qscale*quant_matrix[j])>>4;
level= -level;
@@ -1980,92 +1890,23 @@
level= (level*qscale*quant_matrix[j])>>4;
}
}
- if (i > 63){
- av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d
%d\n", s->mb_x, s->mb_y);
- return -1;
+ if (!fast) {
+ if (i > 63){
+ av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %
d %d\n", s->mb_x, s->mb_y);
+ return -1;
+ }
+ mismatch^= level;
}
-
- mismatch^= level;
block[j] = level;
}
CLOSE_READER(re, &s->gb);
}
- block[63]^= mismatch&1;
+ if (!fast) block[63]^= mismatch&1;
- s->block_last_index[n] = i;
+ s->block_last_index[n] = (fast ? scantable - s-
>intra_scantable.permutated : i);
return 0;
}
-static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
- DCTELEM *block,
- int n)
-{
- int level, dc, diff, j, run;
- int component;
- RLTable *rl;
- uint8_t * scantable= s->intra_scantable.permutated;
- const uint16_t *quant_matrix;
- const int qscale= s->qscale;
-
- /* DC coef */
- if (n < 4){
- quant_matrix = s->intra_matrix;
- component = 0;
- }else{
- quant_matrix = s->chroma_intra_matrix;
- component = (n&1) + 1;
- }
- diff = decode_dc(&s->gb, component);
- if (diff >= 0xffff)
- return -1;
- dc = s->last_dc[component];
- dc += diff;
- s->last_dc[component] = dc;
- block[0] = dc << (3 - s->intra_dc_precision);
- if (s->intra_vlc_format)
- rl = &rl_mpeg2;
- else
- rl = &rl_mpeg1;
-
- {
- OPEN_READER(re, &s->gb);
- /* now quantify & encode AC coefs */
- for(;;) {
- UPDATE_CACHE(re, &s->gb);
- GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],
TEX_VLC_BITS, 2, 0);
-
- if(level == 127){
- break;
- } else if(level != 0) {
- scantable += run;
- j = *scantable;
- level= (level*qscale*quant_matrix[j])>>4;
- level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
SHOW_SBITS(re, &s->gb, 1);
- LAST_SKIP_BITS(re, &s->gb, 1);
- } else {
- /* escape */
- run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS
(re, &s->gb, 6);
- UPDATE_CACHE(re, &s->gb);
- level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s-
>gb, 12);
- scantable += run;
- j = *scantable;
- if(level<0){
- level= (-level*qscale*quant_matrix[j])>>4;
- level= -level;
- }else{
- level= (level*qscale*quant_matrix[j])>>4;
- }
- }
-
- block[j] = level;
- }
- CLOSE_READER(re, &s->gb);
- }
-
- s->block_last_index[n] = scantable - s->intra_scantable.permutated;
- return 0;
-}
-
typedef struct Mpeg1Context {
MpegEncContext mpeg_enc_ctx;
int mpeg_enc_ctx_allocated; /* true if decoding context
allocated */
More information about the ffmpeg-devel
mailing list