[FFmpeg-devel] [PATCH] Electronic Arts TGQ decoder
Michael Niedermayer
michaelni
Sat Sep 27 12:51:29 CEST 2008
On Sat, Sep 27, 2008 at 08:07:05PM +1000, Peter Ross wrote:
> On Sat, Sep 27, 2008 at 10:12:34AM +1000, Peter Ross wrote:
> > Patches enclosed.
> >
> > Info: http://wiki.multimedia.cx/index.php?title=Electronic_Arts_TGQ
> > Samples: http://samples.mplayerhq.hu/game-formats/ea-tgq-uv/
>
> Thanks for the prompt feedback. Round two enclosed.
>
> -- Peter
> (A907 E02F A6E5 0CD2 34CD 20D2 6760 79C5 AC40 DD6B)
> Index: libavcodec/mpegvideo_enc.c
> ===================================================================
> --- libavcodec/mpegvideo_enc.c (revision 15434)
> +++ libavcodec/mpegvideo_enc.c (working copy)
> @@ -61,7 +61,7 @@
> 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
> };
>
> -static const uint16_t inv_aanscales[64] = {
> +const uint16_t ff_inv_aanscales[64] = {
> 4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
> 2953, 2129, 2260, 2511, 2953, 3759, 5457, 10703,
> 3135, 2260, 2399, 2666, 3135, 3990, 5793, 11363,
> @@ -3089,7 +3089,7 @@
> || s->dsp.fdct == ff_faandct
> #endif
> )
> - dct_coeff= (dct_coeff*inv_aanscales[ scantable[i] ]) >> 12;
> + dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
> zero_distortion= dct_coeff*dct_coeff;
>
> for(level_index=0; level_index < coeff_count[i]; level_index++){
ok
[...]
> Index: libavcodec/dsputil.c
> ===================================================================
> --- libavcodec/dsputil.c (revision 15434)
> +++ libavcodec/dsputil.c (working copy)
> @@ -4137,6 +4137,72 @@
> dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
> }
>
> +/* Electronic Arts TGQ/TQI/MAD IDCT algorithm */
> +#define A4 1.3065630f
> +#define A2 0.5411961f
> +#define A5 0.3826834f
> +#define SQRT2 1.41421356237309514547
> +#define IDCT_TRANSFORM(dest,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\
> + const int src7add1 = (src)[7] + (src)[1]; \
> + const int src3add5 = (src)[3] + (src)[5]; \
> + const int value1 = (src7add1 - src3add5)/SQRT2; \
> + const int value2 = ((src)[2] - (src)[6])/SQRT2; \
These could be done by *(1/SQRT2), multiplies are faster than divides
Also they might be faster with a lrintf()
> + const int src1sub7 = (src)[1] - (src)[7]; \
> + const int src5sub3 = (src)[5] - (src)[3]; \
> + const int result0 = floor( src5sub3*A2 + (src5sub3+src1sub7)*A5 ); \
> + const int result2 = floor( src1sub7*A4 - (src5sub3+src1sub7)*A5 ); \
i would s/floor/lrintf/ as that should be faster on x86
> + const int b0 = result2 + src3add5 + src7add1; \
> + const int b1 = result2 + value1; \
> + const int b2 = result0 + value1; \
> + const int b3 = result0; \
> + const int src0add4 = (src)[0] + (src)[4]; \
> + const int src0sub4 = (src)[0] - (src)[4]; \
> + const int src26value2 = (src)[2] + (src)[6] + value2; \
> + const int a0 = src0add4 + src26value2; \
> + const int a1 = src0sub4 + value2; \
> + const int a2 = src0sub4 - value2; \
> + const int a3 = src0add4 - src26value2; \
> + (dest)[d0] = munge(a0 + b0); \
> + (dest)[d1] = munge(a1 + b1); \
> + (dest)[d2] = munge(a2 + b2); \
> + (dest)[d3] = munge(a3 + b3); \
> + (dest)[d4] = munge(a3 - b3); \
> + (dest)[d5] = munge(a2 - b2); \
> + (dest)[d6] = munge(a1 - b1); \
> + (dest)[d7] = munge(a0 - b0); \
adding 8 to src[0][0] at the start should improve the correctness
of the >>4 that munge is in the second pass.
> +}
> +/* end IDCT_TRANSFORM macro */
> +
> +#define MUNGE_NONE(x) (x)
> +#define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,MUNGE_NONE,src)
> +
> +#define MUNGE_8BIT(x) av_clip_uint8((x)>>4)
> +#define IDCT_COL(dest,src) IDCT_TRANSFORM(dest,0,1, 2, 3, 4, 5, 6, 7,MUNGE_8BIT,src)
it seems this does a transpose during IDCT_ROW, if that wasnt done,
ff_zigzag_direct_transposed would be unneeded and the normal zigzag
should be useable
[...]
> Index: libavcodec/dsputil.h
> ===================================================================
> --- libavcodec/dsputil.h (revision 15434)
> +++ libavcodec/dsputil.h (working copy)
> @@ -86,6 +86,9 @@
> void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
> void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
>
> +/* EA DSP function */
> +void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
it seems this is unneeded and the function could be static
[...]
> +static void tgq_decode_block(TgqContext *s, DCTELEM block[64], GetBitContext *gb){
> + int i,j,value;
> + block[0] = get_sbits(gb,8) * s->qtable[0];
> + for(i=1; i<64; ) {
> + switch(show_bits(gb,3)) {
> + case 4:
> + block[ff_zigzag_direct_transposed[i++]] = 0;
> + case 0:
> + block[ff_zigzag_direct_transposed[i++]] = 0;
> + skip_bits(gb,3);
> + break;
> + case 5:
> + block[ff_zigzag_direct_transposed[i++]] = 0;
> + case 1:
> + skip_bits(gb,3);
> + value = 2*get_bits(gb,5);
> + for(j=0; j<value; j++)
> + block[ff_zigzag_direct_transposed[i++]] = 0;
> + break;
> + case 6:
> + skip_bits(gb,3);
> + block[ff_zigzag_direct_transposed[i]] = -(s->qtable[ff_zigzag_direct_transposed[i]]);
> + i++;
> + break;
> + case 2:
> + skip_bits(gb,3);
> + block[ff_zigzag_direct_transposed[i]] = s->qtable[ff_zigzag_direct_transposed[i]];
> + i++;
> + break;
> + case 7: // 111b
> + case 3: // 011b
> + skip_bits(gb,2);
> + if (show_bits(gb,6)==0x3F) {
> + skip_bits(gb, 6);
> + block[ff_zigzag_direct_transposed[i]] = get_sbits(gb,8) * s->qtable[ff_zigzag_direct_transposed[i]];
> + }else{
> + block[ff_zigzag_direct_transposed[i]] = get_sbits(gb,6) * s->qtable[ff_zigzag_direct_transposed[i]];
If you want the codec to work with other idcts, then the permutated scantable
has to be used instead of the hardcoded ff_zigzag_direct_transposed
> + }
> + i++;
> + break;
> + }
> + }
> + block[0] += 128<<4;
> +}
> +
> +static void tgq_idct_put_mb(TgqContext *s, DCTELEM (*block)[64], int mb_x, int mb_y){
> + int linesize= s->frame.linesize[0];
> + uint8_t *dest_y = s->frame.data[0] + (mb_y * 16* linesize ) + mb_x * 16;
> + uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8;
> + uint8_t *dest_cr = s->frame.data[2] + (mb_y * 8 * s->frame.linesize[2]) + mb_x * 8;
> +
> + s->dsp.idct_put(dest_y , linesize, block[0]);
> + s->dsp.idct_put(dest_y + 8, linesize, block[1]);
> + s->dsp.idct_put(dest_y + 8*linesize , linesize, block[2]);
> + s->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]);
> + if(!(s->avctx->flags&CODEC_FLAG_GRAY)){
> + s->dsp.idct_put(dest_cb, s->frame.linesize[1], block[4]);
> + s->dsp.idct_put(dest_cr, s->frame.linesize[2], block[5]);
> + }
> +}
> +
> +static inline void tgq_dconly(TgqContext *s, unsigned char *dst, int dst_stride, int dc){
> + int j;
> + for(j=0;j<8;j++)
> + memset(dst+j*dst_stride, dc, 8);
> +}
> +
> +static inline void tgq_dconly_block(TgqContext *s, int mb_x, int mb_y, int i, int dc_level){
> + int linesize= s->frame.linesize[0];
> + uint8_t *dest_y = s->frame.data[0] + (mb_y * 16* linesize ) + mb_x * 16;
> + uint8_t *dest_cb = s->frame.data[1] + (mb_y * 8 * s->frame.linesize[1]) + mb_x * 8;
> + uint8_t *dest_cr = s->frame.data[2] + (mb_y * 8 * s->frame.linesize[2]) + mb_x * 8;
> + int dc = av_clip_uint8(128 + ((dc_level*s->qtable[0])>>4));
> +
> + switch(i) {
> + case 0: tgq_dconly(s,dest_y , linesize, dc); break;
> + case 1: tgq_dconly(s,dest_y + 8, linesize, dc); break;
> + case 2: tgq_dconly(s,dest_y + 8*linesize , linesize, dc); break;
> + case 3: tgq_dconly(s,dest_y + 8*linesize + 8, linesize, dc); break;
> + case 4: if(!(s->avctx->flags&CODEC_FLAG_GRAY))
> + tgq_dconly(s,dest_cb, s->frame.linesize[1], dc);
> + break;
> + case 5: if(!(s->avctx->flags&CODEC_FLAG_GRAY))
> + tgq_dconly(s,dest_cr, s->frame.linesize[2], dc);
> + break;
> + }
> +}
> +
> +static void tgq_decode_mb(TgqContext *s, int mb_y, int mb_x, const int8_t **bs, const int8_t *buf_end){
> + int mode;
> + int i; // block counter
> + int8_t dc[6];
> + DCTELEM block[6][64];
> +
> + mode = bytestream_get_byte((const uint8_t**)bs);
> + if (mode>buf_end-*bs) {
> + av_log(s->avctx, AV_LOG_ERROR, "truncated macroblock\n");
> + return;
> + }
> +
> + if (mode==3||mode==6||mode==12) {
> + if (mode==3) {
> + memset(dc, (*bs)[0], 4);
> + dc[4] = (*bs)[1];
> + dc[5] = (*bs)[2];
> + }else if (mode==6) {
> + memcpy(dc, *bs, 6);
> + }else if (mode==12) {
> + for(i=0; i<6; i++)
> + dc[i] = (*bs)[i*2];
> + }
> + for(i=0; i<6;i++)
> + tgq_dconly_block(s, mb_x, mb_y, i, dc[i]);
> + }else if (mode>12) {
> + GetBitContext gb;
> + init_get_bits(&gb, *bs, mode*8);
> + for(i=0; i<6; i++)
> + tgq_decode_block(s, block[i], &gb);
> + tgq_idct_put_mb(s, block, mb_x, mb_y);
> + }else {
> + av_log(s->avctx, AV_LOG_ERROR, "unsupported mb mode %i\n", mode);
> + }
if((mode>12) {
...
tgq_idct_put_mb(s, block, mb_x, mb_y);
}else{
if (mode==3) {
memset(dc, (*bs)[0], 4);
dc[4] = (*bs)[1];
dc[5] = (*bs)[2];
}else if (mode==6) {
memcpy(dc, *bs, 6);
}else if (mode==12) {
for(i=0; i<6; i++)
dc[i] = (*bs)[i*2];
}else{
av_log(s->avctx, AV_LOG_ERROR, "unsupported mb mode %i\n", mode);
}
tgq_idct_put_mb_dconly()
}
tgq_idct_put_mb_dconly(){
tgq_dconly(s,dest_y , linesize, dc[0]);
tgq_dconly(s,dest_y + 8, linesize, dc[1]);
tgq_dconly(s,dest_y + 8*linesize , linesize, dc[2]);
tgq_dconly(s,dest_y + 8*linesize + 8, linesize, dc[3]);
if(!(s->avctx->flags&CODEC_FLAG_GRAY)){
tgq_dconly(s,dest_cb, s->frame.linesize[1], dc[4]);
tgq_dconly(s,dest_cr, s->frame.linesize[2], dc[5]);
}
}
> + *bs += mode;
> +}
> +
> +static void tgq_calculate_qtable(TgqContext *s, int quant){
> + int i,j;
> + const int a = (14*(100-quant))/100 + 1;
> + const int b = (11*(100-quant))/100 + 4;
> + for(j=0;j<8;j++)
> + for(i=0;i<8;i++)
> + s->qtable[j*8+i] =((a*(j+i)/(7+7) + b)*ff_inv_aanscales[j*8+i])>>(14-4);
> +}
similarly, if you want the codec to work with the other idcts
ff_inv_aanscales would have to be conditional, and only used for the AAN
idct (=only the new EA one)
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
No human being will ever know the Truth, for even if they happen to say it
by chance, they would not even known they had done so. -- Xenophanes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080927/5da532e6/attachment.pgp>
More information about the ffmpeg-devel
mailing list