[FFmpeg-devel] [PATCH 1/2] VC1: transpose IDCT 8x8 coeffs while reading.
Kostya
kostya.shishkov
Wed Feb 16 20:27:21 CET 2011
On Wed, Feb 16, 2011 at 02:21:49PM -0500, Ronald S. Bultje wrote:
> ---
> libavcodec/ppc/vc1dsp_altivec.c | 1 -
> libavcodec/vc1.h | 1 +
> libavcodec/vc1dec.c | 95 ++++++++++++++++++++------------------
> libavcodec/vc1dsp.c | 24 +++++-----
> 4 files changed, 63 insertions(+), 58 deletions(-)
>
> diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
> index a2f55f2d..5ffe9a5 100644
> --- a/libavcodec/ppc/vc1dsp_altivec.c
> +++ b/libavcodec/ppc/vc1dsp_altivec.c
> @@ -154,7 +154,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
> src6 = vec_ld( 96, block);
> src7 = vec_ld(112, block);
>
> - TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
> s0 = vec_unpackl(src0);
> s1 = vec_unpackl(src1);
> s2 = vec_unpackl(src2);
> diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
> index e5a9cba..da0b6c1 100644
> --- a/libavcodec/vc1.h
> +++ b/libavcodec/vc1.h
> @@ -215,6 +215,7 @@ typedef struct VC1Context{
> int k_y; ///< Number of bits for MVs (depends on MV range)
> int range_x, range_y; ///< MV range
> uint8_t pq, altpq; ///< Current/alternate frame quantizer scale
> + uint8_t zz_8x8[4][64];///< Zigzag table for TT_8x8, permuted for IDCT
> const uint8_t* zz_8x4;///< Zigzag scan table for TT_8x4 coding mode
> const uint8_t* zz_4x8;///< Zigzag scan table for TT_4x8 coding mode
> /** pquant parameters */
> diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
> index 3bd2cce..169797c 100644
> --- a/libavcodec/vc1dec.c
> +++ b/libavcodec/vc1dec.c
> @@ -1499,11 +1499,11 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded
>
> if(v->s.ac_pred) {
> if(!dc_pred_dir)
> - zz_table = wmv1_scantable[2];
> + zz_table = v->zz_8x8[2];
> else
> - zz_table = wmv1_scantable[3];
> + zz_table = v->zz_8x8[3];
> } else
> - zz_table = wmv1_scantable[1];
> + zz_table = v->zz_8x8[1];
>
> ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
> ac_val2 = ac_val;
> @@ -1524,16 +1524,16 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded
> if(s->ac_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k << 3] += ac_val[k];
> + block[k] += ac_val[k];
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k] += ac_val[k + 8];
> + block[k << 3] += ac_val[k + 8];
> }
> }
> /* save AC coeffs for further prediction */
> for(k = 1; k < 8; k++) {
> - ac_val2[k] = block[k << 3];
> - ac_val2[k + 8] = block[k];
> + ac_val2[k] = block[k];
> + ac_val2[k + 8] = block[k << 3];
> }
>
> /* scale AC coeffs */
> @@ -1570,15 +1570,15 @@ not_coded:
> if(s->ac_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++) {
> - block[k << 3] = ac_val[k] * scale;
> - if(!v->pquantizer && block[k << 3])
> - block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
> + block[k] = ac_val[k] * scale;
> + if(!v->pquantizer && block[k])
> + block[k] += (block[k] < 0) ? -v->pq : v->pq;
> }
> } else { //top
> for(k = 1; k < 8; k++) {
> - block[k] = ac_val[k + 8] * scale;
> - if(!v->pquantizer && block[k])
> - block[k] += (block[k] < 0) ? -v->pq : v->pq;
> + block[k << 3] = ac_val[k + 8] * scale;
> + if(!v->pquantizer && block[k << 3])
> + block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
> }
> }
> i = 63;
> @@ -1682,11 +1682,11 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
>
> if(v->s.ac_pred) {
> if(!dc_pred_dir)
> - zz_table = wmv1_scantable[2];
> + zz_table = v->zz_8x8[2];
> else
> - zz_table = wmv1_scantable[3];
> + zz_table = v->zz_8x8[3];
> } else
> - zz_table = wmv1_scantable[1];
> + zz_table = v->zz_8x8[1];
>
> while (!last) {
> vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
> @@ -1705,25 +1705,25 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
>
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> + block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> + block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> }
> } else {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k << 3] += ac_val[k];
> + block[k] += ac_val[k];
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k] += ac_val[k + 8];
> + block[k << 3] += ac_val[k + 8];
> }
> }
> }
> /* save AC coeffs for further prediction */
> for(k = 1; k < 8; k++) {
> - ac_val2[k] = block[k << 3];
> - ac_val2[k + 8] = block[k];
> + ac_val2[k] = block[k];
> + ac_val2[k + 8] = block[k << 3];
> }
>
> /* scale AC coeffs */
> @@ -1765,15 +1765,15 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
> if(use_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++) {
> - block[k << 3] = ac_val2[k] * scale;
> - if(!v->pquantizer && block[k << 3])
> - block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
> + block[k] = ac_val2[k] * scale;
> + if(!v->pquantizer && block[k])
> + block[k] += (block[k] < 0) ? -mquant : mquant;
> }
> } else { //top
> for(k = 1; k < 8; k++) {
> - block[k] = ac_val2[k + 8] * scale;
> - if(!v->pquantizer && block[k])
> - block[k] += (block[k] < 0) ? -mquant : mquant;
> + block[k << 3] = ac_val2[k + 8] * scale;
> + if(!v->pquantizer && block[k << 3])
> + block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
> }
> }
> i = 63;
> @@ -1884,17 +1884,14 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
>
> if(coded) {
> int last = 0, skip, value;
> - const uint8_t *zz_table;
> int k;
>
> - zz_table = wmv1_scantable[0];
> -
> while (!last) {
> vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
> i += skip;
> if(i > 63)
> break;
> - block[zz_table[i++]] = value;
> + block[v->zz_8x8[0][i++]] = value;
> }
>
> /* apply AC prediction if needed */
> @@ -1906,25 +1903,25 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
>
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> + block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> + block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> }
> } else {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k << 3] += ac_val[k];
> + block[k] += ac_val[k];
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k] += ac_val[k + 8];
> + block[k << 3] += ac_val[k + 8];
> }
> }
> }
> /* save AC coeffs for further prediction */
> for(k = 1; k < 8; k++) {
> - ac_val2[k] = block[k << 3];
> - ac_val2[k + 8] = block[k];
> + ac_val2[k] = block[k];
> + ac_val2[k + 8] = block[k << 3];
> }
>
> /* scale AC coeffs */
> @@ -1966,15 +1963,15 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
> if(use_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++) {
> - block[k << 3] = ac_val2[k] * scale;
> - if(!v->pquantizer && block[k << 3])
> - block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
> + block[k] = ac_val2[k] * scale;
> + if(!v->pquantizer && block[k])
> + block[k] += (block[k] < 0) ? -mquant : mquant;
> }
> } else { //top
> for(k = 1; k < 8; k++) {
> - block[k] = ac_val2[k + 8] * scale;
> - if(!v->pquantizer && block[k])
> - block[k] += (block[k] < 0) ? -mquant : mquant;
> + block[k << 3] = ac_val2[k + 8] * scale;
> + if(!v->pquantizer && block[k << 3])
> + block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
> }
> }
> i = 63;
> @@ -2035,7 +2032,7 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
> i += skip;
> if(i > 63)
> break;
> - idx = wmv1_scantable[0][i++];
> + idx = v->zz_8x8[0][i++];
> block[idx] = value * scale;
> if(!v->pquantizer)
> block[idx] += (block[idx] < 0) ? -mquant : mquant;
> @@ -3007,6 +3004,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
> VC1Context *v = avctx->priv_data;
> MpegEncContext *s = &v->s;
> GetBitContext gb;
> + int i;
>
> if (!avctx->extradata_size || !avctx->extradata) return -1;
> if (!(avctx->flags & CODEC_FLAG_GRAY))
> @@ -3025,6 +3023,13 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
> if(ff_msmpeg4_decode_init(avctx) < 0)
> return -1;
> if (vc1_init_common(v) < 0) return -1;
> + for (i = 0; i < 64; i++) {
> +#define transpose(x) ((x>>3) | ((x&7)<<3))
> + v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
> + v->zz_8x8[1][i] = transpose(wmv1_scantable[1][i]);
> + v->zz_8x8[2][i] = transpose(wmv1_scantable[2][i]);
> + v->zz_8x8[3][i] = transpose(wmv1_scantable[3][i]);
> + }
>
> avctx->coded_width = avctx->width;
> avctx->coded_height = avctx->height;
> diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
> index aab1694..fd740e1 100644
> --- a/libavcodec/vc1dsp.c
> +++ b/libavcodec/vc1dsp.c
> @@ -203,25 +203,25 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
> {
> int i;
> register int t1,t2,t3,t4,t5,t6,t7,t8;
> - DCTELEM *src, *dst;
> + DCTELEM *src, *dst, temp[64];
>
> src = block;
> - dst = block;
> + dst = temp;
> for(i = 0; i < 8; i++){
> - t1 = 12 * (src[0] + src[4]) + 4;
> - t2 = 12 * (src[0] - src[4]) + 4;
> - t3 = 16 * src[2] + 6 * src[6];
> - t4 = 6 * src[2] - 16 * src[6];
> + t1 = 12 * (src[ 0] + src[32]) + 4;
> + t2 = 12 * (src[ 0] - src[32]) + 4;
> + t3 = 16 * src[16] + 6 * src[48];
> + t4 = 6 * src[16] - 16 * src[48];
>
> t5 = t1 + t3;
> t6 = t2 + t4;
> t7 = t2 - t4;
> t8 = t1 - t3;
>
> - t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
> - t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
> - t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
> - t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
> + t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
> + t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
> + t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
> + t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
>
> dst[0] = (t5 + t1) >> 3;
> dst[1] = (t6 + t2) >> 3;
> @@ -232,11 +232,11 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
> dst[6] = (t6 - t2) >> 3;
> dst[7] = (t5 - t1) >> 3;
>
> - src += 8;
> + src += 1;
> dst += 8;
> }
>
> - src = block;
> + src = temp;
> dst = block;
> for(i = 0; i < 8; i++){
> t1 = 12 * (src[ 0] + src[32]) + 64;
> --
> 1.7.2.1
looks ok
More information about the ffmpeg-devel
mailing list