[FFmpeg-devel] Awakening of FFH264
Jason Garrett-Glaser
darkshikari
Mon Mar 22 19:33:32 CET 2010
> +inline void ff_h264_deblock_line_luma_c(int p[4], int q[4], int QP, int bS)
Pure code duplication.
> +void ff_h264_deblock_macroblock_c(MacroBlock *mb, int filter_left_edge, int
> filter_top_edge, int isIDR, int QPYav, int QPCav)
This is about 5 times longer than it needs to be.
> +
> +void ff_h264_inv_quant_dct_add_c(DCTELEM block[4][4], int QP, int
> dontscaleDC, uint8_t *dst, int stride) // y,x indexing
I don't see the reason for this unrolling. This is premature
optimization at its worst; write an asm function if you want it
faster. This is just code bloat.
> + /* Not implemented yet */
> + //if (ARCH_ARM) ff_h264dspenc_init_arm(c);
> + //if (ARCH_PPC) ff_h264dspenc_init_ppc(c);
> + //if (HAVE_MMX) ff_h264dspenc_init_x86(c);
I don't see why this is here.
> + void (*h264_hadamard_mult_2x2)(DCTELEM Y[2][2]);
2x2 is faster if inlined. It probably shouldn't be a DSP function.
> +/* Deblocking filter (p153) */
> +static const uint8_t alpha_table[52*3] = {
> +static const uint8_t beta_table[52*3] = {
> +static const uint8_t tc0_table[52*3][4] = {
Pure duplication. These tables are already in libavcodec.
> +#define DEFAULT_QP 30
Use the ffmpeg parameter system, don't define your own internal
reimplementations of it.
> +#define NUMBER_OF_FRAMES 2
Why not make this map to the reference frames parameter?
> +#define RATECONTROLINTERVAL 0.5
Why not use the existing mpegvideo ratecontrol instead of
reimplementing your own badly?
> +#define CHROMA_QP_INDEX_OFFSET_MAX 12
> +#define CHROMA_QP_INDEX_OFFSET_MIN -12
These values are not likely to ever change in the future, so a define
seems needless.
> +#define RTP_H264_FUA 28
> +#define H264_NAL_NRI_MASK 0x60
> +#define H264_NAL_TYPE_MASK 0x1f
These look like they should be defined closer to their use.
> +#define H264_COPY_4X4BLOCK_TRANSPOSED_PART(A, xoffset, yoffset, dest, src1,
> src2) \
> +dest[0][A] = src1[yoffset+A][xoffset+0]-src2[yoffset+A][xoffset+0]; \
> +dest[1][A] = src1[yoffset+A][xoffset+1]-src2[yoffset+A][xoffset+1]; \
> +dest[2][A] = src1[yoffset+A][xoffset+2]-src2[yoffset+A][xoffset+2]; \
> +dest[3][A] = src1[yoffset+A][xoffset+3]-src2[yoffset+A][xoffset+3];
> +
> +#define H264_COPY_4X4BLOCK_TRANSPOSED(xoffset,yoffset,dest,src1,src2) \
> +{ \
> +H264_COPY_4X4BLOCK_TRANSPOSED_PART(0, xoffset, yoffset, dest, src1, src2);
> \
> +H264_COPY_4X4BLOCK_TRANSPOSED_PART(1, xoffset, yoffset, dest, src1, src2);
> \
> +H264_COPY_4X4BLOCK_TRANSPOSED_PART(2, xoffset, yoffset, dest, src1, src2);
> \
> +H264_COPY_4X4BLOCK_TRANSPOSED_PART(3, xoffset, yoffset, dest, src1, src2);
> \
> +}
> +
> +#define H264_COPY_16X16BLOCK(dest,src1,src2) \
> +{ \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 0, 0,dest[0][0],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 4, 0,dest[0][1],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 8, 0,dest[0][2],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(12, 0,dest[0][3],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 0, 4,dest[1][0],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 4, 4,dest[1][1],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 8, 4,dest[1][2],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(12, 4,dest[1][3],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 0, 8,dest[2][0],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 4, 8,dest[2][1],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 8, 8,dest[2][2],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(12, 8,dest[2][3],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 0,12,dest[3][0],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 4,12,dest[3][1],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED( 8,12,dest[3][2],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(12,12,dest[3][3],src1,src2); \
> +}
> +
> +#define H264_COPY_8X8BLOCK(dest,src1,src2) \
> +{ \
> +H264_COPY_4X4BLOCK_TRANSPOSED(0,0,dest[0][0],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(4,0,dest[0][1],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(0,4,dest[1][0],src1,src2); \
> +H264_COPY_4X4BLOCK_TRANSPOSED(4,4,dest[1][1],src1,src2); \
> +}
This is really really ugly. I don't like these stupid nested macros,
and not only are they stupid, they're slow (no write-combining).
Use DSP functions for the larger ones.
> +#define SAR_NOVAL -1
> +
> +static const struct{
> + int width;
> + int height;
> + int sar;
> +} sar_table[] = {
> + { 1, 1, 1},
> + { 12, 11, 2},
> + { 10, 11, 3},
> + { 16, 11, 4},
> + { 40, 33, 5},
> + { 24, 11, 6},
> + { 20, 11, 7},
> + { 32, 11, 8},
> + { 80, 33, 9},
> + { 18, 11, 10},
> + { 15, 11, 11},
> + { 64, 33, 12},
> + {160, 99, 13},
> + { 0, 0, SAR_NOVAL}
> +};
These values can be uint8_t. Define things closer to where they are used.
> +static uint8_t *rtp_buffer;
Static buffers are not allowed.
> +static void h264_rtp_packetize(AVCodecContext *avctx, uint8_t *src, int
> srcsize)
> +{
> + H264EncContext *t = (H264EncContext *)avctx->priv_data;
> +
> + if (srcsize < avctx->rtp_payload_size)
> + {
> + // NAL
> + uint8_t *nalu_payload = src;
> + int tmpsize = srcsize;
> +
> + /* skip any data before the start code prefix */
> + while (tmpsize > 4 && (nalu_payload[0] != 0x00 || nalu_payload[1]
> != 0x00 || nalu_payload[2] != 0x01))
> + {
> + nalu_payload++;
> + tmpsize--;
> + av_log(avctx, AV_LOG_ERROR, "Skipping byte before
> startcode...\n");
> + }
> +
> + if (tmpsize < 4)
> + return;
> +
> + /* skip the start code prefix */
> + nalu_payload += 3;
> + tmpsize -= 3;
> +
> + emms_c();
> + avctx->rtp_callback(avctx, nalu_payload, tmpsize, t->mb_width *
> t->mb_height);
> + }
> + else
> + {
> + // FU-A
> + uint8_t *nalu_payload = src;
> + uint8_t nalu_header;
> + int tmpsize = srcsize;
> + int start=1, end=0, first=0;
> +
> + /* skip any data before the start code prefix */
> + while (tmpsize > 4 && (nalu_payload[0] != 0x00 || nalu_payload[1]
> != 0x00 || nalu_payload[2] != 0x01))
> + {
> + nalu_payload++;
> + tmpsize--;
> + av_log(avctx, AV_LOG_ERROR, "Skipping byte before startcode...
> (FU-A)\n");
> + }
> +
> + if (tmpsize < 4)
> + return;
> +
> + /* skip the start code prefix */
> + nalu_payload += 3;
> + tmpsize -= 3;
> +
> + nalu_header = *nalu_payload;
> +
> + nalu_payload++; /* skip the NALU header */
> + tmpsize--;
> +
> + while (!end)
> + {
> + int payload_size;
> +
> + payload_size = FFMIN (avctx->rtp_payload_size - 2, tmpsize);
> + if (tmpsize==payload_size)
> + end = 1;
> +
> + memset (rtp_buffer, 0, avctx->rtp_payload_size);
> +
> + rtp_buffer[0] = (nalu_header & H264_NAL_NRI_MASK) |
> RTP_H264_FUA; // FU indicator
> + rtp_buffer[1] = (start<<7)|(end<<6)|(nalu_header &
> H264_NAL_TYPE_MASK); // FU header
> +
> + memcpy (rtp_buffer + 2, nalu_payload + first, payload_size);
> +
> + emms_c();
> + avctx->rtp_callback(avctx, rtp_buffer, payload_size + 2, end ?
> t->mb_width * t->mb_height : 0);
> +
> + tmpsize -= payload_size;
> + first += payload_size;
> + start=0;
> + }
> + }
> +}
Why is this even in libavcodec?
> /**
> * Write out the provided data into a NAL unit.
> * @param nal_ref_idc NAL reference IDC
> @@ -33,36 +223,36 @@
> * @param b2 the data which should be escaped
> * @returns pointer to current position in the output buffer or NULL if an
> error occurred
> */
> -static uint8_t *h264_write_nal_unit(int nal_ref_idc, int nal_unit_type,
> uint8_t *dest, int *destsize,
> - PutBitContext *b2)
> +static uint8_t *h264_write_nal_unit(AVCodecContext *avctx, int nal_ref_idc,
> int nal_unit_type, uint8_t *dest, int *destsize,
> + PutBitContext *b2)
> {
> PutBitContext b;
> int i, destpos, rbsplen, escape_count;
> uint8_t *rbsp;
> -
> +
> if (nal_unit_type != NAL_END_STREAM)
> put_bits(b2,1,1); // rbsp_stop_bit
> -
> +
> // Align b2 on a byte boundary
> align_put_bits(b2);
> rbsplen = put_bits_count(b2)/8;
> flush_put_bits(b2);
> rbsp = b2->buf;
> -
> +
> init_put_bits(&b,dest,*destsize);
> -
> +
> put_bits(&b,16,0);
> put_bits(&b,16,0x01);
> -
> +
> put_bits(&b,1,0); // forbidden zero bit
> - put_bits(&b,2,nal_ref_idc); // nal_ref_idc
> - put_bits(&b,5,nal_unit_type); // nal_unit_type
> -
> + put_bits(&b,2,nal_ref_idc);
> + put_bits(&b,5,nal_unit_type);
> +
> flush_put_bits(&b);
> -
> +
> destpos = 5;
> escape_count= 0;
> -
> +
What's with all these false deltas?
> -static const uint8_t pict_type_to_golomb[7] = {-1, 2, 0, 1, -1, 4, 3};
Why do we have negative values in a uint8_t array?
> +static void h264_assign_macroblocks(AVPicture *p, MacroBlock **mb_map, int
> mb_width, int mb_height, int setneighbours)
> +{
> + int y, x, i;
> + int Ylinesize = p->linesize[0];
> + int Ulinesize = p->linesize[1];
> + int Vlinesize = p->linesize[2];
> +
> + if (!setneighbours)
> + {
> + for (y = 0 ; y < mb_height ; y++)
> + {
> + int y16 = y << 4;
> + int y8 = y << 3;
> +
> + for (x = 0 ; x < mb_width ; x++)
> + {
> + int x16 = x << 4;
> + int x8 = x << 3;
> +
> + for (i = 0 ; i < 8 ; i++)
> + {
> + int ypos = y8 + i;
> + mb_map[y][x].U[i] = p->data[1] + (x8+ypos*Ulinesize);
> + mb_map[y][x].V[i] = p->data[2] + (x8+ypos*Vlinesize);
> + }
> + for (i = 0 ; i < 16 ; i++)
> + mb_map[y][x].Y[i] = p->data[0] +
> (x16+(y16+i)*Ylinesize);
> +
> + mb_map[y][x].topblock = NULL;
> + mb_map[y][x].leftblock = NULL;
> + mb_map[y][x].rightblock = NULL;
> + mb_map[y][x].available = 0;
> + }
> + }
> + }
> + else
> + {
> + y = 0;
> + x = 0;
> + for (i = 0 ; i < 8 ; i++)
> + {
> + mb_map[y][x].U[i] = p->data[1] + ((x<<3) + ((y<<3) + i) *
> Ulinesize);
> + mb_map[y][x].V[i] = p->data[2] + ((x<<3) + ((y<<3) + i) *
> Vlinesize);
> + }
> + for (i = 0 ; i < 16 ; i++)
> + mb_map[y][x].Y[i] = p->data[0] + ((x<<4) + ((y<<4) + i) *
> Ylinesize);
> +
> + mb_map[y][x].topblock = NULL;
> + mb_map[y][x].leftblock = NULL;
> +
> + if (x < mb_width-1)
> + mb_map[y][x].rightblock = &(mb_map[y][x+1]);
> + else
> + mb_map[y][x].rightblock = NULL;
> + mb_map[y][x].available = 0;
> +
> + y = 0;
> + for (x = 1 ; x < mb_width ; x++)
> + {
> + for (i = 0 ; i < 8 ; i++)
> + {
> + mb_map[y][x].U[i] = p->data[1] + ((x<<3) + ((y<<3) + i) *
> Ulinesize);
> + mb_map[y][x].V[i] = p->data[2] + ((x<<3) + ((y<<3) + i) *
> Vlinesize);
> + }
> + for (i = 0 ; i < 16 ; i++)
> + mb_map[y][x].Y[i] = p->data[0] + ((x<<4) + ((y<<4) + i) *
> Ylinesize);
> +
> + mb_map[y][x].topblock = NULL;
> + mb_map[y][x].leftblock = &(mb_map[y][x-1]);
> + if (x < mb_width - 1)
> + mb_map[y][x].rightblock = &(mb_map[y][x+1]);
> + else
> + mb_map[y][x].rightblock = NULL;
> + mb_map[y][x].available = 0;
> + }
> +
> + x = 0;
> + for (y = 1 ; y < mb_height ; y++)
> + {
> + for (i = 0 ; i < 8 ; i++)
> + {
> + mb_map[y][x].U[i] = p->data[1] + ((x<<3) + ((y<<3) + i) *
> Ulinesize);
> + mb_map[y][x].V[i] = p->data[2] + ((x<<3) + ((y<<3) + i) *
> Vlinesize);
> + }
> + for (i = 0 ; i < 16 ; i++)
> + mb_map[y][x].Y[i] = p->data[0] + ((x<<4) + ((y<<4) + i) *
> Ylinesize);
> +
> + mb_map[y][x].topblock = &(mb_map[y-1][x]);
> + mb_map[y][x].leftblock = NULL;
> + if (x < mb_width - 1)
> + mb_map[y][x].rightblock = &(mb_map[y][x+1]);
> + else
> + mb_map[y][x].rightblock = NULL;
> + mb_map[y][x].available = 0;
> + }
> +
> + for (y = 1 ; y < mb_height ; y++)
> + {
> + for (x = 1 ; x < mb_width ; x++)
> + {
> + for (i = 0 ; i < 8 ; i++)
> + {
> + mb_map[y][x].U[i] = p->data[1] + ((x<<3) + ((y<<3) + i)
> * Ulinesize);
> + mb_map[y][x].V[i] = p->data[2] + ((x<<3) + ((y<<3) + i)
> * Vlinesize);
> + }
> + for (i = 0 ; i < 16 ; i++)
> + mb_map[y][x].Y[i] = p->data[0] + ((x<<4) + ((y<<4) + i)
> * Ylinesize);
> +
> + mb_map[y][x].topblock = &(mb_map[y-1][x]);
> + mb_map[y][x].leftblock = &(mb_map[y][x-1]);
> + if (x < mb_width - 1)
> + mb_map[y][x].rightblock = &(mb_map[y][x+1]);
> + else
> + mb_map[y][x].rightblock = NULL;
> + mb_map[y][x].available = 0;
> + }
> + }
> + }
> +}
This seems entirely unnecessary. It isn't as if we will ever support
useless features like FMO, so I don't see why we need to waste memory
on a fancy map of macroblock positions. This isn't Theora.
> +static void h264_clear_nonzero_markers(MacroBlock **mb_map, int mb_width,
> int mb_height)
This function should not exist. There is no reason that it wouldl be
needed in any H.264 encoder.
> + put_bits(&b, 1, 0); // constraint_set1_flag
This should be set to 1.
> + set_ue_golomb(&b, 2); // pic_order_cnt
Why in the world are you using poc type 2?
> + set_ue_golomb(&b, 1); // num_ref_frames [0, 16]
Why is this hardcoded when the number of frames is a define?
> + for (i = 0; sar_table[i].sar != -1; i++)
> + {
> + if (sar_table[i].width == t->vui.sar_width &&
> + sar_table[i].height == t->vui.sar_height )
> + break;
There are two spacing inconsistencies here. Also, your syntax doesn't
match the standard K&R used by ffmpeg.
> + put_bits(&b, 1, 0); // bitstream_restriction_flag
You should absolutely write this flag and the relevant information,
especially num_reorder_frames. We don't need more encoders that don't
do this.
> + set_ue_golomb(&b, 0); // num_ref_idx_l0_active_minus1 Using at most
> the previous frame for prediction
Same as what I noted above (it's a define...).
> + /* @FIXME: Is this the correct way to determine the DPB? Or should the
> + * reference frames be taken into account? */
> + dpb = t->refframe_width * t->refframe_height * 1.5; /* YUV420 */
Yes, yes, they should.
> + av_log(avctx, AV_LOG_ERROR, "No maximum bitrate specified,
> bitstream may exceed maximum bitrate level constraints\n");
> + max_br = avctx->bit_rate;
Why is this an error? And how are you going to enforce max bitrates
given that the level table doesn't have CPB sizes in it?
> + switch(avctx->pix_fmt)
> + {
> + case PIX_FMT_YUV420P:
> + break;
> + default:
> + av_log(avctx, AV_LOG_ERROR, "format not supported\n");
> + return -1;
> + }
Why do we need a switch statement for this?
This is needlessly large.
> + /* If the width is not a multiple of 16, enabling cropping */
> + if (( width % 16) !=0 )
> + {
> + t->frame_cropping_flag = 1;
> + t->frame_crop_left_offset = 0;
> + t->frame_crop_right_offset = (width%16)/2;
> + t->mb_width++;
> + }
> +
> + /* If the height is not a multiple of 16, enabling cropping */
> + if (( height % 16) !=0 )
> + {
> + t->frame_cropping_flag = 1;
> + t->frame_crop_top_offset = 0;
> + t->frame_crop_bottom_offset = (height%16)/2;
> + t->mb_height++;
> + }
This is incorrect. It should be obvious why.
> + t->mb_map = av_malloc(sizeof(MacroBlock*) * t->mb_height);
> + for (y = 0 ; y < t->mb_height ; y++)
> + {
> + t->mb_map[y] = av_malloc(sizeof(MacroBlock) * t->mb_width);
> + for (x = 0 ; x < t->mb_width ; x++)
> + {
> + t->mb_map[y][x].Y_width = 16;
> + t->mb_map[y][x].Y_height = 16;
> + }
> + }
Nevermind the fact that the MB map is unnecessary; why is it an array
of pointers instead of just a flat array?
> +
> t->reconstructed_frames[i]->reconstructed_mb_map[y][x].Y_width = 16;
> +
> t->reconstructed_frames[i]->reconstructed_mb_map[y][x].Y_height = 16;
This is retarded. We store the (constant) size of each macroblock
thousands of times per frame because...?
> + av_log(avctx, AV_LOG_ERROR, "Level %d not sufficient for this
> encoding.\n", avctx->level);
Errors should be fatal. If it's not fatal, it's not an error.
> t->reconstructed_frames[0]->reconstructed_picture.linesize[0];
> + t->U_stride =
> t->reconstructed_frames[0]->reconstructed_picture.linesize[1];
> + t->V_stride =
> t->reconstructed_frames[0]->reconstructed_picture.linesize[2];
You should really use shorter names for these data structures.
> +#if 0 // Only used for dev & debug
Why? In CAVLC it's easy to seek back and rewrite a block if it went
over the max MB size (and write PCM instead).
> +static inline void h264_neighbour_count_nonzero(MacroBlock *mb, int type,
> int x, int y, int *nA, int *nB)
> +{
> + if (type == NEIGHBOUR_SUBTYPE_Y)
> + H264_NEIGHBOUR_COUNT_NONZERO_PLANE(Y_nonzero, 3)
> + else if (type == NEIGHBOUR_SUBTYPE_U)
> + H264_NEIGHBOUR_COUNT_NONZERO_PLANE(U_nonzero, 1)
> + else
> + H264_NEIGHBOUR_COUNT_NONZERO_PLANE(V_nonzero, 1)
> +}
This should be done in a fill_caches-like function.
> +static const int8_t zigzagx[16] = { 0,1,0,0,1,2,3,2,1,0,1,2,3,3,2,3 };
> +static const int8_t zigzagy[16] = { 0,0,1,2,1,0,0,1,2,3,3,2,1,2,3,3 };
> +
> +#define H264_ENCODE_INTRA16X16_RESIDUAL_COEFFICIENTS(PLANE) \
> +coefficients[0] = PLANE[0][0]; \
> +coefficients[1] = PLANE[0][1]; \
> +coefficients[2] = PLANE[1][0]; \
> +coefficients[3] = PLANE[1][1]; \
> +h264cavlc_encode(b, coefficients, 4, -1, -1, 1); // nA and nB are not used
> in this case
Instead of making everything a define why not--god forbid--make a function?
> +static void h264_encode_intra16x16_residual(PutBitContext *b, DCTELEM
> YD[4][4], DCTELEM UD[2][2], DCTELEM VD[2][2],
> + Residual *residual, int
> lumamode, int chromamode, MacroBlock *mb)
> +{
> + int lumaACcount = 0;
> + int chromaDCcount = 0;
> + int chromaACcount = 0;
> + int CodedBlockPatternChroma = 0;
> + int CodedBlockPatternLuma = 0;
> + int x, y, i, j;
> + DCTELEM coefficients[256];
> + int nA, nB;
> +
> + for (y = 0 ; y < 4 ; y++)
> + for (x = 0 ; x < 4 ; x++)
> + H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4Y[y][x],
> lumaACcount);
> +
> + for (y = 0 ; y < 2 ; y++)
> + {
> + for (x = 0 ; x < 2 ; x++)
> + {
> + H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4U[y][x],
> chromaACcount);
> + H264_COUNT_AND_CLIP_SUBBLOCK(residual->part4x4V[y][x],
> chromaACcount);
> + }
> + }
> +
> + for (y = 0 ; y < 2 ; y++)
> + {
> + for (x = 0 ; x < 2 ; x++)
> + {
> + H264_COUNT_AND_CLIP(UD[y][x], chromaDCcount);
> + H264_COUNT_AND_CLIP(VD[y][x], chromaDCcount);
> + }
> + }
> +
> + for (y = 0 ; y < 4 ; y++)
> + for (x = 0 ; x < 4 ; x++)
> + clip_inplace(YD[y][x], -2047, 2047);
None of this code should even exist. You don't need to count the
number of coefficients if you're doing things right; it gets counted
inside the entropy coder.
> + set_se_golomb(b, 0); // mb_qp_delta
Why hardcode the most useful syntax element in H.264 to zero?
> + for (i = 0 ; i < 16 ; i++)
> + coefficients[i] = YD[zigzagy[i]][zigzagx[i]];
Make this a function.
> + { { 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243}, {
> 13107, 8066, 13107, 8066}, { 8066, 5243, 8066, 5243} },
> + { { 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660}, {
> 11916, 7490, 11916, 7490}, { 7490, 4660, 7490, 4660} },
> + { { 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194}, {
> 10082, 6554, 10082, 6554}, { 6554, 4194, 6554, 4194} },
> + { { 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647}, {
> 9362, 5825, 9362, 5825}, { 5825, 3647, 5825, 3647} },
> + { { 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355}, {
> 8192, 5243, 8192, 5243}, { 5243, 3355, 5243, 3355} },
> + { { 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893}, {
> 7282, 4559, 7282, 4559}, { 4559, 2893, 4559, 2893} }
Didn't you already declare this earlier?
> +static inline void ff_h264_hadamard_inv_quant_2x2(DCTELEM Y[2][2], int QP)
> +{
> + int32_t V = h264_V00[QP%6];
> + int div = QP/6;
> +
> + V <<= div;
> + Y[0][0] = (Y[0][0]*V) >> 5;
> + Y[0][1] = (Y[0][1]*V) >> 5;
> + Y[1][0] = (Y[1][0]*V) >> 5;
> + Y[1][1] = (Y[1][1]*V) >> 5;
> +}
Why do you combine some dequant functions with inverse transform, but
others you don't?
> +/*
> + * Only if qpprime_y_zero_transform_bypass_flag == 0
> + */
Whaaaaat? Everything above assumes baseline-only, and now we have
something restricted to High 4:4:4 Predictive?
> + int leftavail = 0;
> + int topavail = 0;
> + int topleftavail = 0;
This stuff should have been calculated in fill_caches or similar.
> + enum {
> + H264_INTRA_DC = 0,
> + H264_INTRA_VERTICAL,
> + H264_INTRA_HORIZONTAL,
> + H264_INTRA_PLANE
> + };
These are already defined in ffmpeg.
> + if (available_prediction_strategies & (1 << H264_INTRA_PLANE))
> + {
> + t->hpc.pred16x16[PLANE_PRED8x8](destmb->Y[0], t->refframe_width);
> + /* Calculate the SAD using this prediction */
> + sad = t->dspcontext.pix_abs[0][0](0, targetmb->Y[0], destmb->Y[0],
> t->Y_stride, 16);
> + if (sad < lowest_sad)
> + {
> + lowest_sad = sad;
> + prediction_strategy = H264_INTRA_PLANE;
> + }
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "plane sad: %d\n", sad);
> + }
> +
> + if (available_prediction_strategies & (1 << H264_INTRA_HORIZONTAL))
> + {
> + t->hpc.pred16x16[HOR_PRED8x8](destmb->Y[0], t->refframe_width);
> + /* Calculate the SAD using this prediction */
> + sad = t->dspcontext.pix_abs[0][0](0, targetmb->Y[0], destmb->Y[0],
> t->Y_stride, 16);
> + if (sad < lowest_sad)
> + {
> + lowest_sad = sad;
> + prediction_strategy = H264_INTRA_HORIZONTAL;
> + }
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "horizontal sad: %d\n", sad);
> + }
> +
> + if (available_prediction_strategies & (1 << H264_INTRA_VERTICAL))
> + {
> + t->hpc.pred16x16[VERT_PRED8x8](destmb->Y[0], t->refframe_width);
> + /* Calculate the SAD using this prediction */
> + sad = t->dspcontext.pix_abs[0][0](0, targetmb->Y[0], destmb->Y[0],
> t->Y_stride, 16);
> + if (sad < lowest_sad)
> + {
> + lowest_sad = sad;
> + prediction_strategy = H264_INTRA_VERTICAL;
> + }
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "vertical sad: %d\n", sad);
> + }
Holy code duplication batman.
> + if (available_prediction_strategies & (1 << H264_INTRA_DC))
> + {
> + if(topavail)
> + {
> + if(leftavail)
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "all avail, using
> average\n");
> + t->hpc.pred16x16[DC_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + }
> + else
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "top avail, using average
> of top\n");
> + t->hpc.pred16x16[TOP_DC_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + }
> + }
> + else
> + {
> + if(leftavail)
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "left avail, using average
> of left\n");
> + t->hpc.pred16x16[LEFT_DC_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + }
> + else {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "nothing avail, using
> 128\n");
> + t->hpc.pred16x16[DC_128_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + }
> + }
Wayyyy too complicated.
> + /* Calculate the SAD using this prediction */
> + sad = t->dspcontext.pix_abs[0][0](0, targetmb->Y[0], destmb->Y[0],
> t->Y_stride, 16);
> + if (sad < lowest_sad)
> + {
> + lowest_sad = sad;
> + prediction_strategy = H264_INTRA_DC;
> + }
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "DC sad: %d\n", sad);
> + }
Why are we using SAD instead of the user-selected mbcmp?
> + /* Use the prediction strategy which resulted in the lowest SAD */
> + switch(prediction_strategy)
> + {
> + case H264_INTRA_PLANE:
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "INTRA_PLANE\n");
> + t->hpc.pred16x16[PLANE_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + t->hpc.pred8x8[PLANE_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[PLANE_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + lumapredmode = PLANE_PRED8x8;
> + chromapredmode = PLANE_PRED8x8;
> + break;
> +
> + case H264_INTRA_HORIZONTAL:
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "INTRA_HOR\n");
> + // Horizontal prediction
> + t->hpc.pred16x16[HOR_PRED8x8](destmb->Y[0], t->refframe_width);
> + t->hpc.pred8x8[HOR_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[HOR_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + lumapredmode = HOR_PRED8x8;
> + chromapredmode = HOR_PRED8x8;
> + break;
> +
> + case H264_INTRA_VERTICAL:
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "INTRA_VERTICAL\n");
> + // Vertical prediction
> + t->hpc.pred16x16[VERT_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + t->hpc.pred8x8[VERT_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[VERT_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + lumapredmode = VERT_PRED;
> + chromapredmode = VERT_PRED8x8;
> + break;
> +
> +
> + case H264_INTRA_DC:
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "INTRA_DC\n");
> + if ((!topavail) && (!leftavail))
> + //if (1) // XXX: Illegal! Decoder will not know that 128
> was used,
> + //but will use prediction based on previous blocks.
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "nothing avail, using
> 128\n");
> + t->hpc.pred16x16[DC_128_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + t->hpc.pred8x8[DC_128_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[DC_128_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + }
Why do we assume the optimal chroma pred mode is the same as luma?
> + else
> + {
> + if(topavail)
> + {
> + if(leftavail)
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "all avail, using
> average\n");
> + t->hpc.pred16x16[DC_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + t->hpc.pred8x8[DC_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[DC_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + }
> + else
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "top avail, using
> average of top\n");
> + t->hpc.pred16x16[TOP_DC_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + t->hpc.pred8x8[TOP_DC_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[TOP_DC_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + }
> }
> -// last_non_zero = i;
> - } else {
> - block[0] = 0;
> + else
> + {
> + if(leftavail)
> + {
> + if (DEBUG_INTRA_PREDICTION)
> + av_log(avctx, AV_LOG_DEBUG, "left avail, using
> average of left\n");
> + t->hpc.pred16x16[LEFT_DC_PRED8x8](destmb->Y[0],
> t->refframe_width);
> + t->hpc.pred8x8[LEFT_DC_PRED8x8](destmb->U[0],
> t->refframe_width>>1);
> + t->hpc.pred8x8[LEFT_DC_PRED8x8](destmb->V[0],
> t->refframe_width>>1);
> + }
> + else
> + {
> + av_log(avctx, AV_LOG_ERROR, "error! we should not
> get here!\n");
> +
> + }
> + }
> }
> + lumapredmode = DC_PRED;
> + chromapredmode = DC_PRED8x8;
> + break;
> + }
Holy crap, you just basically copy-pasted this entire code block. Bad kitty.
> +static void h264_encode_inter16x16_residual(H264EncContext *t,
Aaaannd the patch is cut off.
Summary so far: What is this shit?
Dark Shikari
More information about the ffmpeg-devel
mailing list