[FFmpeg-devel] [PATCH] Patch cleanup for MPEG 1 & 2 optimizations
Jeff Downs
heydowns
Mon Apr 14 16:53:42 CEST 2008
On Sun, 13 Apr 2008, Jeff Downs wrote:
> MPV_motion_internal is no longer inlined (max-inline-insns-single limit
> reached). Trying to increase that limit to double the gcc man page stated
> default did nothing to help. Limit was still reached.
>
> Adding av_always_inline to it makes it be inlined. MPV_motion is still not
> (though I don't know if it is in current svn either).
> This (and fixing the aforementioned typo) gets performance closer to what
> I posted for the hardcoded version.
>
OP's patches with the addition of av_always_inline to MPV_motion and
fixing the typo in the calls to MPV_decode_mb are attached.
Here are benchmarks for these three patches (together). 10 runs each
on Core 2 duo:
MPEG1 Current SVN:
User: avg: 0.201 stddev: 0.003 med: 0.200
Real: avg: 0.203 stddev: 0.003 med: 0.202
MPEG1 w/ Patches:
User: avg: 0.198 stddev: 0.003 med: 0.197
Real: avg: 0.201 stddev: 0.003 med: 0.200
MPEG2 Current SVN:
User: avg: 3.059 stddev: 0.029 med: 3.050
Real: avg: 3.101 stddev: 0.025 med: 3.096
MPEG2 w/ Patches:
User: avg: 3.012 stddev: 0.023 med: 3.005
Real: avg: 3.056 stddev: 0.026 med: 3.049
-Jeff
-------------- next part --------------
Index: libavcodec/mpegvideo_common.h
===================================================================
--- libavcodec/mpegvideo_common.h (revision 12790)
+++ libavcodec/mpegvideo_common.h (working copy)
@@ -617,12 +635,12 @@
* @param pic_op qpel motion compensation function (average or put normally)
* the motion vectors are taken from s->mv and the MV type from s->mv_type
*/
-static inline void MPV_motion(MpegEncContext *s,
+static av_always_inline void MPV_motion_internal(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr, int dir,
uint8_t **ref_picture,
op_pixels_func (*pix_op)[4],
- qpel_mc_func (*qpix_op)[16])
+ qpel_mc_func (*qpix_op)[16], int is_mpeg12)
{
int dxy, mx, my, src_x, src_y, motion_x, motion_y;
int mb_x, mb_y, i;
@@ -633,7 +651,7 @@
prefetch_motion(s, ref_picture, dir);
- if(s->obmc && s->pict_type != FF_B_TYPE){
+ if(!is_mpeg12 && s->obmc && s->pict_type != FF_B_TYPE){
int16_t mv_cache[4][4][2];
const int xy= s->mb_x + s->mb_y*s->mb_stride;
const int mot_stride= s->b8_stride;
@@ -704,12 +722,12 @@
gmc_motion(s, dest_y, dest_cb, dest_cr,
ref_picture);
}
- }else if(s->quarter_sample){
+ }else if(!is_mpeg12 && s->quarter_sample){
qpel_motion(s, dest_y, dest_cb, dest_cr,
0, 0, 0,
ref_picture, pix_op, qpix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16);
- }else if(ENABLE_WMV2 && s->mspel){
+ }else if(!is_mpeg12 && ENABLE_WMV2 && s->mspel){
ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
ref_picture, pix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16);
@@ -722,6 +740,7 @@
}
break;
case MV_TYPE_8X8:
+ if (!is_mpeg12) {
mx = 0;
my = 0;
if(s->quarter_sample){
@@ -775,10 +794,11 @@
if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+ }
break;
case MV_TYPE_FIELD:
if (s->picture_structure == PICT_FRAME) {
- if(s->quarter_sample){
+ if(!is_mpeg12 && s->quarter_sample){
for(i=0; i<2; i++){
qpel_motion(s, dest_y, dest_cb, dest_cr,
1, i, s->field_select[dir][i],
@@ -862,4 +882,20 @@
}
}
+static inline void MPV_motion(MpegEncContext *s,
+ uint8_t *dest_y, uint8_t *dest_cb,
+ uint8_t *dest_cr, int dir,
+ uint8_t **ref_picture,
+ op_pixels_func (*pix_op)[4],
+ qpel_mc_func (*qpix_op)[16])
+{
+#ifndef CONFIG_SMALL
+ if(s->out_format == FMT_MPEG1)
+ MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
+ ref_picture, pix_op, qpix_op, 1);
+ else
+#endif
+ MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
+ ref_picture, pix_op, qpix_op, 0);
+}
#endif /* FFMPEG_MPEGVIDEO_COMMON_H */
-------------- next part --------------
Index: libavcodec/mpegvideo.c
===================================================================
--- libavcodec/mpegvideo.c (revision 12790)
+++ libavcodec/mpegvideo.c (working copy)
@@ -1737,7 +1737,7 @@
*/
static av_always_inline
void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
- int lowres_flag)
+ int lowres_flag, int is_mpeg12)
{
int mb_x, mb_y;
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
@@ -1764,7 +1764,7 @@
/* update DC predictors for P macroblocks */
if (!s->mb_intra) {
- if (s->h263_pred || s->h263_aic) {
+ if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
if(s->mbintra_table[mb_xy])
ff_clean_intra_table_entries(s);
} else {
@@ -1773,7 +1773,7 @@
s->last_dc[2] = 128 << s->intra_dc_precision;
}
}
- else if (s->h263_pred || s->h263_aic)
+ else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
s->mbintra_table[mb_xy]=1;
if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
@@ -1888,7 +1888,7 @@
add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
}
}
- } else if(s->codec_id != CODEC_ID_WMV2){
+ } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
add_dct(s, block[0], 0, dest_y , dct_linesize);
add_dct(s, block[1], 1, dest_y + block_size, dct_linesize);
add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize);
@@ -1979,8 +1979,14 @@
}
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
- if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
- else MPV_decode_mb_internal(s, block, 0);
+#ifndef CONFIG_SMALL
+ if(s->out_format == FMT_MPEG1) {
+ if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
+ else MPV_decode_mb_internal(s, block, 0, 1);
+ } else
+#endif
+ if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
+ else MPV_decode_mb_internal(s, block, 0, 0);
}
/**
-------------- next part --------------
Index: libavcodec/mpegvideo_common.h
===================================================================
--- libavcodec/mpegvideo_common.h (revision 12790)
+++ libavcodec/mpegvideo_common.h (working copy)
@@ -237,13 +237,12 @@
return emu;
}
-/* apply one mpeg motion vector to the three components */
static av_always_inline
-void mpeg_motion(MpegEncContext *s,
+void mpeg_motion_internal(MpegEncContext *s,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int field_based, int bottom_field, int field_select,
uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
- int motion_x, int motion_y, int h)
+ int motion_x, int motion_y, int h, int is_mpeg12)
{
uint8_t *ptr_y, *ptr_cb, *ptr_cr;
int dxy, uvdxy, mx, my, src_x, src_y,
@@ -265,7 +264,7 @@
src_x = s->mb_x* 16 + (motion_x >> 1);
src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
- if (s->out_format == FMT_H263) {
+ if (!is_mpeg12 && s->out_format == FMT_H263) {
if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
mx = (motion_x>>1)|(motion_x&1);
my = motion_y >>1;
@@ -277,7 +276,7 @@
uvsrc_x = src_x>>1;
uvsrc_y = src_y>>1;
}
- }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
+ }else if(!is_mpeg12 && s->out_format == FMT_H261){//even chroma mv's are full pel in H261
mx = motion_x / 4;
my = motion_y / 4;
uvdxy = 0;
@@ -312,7 +311,7 @@
if( (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
|| (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
- if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
+ if(is_mpeg12 || s->codec_id == CODEC_ID_MPEG2VIDEO ||
s->codec_id == CODEC_ID_MPEG1VIDEO){
av_log(s->avctx,AV_LOG_DEBUG,
"MPEG motion vector out of boundary\n");
@@ -360,11 +359,30 @@
pix_op[s->chroma_x_shift][uvdxy]
(dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
}
- if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
+ if(!is_mpeg12 && (ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
s->out_format == FMT_H261){
ff_h261_loop_filter(s);
}
}
+/* apply one mpeg motion vector to the three components */
+static av_always_inline
+void mpeg_motion(MpegEncContext *s,
+ uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+ int field_based, int bottom_field, int field_select,
+ uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
+ int motion_x, int motion_y, int h)
+{
+#ifndef CONFIG_SMALL
+ if(s->out_format == FMT_MPEG1)
+ mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, field_based,
+ bottom_field, field_select, ref_picture, pix_op,
+ motion_x, motion_y, h, 1);
+ else
+#endif
+ mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, field_based,
+ bottom_field, field_select, ref_picture, pix_op,
+ motion_x, motion_y, h, 0);
+}
//FIXME move to dsputil, avg variant, 16x16 version
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
More information about the ffmpeg-devel
mailing list