[FFmpeg-devel] [PATCH 01/14] avcodec/vc1: re-implement and expand VC-1 overlap smooting
Jerome Borsboom
jerome.borsboom at carpalis.nl
Mon Apr 23 21:58:31 EEST 2018
The existing implementation did overlap smoothing for progressive
frames only. This rewritten version implements overlap smoothing
for all applicable frame types for both progessive and
frame/field-interlace.
Signed-off-by: Jerome Borsboom <jerome.borsboom at carpalis.nl>
---
This patch-set improves the VC-1 software decoder to the point where the fate
checksums are equal to checksums from the Intel hardware decoded image on Haswell.
libavcodec/vc1.h | 2 +
libavcodec/vc1_loopfilter.c | 94 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 96 insertions(+)
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index 8fc0729cb8..85504c2f9f 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -425,6 +425,8 @@ void ff_vc1_decode_blocks(VC1Context *v);
void ff_vc1_loop_filter_iblk(VC1Context *v, int pq);
void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq);
void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v);
+void ff_vc1_i_overlap_filter(VC1Context *v);
+void ff_vc1_p_overlap_filter(VC1Context *v);
void ff_vc1_apply_p_loop_filter(VC1Context *v);
void ff_vc1_mc_1mv(VC1Context *v, int dir);
diff --git a/libavcodec/vc1_loopfilter.c b/libavcodec/vc1_loopfilter.c
index 025776bac9..3122b1a258 100644
--- a/libavcodec/vc1_loopfilter.c
+++ b/libavcodec/vc1_loopfilter.c
@@ -208,6 +208,100 @@ void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
}
}
+static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
+ int16_t (*right_block)[64], int block_num)
+{
+ if (left_block != right_block || (block_num & 5) == 1) {
+ if (block_num > 3)
+ v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num]);
+ else if (block_num & 1)
+ v->vc1dsp.vc1_h_s_overlap(right_block[block_num - 1], right_block[block_num]);
+ else
+ v->vc1dsp.vc1_h_s_overlap(left_block[block_num + 1], right_block[block_num]);
+ }
+}
+
+static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
+ int16_t (*bottom_block)[64], int block_num)
+{
+ if (top_block != bottom_block || block_num & 2) {
+ if (block_num > 3)
+ v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
+ else if (block_num & 2)
+ v->vc1dsp.vc1_v_s_overlap(bottom_block[block_num - 2], bottom_block[block_num]);
+ else
+ v->vc1dsp.vc1_v_s_overlap(top_block[block_num + 2], bottom_block[block_num]);
+ }
+}
+
+void ff_vc1_i_overlap_filter(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
+ int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+ int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+ int i;
+
+ topleft_blk = v->block[v->topleft_blk_idx];
+ top_blk = v->block[v->top_blk_idx];
+ left_blk = v->block[v->left_blk_idx];
+ cur_blk = v->block[v->cur_blk_idx];
+
+ /* Within a MB, the horizontal overlap always runs before the vertical.
+ * To accomplish that, we run the H on the left and internal vertical
+ * borders of the currently decoded MB. Then, we wait for the next overlap
+ * iteration to do H overlap on the right edge of this MB, before moving
+ * over and running the V overlap on the top and internal horizontal
+ * borders. Therefore, the H overlap trails by one MB col and the
+ * V overlap trails by one MB row. This is reflected in the time at which
+ * we run the put_pixels loop, i.e. delayed by one row and one column. */
+ for (i = 0; i < block_count; i++)
+ if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
+ (v->over_flags_plane[mb_pos] && ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))
+ vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
+
+ if (v->fcm != ILACE_FRAME)
+ for (i = 0; i < block_count; i++) {
+ if (s->mb_x && (v->pq >= 9 || v->condover == CONDOVER_ALL ||
+ (v->over_flags_plane[mb_pos - 1] &&
+ ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))
+ vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
+ if (s->mb_x == s->mb_width - 1)
+ if (v->pq >= 9 || v->condover == CONDOVER_ALL ||
+ (v->over_flags_plane[mb_pos] &&
+ ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride])))
+ vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
+ }
+}
+
+void ff_vc1_p_overlap_filter(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
+ int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+ int i;
+
+ topleft_blk = v->block[v->topleft_blk_idx];
+ top_blk = v->block[v->top_blk_idx];
+ left_blk = v->block[v->left_blk_idx];
+ cur_blk = v->block[v->cur_blk_idx];
+
+ for (i = 0; i < block_count; i++)
+ if (v->mb_type[0][s->block_index[i]] && (s->mb_x == 0 || v->mb_type[0][s->block_index[i] - 1]))
+ vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i);
+
+ if (v->fcm != ILACE_FRAME)
+ for (i = 0; i < block_count; i++) {
+ if (s->mb_x && v->mb_type[0][s->block_index[i] - 1] &&
+ (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 1]))
+ vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
+ if (s->mb_x == s->mb_width - 1)
+ if (v->mb_type[0][s->block_index[i]] &&
+ (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i]]))
+ vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
+ }
+}
+
static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
{
MpegEncContext *s = &v->s;
--
2.13.6
More information about the ffmpeg-devel
mailing list