[FFmpeg-devel] [PATCH/RFC] H.264 FMO+ASO decoding
Michael Niedermayer
michaelni
Sun Sep 26 16:43:45 CEST 2010
On Wed, Jul 21, 2010 at 10:20:36PM +0200, Stefan Gehrer wrote:
> Hi,
>
> attached is a patch that implements the header stuff for FMO decoding
> in H.264 baseline streams. It decodes the slice group map and also
> provides means that at the start of a slice the actual x/y position
> of the first macroblock can be determined from first_mb_in_slice.
> If this is done, slices can be decoded out of order, i.e.
> first_mb_in_slice does not have to increase for slices of the same
> picture (aka ASO).
> Let's take as example a tiny video of 4x3 macroblocks which
> has a slice group map with two slice groups like this:
>
> 1 1 1 1
> 1 0 0 1
> 1 1 1 1
>
> Each slice group itself (0 or 1) is decoded in raster-scan order,
> so that the macroblock addresses are as follows:
>
> 2 3 4 5
> 6 0 1 7
> 8 9 10 11
>
> So if a slice comes along with first_mb_in_slice equal to 7 we need
> to start decoding at MB position x=3 and y=1.
>
> Unfortunately, the real challenge starts here. A lot of neighbor
> context handling (i4x4 modes, non-zero counts, MVs) has to be
> handled differently when the assumption of raster-scan order of
> macroblocks is not true anymore. Also, deblocking can only be done
> after the picture has been fully decoded. This is because deblocking
> goes across slice boundaries and slice group boundaries and the
> neighbor MB might just happen to be the last to be decoded in the
> picture.
> Considering the heavy optimizations that have been done in the
> normal decoding paths I guess there would be some outcry if in
> many places in the MB decoding a conditional like
> if(pps->slice_group_count > 1)
> would appear.
> So my feeling is that if FMO is to be implemented it may be best
> to have a new code path for the slice data decoding, a
> baseline-FMO-specific version of decode_slice() and some of its
> subfunctions maybe?
> Opinions welcome.
my guess is you wont finish FMO
also we need templating for it ...
still parts of your patch could be usefull and move us a tiny step closer to
FMO support
>
> Stefan
> h264.c | 32 +++++++---
> h264.h | 6 +
> h264_ps.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++------------
> 3 files changed, 182 insertions(+), 43 deletions(-)
> 6228649db31722b1b6bbf9ff33a04c52d2baaf14 h264_fmo.diff
> diff --git a/libavcodec/h264.c b/libavcodec/h264.c
> index d1662fc..bfb65d6 100644
> --- a/libavcodec/h264.c
> +++ b/libavcodec/h264.c
> @@ -1968,8 +1968,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
> av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
> return -1;
> }
> - s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
> - s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
> if (s->picture_structure == PICT_BOTTOM_FIELD)
> s->resync_mb_y = s->mb_y = s->mb_y + 1;
> assert(s->mb_y < s->mb_height);
> @@ -2153,11 +2151,18 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
> }
> h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
>
> -#if 0 //FMO
> - if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
> - slice_group_change_cycle= get_bits(&s->gb, ?);
> -#endif
> + if(h->pps.slice_group_count > 1){
> + int addr = -1;
>
> + if(h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
> + ff_h264_draw_slice_group(h, &h->pps, s->mb_width, s->mb_height);
> + h->slice_group_current = 0;
> + for(j=0;j<=first_mb_in_slice;j++)
> + addr = ff_h264_fmo_next_mb(h, addr);
this is too slow with many slices and groups
> + first_mb_in_slice = addr;
> + }
> + s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
> + s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
> h0->last_slice_type = slice_type;
> h->slice_num = ++h0->current_slice;
> if(h->slice_num >= MAX_SLICES){
> @@ -2608,6 +2613,14 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
> return -1;
> }
>
> + if(h->pps.slice_group_count > 1){
> + int addr = ff_h264_fmo_next_mb(h, s->mb_y*s->mb_width+s->mb_x);
> +
> + if(addr < 0)
> + goto end;
> + s->mb_x = addr % s->mb_width;
> + s->mb_y = (addr / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
> + } else
> if(++s->mb_x >= s->mb_width){
> s->mb_x=0;
> loop_filter(h);
> @@ -2620,7 +2633,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
> }
> if(s->mb_y >= s->mb_height){
> tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
> -
> + end:
> if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
> ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
>
iam not a fan of this kind of goto spagethi code
> @@ -3364,8 +3377,11 @@ av_cold void ff_h264_free_context(H264Context *h)
> for(i = 0; i < MAX_SPS_COUNT; i++)
> av_freep(h->sps_buffers + i);
>
> - for(i = 0; i < MAX_PPS_COUNT; i++)
> + for(i = 0; i < MAX_PPS_COUNT; i++){
> + if(h->pps_buffers[i])
> + av_free(h->pps_buffers[i]->slice_group_map);
> av_freep(h->pps_buffers + i);
> + }
> }
>
> av_cold int ff_h264_decode_end(AVCodecContext *avctx)
> diff --git a/libavcodec/h264.h b/libavcodec/h264.h
> index 7158d97..db2ec75 100644
> --- a/libavcodec/h264.h
> +++ b/libavcodec/h264.h
> @@ -220,6 +220,9 @@ typedef struct PPS{
> int pic_order_present; ///< pic_order_present_flag
> int slice_group_count; ///< num_slice_groups_minus1 + 1
> int mb_slice_group_map_type;
> + uint8_t *slice_group_map;
> + int slice_group_dir_flag;
> + int slice_group_change_rate;
> unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
> int weighted_pred; ///< weighted_pred_flag
> int weighted_bipred_idc;
> @@ -589,6 +592,7 @@ typedef struct H264Context{
> int sei_buffering_period_present; ///< Buffering period SEI flag
> int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
>
> + int slice_group_current;
> //SVQ3 specific fields
> int halfpel_flag;
> int thirdpel_flag;
> @@ -619,6 +623,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h);
> */
> int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length);
>
> +int ff_h264_fmo_next_mb(H264Context *h, int addr);
> +int ff_h264_draw_slice_group(H264Context *h, PPS *pps, int width, int height);
> /**
> * Decode a network abstraction layer unit.
> * @param consumed is the number of bytes used as input
> diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
> index 7648e2c..811d81d 100644
> --- a/libavcodec/h264_ps.c
> +++ b/libavcodec/h264_ps.c
> @@ -420,6 +420,143 @@ build_qp_table(PPS *pps, int t, int index)
> pps->chroma_qp_table[t][i] = ff_h264_chroma_qp[av_clip(i + index, 0, 51)];
> }
>
> +static int first_in_group(H264Context *h){
> + int map_units = h->s.mb_width * h->s.mb_height;
> + int i;
> +
> + for(i=0;i<map_units;i++){
> + if(h->pps.slice_group_map[i] == h->slice_group_current)
> + return i;
> + }
> + return -1;
indention
> +}
> +
> +int ff_h264_fmo_next_mb(H264Context *h, int addr){
> + int map_units = h->s.mb_width * h->s.mb_height;
> +
> + while(h->pps.slice_group_map[++addr] != h->slice_group_current){
> + if(addr == map_units){
> + h->slice_group_current++;
> + if(h->slice_group_current == h->pps.slice_group_count)
> + return -1;
> + addr = first_in_group(h);
> + }
> + }
is is not efficient if there are many groups if i understand it correctly
and you call it per MB so it must be efficient
> + return addr;
> +}
> +
> +int ff_h264_draw_slice_group(H264Context *h, PPS *pps, int width, int height){
> + MpegEncContext * const s = &h->s;
> + int map_units = width * height;
> + int group, length, bits, x, y, i = 0;
> + int top, left, bottom, right, xdir, ydir, vacant;
> + int run_length[8];
> + int dir_flag = pps->slice_group_dir_flag;
> +
> + switch(pps->mb_slice_group_map_type) {
> + case 0: /* interleaved slice group map */
> + for(group = 0; group < pps->slice_group_count; group++)
> + run_length[group] = get_ue_golomb(&s->gb) + 1;
> + do {
> + for(group=0; group<pps->slice_group_count && i<map_units;
> + group++) {
> + memset(pps->slice_group_map + i, group,
> + FFMIN(run_length[group], map_units - i));
> + i += run_length[group];
> + }
> + } while(i < map_units);
> + break;
> + case 1: /* dispersed slice group map */
> + for(y=0;y<height; y++)
> + for(x=0;x<width;x++)
> + pps->slice_group_map[i++] =
> + ( x + ((y * pps->slice_group_count)>>1) ) %
> + pps->slice_group_count;
please dont do % per MB
similar comments about efficiency apply to other parts too
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The misfortune of the wise is better than the prosperity of the fool.
-- Epicurus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100926/5962fd35/attachment.pgp>
More information about the ffmpeg-devel
mailing list