[FFmpeg-devel] [PATCH] H264 DXVA2 implementation

Thu Jan 7 22:05:49 CET 2010

On Thu, Jan 07, 2010 at 08:36:11PM +0100, Laurent Aimar wrote:
> +    if (size <= dxva_size) {
> +        memcpy(dxva_data, data, size);
> +
> +        memset(dsc, 0, sizeof(*dsc));
> +        dsc->CompressedBufferType = type;
> +        dsc->DataSize             = size;
> +        dsc->NumMBsInBuffer       = mb_count;
> +    }
> +    if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx->decoder, type))) {
> +        av_log(avctx, AV_LOG_DEBUG, "Failed to release buffer type %d\n", type);
> +        return -1;
> +    }
> +    if (dxva_size < size) {
> +        av_log(avctx, AV_LOG_DEBUG, "Buffer for type %d was too small\n", type);
> +        return -1;
> +    }

This is the opposite of condition above, so use an else above (yes, I know why
you did it like this, but I am sure this can be done nicer.

> +    pp->residual_colour_transform_flag=
> +        (h->sps.profile_idc >= 100 &&
> +         h->sps.chroma_format_idc == 3) ? h->sps.residual_color_transform_flag : 0;

Any reason why the sps reading code shouldn't just apply this condition?

> +    if (h->sps.profile_idc >= 100) {
> +        pp->bit_depth_luma_minus8     = h->sps.bit_depth_luma - 8;
> +        pp->bit_depth_chroma_minus8   = h->sps.bit_depth_chroma - 8;
> +    } else {
> +        pp->bit_depth_luma_minus8     = 0;
> +        pp->bit_depth_chroma_minus8   = 0;
> +    }

Again, why not in sps reading code (the condition, not the - 8)?

> +    for (i = 0; i < 6; i++)
> +        for (j = 0; j < 16; j++)
> +            qm->bScalingLists4x4[i][j] = h->pps.scaling_matrix4[i][zigzag_scan[j]];
> +
> +    for (i = 0; i < 2; i++)
> +        for (j = 0; j < 64; j++)
> +            qm->bScalingLists8x8[i][j] = h->pps.scaling_matrix8[i][ff_zigzag_direct[j]];

Swap the loop, and in the second case unroll it.
I doubt speed matters, but then you can do the zigzag lookup in the outer loop
which is more readable IMO.

> +    switch (h->slice_type) {
> +    case FF_P_TYPE:  slice->slice_type = 0; break;
> +    case FF_B_TYPE:  slice->slice_type = 1; break;
> +    case FF_I_TYPE:  slice->slice_type = 2; break;
> +    case FF_SP_TYPE: slice->slice_type = 3; break;
> +    case FF_SI_TYPE: slice->slice_type = 4; break;

There are no constants defined for those values?

> +    for (list = 0; list < 2; list++) {
> +        unsigned i;
> +        for (i = 0; i < FF_ARRAY_ELEMS(slice->RefPicList[list]); i++) {
> +            if (list < h->list_count && i < h->ref_count[list]) {
> +                const Picture *r = &h->ref_list[list][i];
> +                unsigned plane;
> +                slice->RefPicList[list][i].Index7Bits     = get_surface_index(ctx, r);
> +                slice->RefPicList[list][i].AssociatedFlag = r->reference == PICT_BOTTOM_FIELD;
> +                if (h->luma_weight_flag[list]) {
> +                    slice->Weights[list][i][0][0] = h->luma_weight[list][i];
> +                    slice->Weights[list][i][0][1] = h->luma_offset[list][i];
> +                } else {
> +                    slice->Weights[list][i][0][0] = 1 << h->luma_log2_weight_denom;
> +                    slice->Weights[list][i][0][1] = 0;
> +                }
> +                for (plane = 1; plane < 3; plane++) {
> +                    int w, o;
> +                    if (h->chroma_weight_flag[list]) {
> +                        w = h->chroma_weight[list][i][plane-1];
> +                        o = h->chroma_offset[list][i][plane-1];
> +                    } else {
> +                        w = 1 << h->chroma_log2_weight_denom;
> +                        o = 0;
> +                    }
> +                    slice->Weights[list][i][plane][0] = w;
> +                    slice->Weights[list][i][plane][1] = o;
> +                }
> +            } else {
> +                unsigned plane;
> +                slice->RefPicList[list][i].bPicEntry = 0xff;
> +                for (plane = 0; plane < 3; plane++) {
> +                    slice->Weights[list][i][plane][0] = 0;
> +                    slice->Weights[list][i][plane][1] = 0;
> +                }
> +            }
> +        }
> +    }

Maybe it would be better to split this and the other one like it into
two parts, one initialization looping from 0 to FF_ARRAY_ELEMS..
and one setup lopping from 0 till h->ref_count

> +    if (h->pps.redundant_pic_cnt_present)
> +        slice->redundant_pic_cnt = h->redundant_pic_count;

Why/how can h->redundant_pic_count have the wrong value if h->pps.redundant_pic_cnt_present is 0?

> +    slice->cabac_init_idc = h->pps.cabac ? h->cabac_init_idc : 0;

Same.

> +    if (h->deblocking_filter < 2)
> +        slice->disable_deblocking_filter_idc = 1 - h->deblocking_filter;

Does this intentionally cover values < 0 or wouldn't maybe !h->deblocking_filter be clearer?

> +/* */
> +static int start_frame(AVCodecContext *avctx,
> +                       av_unused const uint8_t *buffer,
> +                       av_unused uint32_t size)

Not a helpful comment

> +    /* Create an annexe B bitstream buffer with only slice NAL and finalize slice */

A e to much in annex

> +#define OFFSET_OF(t,f) ((intptr_t)&((t*)NULL)->f)
> +            assert(OFFSET_OF(DXVA_Slice_H264_Short, BSNALunitDataLocation) == 
> +                   OFFSET_OF(DXVA_Slice_H264_Long,  BSNALunitDataLocation));
> +            assert(OFFSET_OF(DXVA_Slice_H264_Short, SliceBytesInBuffer) == 
> +                   OFFSET_OF(DXVA_Slice_H264_Long,  SliceBytesInBuffer));
> +#undef OFFSET_OF

There is offsetof which probably works at least as well as this hack which
invokes undefined behaviour.

> +            if (i == ctx_pic->slice_count - 1)
> +                padding = 128 - ((&current[start_code_size + size] - dxva_data) % 128);

Why not & 127?
If you use % I think a lot of people familiar with FFmpeg will think you need it for
its different behaviour with negative values.

> +            if (&current[start_code_size + size] > &dxva_data[dxva_size]) {

Pointlessly using & and [] instead of just adding pointers?
Also, sure this does not invoke undefined behaviour (i.e. is
current + start_code_size + size at most one after the last element of the allocated
memory?)

> +        if (!FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx->decoder,
> +                                                       DXVA2_BitStreamDateBufferType))) {
> +            DXVA2_DecodeBufferDesc *dsc = &buffer[buffer_count++];
> +            memset(dsc, 0, sizeof(*dsc));
> +            dsc->CompressedBufferType = DXVA2_BitStreamDateBufferType;
> +            dsc->DataSize             = current - dxva_data;
> +            dsc->NumMBsInBuffer       = mb_count;
> +        }
> +        else
> +            av_log(avctx, AV_LOG_DEBUG, "Failed to add BS\n");

And I don't think it is a good idea to just silently swallow all those errors.

> +    memset(&exe, 0, sizeof(exe));

exe IMO is bad name.