[FFmpeg-devel] [PATCH][VAAPI][2/6] Add common data structures and helpers (take 5)

Tue Mar 10 22:20:44 CET 2009

On Tue, Mar 10, 2009 at 04:28:20PM +0100, Gwenole Beauchesne wrote:
> On Tue, 10 Mar 2009, Gwenole Beauchesne wrote:
>
>> On Tue, 10 Mar 2009, Gwenole Beauchesne wrote:
>>
>>> On Sat, 7 Mar 2009, Michael Niedermayer wrote:
>>>
>>>>>>> +    }
>>>>>>> +    memcpy(rds->slice_data + rds->slice_data_size, buffer, size);
>>>>>>> +    rds->slice_data_size += size;
>>>>>>
>>>>>> also whats the point in allocating a buffer and memcpy into it that is
>>>>>> compared to using the original?
>>>>>
>>>>> We can be memcpy()'ing into mapped memory from HW accelerator. For the
>>>>
>>>> i dont mind memcpy() into memory from the accelerator, i do mind if this
>>>> could be memcpy into normal memory, it simply seems unneeded in that 
>>>> case
>>>> ...
>>>
>>> Dropped all memcpy() but there is no striking difference in 
>>> performance...
>>>
>>> On a 1080p H.263 sample (Grey.ts):
>>> - before: A:40347.6 V:40348.9 A-V: -1.317 ct:  1.247 904/904 13%  1%  
>>> 3.1% 8
>>> 0
>>> - after : A:40347.6 V:40348.9 A-V: -1.317 ct:  1.247 904/904 13%  1%  
>>> 3.0% 8
>>> 0
>>>
>>> On a 1080p MPEG-2 sample:
>>> - before: A:24956.4 V:24956.5 A-V: -0.047 ct: -0.862 828/828 18%  2%  
>>> 4.7% 5
>>> 0
>>> - after : A:24956.4 V:24956.5 A-V: -0.046 ct: -0.862 828/828 19%  1%  
>>> 4.7% 6
>>> 0
>>
>> Here is another one:
>> - Dropped calls to alloca()
>> - Simplify slice buffer IDs management
>>
>> I don't think I can make it smaller.
>
> It seems I could by forgetting about the attachement. ;-)

[...]
> +/** Initialize VA API render state */
> +static int vaapi_render_state_init(struct vaapi_render_state_private *rds, Picture *pic)
> +{
> +    struct vaapi_render_state * const r = ff_get_vaapi_render_state(pic);
> +
> +    assert(r && r->va_context);
> +    rds->display                = r->va_context->display;
> +    rds->context_id             = r->va_context->context_id;
> +    rds->surface                = r->surface;

> +    rds->pic_param_buf_id       = 0;
> +    rds->iq_matrix_buf_id       = 0;
> +    rds->bitplane_buf_id        = 0;
> +    rds->slice_buf_ids          = NULL;
> +    rds->n_slice_buf_ids        = 0;
> +    rds->n_slice_buf_ids_alloc  = 0;
> +    rds->slice_params           = NULL;
> +    rds->n_slice_params_alloc   = 0;
> +    rds->slice_count            = 0;
> +    rds->slice_data             = NULL;
> +    rds->slice_data_size        = 0;

memset(0) seems simpler if these are not guranteed to be allocated by
av_mallocz() or another zeroing alloc, but guranteeing this seems like
the best choice ...

> +    return 0;
> +}
> +

> +/** Destroy VA API render state buffers */
> +static int vaapi_render_state_fini(struct vaapi_render_state_private *rds)
> +{
> +    unsigned int i;
> +
> +    if (rds->pic_param_buf_id) {
> +        vaDestroyBuffer(rds->display, rds->pic_param_buf_id);
> +        rds->pic_param_buf_id = 0;
> +    }
> +    if (rds->iq_matrix_buf_id) {
> +        vaDestroyBuffer(rds->display, rds->iq_matrix_buf_id);
> +        rds->iq_matrix_buf_id = 0;
> +    }
> +    if (rds->bitplane_buf_id) {
> +        vaDestroyBuffer(rds->display, rds->bitplane_buf_id);
> +        rds->bitplane_buf_id = 0;
> +    }
> +    if (rds->slice_buf_ids) {
> +        for (i = 0; i < rds->n_slice_buf_ids; i++) {
> +            vaDestroyBuffer(rds->display, rds->slice_buf_ids[i]);
> +            rds->slice_buf_ids[i] = 0;
> +        }
> +        av_freep(&rds->slice_buf_ids);
> +    }
> +    av_freep(&rds->slice_params);
> +    return 0;
> +}

static destroy_buffer(VADisplay display, VABufferID *id){
    if(*id){
        vaDestroyBuffer(display, *id);
        *id=0;
    }
}

would greatly simplify above

> +
> +/** Render the picture */
> +static int vaapi_render_picture(struct vaapi_render_state_private *rds)

whats the sense in the vaapi prefix in a static function in vappi.c ?

> +{
> +    VABufferID va_buffers[3];
> +    unsigned int n_va_buffers = 0;
> +

> +    /* Ensure context is sane */
> +    if (rds->pic_param_buf_id == 0)
> +        return -1;
> +    if (rds->n_slice_buf_ids == 0)
> +        return -1;

can be merged

[...]
> +    slice_params = rds->slice_params;
> +    if (rds->slice_count >= rds->n_slice_params_alloc) {
> +        rds->n_slice_params_alloc += 16;
> +        slice_params = av_realloc(rds->slice_params,
> +                                  rds->n_slice_params_alloc * rds->slice_param_size);
> +        if (!slice_params) {
> +            av_free(rds->slice_params);
> +            return NULL;
> +        }
> +        rds->slice_params = slice_params;
> +    }

av_fast_realloc() might be usefull for this

> +    if (!slice_params)
> +        return NULL;
> +    assert(rds->slice_count < rds->n_slice_params_alloc);
> +
> +    slice_param = (VASliceParameterBufferBase *)(slice_params + rds->slice_count * rds->slice_param_size);
> +    slice_param->slice_data_size        = size;
> +    slice_param->slice_data_offset      = rds->slice_data_size;
> +    slice_param->slice_data_flag        = VA_SLICE_DATA_FLAG_ALL;
> +
> +    rds->slice_count++;
> +    rds->slice_data_size += size;
> +    return slice_param;
> +}
> +

> +int ff_vaapi_common_start_frame(AVCodecContext *avctx,
> +                                av_unused const uint8_t *buffer, av_unused uint32_t size)
> +{
> +    MpegEncContext * const s = avctx->priv_data;
> +    struct vaapi_render_state_private *rds;
> +
> +    dprintf(NULL, "ff_vaapi_common_start_frame()\n");

you have the codec context, it could be used instead of NULL

> +
> +    if ((rds = ff_get_vaapi_render_state_private(s->current_picture_ptr)) == NULL)
> +        return -1;

how can this be NULL?
if it cant the check is unneeded

> +
> +    if (vaapi_render_state_init(rds, s->current_picture_ptr) < 0)
> +        return -1;
> +
> +    switch (avctx->codec->id) {
> +    case CODEC_ID_MPEG2VIDEO:
> +        rds->slice_param_size   = sizeof(VASliceParameterBufferMPEG2);
> +        break;
> +    case CODEC_ID_MPEG4:
> +    case CODEC_ID_H263:
> +        rds->slice_param_size   = sizeof(VASliceParameterBufferMPEG4);
> +        break;
> +    case CODEC_ID_H264:
> +        rds->slice_param_size   = sizeof(VASliceParameterBufferH264);
> +        break;
> +    case CODEC_ID_VC1:
> +    case CODEC_ID_WMV3:
> +        rds->slice_param_size   = sizeof(VASliceParameterBufferVC1);
> +        break;
> +    default:
> +        assert(0);
> +        return -1;
> +    }

it might make sense to call

ff_vaapi_common_start_frame()
and set rds->slice_param_size afterwards from each of the start_frame
functions unless ff_vaapi_common_start_frame() alone is enough and there
are no individual start_frame ...

[...]
> +/** This structure holds all temporary data for VA API decoding */
> +struct vaapi_render_state_private {
> +    VADisplay   display;                ///< Window system dependent data (copied from vaapi_render_state->va_context)
> +    VAContextID context_id;             ///< Context ID (video decode pipeline, copied from vaapi_render_state->va_context)
> +    VASurfaceID surface;                ///< Rendering surface (copied from vaapi_render_state->va_context)
> +    VABufferID  pic_param_buf_id;       ///< VAPictureParameterBuffer ID
> +    VABufferID  iq_matrix_buf_id;       ///< VAIQMatrixBuffer ID
> +    VABufferID  bitplane_buf_id;        ///< VABitPlaneBuffer ID (for VC-1 decoding)
> +    VABufferID *slice_buf_ids;          ///< Slice parameter/data buffer IDs
> +    uint32_t    n_slice_buf_ids;        ///< Number of effective slice buffer IDs to send to the HW
> +    uint32_t    n_slice_buf_ids_alloc;  ///< Number of allocated slice buffer IDs
> +    union {
> +        VAPictureParameterBufferMPEG2 mpeg2;
> +        VAPictureParameterBufferMPEG4 mpeg4;
> +        VAPictureParameterBufferH264  h264;
> +        VAPictureParameterBufferVC1   vc1;
> +    } pic_param;                        ///< Plain VAPictureParameterBuffer info
> +    void       *slice_params;           ///< Pointer to VASliceParameterBuffers

> +    uint32_t    slice_param_size;       ///< Size of a VASliceParameterBuffer element
> +    uint32_t    n_slice_params_alloc;   ///< Number of allocated slice parameters
> +    uint32_t    slice_count;            ///< Number of slices currently filled in

unsigned int seems a more natural choice for these ...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I have never wished to cater to the crowd; for what I know they do not
approve, and what they approve I do not know. -- Epicurus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090310/b9a5e709/attachment.pgp>