[FFmpeg-devel] [PATCH][VAAPI][2/6] Add common data structures and helpers (take 5)
Michael Niedermayer
michaelni
Tue Mar 10 22:20:44 CET 2009
On Tue, Mar 10, 2009 at 04:28:20PM +0100, Gwenole Beauchesne wrote:
> On Tue, 10 Mar 2009, Gwenole Beauchesne wrote:
>
>> On Tue, 10 Mar 2009, Gwenole Beauchesne wrote:
>>
>>> On Sat, 7 Mar 2009, Michael Niedermayer wrote:
>>>
>>>>>>> + }
>>>>>>> + memcpy(rds->slice_data + rds->slice_data_size, buffer, size);
>>>>>>> + rds->slice_data_size += size;
>>>>>>
>>>>>> also whats the point in allocating a buffer and memcpy into it that is
>>>>>> compared to using the original?
>>>>>
>>>>> We can be memcpy()'ing into mapped memory from HW accelerator. For the
>>>>
>>>> i dont mind memcpy() into memory from the accelerator, i do mind if this
>>>> could be memcpy into normal memory, it simply seems unneeded in that
>>>> case
>>>> ...
>>>
>>> Dropped all memcpy() but there is no striking difference in
>>> performance...
>>>
>>> On a 1080p H.263 sample (Grey.ts):
>>> - before: A:40347.6 V:40348.9 A-V: -1.317 ct: 1.247 904/904 13% 1%
>>> 3.1% 8
>>> 0
>>> - after : A:40347.6 V:40348.9 A-V: -1.317 ct: 1.247 904/904 13% 1%
>>> 3.0% 8
>>> 0
>>>
>>> On a 1080p MPEG-2 sample:
>>> - before: A:24956.4 V:24956.5 A-V: -0.047 ct: -0.862 828/828 18% 2%
>>> 4.7% 5
>>> 0
>>> - after : A:24956.4 V:24956.5 A-V: -0.046 ct: -0.862 828/828 19% 1%
>>> 4.7% 6
>>> 0
>>
>> Here is another one:
>> - Dropped calls to alloca()
>> - Simplify slice buffer IDs management
>>
>> I don't think I can make it smaller.
>
> It seems I could by forgetting about the attachement. ;-)
[...]
> +/** Initialize VA API render state */
> +static int vaapi_render_state_init(struct vaapi_render_state_private *rds, Picture *pic)
> +{
> + struct vaapi_render_state * const r = ff_get_vaapi_render_state(pic);
> +
> + assert(r && r->va_context);
> + rds->display = r->va_context->display;
> + rds->context_id = r->va_context->context_id;
> + rds->surface = r->surface;
> + rds->pic_param_buf_id = 0;
> + rds->iq_matrix_buf_id = 0;
> + rds->bitplane_buf_id = 0;
> + rds->slice_buf_ids = NULL;
> + rds->n_slice_buf_ids = 0;
> + rds->n_slice_buf_ids_alloc = 0;
> + rds->slice_params = NULL;
> + rds->n_slice_params_alloc = 0;
> + rds->slice_count = 0;
> + rds->slice_data = NULL;
> + rds->slice_data_size = 0;
memset(0) seems simpler if these are not guranteed to be allocated by
av_mallocz() or another zeroing alloc, but guranteeing this seems like
the best choice ...
> + return 0;
> +}
> +
> +/** Destroy VA API render state buffers */
> +static int vaapi_render_state_fini(struct vaapi_render_state_private *rds)
> +{
> + unsigned int i;
> +
> + if (rds->pic_param_buf_id) {
> + vaDestroyBuffer(rds->display, rds->pic_param_buf_id);
> + rds->pic_param_buf_id = 0;
> + }
> + if (rds->iq_matrix_buf_id) {
> + vaDestroyBuffer(rds->display, rds->iq_matrix_buf_id);
> + rds->iq_matrix_buf_id = 0;
> + }
> + if (rds->bitplane_buf_id) {
> + vaDestroyBuffer(rds->display, rds->bitplane_buf_id);
> + rds->bitplane_buf_id = 0;
> + }
> + if (rds->slice_buf_ids) {
> + for (i = 0; i < rds->n_slice_buf_ids; i++) {
> + vaDestroyBuffer(rds->display, rds->slice_buf_ids[i]);
> + rds->slice_buf_ids[i] = 0;
> + }
> + av_freep(&rds->slice_buf_ids);
> + }
> + av_freep(&rds->slice_params);
> + return 0;
> +}
static destroy_buffer(VADisplay display, VABufferID *id){
if(*id){
vaDestroyBuffer(display, *id);
*id=0;
}
}
would greatly simplify above
> +
> +/** Render the picture */
> +static int vaapi_render_picture(struct vaapi_render_state_private *rds)
whats the sense in the vaapi prefix in a static function in vappi.c ?
> +{
> + VABufferID va_buffers[3];
> + unsigned int n_va_buffers = 0;
> +
> + /* Ensure context is sane */
> + if (rds->pic_param_buf_id == 0)
> + return -1;
> + if (rds->n_slice_buf_ids == 0)
> + return -1;
can be merged
[...]
> + slice_params = rds->slice_params;
> + if (rds->slice_count >= rds->n_slice_params_alloc) {
> + rds->n_slice_params_alloc += 16;
> + slice_params = av_realloc(rds->slice_params,
> + rds->n_slice_params_alloc * rds->slice_param_size);
> + if (!slice_params) {
> + av_free(rds->slice_params);
> + return NULL;
> + }
> + rds->slice_params = slice_params;
> + }
av_fast_realloc() might be usefull for this
> + if (!slice_params)
> + return NULL;
> + assert(rds->slice_count < rds->n_slice_params_alloc);
> +
> + slice_param = (VASliceParameterBufferBase *)(slice_params + rds->slice_count * rds->slice_param_size);
> + slice_param->slice_data_size = size;
> + slice_param->slice_data_offset = rds->slice_data_size;
> + slice_param->slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
> +
> + rds->slice_count++;
> + rds->slice_data_size += size;
> + return slice_param;
> +}
> +
> +int ff_vaapi_common_start_frame(AVCodecContext *avctx,
> + av_unused const uint8_t *buffer, av_unused uint32_t size)
> +{
> + MpegEncContext * const s = avctx->priv_data;
> + struct vaapi_render_state_private *rds;
> +
> + dprintf(NULL, "ff_vaapi_common_start_frame()\n");
you have the codec context, it could be used instead of NULL
> +
> + if ((rds = ff_get_vaapi_render_state_private(s->current_picture_ptr)) == NULL)
> + return -1;
how can this be NULL?
if it cant the check is unneeded
> +
> + if (vaapi_render_state_init(rds, s->current_picture_ptr) < 0)
> + return -1;
> +
> + switch (avctx->codec->id) {
> + case CODEC_ID_MPEG2VIDEO:
> + rds->slice_param_size = sizeof(VASliceParameterBufferMPEG2);
> + break;
> + case CODEC_ID_MPEG4:
> + case CODEC_ID_H263:
> + rds->slice_param_size = sizeof(VASliceParameterBufferMPEG4);
> + break;
> + case CODEC_ID_H264:
> + rds->slice_param_size = sizeof(VASliceParameterBufferH264);
> + break;
> + case CODEC_ID_VC1:
> + case CODEC_ID_WMV3:
> + rds->slice_param_size = sizeof(VASliceParameterBufferVC1);
> + break;
> + default:
> + assert(0);
> + return -1;
> + }
it might make sense to call
ff_vaapi_common_start_frame()
and set rds->slice_param_size afterwards from each of the start_frame
functions unless ff_vaapi_common_start_frame() alone is enough and there
are no individual start_frame ...
[...]
> +/** This structure holds all temporary data for VA API decoding */
> +struct vaapi_render_state_private {
> + VADisplay display; ///< Window system dependent data (copied from vaapi_render_state->va_context)
> + VAContextID context_id; ///< Context ID (video decode pipeline, copied from vaapi_render_state->va_context)
> + VASurfaceID surface; ///< Rendering surface (copied from vaapi_render_state->va_context)
> + VABufferID pic_param_buf_id; ///< VAPictureParameterBuffer ID
> + VABufferID iq_matrix_buf_id; ///< VAIQMatrixBuffer ID
> + VABufferID bitplane_buf_id; ///< VABitPlaneBuffer ID (for VC-1 decoding)
> + VABufferID *slice_buf_ids; ///< Slice parameter/data buffer IDs
> + uint32_t n_slice_buf_ids; ///< Number of effective slice buffer IDs to send to the HW
> + uint32_t n_slice_buf_ids_alloc; ///< Number of allocated slice buffer IDs
> + union {
> + VAPictureParameterBufferMPEG2 mpeg2;
> + VAPictureParameterBufferMPEG4 mpeg4;
> + VAPictureParameterBufferH264 h264;
> + VAPictureParameterBufferVC1 vc1;
> + } pic_param; ///< Plain VAPictureParameterBuffer info
> + void *slice_params; ///< Pointer to VASliceParameterBuffers
> + uint32_t slice_param_size; ///< Size of a VASliceParameterBuffer element
> + uint32_t n_slice_params_alloc; ///< Number of allocated slice parameters
> + uint32_t slice_count; ///< Number of slices currently filled in
unsigned int seems a more natural choice for these ...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
I have never wished to cater to the crowd; for what I know they do not
approve, and what they approve I do not know. -- Epicurus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090310/b9a5e709/attachment.pgp>
More information about the ffmpeg-devel
mailing list