[FFmpeg-devel] [PATCH] lavfi: Add VAAPI deinterlacer

Thu Jan 12 12:03:07 EET 2017

On 12/01/17 08:01, wm4 wrote:
> On Sun, 8 Jan 2017 19:12:47 +0000
> Mark Thompson <sw at jkqxz.net> wrote:
> 
>> (cherry picked from commit ade370a4d7eab1866b6023c91c135d27c77ca465)
>> ---
>> One minor fixup for allocation due to differences in the lavfis, otherwise unchanged.
>>
>>  configure                          |   1 +
>>  libavfilter/Makefile               |   1 +
>>  libavfilter/allfilters.c           |   1 +
>>  libavfilter/version.h              |   2 +-
>>  libavfilter/vf_deinterlace_vaapi.c | 630 +++++++++++++++++++++++++++++++++++++
>>  5 files changed, 634 insertions(+), 1 deletion(-)
>>  create mode 100644 libavfilter/vf_deinterlace_vaapi.c
>> ...
>> +
>> +static int deint_vaapi_config_output(AVFilterLink *outlink)
>> +{
>> +    AVFilterContext    *avctx = outlink->src;
>> +    DeintVAAPIContext    *ctx = avctx->priv;
>> +    AVVAAPIHWConfig *hwconfig = NULL;
>> +    AVHWFramesConstraints *constraints = NULL;
>> +    AVVAAPIFramesContext *va_frames;
>> +    VAStatus vas;
>> +    int err;
>> +
>> +    deint_vaapi_pipeline_uninit(avctx);
>> +
>> +    av_assert0(ctx->input_frames);
>> +    ctx->device_ref = av_buffer_ref(ctx->input_frames->device_ref);
>> +    ctx->hwctx = ((AVHWDeviceContext*)ctx->device_ref->data)->hwctx;
>> +
>> +    ctx->output_width  = ctx->input_frames->width;
>> +    ctx->output_height = ctx->input_frames->height;
>> +
>> +    av_assert0(ctx->va_config == VA_INVALID_ID);
>> +    vas = vaCreateConfig(ctx->hwctx->display, VAProfileNone,
>> +                         VAEntrypointVideoProc, 0, 0, &ctx->va_config);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to create processing pipeline "
>> +               "config: %d (%s).\n", vas, vaErrorStr(vas));
>> +        err = AVERROR(EIO);
>> +        goto fail;
>> +    }
>> +
>> +    hwconfig = av_hwdevice_hwconfig_alloc(ctx->device_ref);
>> +    if (!hwconfig) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +    hwconfig->config_id = ctx->va_config;
>> +
>> +    constraints = av_hwdevice_get_hwframe_constraints(ctx->device_ref,
>> +                                                      hwconfig);
>> +    if (!constraints) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    if (ctx->output_width  < constraints->min_width  ||
>> +        ctx->output_height < constraints->min_height ||
>> +        ctx->output_width  > constraints->max_width  ||
>> +        ctx->output_height > constraints->max_height) {
>> +        av_log(avctx, AV_LOG_ERROR, "Hardware does not support "
>> +               "deinterlacing to size %dx%d "
>> +               "(constraints: width %d-%d height %d-%d).\n",
>> +               ctx->output_width, ctx->output_height,
>> +               constraints->min_width,  constraints->max_width,
>> +               constraints->min_height, constraints->max_height);
>> +        err = AVERROR(EINVAL);
>> +        goto fail;
>> +    }
>> +
>> +    err = deint_vaapi_build_filter_params(avctx);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    ctx->output_frames_ref = av_hwframe_ctx_alloc(ctx->device_ref);
>> +    if (!ctx->output_frames_ref) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to create HW frame context "
>> +               "for output.\n");
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    ctx->output_frames = (AVHWFramesContext*)ctx->output_frames_ref->data;
>> +
>> +    ctx->output_frames->format    = AV_PIX_FMT_VAAPI;
>> +    ctx->output_frames->sw_format = ctx->input_frames->sw_format;
>> +    ctx->output_frames->width     = ctx->output_width;
>> +    ctx->output_frames->height    = ctx->output_height;
>> +
>> +    // The number of output frames we need is determined by what follows
>> +    // the filter.  If it's an encoder with complex frame reference
>> +    // structures then this could be very high.
>> +    ctx->output_frames->initial_pool_size = 10;
> 
> This seems less than ideal. We should probably have some concept to
> handle this issue. Until then, this should probably be a user
> configurable option. It looks like this could waste GPU memory, so it
> seems important enough.

Yes, we need a generic option somewhere - most hardware filters and decoders want it (not just VAAPI - QSV really needs it too, especially with the look-ahead options which can buffer arbitrarily many frames in the encoder).  libav have talked about this recently, we should probably follow it up there.

On the specific option, I don't much like the idea of adding that now because we would have to continue to support it later?

> But is there really a need to allocate all the surfaces upfront? I
> hoped this to be an issue with decoders only.
> 
> (In my experience, I never needed this for VPP.)

The API says it's required, so we do it.  Not sure we can sanely do anything else?

>> +    err = av_hwframe_ctx_init(ctx->output_frames_ref);
>> +    if (err < 0) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to initialise VAAPI frame "
>> +               "context for output: %d\n", err);
>> +        goto fail;
>> +    }
>> +
>> +    va_frames = ctx->output_frames->hwctx;
>> +
>> +    av_assert0(ctx->va_context == VA_INVALID_ID);
>> +    vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
>> +                          ctx->output_width, ctx->output_height, 0,
>> +                          va_frames->surface_ids, va_frames->nb_surfaces,
>> +                          &ctx->va_context);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to create processing pipeline "
>> +               "context: %d (%s).\n", vas, vaErrorStr(vas));
>> +        return AVERROR(EIO);
>> +    }
>> +
>> +    outlink->w = ctx->output_width;
>> +    outlink->h = ctx->output_height;
>> +
>> +    outlink->hw_frames_ctx = av_buffer_ref(ctx->output_frames_ref);
>> +    if (!outlink->hw_frames_ctx) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    av_freep(&hwconfig);
>> +    av_hwframe_constraints_free(&constraints);
>> +    return 0;
>> +
>> +fail:
>> +    av_buffer_unref(&ctx->output_frames_ref);
>> +    av_freep(&hwconfig);
>> +    av_hwframe_constraints_free(&constraints);
>> +    return err;
>> +}
>> +
>> +static int vaapi_proc_colour_standard(enum AVColorSpace av_cs)
>> +{
>> +    switch(av_cs) {
>> +#define CS(av, va) case AVCOL_SPC_ ## av: return VAProcColorStandard ## va;
>> +        CS(BT709,     BT709);
>> +        CS(BT470BG,   BT470BG);
>> +        CS(SMPTE170M, SMPTE170M);
>> +        CS(SMPTE240M, SMPTE240M);
>> +#undef CS
>> +    default:
>> +        return VAProcColorStandardNone;
>> +    }
>> +}
> 
> I bet there's something like this in the encoder code too?

The encoder doesn't deal with it, because we write the headers ourselves.  It is identical to a fragment in vf_scale_vaapi, though, yes.

>> +
>> +static int deint_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
>> +{
>> +    AVFilterContext   *avctx = inlink->dst;
>> +    AVFilterLink    *outlink = avctx->outputs[0];
>> +    DeintVAAPIContext *ctx = avctx->priv;
>> +    AVFrame *output_frame = NULL;
>> +    VASurfaceID input_surface, output_surface;
>> +    VASurfaceID backward_references[MAX_REFERENCES];
>> +    VASurfaceID forward_references[MAX_REFERENCES];
>> +    VAProcPipelineParameterBuffer params;
>> +    VAProcFilterParameterBufferDeinterlacing *filter_params;
>> +    VARectangle input_region;
>> +    VABufferID params_id;
>> +    VAStatus vas;
>> +    void *filter_params_addr = NULL;
>> +    int err, i;
>> +
>> +    av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
>> +           av_get_pix_fmt_name(input_frame->format),
>> +           input_frame->width, input_frame->height, input_frame->pts);
>> +
>> +    if (ctx->queue_count < ctx->queue_depth) {
>> +        ctx->frame_queue[ctx->queue_count++] = input_frame;
>> +        if (ctx->queue_count < ctx->queue_depth) {
>> +            // Need more reference surfaces before we can continue.
>> +            return 0;
> 
> Does this handle close-to-EOF situations?

The filter produces queue_depth fewer output frames than input, so yes.  (If you only have a small number of frames and need more references than that, you get no output - sucks to be you.)

>> +        }
>> +    } else {
>> +        av_frame_free(&ctx->frame_queue[0]);
>> +        for (i = 0; i + 1 < ctx->queue_count; i++)
>> +            ctx->frame_queue[i] = ctx->frame_queue[i + 1];
>> +        ctx->frame_queue[i] = input_frame;
>> +    }
>> +
>> +    input_frame =
>> +        ctx->frame_queue[ctx->pipeline_caps.num_backward_references];
>> +    input_surface = (VASurfaceID)(uintptr_t)input_frame->data[3];
>> +    for (i = 0; i < ctx->pipeline_caps.num_backward_references; i++)
>> +        backward_references[i] = (VASurfaceID)(uintptr_t)
>> +            ctx->frame_queue[ctx->pipeline_caps.num_backward_references -
>> +                             i - 1]->data[3];
>> +    for (i = 0; i < ctx->pipeline_caps.num_forward_references; i++)
>> +        forward_references[i] = (VASurfaceID)(uintptr_t)
>> +            ctx->frame_queue[ctx->pipeline_caps.num_backward_references +
>> +                             i + 1]->data[3];
>> +
>> +    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for "
>> +           "deinterlace input.\n", input_surface);
>> +    av_log(avctx, AV_LOG_DEBUG, "Backward references:");
>> +    for (i = 0; i < ctx->pipeline_caps.num_backward_references; i++)
>> +        av_log(avctx, AV_LOG_DEBUG, " %#x", backward_references[i]);
>> +    av_log(avctx, AV_LOG_DEBUG, "\n");
>> +    av_log(avctx, AV_LOG_DEBUG, "Forward  references:");
>> +    for (i = 0; i < ctx->pipeline_caps.num_forward_references; i++)
>> +        av_log(avctx, AV_LOG_DEBUG, " %#x", forward_references[i]);
>> +    av_log(avctx, AV_LOG_DEBUG, "\n");
>> +
>> +    output_frame = av_frame_alloc();
>> +    if (!output_frame) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    err = av_hwframe_get_buffer(ctx->output_frames_ref,
>> +                                output_frame, 0);
>> +    if (err < 0) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3];
>> +    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for "
>> +           "deinterlace output.\n", output_surface);
>> +
>> +    memset(&params, 0, sizeof(params));
>> +
>> +    input_region = (VARectangle) {
>> +        .x      = 0,
>> +        .y      = 0,
>> +        .width  = input_frame->width,
>> +        .height = input_frame->height,
>> +    };
>> +
>> +    params.surface = input_surface;
>> +    params.surface_region = &input_region;
>> +    params.surface_color_standard =
>> +        vaapi_proc_colour_standard(input_frame->colorspace);
>> +
>> +    params.output_region = NULL;
>> +    params.output_background_color = 0xff000000;
>> +    params.output_color_standard = params.surface_color_standard;
>> +
>> +    params.pipeline_flags = 0;
>> +    params.filter_flags   = VA_FRAME_PICTURE;
>> +
>> +    vas = vaMapBuffer(ctx->hwctx->display, ctx->filter_buffer,
>> +                      &filter_params_addr);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to map filter parameter "
>> +               "buffer: %d (%s).\n", vas, vaErrorStr(vas));
>> +        err = AVERROR(EIO);
>> +        goto fail;
>> +    }
>> +    filter_params = filter_params_addr;
>> +    filter_params->flags = 0;
>> +    if (input_frame->interlaced_frame && !input_frame->top_field_first)
>> +        filter_params->flags |= VA_DEINTERLACING_BOTTOM_FIELD_FIRST;
>> +    filter_params_addr = NULL;
>> +    vas = vaUnmapBuffer(ctx->hwctx->display, ctx->filter_buffer);
>> +    if (vas != VA_STATUS_SUCCESS)
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to unmap filter parameter "
>> +               "buffer: %d (%s).\n", vas, vaErrorStr(vas));
>> +
>> +    params.filters     = &ctx->filter_buffer;
>> +    params.num_filters = 1;
>> +
>> +    params.forward_references = forward_references;
>> +    params.num_forward_references =
>> +        ctx->pipeline_caps.num_forward_references;
>> +    params.backward_references = backward_references;
>> +    params.num_backward_references =
>> +        ctx->pipeline_caps.num_backward_references;
>> +
>> +    vas = vaBeginPicture(ctx->hwctx->display,
>> +                         ctx->va_context, output_surface);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to attach new picture: "
>> +               "%d (%s).\n", vas, vaErrorStr(vas));
>> +        err = AVERROR(EIO);
>> +        goto fail;
>> +    }
>> +
>> +    vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>> +                         VAProcPipelineParameterBufferType,
>> +                         sizeof(params), 1, &params, &params_id);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to create parameter buffer: "
>> +               "%d (%s).\n", vas, vaErrorStr(vas));
>> +        err = AVERROR(EIO);
>> +        goto fail_after_begin;
>> +    }
>> +    av_log(avctx, AV_LOG_DEBUG, "Pipeline parameter buffer is %#x.\n",
>> +           params_id);
>> +
>> +    vas = vaRenderPicture(ctx->hwctx->display, ctx->va_context,
>> +                          &params_id, 1);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to render parameter buffer: "
>> +               "%d (%s).\n", vas, vaErrorStr(vas));
>> +        err = AVERROR(EIO);
>> +        goto fail_after_begin;
>> +    }
>> +
>> +    vas = vaEndPicture(ctx->hwctx->display, ctx->va_context);
>> +    if (vas != VA_STATUS_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to start picture processing: "
>> +               "%d (%s).\n", vas, vaErrorStr(vas));
>> +        err = AVERROR(EIO);
>> +        goto fail_after_render;
>> +    }
>> +
>> +    if (ctx->hwctx->driver_quirks &
>> +        AV_VAAPI_DRIVER_QUIRK_RENDER_PARAM_BUFFERS) {
>> +        vas = vaDestroyBuffer(ctx->hwctx->display, params_id);
>> +        if (vas != VA_STATUS_SUCCESS) {
>> +            av_log(avctx, AV_LOG_ERROR, "Failed to free parameter buffer: "
>> +                   "%d (%s).\n", vas, vaErrorStr(vas));
>> +            // And ignore.
>> +        }
>> +    }
>> +
>> +    err = av_frame_copy_props(output_frame, input_frame);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
>> +           av_get_pix_fmt_name(output_frame->format),
>> +           output_frame->width, output_frame->height, output_frame->pts);
>> +
>> +    return ff_filter_frame(outlink, output_frame);
>> +
>> +fail_after_begin:
>> +    vaRenderPicture(ctx->hwctx->display, ctx->va_context, &params_id, 1);
>> +fail_after_render:
>> +    vaEndPicture(ctx->hwctx->display, ctx->va_context);
>> +fail:
>> +    if (filter_params_addr)
>> +        vaUnmapBuffer(ctx->hwctx->display, ctx->filter_buffer);
>> +    av_frame_free(&output_frame);
>> +    return err;
>> +}
>> +
>> ...
>> +
>> +AVFilter ff_vf_deinterlace_vaapi = {
>> +    .name           = "deinterlace_vaapi",
>> +    .description    = NULL_IF_CONFIG_SMALL("Deinterlacing of VAAPI surfaces"),
>> +    .priv_size      = sizeof(DeintVAAPIContext),
>> +    .init           = &deint_vaapi_init,
>> +    .uninit         = &deint_vaapi_uninit,
>> +    .query_formats  = &deint_vaapi_query_formats,
>> +    .inputs         = deint_vaapi_inputs,
>> +    .outputs        = deint_vaapi_outputs,
>> +    .priv_class     = &deint_vaapi_class,
>> +};
> 
> Would there be any benefit in conflating this filter with the scale
> filter, or is that strictly unnecessary? I know vdpau's API conflates
> them, but maybe that was done for simplicity.

There is some common code, but I don't think they should be the same filter - the structure and options are sufficiently different that it would be kindof annoying.  I could see making libavfilter/vaapi.[ch] files with common stuff in, though.  (Also for the "miscellaneous other processing" filter (denoise, sharpen, etc.), if anyone wanted to chase that up.)

Thanks,

- Mark