[FFmpeg-devel] [PATCH v1 3/4] lavc/hevc_vaapi: enable sub frame support

Fri Apr 29 10:59:40 EEST 2022

Intel HW provide a feature that allows decoder output another scaled
frame beside original frame. And the scaled frame will attach to main
frame as sub frame side data.

The use case is mainly for video analysis. For example, scaled down
frame can be used for analysis, and the result can be applied back
to main frame.

Normally, we use scale_vaapi for scaling in vaapi transcode pipeline
if want to get a smaller resolution frame. While now sub frame can
be used instead. For some platforms, the sub frame scaling is much
more faster than scale_vaapi. For example, the decode + sub frame
cmd will improve ~50% performance than decode + scaling on my DG2
i3-11100B at 3.6GHz.

decode + sub frame cmd:
ffmpeg -hwaccel vaapi -hwaccel_device /dev/dri/renderD128            \
-hwaccel_output_format vaapi -export_side_data sub_frame             \
-sub_frame_opts "width=300:height=300:format=nv12"                   \
-i 1920x1080.h265 -f null - &

decode + scaling cmd:
ffmpeg -hwaccel vaapi -hwaccel_device /dev/dri/renderD128            \
-hwaccel_output_format vaapi -i 1920x1080.h265                       \
-vf 'scale_vaapi=w=300:h=300:format=nv12' -f null - &

Signed-off-by: Fei Wang <fei.w.wang at intel.com>
---
 libavcodec/vaapi_decode.c   | 46 ++++++++++++++++++++++++++-
 libavcodec/vaapi_decode.h   |  4 +++
 libavcodec/vaapi_hevc.c     | 32 ++++++++++++++++++-
 libavutil/hwcontext_vaapi.c | 62 +++++++++++++++++++++++++++++++++++--
 libavutil/hwcontext_vaapi.h | 15 ++++++++-
 5 files changed, 153 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index a7abddb06b..920bab1ef4 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -160,6 +160,10 @@ int ff_vaapi_decode_issue(AVCodecContext *avctx,
     av_log(avctx, AV_LOG_DEBUG, "Decode to surface %#x.\n",
            pic->output_surface);
 
+    if (ctx->hwfc->enable_sub_frame)
+        av_log(avctx, AV_LOG_DEBUG, "Decode sub frame to surface %#x.\n",
+               pic->sub_frame_surface);
+
     vas = vaBeginPicture(ctx->hwctx->display, ctx->va_context,
                          pic->output_surface);
     if (vas != VA_STATUS_SUCCESS) {
@@ -440,6 +444,9 @@ static int vaapi_decode_make_config(AVCodecContext *avctx,
     AVHWDeviceContext    *device = (AVHWDeviceContext*)device_ref->data;
     AVVAAPIDeviceContext *hwctx = device->hwctx;
 
+    VAConfigAttrib attr;
+    int attr_num = 0, support_dec_processing = 0;
+
     codec_desc = avcodec_descriptor_get(avctx->codec_id);
     if (!codec_desc) {
         err = AVERROR(EINVAL);
@@ -518,8 +525,23 @@ static int vaapi_decode_make_config(AVCodecContext *avctx,
         }
     }
 
+    if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_SUB_FRAME) {
+        attr.type = VAConfigAttribDecProcessing;
+        vas = vaGetConfigAttributes(hwctx->display, matched_va_profile,
+                                    VAEntrypointVLD, &attr, 1);
+        if (vas != VA_STATUS_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to query decode process "
+                   "attributes: %d (%s).\n", vas, vaErrorStr(vas));
+            return AVERROR_EXTERNAL;
+        } else if (attr.value | VA_DEC_PROCESSING) {
+            support_dec_processing = 1;
+            attr_num++;
+        } else
+            av_log(avctx, AV_LOG_WARNING, "Hardware doesn't support decode processing.\n");
+    }
+
     vas = vaCreateConfig(hwctx->display, matched_va_profile,
-                         VAEntrypointVLD, NULL, 0,
+                         VAEntrypointVLD, &attr, attr_num,
                          va_config);
     if (vas != VA_STATUS_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Failed to create decode "
@@ -564,10 +586,32 @@ static int vaapi_decode_make_config(AVCodecContext *avctx,
 
     if (frames_ref) {
         AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
+        AVVAAPIFramesContext *avfc = frames->hwctx;
 
         frames->format = AV_PIX_FMT_VAAPI;
         frames->width = avctx->coded_width;
         frames->height = avctx->coded_height;
+        avfc->enable_sub_frame = support_dec_processing;
+
+        if (avfc->enable_sub_frame) {
+            avfc->sub_frame_width = avctx->coded_width;
+            avfc->sub_frame_height = avctx->coded_height;
+            avfc->sub_frame_sw_format = AV_PIX_FMT_NV12;
+            if (avctx->sub_frame_opts) {
+                AVDictionaryEntry *e = NULL;
+                while ((e = av_dict_get(avctx->sub_frame_opts, "", e, AV_DICT_IGNORE_SUFFIX))) {
+                    if (!strcmp(e->key, "width"))
+                        avfc->sub_frame_width= atoi(e->value);
+                    else if (!strcmp(e->key, "height"))
+                        avfc->sub_frame_height = atoi(e->value);
+                    else if (!strcmp(e->key, "format"))
+                        avfc->sub_frame_sw_format = av_get_pix_fmt(e->value);
+                }
+            }
+            av_log(avctx, AV_LOG_DEBUG, "Sub frame set with width:%d, height:%d, "
+                   "format:%s.\n", avfc->sub_frame_width, avfc->sub_frame_height,
+                   av_get_pix_fmt_name(avfc->sub_frame_sw_format));
+        }
 
         err = vaapi_decode_find_best_format(avctx, device,
                                             *va_config, frames);
diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h
index 6beda14e52..fbac7e7a8e 100644
--- a/libavcodec/vaapi_decode.h
+++ b/libavcodec/vaapi_decode.h
@@ -45,6 +45,10 @@ typedef struct VAAPIDecodePicture {
     int                nb_slices;
     VABufferID           *slice_buffers;
     int                   slices_allocated;
+
+    VASurfaceID           sub_frame_surface;
+    VARectangle           sub_frame_src;
+    VARectangle           sub_frame_dst;
 } VAAPIDecodePicture;
 
 typedef struct VAAPIDecodeContext {
diff --git a/libavcodec/vaapi_hevc.c b/libavcodec/vaapi_hevc.c
index 9083331c45..209a302a2c 100644
--- a/libavcodec/vaapi_hevc.c
+++ b/libavcodec/vaapi_hevc.c
@@ -38,6 +38,7 @@ typedef struct VAAPIDecodePictureHEVC {
     VAPictureParameterBufferHEVC pic_param;
     VASliceParameterBufferHEVC last_slice_param;
 #endif
+    VAProcPipelineParameterBuffer proc_param;
     const uint8_t *last_buffer;
     size_t         last_size;
 
@@ -122,8 +123,8 @@ static int vaapi_hevc_start_frame(AVCodecContext          *avctx,
     VAAPIDecodePictureHEVC *pic = h->ref->hwaccel_picture_private;
     const HEVCSPS          *sps = h->ps.sps;
     const HEVCPPS          *pps = h->ps.pps;
-
     const ScalingList *scaling_list = NULL;
+    AVFrameSideData *sd;
     int pic_param_size, err, i;
 
     VAPictureParameterBufferHEVC *pic_param = (VAPictureParameterBufferHEVC *)&pic->pic_param;
@@ -285,6 +286,35 @@ static int vaapi_hevc_start_frame(AVCodecContext          *avctx,
             goto fail;
     }
 
+    sd = av_frame_get_side_data(h->ref->frame, AV_FRAME_DATA_SUB_FRAME);
+    if (sd) {
+        VAProcPipelineParameterBuffer *proc_param = &pic->proc_param;
+        AVFrame *sub_frame = (AVFrame *)sd->data;
+
+        memset(proc_param, 0, sizeof(VAProcPipelineParameterBuffer));
+
+        pic->pic.sub_frame_src.x = pic->pic.sub_frame_src.y = 0;
+        pic->pic.sub_frame_src.width = sps->width;
+        pic->pic.sub_frame_src.height = sps->height;
+
+        pic->pic.sub_frame_dst.x = pic->pic.sub_frame_dst.y = 0;
+        pic->pic.sub_frame_dst.width = sub_frame->width;
+        pic->pic.sub_frame_dst.height = sub_frame->height;
+
+        pic->pic.sub_frame_surface = ff_vaapi_get_surface_id(sub_frame);
+        proc_param->surface = pic->pic.output_surface;
+        proc_param->surface_region = &pic->pic.sub_frame_src;
+        proc_param->output_region = &pic->pic.sub_frame_dst;
+        proc_param->additional_outputs = &pic->pic.sub_frame_surface;
+        proc_param->num_additional_outputs = 1;
+
+        err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+                                                VAProcPipelineParameterBufferType,
+                                                &pic->proc_param, sizeof(VAProcPipelineParameterBuffer));
+        if (err < 0)
+            goto fail;
+    }
+
     return 0;
 
 fail:
diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index c3a98bc4b1..1b3b487738 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c
@@ -49,8 +49,7 @@
 #include "hwcontext_vaapi.h"
 #include "mem.h"
 #include "pixdesc.h"
-#include "pixfmt.h"
-
+#include "sub_frame_metadata.h"
 
 typedef struct VAAPIDevicePriv {
 #if HAVE_VAAPI_X11
@@ -82,6 +81,8 @@ typedef struct VAAPIFramesContext {
     // Caches whether VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2 is unsupported for
     // surface imports.
     int prime_2_import_unsupported;
+
+    AVBufferRef *sub_frames_ref;
 } VAAPIFramesContext;
 
 typedef struct VAAPIMapping {
@@ -511,7 +512,7 @@ static AVBufferRef *vaapi_pool_alloc(void *opaque, size_t size)
     return ref;
 }
 
-static int vaapi_frames_init(AVHWFramesContext *hwfc)
+static int vaapi_hw_frames_init(AVHWFramesContext *hwfc)
 {
     AVVAAPIFramesContext  *avfc = hwfc->hwctx;
     VAAPIFramesContext     *ctx = hwfc->internal->priv;
@@ -663,17 +664,57 @@ fail:
     return err;
 }
 
+static int vaapi_frames_init(AVHWFramesContext *hwfc)
+{
+    VAAPIFramesContext *ctx = hwfc->internal->priv;
+    AVVAAPIFramesContext *avfc = hwfc->hwctx;
+    AVHWFramesContext *sub_frames_ctx;
+    int ret;
+
+    ret = vaapi_hw_frames_init(hwfc);
+    if (ret < 0)
+        return ret;
+
+    if (avfc->enable_sub_frame){
+        ctx->sub_frames_ref = av_hwframe_ctx_alloc(hwfc->device_ref);
+        if (!ctx->sub_frames_ref) {
+            return AVERROR(ENOMEM);
+        }
+        sub_frames_ctx = (AVHWFramesContext*)ctx->sub_frames_ref->data;
+
+        sub_frames_ctx->width             = avfc->sub_frame_width;
+        sub_frames_ctx->height            = avfc->sub_frame_height;
+        sub_frames_ctx->format            = AV_PIX_FMT_VAAPI;
+        sub_frames_ctx->sw_format         = avfc->sub_frame_sw_format;
+
+        ret = av_hwframe_ctx_init(ctx->sub_frames_ref);
+        if (ret < 0) {
+            av_buffer_unref(&ctx->sub_frames_ref);
+            av_log(hwfc, AV_LOG_ERROR, "Error to init sub frame hw context.\n");
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 static void vaapi_frames_uninit(AVHWFramesContext *hwfc)
 {
     AVVAAPIFramesContext *avfc = hwfc->hwctx;
     VAAPIFramesContext    *ctx = hwfc->internal->priv;
 
+    av_buffer_unref(&ctx->sub_frames_ref);
     av_freep(&avfc->surface_ids);
     av_freep(&ctx->attributes);
 }
 
 static int vaapi_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
 {
+    VAAPIFramesContext *ctx = hwfc->internal->priv;
+    AVVAAPIFramesContext *avfc = hwfc->hwctx;
+    AVFrame *sub_frame;
+    int ret;
+
     frame->buf[0] = av_buffer_pool_get(hwfc->pool);
     if (!frame->buf[0])
         return AVERROR(ENOMEM);
@@ -683,6 +724,21 @@ static int vaapi_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
     frame->width   = hwfc->width;
     frame->height  = hwfc->height;
 
+    if (avfc->enable_sub_frame) {
+        if (!ctx->sub_frames_ref)
+            return AVERROR(ENOSYS);
+
+        sub_frame = av_sub_frame_create_side_data(frame);
+        if (!sub_frame)
+            return AVERROR(ENOMEM);
+
+        ret = av_hwframe_get_buffer(ctx->sub_frames_ref, sub_frame, 0);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Can't get sub frame.\n");
+            return ret;
+        }
+    }
+
     return 0;
 }
 
diff --git a/libavutil/hwcontext_vaapi.h b/libavutil/hwcontext_vaapi.h
index 0b2e071cb3..aea0ec9263 100644
--- a/libavutil/hwcontext_vaapi.h
+++ b/libavutil/hwcontext_vaapi.h
@@ -19,6 +19,7 @@
 #ifndef AVUTIL_HWCONTEXT_VAAPI_H
 #define AVUTIL_HWCONTEXT_VAAPI_H
 
+#include "pixfmt.h"
 #include <va/va.h>
 
 /**
@@ -81,7 +82,7 @@ typedef struct AVVAAPIDeviceContext {
 } AVVAAPIDeviceContext;
 
 /**
- * VAAPI-specific data associated with a frame pool.
+ * VAAPI-specific data associated with a frame pool and sub frame.
  *
  * Allocated as AVHWFramesContext.hwctx.
  */
@@ -100,6 +101,18 @@ typedef struct AVVAAPIFramesContext {
      */
     VASurfaceID     *surface_ids;
     int           nb_surfaces;
+
+    /**
+     * Set by the user to indicate if need to enable sub frame support.
+     */
+    int enable_sub_frame;
+
+    /**
+     * Sub frame width/height/format. Only avaliable if enable_sub_frame
+     * is true.
+     */
+    int sub_frame_width, sub_frame_height;
+    enum AVPixelFormat sub_frame_sw_format;
 } AVVAAPIFramesContext;
 
 /**
-- 
2.25.1