[FFmpeg-devel] [PATCH 1/3] lavc/qsvdec: add support for gpu_copy
Fu, Linjie
linjie.fu at intel.com
Mon Apr 8 09:14:18 EEST 2019
> -----Original Message-----
> From: Fu, Linjie
> Sent: Tuesday, March 26, 2019 13:38
> To: ffmpeg-devel at ffmpeg.org
> Cc: Fu, Linjie <linjie.fu at intel.com>; ChaoX A Liu <chaox.a.liu at intel.com>
> Subject: [PATCH 1/3] lavc/qsvdec: add support for gpu_copy
>
> Add support for GPU copy when QSV decoders works in system memory
> mode.
> However, memory must be sequent and aligned with 128x64 to enable this
> feature.(first introduced in FFmpeg 3.3.1)
>
> GPUCopy = MFX_GPUCOPY_ON leads to performance improvement up to
> x10.
>
> CMD:
> ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv
> -gpu_copy on -i input.h264 -pix_fmt yuv420p out.yuv
>
>
> Signed-off-by: Linjie Fu <linjie.fu at intel.com>
> Signed-off-by: ChaoX A Liu <chaox.a.liu at intel.com>
> ---
> libavcodec/qsv.c | 27 +++++++++++++-------
> libavcodec/qsv_internal.h | 6 ++---
> libavcodec/qsvdec.c | 53 ++++++++++++++++++++++++++++++++++----
> -
> libavcodec/qsvdec.h | 2 ++
> libavcodec/qsvdec_h2645.c | 10 ++++++++
> libavcodec/qsvdec_other.c | 5 ++++
> libavcodec/qsvenc.c | 7 +++---
> 7 files changed, 89 insertions(+), 21 deletions(-)
>
> diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c
> index bb0d79588c..40e6c677cb 100644
> --- a/libavcodec/qsv.c
> +++ b/libavcodec/qsv.c
> @@ -277,15 +277,19 @@ load_plugin_fail:
> }
>
> int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession
> *session,
> - const char *load_plugins)
> + const char *load_plugins, int gpu_copy)
> {
> - mfxIMPL impl = MFX_IMPL_AUTO_ANY;
> - mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
> + mfxIMPL impl = MFX_IMPL_AUTO_ANY;
> + mfxVersion ver = { { QSV_VERSION_MINOR,
> QSV_VERSION_MAJOR } };
> + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };
>
> const char *desc;
> int ret;
>
> - ret = MFXInit(impl, &ver, session);
> + init_par.GPUCopy = gpu_copy;
> + init_par.Implementation = impl;
> + init_par.Version = ver;
> + ret = MFXInitEx(init_par, session);
> if (ret < 0)
> return ff_qsv_print_error(avctx, ret,
> "Error initializing an internal MFX session");
> @@ -571,7 +575,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis,
> mfxMemId mid, mfxHDL *hdl)
> }
>
> int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession
> *psession,
> - AVBufferRef *device_ref, const char *load_plugins)
> + AVBufferRef *device_ref, const char *load_plugins,
> + int gpu_copy)
> {
> static const mfxHandleType handle_types[] = {
> MFX_HANDLE_VA_DISPLAY,
> @@ -581,11 +586,12 @@ int ff_qsv_init_session_device(AVCodecContext
> *avctx, mfxSession *psession,
> AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)device_ref-
> >data;
> AVQSVDeviceContext *device_hwctx = device_ctx->hwctx;
> mfxSession parent_session = device_hwctx->session;
> + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };
> + mfxHDL handle = NULL;
>
> mfxSession session;
> mfxVersion ver;
> mfxIMPL impl;
> - mfxHDL handle = NULL;
> mfxHandleType handle_type;
> mfxStatus err;
>
> @@ -611,7 +617,10 @@ int ff_qsv_init_session_device(AVCodecContext
> *avctx, mfxSession *psession,
> "from the session\n");
> }
>
> - err = MFXInit(impl, &ver, &session);
> + init_par.GPUCopy = gpu_copy;
> + init_par.Implementation = impl;
> + init_par.Version = ver;
> + err = MFXInitEx(init_par, &session);
> if (err != MFX_ERR_NONE)
> return ff_qsv_print_error(avctx, err,
> "Error initializing a child MFX session");
> @@ -642,7 +651,7 @@ int ff_qsv_init_session_device(AVCodecContext
> *avctx, mfxSession *psession,
>
> int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession
> *psession,
> QSVFramesContext *qsv_frames_ctx,
> - const char *load_plugins, int opaque)
> + const char *load_plugins, int opaque, int gpu_copy)
> {
> mfxFrameAllocator frame_allocator = {
> .pthis = qsv_frames_ctx,
> @@ -662,7 +671,7 @@ int ff_qsv_init_session_frames(AVCodecContext
> *avctx, mfxSession *psession,
> int ret;
>
> ret = ff_qsv_init_session_device(avctx, &session,
> - frames_ctx->device_ref, load_plugins);
> + frames_ctx->device_ref, load_plugins, gpu_copy);
> if (ret < 0)
> return ret;
>
> diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h
> index 394c558883..8be6c3757c 100644
> --- a/libavcodec/qsv_internal.h
> +++ b/libavcodec/qsv_internal.h
> @@ -95,14 +95,14 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat format,
> uint32_t *fourcc);
> enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type);
>
> int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession
> *session,
> - const char *load_plugins);
> + const char *load_plugins, int gpu_copy);
>
> int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession
> *psession,
> - AVBufferRef *device_ref, const char *load_plugins);
> + AVBufferRef *device_ref, const char *load_plugins, int
> gpu_copy);
>
> int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession
> *session,
> QSVFramesContext *qsv_frames_ctx,
> - const char *load_plugins, int opaque);
> + const char *load_plugins, int opaque, int gpu_copy);
>
> int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame);
>
> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
> index 4a0be811fb..5dd2b3834b 100644
> --- a/libavcodec/qsvdec.c
> +++ b/libavcodec/qsvdec.c
> @@ -34,9 +34,11 @@
> #include "libavutil/pixdesc.h"
> #include "libavutil/pixfmt.h"
> #include "libavutil/time.h"
> +#include "libavutil/imgutils.h"
>
> #include "avcodec.h"
> #include "internal.h"
> +#include "decode.h"
> #include "qsv.h"
> #include "qsv_internal.h"
> #include "qsvdec.h"
> @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[]
> = {
> NULL
> };
>
> +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame
> *frame, AVBufferPool *pool)
> +{
> + int ret = 0;
> +
> + ff_decode_frame_props(avctx, frame);
> +
> + frame->width = avctx->width;
> + frame->height = avctx->height;
> + frame->linesize[0] = FFALIGN(avctx->width, 128);
> + frame->linesize[1] = frame->linesize[0];
> + frame->buf[0] = av_buffer_pool_get(pool);
> + if (!frame->buf[0])
> + return AVERROR(ENOMEM);
> +
> + frame->data[0] = frame->buf[0]->data;
> + frame->data[1] = frame->data[0] +
> + frame->linesize[0] * FFALIGN(avctx->height, 64);
> +
> + ret = ff_attach_decode_data(frame);
> + if (ret < 0)
> + return ret;
> +
> + return 0;
> +}
> +
> static int qsv_init_session(AVCodecContext *avctx, QSVContext *q,
> mfxSession session,
> AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref)
> {
> @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
>
> ret = ff_qsv_init_session_frames(avctx, &q->internal_session,
> &q->frames_ctx, q->load_plugins,
> - q->iopattern ==
> MFX_IOPATTERN_OUT_OPAQUE_MEMORY);
> + q->iopattern ==
> MFX_IOPATTERN_OUT_OPAQUE_MEMORY,
> + q->gpu_copy);
> if (ret < 0) {
> av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
> return ret;
> @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
> }
>
> ret = ff_qsv_init_session_device(avctx, &q->internal_session,
> - hw_device_ref, q->load_plugins);
> + hw_device_ref, q->load_plugins, q->gpu_copy);
> if (ret < 0)
> return ret;
>
> @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
> } else {
> if (!q->internal_session) {
> ret = ff_qsv_init_internal_session(avctx, &q->internal_session,
> - q->load_plugins);
> + q->load_plugins, q->gpu_copy);
> if (ret < 0)
> return ret;
> }
> @@ -213,6 +241,12 @@ static int qsv_decode_init(AVCodecContext *avctx,
> QSVContext *q)
>
> q->frame_info = param.mfx.FrameInfo;
>
> + if (avctx->pix_fmt != AV_PIX_FMT_QSV)
> + q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx-
> >pix_fmt,
> + FFALIGN(avctx->width, 128),
> + FFALIGN(avctx->height, 64), 1),
> + av_buffer_allocz);
> +
> return 0;
> }
>
> @@ -220,9 +254,15 @@ static int alloc_frame(AVCodecContext *avctx,
> QSVContext *q, QSVFrame *frame)
> {
> int ret;
>
> - ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
> - if (ret < 0)
> - return ret;
> + if (!q->pool) {
> + ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
> + if (ret < 0)
> + return ret;
> + } else {
> + ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool);
> + if (ret < 0)
> + return ret;
> + }
>
> if (frame->frame->format == AV_PIX_FMT_QSV) {
> frame->surface = *(mfxFrameSurface1*)frame->frame->data[3];
> @@ -484,6 +524,7 @@ int ff_qsv_decode_close(QSVContext *q)
>
> av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
> av_buffer_unref(&q->frames_ctx.mids_buf);
> + av_buffer_pool_uninit(&q->pool);
>
> return 0;
> }
> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
> index 111536caba..43ea03867e 100644
> --- a/libavcodec/qsvdec.h
> +++ b/libavcodec/qsvdec.h
> @@ -62,10 +62,12 @@ typedef struct QSVContext {
> enum AVPixelFormat orig_pix_fmt;
> uint32_t fourcc;
> mfxFrameInfo frame_info;
> + AVBufferPool *pool;
>
> // options set by the caller
> int async_depth;
> int iopattern;
> + int gpu_copy;
>
> char *load_plugins;
>
> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
> index 9b49f5506e..3d1f1cbfac 100644
> --- a/libavcodec/qsvdec_h2645.c
> +++ b/libavcodec/qsvdec_h2645.c
> @@ -192,6 +192,11 @@ static const AVOption hevc_options[] = {
>
> { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an
> internal session",
> OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },
> +
> + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines",
> OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =
> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT,
> MFX_GPUCOPY_OFF, VD, "gpu_copy"},
> + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
> + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },
> 0, 0, VD, "gpu_copy"},
> + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },
> 0, 0, VD, "gpu_copy"},
> { NULL },
> };
>
> @@ -227,6 +232,11 @@ AVCodec ff_hevc_qsv_decoder = {
> #if CONFIG_H264_QSV_DECODER
> static const AVOption options[] = {
> { "async_depth", "Internal parallelization depth, the higher the value the
> higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 =
> ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
> +
> + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines",
> OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =
> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT,
> MFX_GPUCOPY_OFF, VD, "gpu_copy"},
> + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
> + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },
> 0, 0, VD, "gpu_copy"},
> + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },
> 0, 0, VD, "gpu_copy"},
> { NULL },
> };
>
> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
> index 03251d2c85..37237180fb 100644
> --- a/libavcodec/qsvdec_other.c
> +++ b/libavcodec/qsvdec_other.c
> @@ -169,6 +169,11 @@ static void qsv_decode_flush(AVCodecContext
> *avctx)
> #define VD AV_OPT_FLAG_VIDEO_PARAM |
> AV_OPT_FLAG_DECODING_PARAM
> static const AVOption options[] = {
> { "async_depth", "Internal parallelization depth, the higher the value the
> higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 =
> ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
> +
> + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines",
> OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =
> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT,
> MFX_GPUCOPY_OFF, VD, "gpu_copy"},
> + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
> + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },
> 0, 0, VD, "gpu_copy"},
> + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },
> 0, 0, VD, "gpu_copy"},
> { NULL },
> };
>
> diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
> index 5aa020d47b..3d008ed527 100644
> --- a/libavcodec/qsvenc.c
> +++ b/libavcodec/qsvenc.c
> @@ -909,7 +909,8 @@ static int qsvenc_init_session(AVCodecContext
> *avctx, QSVEncContext *q)
>
> ret = ff_qsv_init_session_frames(avctx, &q->internal_session,
> &q->frames_ctx, q->load_plugins,
> - q->param.IOPattern ==
> MFX_IOPATTERN_IN_OPAQUE_MEMORY);
> + q->param.IOPattern ==
> MFX_IOPATTERN_IN_OPAQUE_MEMORY,
> + MFX_GPUCOPY_OFF);
> if (ret < 0) {
> av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
> return ret;
> @@ -918,14 +919,14 @@ static int qsvenc_init_session(AVCodecContext
> *avctx, QSVEncContext *q)
> q->session = q->internal_session;
> } else if (avctx->hw_device_ctx) {
> ret = ff_qsv_init_session_device(avctx, &q->internal_session,
> - avctx->hw_device_ctx, q->load_plugins);
> + avctx->hw_device_ctx, q->load_plugins,
> MFX_GPUCOPY_OFF);
> if (ret < 0)
> return ret;
>
> q->session = q->internal_session;
> } else {
> ret = ff_qsv_init_internal_session(avctx, &q->internal_session,
> - q->load_plugins);
> + q->load_plugins, MFX_GPUCOPY_OFF);
> if (ret < 0)
> return ret;
>
> --
> 2.17.1
Ping?
Any comments for this patch set?
The decode performance can be improved obviously on some platform(6x for example)
More information about the ffmpeg-devel
mailing list