[FFmpeg-devel] [PATCH 3/6] avutil/hwcontext_cuda: add CUstream in cuda hwctx
wm4
nfxjfg at googlemail.com
Tue May 8 18:55:41 EEST 2018
On Tue, 8 May 2018 17:48:21 +0200
Timo Rothenpieler <timo at rothenpieler.org> wrote:
> Am 08.05.2018 um 17:26 schrieb wm4:
> > On Tue, 8 May 2018 15:31:29 +0200
> > Timo Rothenpieler <timo at rothenpieler.org> wrote:
> >
> >> ---
> >> configure | 6 ++++--
> >> doc/APIchanges | 3 ++-
> >> libavutil/hwcontext_cuda.c | 3 +++
> >> libavutil/hwcontext_cuda.h | 1 +
> >> libavutil/version.h | 2 +-
> >> 5 files changed, 11 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/configure b/configure
> >> index 7c143238a8..cae8a235a4 100755
> >> --- a/configure
> >> +++ b/configure
> >> @@ -5887,8 +5887,10 @@ check_type "va/va.h va/va_enc_vp9.h" "VAEncPictureParameterBufferVP9"
> >> check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
> >>
> >> if ! disabled ffnvcodec; then
> >> - check_pkg_config ffnvcodec "ffnvcodec >= 8.0.14.1" \
> >> - "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" ""
> >> + check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.2" \
> >> + "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" "" || \
> >> + { test_pkg_config ffnvcodec_tmp "ffnvcodec < 8.1" "" "" && check_pkg_config ffnvcodec "ffnvcodec >= 8.0.14.2" \
> >> + "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" ""; }
> >> fi
> >>
> >> check_cpp_condition winrt windows.h "!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)"
> >> diff --git a/doc/APIchanges b/doc/APIchanges
> >> index f8ae6b0433..7a0a8522f9 100644
> >> --- a/doc/APIchanges
> >> +++ b/doc/APIchanges
> >> @@ -15,8 +15,9 @@ libavutil: 2017-10-21
> >>
> >> API changes, most recent first:
> >>
> >> -2018-05-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext_cuda.h
> >> +2018-05-xx - xxxxxxxxxx, xxxxxxxxxx - lavu 56.19.100/101 - hwcontext_cuda.h
> >> Add AVCUDAFramesContext and AVCUDAFramesContext.flags.
> >> + Add AVCUDADeviceContext.stream.
> >>
> >> 2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h
> >> Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8.
> >> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> >> index b0b4bf24ae..8024eec79d 100644
> >> --- a/libavutil/hwcontext_cuda.c
> >> +++ b/libavutil/hwcontext_cuda.c
> >> @@ -395,6 +395,9 @@ static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
> >> goto error;
> >> }
> >>
> >> + // Setting stream to NULL will make functions automatically use the default CUstream
> >> + hwctx->stream = NULL;
> >> +
> >> cu->cuCtxPopCurrent(&dummy);
> >>
> >> hwctx->internal->is_allocated = 1;
> >> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> >> index 19accbd9be..cd797ae920 100644
> >> --- a/libavutil/hwcontext_cuda.h
> >> +++ b/libavutil/hwcontext_cuda.h
> >> @@ -41,6 +41,7 @@ typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal;
> >> */
> >> typedef struct AVCUDADeviceContext {
> >> CUcontext cuda_ctx;
> >> + CUstream stream;
> >> AVCUDADeviceContextInternal *internal;
> >> } AVCUDADeviceContext;
> >>
> >> diff --git a/libavutil/version.h b/libavutil/version.h
> >> index 84409b1d69..f84ec89154 100644
> >> --- a/libavutil/version.h
> >> +++ b/libavutil/version.h
> >> @@ -80,7 +80,7 @@
> >>
> >> #define LIBAVUTIL_VERSION_MAJOR 56
> >> #define LIBAVUTIL_VERSION_MINOR 19
> >> -#define LIBAVUTIL_VERSION_MICRO 100
> >> +#define LIBAVUTIL_VERSION_MICRO 101
> >>
> >> #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
> >> LIBAVUTIL_VERSION_MINOR, \
> >
> > What is this?
>
> https://docs.nvidia.com/cuda/cuda-driver-api/stream-sync-behavior.html
>
> It allows asynchronous processing of CUDA workloads. The next couple
> patches make use of it.
> There's no change in behaviour if it remains unset/NULL, but if you set
> one, the workload won't block the main CUDA stream so you can do
> multiple transcode sessions in the same application without blocking one
> another.
>
Could probably be documented.
It seems a bit strange that this is per device. Wouldn't it be per
operation?
More information about the ffmpeg-devel
mailing list