[FFmpeg-devel] [PATCH 1/2] avcodec/cuvid: add cuvid decoder
Hendrik Leppkes
h.leppkes at gmail.com
Mon Jun 6 11:50:02 CEST 2016
On Sun, Jun 5, 2016 at 8:58 PM, Timo Rothenpieler <timo at rothenpieler.org> wrote:
> ---
> Changelog | 2 +
> MAINTAINERS | 1 +
> configure | 20 ++
> libavcodec/Makefile | 2 +
> libavcodec/allcodecs.c | 4 +
> libavcodec/cuvid.c | 550 +++++++++++++++++++++++++++++++++++++++++++++++++
> libavcodec/version.h | 4 +-
> 7 files changed, 581 insertions(+), 2 deletions(-)
> create mode 100644 libavcodec/cuvid.c
>
> diff --git a/Changelog b/Changelog
> index d5228b2..35e17e5 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -38,6 +38,8 @@ version <next>:
> - loudnorm filter
> - MTAF demuxer and decoder
> - MagicYUV decoder
> +- CUDA CUVID H264/HEVC decoder
> +
>
> version 3.0:
> - Common Encryption (CENC) MP4 encoding and decoding support
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9ce2524..bf99d0c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -163,6 +163,7 @@ Codecs:
> cpia.c Stephan Hilb
> crystalhd.c Philip Langdale
> cscd.c Reimar Doeffinger
> + cuvid.c Timo Rothenpieler
> dca.c Kostya Shishkov, Benjamin Larsson
> dirac* Rostislav Pehlivanov
> dnxhd* Baptiste Coudurier
> diff --git a/configure b/configure
> index 7c463a5..2b2d5f8 100755
> --- a/configure
> +++ b/configure
> @@ -158,6 +158,7 @@ Hardware accelerators:
>
> Hardware-accelerated decoding/encoding:
> --enable-cuda enable dynamically linked CUDA [no]
> + --enable-cuvid enable CUVID support [autodetect]
> --enable-libmfx enable HW acceleration through libmfx
> --enable-mmal enable decoding via MMAL [no]
> --enable-nvenc enable NVIDIA NVENC support [no]
> @@ -1567,6 +1568,7 @@ FEATURE_LIST="
>
> HW_CODECS_LIST="
> cuda
> + cuvid
> libmfx
> mmal
> nvenc
> @@ -2328,6 +2330,7 @@ comfortnoise_encoder_select="lpc"
> cook_decoder_select="audiodsp mdct sinewin"
> cscd_decoder_select="lzo"
> cscd_decoder_suggest="zlib"
> +cuvid_decoder_deps="cuda cuvid"
> dca_decoder_select="mdct"
> dds_decoder_select="texturedsp"
> dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc"
> @@ -2522,6 +2525,7 @@ audiotoolbox_extralibs="-framework CoreFoundation -framework AudioToolbox -frame
>
> # hardware accelerators
> crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
> +cuvid_deps="cuda"
> d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext"
> dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode"
> vaapi_deps="va_va_h"
> @@ -2539,6 +2543,7 @@ h263_vaapi_hwaccel_select="h263_decoder"
> h263_videotoolbox_hwaccel_deps="videotoolbox"
> h263_videotoolbox_hwaccel_select="h263_decoder"
> h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf h264_parser"
> +h264_cuvid_hwaccel_deps="cuda cuvid"
> h264_d3d11va_hwaccel_deps="d3d11va"
> h264_d3d11va_hwaccel_select="h264_decoder"
> h264_dxva2_hwaccel_deps="dxva2"
> @@ -2564,6 +2569,7 @@ h264_vdpau_hwaccel_deps="vdpau"
> h264_vdpau_hwaccel_select="h264_decoder"
> h264_videotoolbox_hwaccel_deps="videotoolbox"
> h264_videotoolbox_hwaccel_select="h264_decoder"
> +hevc_cuvid_hwaccel_deps="cuda cuvid"
> hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
> hevc_d3d11va_hwaccel_select="hevc_decoder"
> hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
> @@ -2657,6 +2663,8 @@ hwupload_cuda_filter_deps="cuda"
> scale_npp_filter_deps="cuda libnpp"
>
> nvenc_encoder_deps="nvenc"
> +h264_cuvid_decoder_deps="cuda cuvid"
> +h264_cuvid_decoder_select="h264_mp4toannexb_bsf h264_cuvid_hwaccel"
> h264_qsv_decoder_deps="libmfx"
> h264_qsv_decoder_select="h264_mp4toannexb_bsf h264_parser qsvdec h264_qsv_hwaccel"
> h264_qsv_encoder_deps="libmfx"
> @@ -2664,6 +2672,8 @@ h264_qsv_encoder_select="qsvenc"
> h264_vaapi_encoder_deps="VAEncPictureParameterBufferH264"
> h264_vaapi_encoder_select="vaapi_encode golomb"
>
> +hevc_cuvid_decoder_deps="cuda cuvid"
> +hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf hevc_cuvid_hwaccel"
> hevc_qsv_decoder_deps="libmfx"
> hevc_qsv_decoder_select="hevc_mp4toannexb_bsf hevc_parser qsvdec hevc_qsv_hwaccel"
> hevc_qsv_encoder_deps="libmfx"
> @@ -5002,6 +5012,7 @@ die_license_disabled gpl libxvid
> die_license_disabled gpl x11grab
>
> die_license_disabled nonfree cuda
> +die_license_disabled nonfree cuvid
> die_license_disabled nonfree libfaac
> die_license_disabled nonfree libnpp
> enabled gpl && die_license_disabled_gpl nonfree libfdk_aac
> @@ -5572,6 +5583,11 @@ for func in $COMPLEX_FUNCS; do
> eval check_complexfunc $func \${${func}_args:-1}
> done
>
> +# Enable CUVID by default if CUDA is enabled
> +if enabled cuda && ! disabled cuvid; then
> + enable cuvid
> +fi
> +
> # these are off by default, so fail if requested and not available
> enabled avfoundation_indev && { check_header_objcc AVFoundation/AVFoundation.h || disable avfoundation_indev; }
> enabled avfoundation_indev && { check_lib2 CoreGraphics/CoreGraphics.h CGGetActiveDisplayList -framework CoreGraphics ||
> @@ -5581,6 +5597,10 @@ enabled avisynth && { { check_lib2 "windows.h" LoadLibrary; } ||
> die "ERROR: LoadLibrary/dlopen not found for avisynth"; }
> enabled cuda && { check_lib cuda.h cuInit -lcuda ||
> die "ERROR: CUDA not found"; }
> +enabled cuvid && { check_lib cuviddec.h cuvidCreateDecoder -lnvcuvid ||
> + die "ERROR: CUVID not found"; } &&
> + { enabled cuda ||
> + die "ERROR: CUVID requires CUDA"; }
> enabled chromaprint && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint
> enabled coreimage_filter && { check_header_objcc QuartzCore/CoreImage.h || disable coreimage_filter; }
> enabled coreimagesrc_filter && { check_header_objcc QuartzCore/CoreImage.h || disable coreimagesrc_filter; }
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 6e26aad..2927b84 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -767,12 +767,14 @@ OBJS-$(CONFIG_QSV) += qsv.o
> OBJS-$(CONFIG_QSVDEC) += qsvdec.o
> OBJS-$(CONFIG_QSVENC) += qsvenc.o
>
> +OBJS-$(CONFIG_H264_CUVID_DECODER) += cuvid.o
> OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o
> OBJS-$(CONFIG_H264_VDA_DECODER) += vda_h264_dec.o
> OBJS-$(CONFIG_H264_OMX_ENCODER) += omx.o
> OBJS-$(CONFIG_H264_QSV_DECODER) += qsvdec_h2645.o
> OBJS-$(CONFIG_H264_QSV_ENCODER) += qsvenc_h264.o
> OBJS-$(CONFIG_H264_VAAPI_ENCODER) += vaapi_encode_h264.o vaapi_encode_h26x.o
> +OBJS-$(CONFIG_HEVC_CUVID_DECODER) += cuvid.o
> OBJS-$(CONFIG_HEVC_QSV_DECODER) += qsvdec_h2645.o
> OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc_ps_enc.o h2645_parse.o
> OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_encode_h265.o vaapi_encode_h26x.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 7aa54ee..9256e99 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -69,6 +69,7 @@ void avcodec_register_all(void)
> /* hardware accelerators */
> REGISTER_HWACCEL(H263_VAAPI, h263_vaapi);
> REGISTER_HWACCEL(H263_VIDEOTOOLBOX, h263_videotoolbox);
> + REGISTER_HWACCEL(H264_CUVID, h264_cuvid);
> REGISTER_HWACCEL(H264_D3D11VA, h264_d3d11va);
> REGISTER_HWACCEL(H264_DXVA2, h264_dxva2);
> REGISTER_HWACCEL(H264_MMAL, h264_mmal);
> @@ -78,6 +79,7 @@ void avcodec_register_all(void)
> REGISTER_HWACCEL(H264_VDA_OLD, h264_vda_old);
> REGISTER_HWACCEL(H264_VDPAU, h264_vdpau);
> REGISTER_HWACCEL(H264_VIDEOTOOLBOX, h264_videotoolbox);
> + REGISTER_HWACCEL(HEVC_CUVID, hevc_cuvid);
> REGISTER_HWACCEL(HEVC_D3D11VA, hevc_d3d11va);
> REGISTER_HWACCEL(HEVC_DXVA2, hevc_dxva2);
> REGISTER_HWACCEL(HEVC_QSV, hevc_qsv);
> @@ -617,6 +619,7 @@ void avcodec_register_all(void)
> /* external libraries, that shouldn't be used by default if one of the
> * above is available */
> REGISTER_ENCODER(LIBOPENH264, libopenh264);
> + REGISTER_DECODER(H264_CUVID, h264_cuvid);
> REGISTER_ENCODER(H264_QSV, h264_qsv);
> REGISTER_ENCODER(H264_VAAPI, h264_vaapi);
> REGISTER_ENCODER(H264_VIDEOTOOLBOX, h264_videotoolbox);
> @@ -624,6 +627,7 @@ void avcodec_register_all(void)
> REGISTER_ENCODER(H264_OMX, h264_omx);
> REGISTER_ENCODER(NVENC_H264, nvenc_h264);
> REGISTER_ENCODER(NVENC_HEVC, nvenc_hevc);
> + REGISTER_DECODER(HEVC_CUVID, hevc_cuvid);
> REGISTER_ENCODER(HEVC_QSV, hevc_qsv);
> REGISTER_ENCODER(HEVC_VAAPI, hevc_vaapi);
> REGISTER_ENCODER(LIBKVAZAAR, libkvazaar);
> diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
> new file mode 100644
> index 0000000..6cadadf
> --- /dev/null
> +++ b/libavcodec/cuvid.c
> @@ -0,0 +1,550 @@
> +/*
> + * Nvidia CUVID decoder
> + * Copyright (c) 2016 Timo Rothenpieler <timo at rothenpieler.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/buffer.h"
> +#include "libavutil/mathematics.h"
> +#include "libavutil/hwcontext.h"
> +#include "libavutil/hwcontext_cuda.h"
> +#include "libavutil/fifo.h"
> +#include "libavutil/log.h"
> +
> +#include "avcodec.h"
> +#include "internal.h"
> +
> +#include <nvcuvid.h>
> +
> +#define MAX_FRAME_COUNT 20
> +#define FRAME_DELAY 4
> +
> +typedef struct CuvidContext
> +{
> + CUvideodecoder cudecoder;
> + CUvideoparser cuparser;
> +
> + AVBufferRef *hwdevice;
> + AVBufferRef *hwframe;
> +
> + AVBSFContext *bsf;
> +
> + AVFifoBuffer *frame_queue;
> +
> + int internal_error;
> +} CuvidContext;
> +
> +static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
> +{
> + AVCodecContext *avctx = opaque;
> + CuvidContext *ctx = avctx->priv_data;
> + AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
> + CUVIDDECODECREATEINFO cuinfo;
> + CUresult err;
> +
> + ctx->internal_error = 0;
> +
> + if (ctx->cudecoder) {
> + av_log(avctx, AV_LOG_ERROR, "re-initializing decoder is not supported\n");
> + ctx->internal_error = AVERROR(EINVAL);
> + return 0;
> + }
cuvid_handle_video_sequence is typically called quite often when
decoding something like a mpeg-ts broadcast stream.
It might be nice to support re-init. Or at least, check if the key
properties actually change (like frame w/h and whatever is of
relevance), and don't error in that case.
.
> +
> + if (hwframe_ctx->pool) {
> + av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized\n");
> + ctx->internal_error = AVERROR(EINVAL);
> + return 0;
> + }
> +
> + avctx->width = format->coded_width;
> + avctx->height = format->coded_height;
This ignores cropping info, ie. try to play any ordinary 1080p h264
clip, and you would get 1088 lines output.
avctx->width/height should be set to format->display_area.right/bottom
respectively, and avctx->coded_width/height to the coded fields from
format.
> +
> + memset(&cuinfo, 0, sizeof(cuinfo));
> +
> + cuinfo.CodecType = format->codec;
> + cuinfo.ChromaFormat = format->chroma_format;
> + cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
> + cuinfo.ulWidth = format->coded_width;
> + cuinfo.ulHeight = format->coded_height;
> + cuinfo.ulTargetWidth = cuinfo.ulWidth;
> + cuinfo.ulTargetHeight = cuinfo.ulHeight;
> + cuinfo.display_area.right = cuinfo.ulWidth;
> + cuinfo.display_area.bottom = cuinfo.ulHeight;
A quick hint: even though it might be tempting to copy display_area
here from the format struct, do not do that, otherwise the decoder
starts to process the video.
So this code is fine, just make use of display_area when filling
avctx, and not here.
> + cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
> + cuinfo.ulNumOutputSurfaces = 1;
> + cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
> +
> + if (format->progressive_sequence)
> + cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
> + else
> + cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
If you want to actually support proper deinterlacing, you would need
to do more in the code that uses cuvidMapVideoFrame.
In absence of that, and an option to let the user choose to do that, I
would recommend to just not deinterlace and output the video as-is,
just like any other decoder we have right now.
> +
> + err = cuvidCreateDecoder(&ctx->cudecoder, &cuinfo);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Error creating a CUVID decoder\n");
> + ctx->internal_error = AVERROR_UNKNOWN;
> + return 0;
> + }
> +
> + hwframe_ctx->format = AV_PIX_FMT_CUDA;
> + hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
> + hwframe_ctx->width = FFALIGN(cuinfo.ulTargetWidth, 16);
> + hwframe_ctx->height = FFALIGN(cuinfo.ulTargetHeight, 16);
> +
> + if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
> + av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
> + return 0;
> + }
> +
> + return 1;
> +}
> +
> +static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
> +{
> + AVCodecContext *avctx = opaque;
> + CuvidContext *ctx = avctx->priv_data;
> + CUresult err;
> +
> + ctx->internal_error = 0;
> +
> + err = cuvidDecodePicture(ctx->cudecoder, picparams);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuvidDecodePicture failed\n");
> + ctx->internal_error = AVERROR_UNKNOWN;
> + return 0;
> + }
> +
> + return 1;
> +}
> +
> +static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
> +{
> + AVCodecContext *avctx = opaque;
> + CuvidContext *ctx = avctx->priv_data;
> +
> + ctx->internal_error = 0;
> +
> + av_fifo_generic_write(ctx->frame_queue, dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL);
> +
> + return 1;
> +}
> +
> +static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
> +{
> + CuvidContext *ctx = avctx->priv_data;
> + AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
> + AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
> + CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
> + AVFrame *frame = data;
> + CUVIDSOURCEDATAPACKET cupkt;
> + CUresult err;
> + AVPacket filter_packet = { 0 };
> + AVPacket filtered_packet = { 0 };
> + int ret = 0;
> +
> + if (avpkt->size) {
> + if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
> + av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
> + return ret;
> + }
> +
> + if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
> + av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
> + av_packet_unref(&filter_packet);
> + return ret;
> + }
> +
> + if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
> + av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
> + return ret;
> + }
> + }
> +
> + err = cuCtxPushCurrent(cuda_ctx);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
> + av_packet_unref(&filtered_packet);
> + return AVERROR_UNKNOWN;
> + }
> +
> + memset(&cupkt, 0, sizeof(cupkt));
> +
> + if (avpkt->size && filtered_packet.size) {
> + cupkt.payload_size = filtered_packet.size;
> + cupkt.payload = filtered_packet.data;
> +
> + if (filtered_packet.pts != AV_NOPTS_VALUE) {
> + cupkt.flags = CUVID_PKT_TIMESTAMP;
> + cupkt.timestamp = av_rescale_q(filtered_packet.pts, avctx->time_base, (AVRational){1, 10000000});
> + }
> + } else {
> + cupkt.flags = CUVID_PKT_ENDOFSTREAM;
> + }
> +
> + err = cuvidParseVideoData(ctx->cuparser, &cupkt);
> +
> + av_packet_unref(&filtered_packet);
> +
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuvidParseVideoData failed\n");
> + if (ctx->internal_error)
> + ret = ctx->internal_error;
> + else
> + ret = AVERROR_UNKNOWN;
> + goto error;
> + }
> +
> + if (av_fifo_size(ctx->frame_queue) >= FRAME_DELAY * sizeof(CUVIDPARSERDISPINFO)) {
> + CUVIDPARSERDISPINFO dispinfo;
> + CUVIDPROCPARAMS params;
> + unsigned int pitch = 0;
> + CUdeviceptr mapped_frame = 0;
> + int offset = 0;
> + int i;
> +
> + av_fifo_generic_read(ctx->frame_queue, &dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL);
> +
> + memset(¶ms, 0, sizeof(params));
> + params.progressive_frame = dispinfo.progressive_frame;
> + params.second_field = 0;
> + params.top_field_first = dispinfo.top_field_first;
> + params.unpaired_field = (dispinfo.progressive_frame == 1 || dispinfo.repeat_first_field <= 1);
Did you find some docs somewhere when to set unpaired_field? I've been
not setting it for years and never had any issues.
It definitely sounds odd to set it for progressive, since progressive
is technically two fields (but its probably ignored if
progressive_frame is 1). Setting it for repeat=1 also sounds wrong,
since telecine still has two fields in it, you just repeat one of it.
For deinterlacing, as hinted above, you would need to run this entire
block twice per frame, once with second_field=0, and once with
second_field=1, but if anything that should be entirely optional.
> +
> + err = cuvidMapVideoFrame(ctx->cudecoder, dispinfo.picture_index, &mapped_frame, &pitch, ¶ms);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuvidMapVideoFrame failed on index %d: 0x%x\n", dispinfo.picture_index, (int)err);
> + ret = AVERROR_EXTERNAL;
> + goto error;
> + }
> +
> + if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
> + ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
> + if (ret < 0) {
> + av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
> + goto error;
> + }
> +
> + for (i = 0; i < 2; i++) {
> + CUDA_MEMCPY2D cpy = {
> + .srcMemoryType = CU_MEMORYTYPE_DEVICE,
> + .dstMemoryType = CU_MEMORYTYPE_DEVICE,
> + .srcDevice = mapped_frame,
> + .dstDevice = (CUdeviceptr)frame->data[i],
> + .srcPitch = pitch,
> + .dstPitch = frame->linesize[i],
> + .srcY = offset,
> + .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
> + .Height = avctx->height >> (i ? 1 : 0),
> + };
> +
> + err = cuMemcpy2D(&cpy);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuMemcpy2D failed\n");
> + ret = AVERROR_EXTERNAL;
> + goto error;
> + }
> +
> + offset += avctx->height;
> + }
> + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
> + AVFrame *tmp_frame = av_frame_alloc();
> + if (!tmp_frame) {
> + av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
> + cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame);
> + ret = AVERROR(ENOMEM);
> + goto error;
> + }
> +
> + tmp_frame->format = AV_PIX_FMT_CUDA;
> + tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
> + tmp_frame->data[0] = (uint8_t*)mapped_frame;
> + tmp_frame->linesize[0] = pitch;
> + tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->height * pitch);
> + tmp_frame->linesize[1] = pitch;
> + tmp_frame->width = avctx->width;
> + tmp_frame->height = avctx->height;
> +
> + ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
> + if (ret) {
> + av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
> + cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame);
> + av_frame_free(&tmp_frame);
> + goto error;
> + }
> +
> + av_frame_free(&tmp_frame);
> + } else {
> + ret = AVERROR_BUG;
> + goto error;
> + }
> +
> + err = cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
> + ret = AVERROR_EXTERNAL;
> + goto error;
> + }
> +
> + frame->width = avctx->width;
> + frame->height = avctx->height;
> + frame->pts = frame->pkt_pts = av_rescale_q(dispinfo.timestamp, (AVRational){1, 10000000}, avctx->time_base);
> +
> + *got_frame = 1;
> + } else {
> + *got_frame = 0;
> + }
> +
> +error:
> + err = cuCtxPopCurrent(&dummy);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuvidCtxUnlock failed\n");
> + ret = AVERROR_EXTERNAL;
> + }
> +
> + return ret;
> +}
> +
> +static av_cold int cuvid_decode_end(AVCodecContext *avctx)
> +{
> + CuvidContext *ctx = avctx->priv_data;
> +
> + av_fifo_freep(&ctx->frame_queue);
> +
> + if (ctx->bsf)
> + av_bsf_free(&ctx->bsf);
> +
> + if (ctx->cuparser)
> + cuvidDestroyVideoParser(ctx->cuparser);
> +
> + if (ctx->cudecoder)
> + cuvidDestroyDecoder(ctx->cudecoder);
> +
> + av_buffer_unref(&ctx->hwframe);
> + av_buffer_unref(&ctx->hwdevice);
> +
> + return 0;
> +}
> +
> +static void cuvid_ctx_free(AVHWDeviceContext *ctx)
> +{
> + AVCUDADeviceContext *hwctx = ctx->hwctx;
> + cuCtxDestroy(hwctx->cuda_ctx);
> +}
> +
> +static av_cold int cuvid_decode_init(AVCodecContext *avctx)
> +{
> + CuvidContext *ctx = avctx->priv_data;
> + AVCUDADeviceContext *device_hwctx;
> + AVHWDeviceContext *device_ctx;
> + AVHWFramesContext *hwframe_ctx;
> + CUVIDPARSERPARAMS cuparseinfo;
> + CUdevice device;
> + CUcontext cuda_ctx = NULL;
> + CUcontext dummy;
> + CUresult err;
> + const AVBitStreamFilter *bsf;
> + int ret = 0;
> +
> + enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
> + AV_PIX_FMT_NV12,
> + AV_PIX_FMT_NONE };
> +
> + ret = ff_get_format(avctx, pix_fmts);
> + if (ret < 0) {
> + av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
> + return ret;
> + }
> +
> + avctx->pix_fmt = ret;
> +
> + if (avctx->hw_frames_ctx) {
> + ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
> + if (!ctx->hwframe) {
> + ret = AVERROR(ENOMEM);
> + goto error;
> + }
> +
> + hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
> +
> + ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
> + if (!ctx->hwdevice) {
> + ret = AVERROR(ENOMEM);
> + goto error;
> + }
> +
> + device_ctx = hwframe_ctx->device_ctx;
> + device_hwctx = device_ctx->hwctx;
> + cuda_ctx = device_hwctx->cuda_ctx;
> + } else {
> + ctx->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
> + if (!ctx->hwdevice) {
> + av_log(avctx, AV_LOG_ERROR, "Error allocating hwdevice\n");
> + ret = AVERROR(ENOMEM);
> + goto error;
> + }
> +
> + err = cuInit(0);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
> + ret = AVERROR_UNKNOWN;
> + goto error;
> + }
> +
> + err = cuDeviceGet(&device, 0); ///TODO: Make device index configurable
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Could not get the device number %d\n", 0);
> + ret = AVERROR_UNKNOWN;
> + goto error;
> + }
> +
> + err = cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
> + ret = AVERROR_UNKNOWN;
> + goto error;
> + }
> +
> + device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
> + device_ctx->free = cuvid_ctx_free;
> +
> + device_hwctx = device_ctx->hwctx;
> + device_hwctx->cuda_ctx = cuda_ctx;
> +
> + err = cuCtxPopCurrent(&dummy);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
> + ret = AVERROR_UNKNOWN;
> + goto error;
> + }
> +
> + ret = av_hwdevice_ctx_init(ctx->hwdevice);
> + if (ret < 0) {
> + av_log(avctx, AV_LOG_ERROR, "av_hwdevice_ctx_init failed\n");
> + goto error;
> + }
> +
> + ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
> + if (!ctx->hwframe) {
> + av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
> + ret = AVERROR(ENOMEM);
> + goto error;
> + }
> + }
> +
> + memset(&cuparseinfo, 0, sizeof(cuparseinfo));
> +
> + if (avctx->codec->id == AV_CODEC_ID_H264)
> + cuparseinfo.CodecType = cudaVideoCodec_H264;
> + else if (avctx->codec->id == AV_CODEC_ID_HEVC)
> + cuparseinfo.CodecType = cudaVideoCodec_HEVC;
> + else
> + return AVERROR_BUG;
> +
> + cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
> + cuparseinfo.ulMaxDisplayDelay = FRAME_DELAY;
> + cuparseinfo.pUserData = avctx;
> + cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
> + cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
> + cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
> +
> + err = cuvidCreateVideoParser(&ctx->cuparser, &cuparseinfo);
> + if (err != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Error creating a CUVID parser\n");
> + ret = AVERROR_UNKNOWN;
> + goto error;
> + }
> +
> + if (avctx->codec->id == AV_CODEC_ID_H264)
> + bsf = av_bsf_get_by_name("h264_mp4toannexb");
> + else if (avctx->codec->id == AV_CODEC_ID_HEVC)
> + bsf = av_bsf_get_by_name("hevc_mp4toannexb");
> + else
> + return AVERROR_BUG;
> +
> + if (!bsf) {
> + ret = AVERROR_BSF_NOT_FOUND;
> + goto error;
> + }
> + if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
> + goto error;
> + }
> + if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
> + av_bsf_free(&ctx->bsf);
> + goto error;
> + }
> +
> + ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CUVIDPARSERDISPINFO));
> + if (!ctx->frame_queue) {
> + ret = AVERROR(ENOMEM);
> + goto error;
> + }
> +
> + return 0;
> +
> +error:
> + cuvid_decode_end(avctx);
> + return ret;
> +}
> +
> +#if CONFIG_HEVC_CUVID_DECODER
> +AVHWAccel ff_hevc_cuvid_hwaccel = {
> + .name = "hevc_cuvid",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .id = AV_CODEC_ID_HEVC,
> + .pix_fmt = AV_PIX_FMT_CUDA,
> +};
> +
> +AVCodec ff_hevc_cuvid_decoder = {
> + .name = "hevc_cuvid",
> + .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID HEVC Decoder"),
> + .type = AVMEDIA_TYPE_VIDEO,
> + .id = AV_CODEC_ID_HEVC,
> + .priv_data_size = sizeof(CuvidContext),
> + .init = cuvid_decode_init,
> + .close = cuvid_decode_end,
> + .decode = cuvid_decode_frame,
> + .capabilities = AV_CODEC_CAP_DELAY,
> + .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA,
> + AV_PIX_FMT_NV12,
> + AV_PIX_FMT_NONE },
> +};
> +#endif
> +
> +#if CONFIG_H264_CUVID_DECODER
> +AVHWAccel ff_h264_cuvid_hwaccel = {
> + .name = "h264_cuvid",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .id = AV_CODEC_ID_H264,
> + .pix_fmt = AV_PIX_FMT_CUDA,
> +};
> +
> +AVCodec ff_h264_cuvid_decoder = {
> + .name = "h264_cuvid",
> + .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID H264 Decoder"),
> + .type = AVMEDIA_TYPE_VIDEO,
> + .id = AV_CODEC_ID_H264,
> + .priv_data_size = sizeof(CuvidContext),
> + .init = cuvid_decode_init,
> + .close = cuvid_decode_end,
> + .decode = cuvid_decode_frame,
> + .capabilities = AV_CODEC_CAP_DELAY,
> + .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA,
> + AV_PIX_FMT_NV12,
> + AV_PIX_FMT_NONE },
> +};
> +#endif
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index cf7f231..d30d3e2 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,8 +28,8 @@
> #include "libavutil/version.h"
>
> #define LIBAVCODEC_VERSION_MAJOR 57
> -#define LIBAVCODEC_VERSION_MINOR 44
> -#define LIBAVCODEC_VERSION_MICRO 101
> +#define LIBAVCODEC_VERSION_MINOR 45
> +#define LIBAVCODEC_VERSION_MICRO 100
>
> #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> LIBAVCODEC_VERSION_MINOR, \
> --
> 2.8.3
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
More information about the ffmpeg-devel
mailing list