[FFmpeg-devel] [PATCH 2/5] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content

Sat Oct 20 23:58:34 EEST 2018

On 20.10.2018 22:46, Philip Langdale wrote:
> The latest generation video decoder on the Turing chips supports
> decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
> we need to account for the different chroma format and pick the
> right output and sw formats at the right times.
> 
> There was one bug which was the hard-coded assumption that the
> first chroma plane would be half-height; I fixed this to use the
> actual shift value on the plane.
> 
> The output formats ('2', and '3') are currently undocumented but
> appear to be YUV444P and YUV444P16 based on how they behave.
> 
> Signed-off-by: Philip Langdale <philipl at overt.org>
> ---
>   libavcodec/hevcdec.c       |  3 +++
>   libavcodec/nvdec.c         | 43 +++++++++++++++++++++++++++++++-------
>   libavutil/hwcontext_cuda.c |  2 ++
>   3 files changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index a3b5c8cb71..972f2b56b6 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
>   #endif
>           break;
>       case AV_PIX_FMT_YUV420P12:
> +    case AV_PIX_FMT_YUV444P:
> +    case AV_PIX_FMT_YUV444P10:
> +    case AV_PIX_FMT_YUV444P12:
>   #if CONFIG_HEVC_NVDEC_HWACCEL
>           *fmt++ = AV_PIX_FMT_CUDA;
>   #endif
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index e779be3a45..43cc38485a 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -34,6 +34,9 @@
>   #include "nvdec.h"
>   #include "internal.h"
>   
> +#define NVDEC_FORMAT_YUV444P 2
> +#define NVDEC_FORMAT_YUV444P16 3
> +
>   typedef struct NVDECDecoder {
>       CUvideodecoder decoder;
>   
> @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>   
>       CUVIDDECODECREATEINFO params = { 0 };
>   
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    cudaVideoSurfaceFormat output_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>       int ret = 0;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>           av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
>           return AVERROR(ENOSYS);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       if (!avctx->hw_frames_ctx) {
>           ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
> @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>               return ret;
>       }
>   
> +    switch (sw_desc->comp[0].depth) {
> +    case 8:
> +        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P :
> +                                     cudaVideoSurfaceFormat_NV12;
> +        break;
> +    case 10:
> +    case 12:
> +        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P16 :
> +                                     cudaVideoSurfaceFormat_P016;
> +        break;
> +    default:
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
> +        return AVERROR(ENOSYS);
> +    }
> +
>       frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>   
>       params.ulWidth             = avctx->coded_width;
> @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>       params.ulTargetWidth       = avctx->coded_width;
>       params.ulTargetHeight      = avctx->coded_height;
>       params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
> -    params.OutputFormat        = params.bitDepthMinus8 ?
> -                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
> +    params.OutputFormat        = output_format;
>       params.CodecType           = cuvid_codec_type;
>       params.ChromaFormat        = cuvid_chroma_format;
>       params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
> @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
>       NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>   
> +    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
> +
>       CUVIDPROCPARAMS vpp = { 0 };
>       NVDECFrame *unmap_data = NULL;
>   
> @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>   
>       unsigned int pitch, i;
>       unsigned int offset = 0;
> +    int shift_h = 0, shift_v = 0;
>       int ret = 0;
>   
>       vpp.progressive_frame = 1;
> @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
>       unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>   
> +    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
>       for (i = 0; frame->linesize[i]; i++) {
>           frame->data[i] = (uint8_t*)(devptr + offset);
>           frame->linesize[i] = pitch;
> -        offset += pitch * (frame->height >> (i ? 1 : 0));
> +        offset += pitch * (frame->height >> (i ? shift_v : 0));
>       }
>   
>       goto finish;
> @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>   {
>       AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
>       const AVPixFmtDescriptor *sw_desc;
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
>       if (!sw_desc)
> @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>           av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
>           return AVERROR(EINVAL);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       frames_ctx->format            = AV_PIX_FMT_CUDA;
>       frames_ctx->width             = (avctx->coded_width + 1) & ~1;
> @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>       if (!frames_ctx->pool)
>           return AVERROR(ENOMEM);
>   
> +    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
> +    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
> +    // bits go at. P16 is unambiguous and matches.

This comment seems redundant, since AX_PIX_FMT_YUV444P16 isn't even used.

>       switch (sw_desc->comp[0].depth) {
>       case 8:
> -        frames_ctx->sw_format = AV_PIX_FMT_NV12;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
>           break;
>       case 10:
> -        frames_ctx->sw_format = AV_PIX_FMT_P010;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P10_MSB : AV_PIX_FMT_P010;
>           break;
>       case 12:
> -        frames_ctx->sw_format = AV_PIX_FMT_P016;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P12_MSB : AV_PIX_FMT_P016;
>           break;
>       default:
>           return AVERROR(EINVAL);
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 3b1d53e799..43337f14f0 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -38,6 +38,8 @@ static const enum AVPixelFormat supported_formats[] = {
>       AV_PIX_FMT_YUV444P,
>       AV_PIX_FMT_P010,
>       AV_PIX_FMT_P016,
> +    AV_PIX_FMT_YUV444P10_MSB,
> +    AV_PIX_FMT_YUV444P12_MSB,

You are adding these to supported formats, but are not actually adding 
support, so cuda_frames_init and cuda_get_buffer will both run into the 
BUG case. Should be super easy, as they're identical to 444P16.

>       AV_PIX_FMT_YUV444P16,
Technically, this can go now. But I guess removing it is an API break, 
so it gotta stay I guess.

>       AV_PIX_FMT_0RGB32,
>       AV_PIX_FMT_0BGR32,
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4538 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20181020/a9747a6d/attachment.bin>