[FFmpeg-devel] [PATCH 2/5] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content
Timo Rothenpieler
timo at rothenpieler.org
Sat Oct 20 23:58:34 EEST 2018
On 20.10.2018 22:46, Philip Langdale wrote:
> The latest generation video decoder on the Turing chips supports
> decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
> we need to account for the different chroma format and pick the
> right output and sw formats at the right times.
>
> There was one bug which was the hard-coded assumption that the
> first chroma plane would be half-height; I fixed this to use the
> actual shift value on the plane.
>
> The output formats ('2', and '3') are currently undocumented but
> appear to be YUV444P and YUV444P16 based on how they behave.
>
> Signed-off-by: Philip Langdale <philipl at overt.org>
> ---
> libavcodec/hevcdec.c | 3 +++
> libavcodec/nvdec.c | 43 +++++++++++++++++++++++++++++++-------
> libavutil/hwcontext_cuda.c | 2 ++
> 3 files changed, 40 insertions(+), 8 deletions(-)
>
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index a3b5c8cb71..972f2b56b6 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
> #endif
> break;
> case AV_PIX_FMT_YUV420P12:
> + case AV_PIX_FMT_YUV444P:
> + case AV_PIX_FMT_YUV444P10:
> + case AV_PIX_FMT_YUV444P12:
> #if CONFIG_HEVC_NVDEC_HWACCEL
> *fmt++ = AV_PIX_FMT_CUDA;
> #endif
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index e779be3a45..43cc38485a 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -34,6 +34,9 @@
> #include "nvdec.h"
> #include "internal.h"
>
> +#define NVDEC_FORMAT_YUV444P 2
> +#define NVDEC_FORMAT_YUV444P16 3
> +
> typedef struct NVDECDecoder {
> CUvideodecoder decoder;
>
> @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>
> CUVIDDECODECREATEINFO params = { 0 };
>
> - int cuvid_codec_type, cuvid_chroma_format;
> + cudaVideoSurfaceFormat output_format;
> + int cuvid_codec_type, cuvid_chroma_format, chroma_444;
> int ret = 0;
>
> sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
> av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
> return AVERROR(ENOSYS);
> }
> + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>
> if (!avctx->hw_frames_ctx) {
> ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
> @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
> return ret;
> }
>
> + switch (sw_desc->comp[0].depth) {
> + case 8:
> + output_format = chroma_444 ? NVDEC_FORMAT_YUV444P :
> + cudaVideoSurfaceFormat_NV12;
> + break;
> + case 10:
> + case 12:
> + output_format = chroma_444 ? NVDEC_FORMAT_YUV444P16 :
> + cudaVideoSurfaceFormat_P016;
> + break;
> + default:
> + av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
> + return AVERROR(ENOSYS);
> + }
> +
> frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>
> params.ulWidth = avctx->coded_width;
> @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
> params.ulTargetWidth = avctx->coded_width;
> params.ulTargetHeight = avctx->coded_height;
> params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
> - params.OutputFormat = params.bitDepthMinus8 ?
> - cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
> + params.OutputFormat = output_format;
> params.CodecType = cuvid_codec_type;
> params.ChromaFormat = cuvid_chroma_format;
> params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
> @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
> NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
> NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>
> + AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
> +
> CUVIDPROCPARAMS vpp = { 0 };
> NVDECFrame *unmap_data = NULL;
>
> @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>
> unsigned int pitch, i;
> unsigned int offset = 0;
> + int shift_h = 0, shift_v = 0;
> int ret = 0;
>
> vpp.progressive_frame = 1;
> @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
> unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
> unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>
> + av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
> for (i = 0; frame->linesize[i]; i++) {
> frame->data[i] = (uint8_t*)(devptr + offset);
> frame->linesize[i] = pitch;
> - offset += pitch * (frame->height >> (i ? 1 : 0));
> + offset += pitch * (frame->height >> (i ? shift_v : 0));
> }
>
> goto finish;
> @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
> {
> AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
> const AVPixFmtDescriptor *sw_desc;
> - int cuvid_codec_type, cuvid_chroma_format;
> + int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>
> sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> if (!sw_desc)
> @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
> av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
> return AVERROR(EINVAL);
> }
> + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>
> frames_ctx->format = AV_PIX_FMT_CUDA;
> frames_ctx->width = (avctx->coded_width + 1) & ~1;
> @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
> if (!frames_ctx->pool)
> return AVERROR(ENOMEM);
>
> + // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
> + // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
> + // bits go at. P16 is unambiguous and matches.
This comment seems redundant, since AX_PIX_FMT_YUV444P16 isn't even used.
> switch (sw_desc->comp[0].depth) {
> case 8:
> - frames_ctx->sw_format = AV_PIX_FMT_NV12;
> + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
> break;
> case 10:
> - frames_ctx->sw_format = AV_PIX_FMT_P010;
> + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P10_MSB : AV_PIX_FMT_P010;
> break;
> case 12:
> - frames_ctx->sw_format = AV_PIX_FMT_P016;
> + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P12_MSB : AV_PIX_FMT_P016;
> break;
> default:
> return AVERROR(EINVAL);
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 3b1d53e799..43337f14f0 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -38,6 +38,8 @@ static const enum AVPixelFormat supported_formats[] = {
> AV_PIX_FMT_YUV444P,
> AV_PIX_FMT_P010,
> AV_PIX_FMT_P016,
> + AV_PIX_FMT_YUV444P10_MSB,
> + AV_PIX_FMT_YUV444P12_MSB,
You are adding these to supported formats, but are not actually adding
support, so cuda_frames_init and cuda_get_buffer will both run into the
BUG case. Should be super easy, as they're identical to 444P16.
> AV_PIX_FMT_YUV444P16,
Technically, this can go now. But I guess removing it is an API break,
so it gotta stay I guess.
> AV_PIX_FMT_0RGB32,
> AV_PIX_FMT_0BGR32,
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4538 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20181020/a9747a6d/attachment.bin>
More information about the ffmpeg-devel
mailing list