[FFmpeg-devel] [PATCH] avcodec/cuvid: Add support for P010 as an output surface format
wm4
nfxjfg at googlemail.com
Mon Nov 21 13:55:36 EET 2016
On Sat, 19 Nov 2016 17:18:08 -0800
Philip Langdale <philipl at overt.org> wrote:
> The nvidia 375.xx driver introduces support for P016 output surfaces,
> for 10bit and 12bit HEVC content (it's also the first driver to support
> hardware decoding of 12bit content).
>
> Technically, we don't support P016, but in practice I don't think we
> zero-out the extra bits in P010 so it can be used to carry the data.
>
> This change introduces cuvid decoder support for P010 output for
> output to hardware and system memory surfaces. For simplicity, it
> does not maintain the previous ability to output NV12 for > 8 bit
> input video - the user will need to update their driver to decode
> such videos.
>
> After this change, both cuvid and nvenc support P010, but the
> ffmpeg_cuvid transcoding logic will need more work to connect the
> two together. Similarly, the scale_npp filter still only works with
> 8bit surfaces.
>
> Signed-off-by: Philip Langdale <philipl at overt.org>
> ---
> compat/cuda/dynlink_cuviddec.h | 3 ++-
> libavcodec/cuvid.c | 58 +++++++++++++++++++++++++++++++-----------
> libavutil/hwcontext_cuda.c | 11 +++++++-
> 3 files changed, 55 insertions(+), 17 deletions(-)
>
> diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h
> index 17207bc..9ff2741 100644
> --- a/compat/cuda/dynlink_cuviddec.h
> +++ b/compat/cuda/dynlink_cuviddec.h
> @@ -83,7 +83,8 @@ typedef enum cudaVideoCodec_enum {
> * Video Surface Formats Enums
> */
> typedef enum cudaVideoSurfaceFormat_enum {
> - cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only supported output format) */
> + cudaVideoSurfaceFormat_NV12=0, /**< NV12 */
> + cudaVideoSurfaceFormat_P016=1 /**< P016 */
> } cudaVideoSurfaceFormat;
>
> /*!
> diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
> index c3e831a..34b0734 100644
> --- a/libavcodec/cuvid.c
> +++ b/libavcodec/cuvid.c
> @@ -28,6 +28,7 @@
> #include "libavutil/fifo.h"
> #include "libavutil/log.h"
> #include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
>
> #include "avcodec.h"
> #include "internal.h"
> @@ -103,11 +104,35 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
> CuvidContext *ctx = avctx->priv_data;
> AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
> CUVIDDECODECREATEINFO cuinfo;
> + int surface_fmt;
> +
> + enum AVPixelFormat pix_fmts_nv12[3] = { AV_PIX_FMT_CUDA,
> + AV_PIX_FMT_NV12,
> + AV_PIX_FMT_NONE };
> +
> + enum AVPixelFormat pix_fmts_p010[3] = { AV_PIX_FMT_CUDA,
> + AV_PIX_FMT_P010,
> + AV_PIX_FMT_NONE };
>
> av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
>
> ctx->internal_error = 0;
>
> + surface_fmt = ff_get_format(avctx, format->bit_depth_luma_minus8 > 0 ?
> + pix_fmts_p010 : pix_fmts_nv12);
> + if (surface_fmt < 0) {
> + av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
> + ctx->internal_error = AVERROR(EINVAL);
> + return 0;
> + }
> +
> + av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
> + av_get_pix_fmt_name(avctx->pix_fmt),
> + av_get_pix_fmt_name(surface_fmt),
> + av_get_pix_fmt_name(avctx->sw_pix_fmt));
> +
> + avctx->pix_fmt = surface_fmt;
> +
> avctx->width = format->display_area.right;
> avctx->height = format->display_area.bottom;
>
> @@ -156,7 +181,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
> hwframe_ctx->width < avctx->width ||
> hwframe_ctx->height < avctx->height ||
> hwframe_ctx->format != AV_PIX_FMT_CUDA ||
> - hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
> + hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
> av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
> ctx->internal_error = AVERROR(EINVAL);
> return 0;
> @@ -177,7 +202,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
>
> cuinfo.CodecType = ctx->codec_type = format->codec;
> cuinfo.ChromaFormat = format->chroma_format;
> - cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
> +
> + switch (avctx->sw_pix_fmt) {
> + case AV_PIX_FMT_NV12:
> + cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
> + break;
> + case AV_PIX_FMT_P010:
> + cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
> + break;
> + default:
> + av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12 or P010 are not supported\n");
> + ctx->internal_error = AVERROR(EINVAL);
> + return 0;
> + }
>
> cuinfo.ulWidth = avctx->coded_width;
> cuinfo.ulHeight = avctx->coded_height;
> @@ -209,7 +246,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
>
> if (!hwframe_ctx->pool) {
> hwframe_ctx->format = AV_PIX_FMT_CUDA;
> - hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
> + hwframe_ctx->sw_format = avctx->sw_pix_fmt;
> hwframe_ctx->width = avctx->width;
> hwframe_ctx->height = avctx->height;
>
> @@ -417,7 +454,8 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
>
> offset += avctx->coded_height;
> }
> - } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
> + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
> + avctx->pix_fmt == AV_PIX_FMT_P010) {
> AVFrame *tmp_frame = av_frame_alloc();
> if (!tmp_frame) {
> av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
> @@ -615,17 +653,6 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
> const AVBitStreamFilter *bsf;
> int ret = 0;
>
> - enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
> - AV_PIX_FMT_NV12,
> - AV_PIX_FMT_NONE };
> -
> - ret = ff_get_format(avctx, pix_fmts);
> - if (ret < 0) {
> - av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
> - return ret;
> - }
> - avctx->pix_fmt = ret;
> -
> ret = cuvid_load_functions(&ctx->cvdl);
> if (ret < 0) {
> av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
> @@ -899,6 +926,7 @@ static const AVOption options[] = {
> .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
> .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
> AV_PIX_FMT_NV12, \
> + AV_PIX_FMT_P010, \
> AV_PIX_FMT_NONE }, \
> };
>
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 30de299..e413aa8 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -35,6 +35,7 @@ static const enum AVPixelFormat supported_formats[] = {
> AV_PIX_FMT_NV12,
> AV_PIX_FMT_YUV420P,
> AV_PIX_FMT_YUV444P,
> + AV_PIX_FMT_P010,
> };
>
> static void cuda_buffer_free(void *opaque, uint8_t *data)
> @@ -111,6 +112,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
> size = aligned_width * ctx->height * 3 / 2;
> break;
> case AV_PIX_FMT_YUV444P:
> + case AV_PIX_FMT_P010:
> size = aligned_width * ctx->height * 3;
> break;
> }
> @@ -125,7 +127,13 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
>
> static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
> {
> - int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
> + int aligned_width;
> + int width_in_bytes = ctx->width;
> +
> + if (ctx->sw_format == AV_PIX_FMT_P010) {
> + width_in_bytes *= 2;
> + }
> + aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
>
> frame->buf[0] = av_buffer_pool_get(ctx->pool);
> if (!frame->buf[0])
> @@ -133,6 +141,7 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>
> switch (ctx->sw_format) {
> case AV_PIX_FMT_NV12:
> + case AV_PIX_FMT_P010:
> frame->data[0] = frame->buf[0]->data;
> frame->data[1] = frame->data[0] + aligned_width * ctx->height;
> frame->linesize[0] = aligned_width;
I think it would be better to add a P016 pixfmt if the decoder can
output data that has the LSBs set for the ones that are normally 0 in
P010.
More information about the ffmpeg-devel
mailing list