[FFmpeg-devel] [PATCH] avcodec: implement vp9 nvdec hwaccel

Sun Nov 12 01:04:07 EET 2017

---
 configure              |   2 +
 libavcodec/Makefile    |   1 +
 libavcodec/allcodecs.c |   1 +
 libavcodec/nvdec.c     |   1 +
 libavcodec/nvdec_vp9.c | 225 +++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/vp9.c       |  11 ++-
 6 files changed, 240 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/nvdec_vp9.c

diff --git a/configure b/configure
index e7b06d4305..34bb4caa57 100755
--- a/configure
+++ b/configure
@@ -2754,6 +2754,8 @@ vp9_d3d11va2_hwaccel_select="vp9_decoder"
 vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
 vp9_dxva2_hwaccel_select="vp9_decoder"
 vp9_mediacodec_hwaccel_deps="mediacodec"
+vp9_nvdec_hwaccel_deps="cuda nvdec"
+vp9_nvdec_hwaccel_select="vp9_decoder"
 vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
 vp9_vaapi_hwaccel_select="vp9_decoder"
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7ac4e13a06..64cff75539 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -870,6 +870,7 @@ OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
 OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
 OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
+OBJS-$(CONFIG_VP9_NVDEC_HWACCEL)          += nvdec_vp9.o
 OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec_other.o
 
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index c817003693..8ac9d9629d 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -123,6 +123,7 @@ static void register_all(void)
     REGISTER_HWACCEL(VP9_D3D11VA2,      vp9_d3d11va2);
     REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
     REGISTER_HWACCEL(VP9_MEDIACODEC,    vp9_mediacodec);
+    REGISTER_HWACCEL(VP9_NVDEC,         vp9_nvdec);
     REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
     REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
     REGISTER_HWACCEL(WMV3_D3D11VA2,     wmv3_d3d11va2);
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index f10da670d6..eedf9b2f01 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id)
     switch (id) {
     case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
     case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
+    case AV_CODEC_ID_VP9:  return cudaVideoCodec_VP9;
     }
     return -1;
 }
diff --git a/libavcodec/nvdec_vp9.c b/libavcodec/nvdec_vp9.c
new file mode 100644
index 0000000000..04e38be7ba
--- /dev/null
+++ b/libavcodec/nvdec_vp9.c
@@ -0,0 +1,225 @@
+/*
+ * VP9 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2016 Timo Rothenpieler
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/pixdesc.h"
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "internal.h"
+#include "vp9shared.h"
+
+static unsigned char get_ref_idx(AVFrame *frame)
+{
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+
+    if (!frame || !frame->private_ref)
+        return 255;
+
+    fdd = (FrameDecodeData*)frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    return cf->idx;
+}
+
+static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    VP9SharedContext *h = avctx->priv_data;
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
+    CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
+    FrameDecodeData *fdd;
+    NVDECFrame *cf;
+    AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
+
+    int ret, i;
+
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
+    if (ret < 0)
+        return ret;
+
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+
+    *pp = (CUVIDPICPARAMS) {
+        .PicWidthInMbs     = cur_frame->width / 16,
+        .FrameHeightInMbs  = cur_frame->height / 16,
+        .CurrPicIdx        = cf->idx,
+
+        .CodecSpecific.vp9 = {
+            .width = cur_frame->width,
+            .height = cur_frame->height,
+
+            .LastRefIdx = get_ref_idx(h->refs[h->h.refidx[0]].f),
+            .GoldenRefIdx = get_ref_idx(h->refs[h->h.refidx[1]].f),
+            .AltRefIdx = get_ref_idx(h->refs[h->h.refidx[2]].f),
+
+            .profile = h->h.profile,
+            .frameContextIdx = h->h.framectxid,
+            .frameType = !h->h.keyframe,
+            .showFrame = !h->h.invisible,
+            .errorResilient = h->h.errorres,
+            .frameParallelDecoding = h->h.parallelmode,
+            .subSamplingX = pixdesc->log2_chroma_w,
+            .subSamplingY = pixdesc->log2_chroma_h,
+            .intraOnly = h->h.intraonly,
+            .allow_high_precision_mv = h->h.keyframe ? 0 : h->h.highprecisionmvs,
+            .refreshEntropyProbs = h->h.refreshctx,
+
+            .refFrameSignBias[0] = 0,
+
+            .bitDepthMinus8Luma = pixdesc->comp[0].depth - 8,
+            .bitDepthMinus8Chroma = pixdesc->comp[1].depth - 8,
+
+            .loopFilterLevel = h->h.filter.level,
+            .loopFilterSharpness = h->h.filter.sharpness,
+            .modeRefLfEnabled = h->h.lf_delta.enabled,
+
+            .log2_tile_columns = h->h.tiling.log2_tile_cols,
+            .log2_tile_rows = h->h.tiling.log2_tile_rows,
+
+            .segmentEnabled = h->h.segmentation.enabled,
+            .segmentMapUpdate = h->h.segmentation.update_map,
+            .segmentMapTemporalUpdate = h->h.segmentation.temporal,
+            .segmentFeatureMode = h->h.segmentation.absolute_vals,
+
+            .qpYAc = h->h.yac_qi,
+            .qpYDc = h->h.ydc_qdelta,
+            .qpChDc = h->h.uvdc_qdelta,
+            .qpChAc = h->h.uvac_qdelta,
+
+            .resetFrameContext = h->h.resetctx,
+            .mcomp_filter_type = h->h.filtermode ^ (h->h.filtermode <= 1),
+
+            .frameTagSize = h->h.uncompressed_header_size,
+            .offsetToDctParts = h->h.compressed_header_size,
+        }
+    };
+
+    for (i = 0; i < 2; i++)
+        ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
+
+    for (i = 0; i < 4; i++)
+        ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
+
+    for (i = 0; i < 7; i++)
+        ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
+
+    for (i = 0; i < 3; i++) {
+        ppc->activeRefIdx[i] = h->h.refidx[i];
+        ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
+        ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
+    }
+
+    for (i = 0; i < 8; i++) {
+        ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
+        ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
+        ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
+        ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
+
+        ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
+        ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
+        ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
+        ppc->segmentFeatureData[i][3] = 0;
+    }
+
+    switch (avctx->colorspace) {
+    default:
+    case AVCOL_SPC_UNSPECIFIED:
+        ppc->colorSpace = 0;
+        break;
+    case AVCOL_SPC_BT470BG:
+        ppc->colorSpace = 1;
+        break;
+    case AVCOL_SPC_BT709:
+        ppc->colorSpace = 2;
+        break;
+    case AVCOL_SPC_SMPTE170M:
+        ppc->colorSpace = 3;
+        break;
+    case AVCOL_SPC_SMPTE240M:
+        ppc->colorSpace = 4;
+        break;
+    case AVCOL_SPC_BT2020_NCL:
+        ppc->colorSpace = 5;
+        break;
+    case AVCOL_SPC_RESERVED:
+        ppc->colorSpace = 6;
+        break;
+    case AVCOL_SPC_RGB:
+        ppc->colorSpace = 7;
+        break;
+    }
+
+    return 0;
+}
+
+static int nvdec_vp9_end_frame(AVCodecContext *avctx)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    int ret = ff_nvdec_end_frame(avctx);
+    ctx->bitstream = NULL;
+    return ret;
+}
+
+static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+{
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    void *tmp;
+
+    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
+                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    ctx->slice_offsets = tmp;
+
+    if (!ctx->bitstream)
+        ctx->bitstream = (uint8_t*)buffer;
+
+    ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
+    ctx->bitstream_len += size;
+    ctx->nb_slices++;
+
+    return 0;
+}
+
+static int nvdec_vp9_decode_init(AVCodecContext *avctx)
+{
+    // VP9 uses a fixed size pool of 8 possible reference frames
+    return ff_nvdec_decode_init(avctx, 8);
+}
+
+AVHWAccel ff_vp9_nvdec_hwaccel = {
+    .name                 = "vp9_nvdec",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP9,
+    .pix_fmt              = AV_PIX_FMT_CUDA,
+    .start_frame          = nvdec_vp9_start_frame,
+    .end_frame            = nvdec_vp9_end_frame,
+    .decode_slice         = nvdec_vp9_decode_slice,
+    .init                 = nvdec_vp9_decode_init,
+    .uninit               = ff_nvdec_decode_uninit,
+    .priv_data_size       = sizeof(NVDECContext),
+};
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 6b5de19266..6ce9be61bb 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -169,7 +169,10 @@ fail:
 
 static int update_size(AVCodecContext *avctx, int w, int h)
 {
-#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
+                     CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
+                     CONFIG_VP9_NVDEC_HWACCEL + \
+                     CONFIG_VP9_VAAPI_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
     VP9Context *s = avctx->priv_data;
     uint8_t *p;
@@ -191,12 +194,18 @@ static int update_size(AVCodecContext *avctx, int w, int h)
             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
             *fmtp++ = AV_PIX_FMT_D3D11;
 #endif
+#if CONFIG_VP9_NVDEC_HWACCEL
+            *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
 #if CONFIG_VP9_VAAPI_HWACCEL
             *fmtp++ = AV_PIX_FMT_VAAPI;
 #endif
             break;
         case AV_PIX_FMT_YUV420P10:
         case AV_PIX_FMT_YUV420P12:
+#if CONFIG_VP9_NVDEC_HWACCEL
+            *fmtp++ = AV_PIX_FMT_CUDA;
+#endif
 #if CONFIG_VP9_VAAPI_HWACCEL
             *fmtp++ = AV_PIX_FMT_VAAPI;
 #endif
-- 
2.14.2