[FFmpeg-devel] [PATCH] Add NVENC encoder
Nicolas George
george at nsup.org
Wed Nov 26 21:38:04 CET 2014
Le sextidi 6 frimaire, an CCXXIII, Timo Rothenpieler a écrit :
> It uses init_static_data to dynamicaly ask the nvidia driver for the
> supported pixel formats instead.
It means it will try to load and init the library whenever libavcodec is
used, even if this specific encoder is not used. For a library that accesses
hardware devices, that may not be a good idea.
Below, a few quick comments that became a lot of comments; I do not know the
API itself.
> From 793271822a5f52c3aed876fcedc7c6d8edd3c10c Mon Sep 17 00:00:00 2001
> From: Timo Rothenpieler <timo at rothenpieler.org>
> Date: Wed, 26 Nov 2014 11:08:11 +0100
> Subject: [PATCH] Add NVENC encoder
>
> ---
> Changelog | 1 +
> configure | 12 +-
> libavcodec/Makefile | 1 +
> libavcodec/allcodecs.c | 1 +
> libavcodec/nvenc.c | 932 ++++++++++++++++++++++++++++++++++++++++++++++++
> libavcodec/nvenc_api.c | 275 ++++++++++++++
> libavcodec/nvenc_api.h | 35 ++
> libavcodec/nvenc_cuda.h | 62 ++++
Is it necessary to split the _api part in a separate file? The whole code is
a bit large, but still manageable, and merging the files would avoid some
headers overhead.
> 8 files changed, 1317 insertions(+), 2 deletions(-)
> create mode 100644 libavcodec/nvenc.c
> create mode 100644 libavcodec/nvenc_api.c
> create mode 100644 libavcodec/nvenc_api.h
> create mode 100644 libavcodec/nvenc_cuda.h
>
> diff --git a/Changelog b/Changelog
> index 7172d0c..d26b7fa 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -17,6 +17,7 @@ version <next>:
> - WebP muxer with animated WebP support
> - zygoaudio decoding support
> - APNG demuxer
> +- nvenc encoder
>
>
> version 2.4:
> diff --git a/configure b/configure
> index 38619c4..05bce5d 100755
> --- a/configure
> +++ b/configure
> @@ -261,6 +261,7 @@ External library support:
> --enable-libzvbi enable teletext support via libzvbi [no]
> --disable-lzma disable lzma [autodetect]
> --enable-decklink enable Blackmagick DeckLink I/O support [no]
> + --enable-nvenc enable NVIDIA NVENC support [no]
> --enable-openal enable OpenAL 1.1 capture support [no]
> --enable-opencl enable OpenCL code
> --enable-opengl enable OpenGL rendering [no]
> @@ -1393,6 +1394,7 @@ EXTERNAL_LIBRARY_LIST="
> libzmq
> libzvbi
> lzma
> + nvenc
> openal
> opencl
> opengl
> @@ -2389,6 +2391,7 @@ libxvid_encoder_deps="libxvid"
> libutvideo_decoder_deps="libutvideo"
> libutvideo_encoder_deps="libutvideo"
> libzvbi_teletext_decoder_deps="libzvbi"
> +nvenc_encoder_deps="nvenc"
>
> # demuxers / muxers
> ac3_demuxer_select="ac3_parser"
> @@ -2569,9 +2572,7 @@ drawtext_filter_deps="libfreetype"
> ebur128_filter_deps="gpl"
> flite_filter_deps="libflite"
> frei0r_filter_deps="frei0r dlopen"
> -frei0r_filter_extralibs='$ldl'
> frei0r_src_filter_deps="frei0r dlopen"
> -frei0r_src_filter_extralibs='$ldl'
> geq_filter_deps="gpl"
> histeq_filter_deps="gpl"
> hqdn3d_filter_deps="gpl"
> @@ -4344,6 +4345,7 @@ die_license_disabled gpl x11grab
>
> die_license_disabled nonfree libaacplus
> die_license_disabled nonfree libfaac
> +die_license_disabled nonfree nvenc
> enabled gpl && die_license_disabled_gpl nonfree libfdk_aac
> enabled gpl && die_license_disabled_gpl nonfree openssl
>
> @@ -4650,6 +4652,11 @@ elif check_func dlopen -ldl; then
> ldl=-ldl
> fi
>
> +# set a few flags which depend on ldl and can't be set earlier
> +nvenc_encoder_extralibs='$ldl'
> +frei0r_filter_extralibs='$ldl'
> +frei0r_src_filter_extralibs='$ldl'
I think moving the frei0r rules is supposed to belong in a separate patch.
> +
> if ! disabled network; then
> check_func getaddrinfo $network_extralibs
> check_func getservbyport $network_extralibs
> @@ -4913,6 +4920,7 @@ enabled libxavs && require libxavs xavs.h xavs_encoder_encode -lxavs
> enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore
> enabled libzmq && require_pkg_config libzmq zmq.h zmq_ctx_new
> enabled libzvbi && require libzvbi libzvbi.h vbi_decoder_new -lzvbi
> +enabled nvenc && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; }
> enabled openal && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
> check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
> die "ERROR: openal not found"; } &&
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index fa0f53d..cc41564 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -347,6 +347,7 @@ OBJS-$(CONFIG_MXPEG_DECODER) += mxpegdec.o
> OBJS-$(CONFIG_NELLYMOSER_DECODER) += nellymoserdec.o nellymoser.o
> OBJS-$(CONFIG_NELLYMOSER_ENCODER) += nellymoserenc.o nellymoser.o
> OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o
> +OBJS-$(CONFIG_NVENC_ENCODER) += nvenc.o nvenc_api.o
> OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o
> OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o \
> opus_imdct.o opus_silk.o \
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 0d39d33..8ceee2f 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -223,6 +223,7 @@ void avcodec_register_all(void)
> REGISTER_DECODER(MVC2, mvc2);
> REGISTER_DECODER(MXPEG, mxpeg);
> REGISTER_DECODER(NUV, nuv);
> + REGISTER_ENCODER(NVENC, nvenc);
> REGISTER_DECODER(PAF_VIDEO, paf_video);
> REGISTER_ENCDEC (PAM, pam);
> REGISTER_ENCDEC (PBM, pbm);
> diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
> new file mode 100644
> index 0000000..3cb98d3
> --- /dev/null
> +++ b/libavcodec/nvenc.c
> @@ -0,0 +1,932 @@
> +/*
> + * H.264 hardware encoding using nvidia nvenc
> + * Copyright (c) 2014 Timo Rothenpieler <timo at rothenpieler.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifdef _WIN32
> +#include <windows.h>
> +#endif
> +
> +#include "libavutil/internal.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/mem.h"
> +#include "avcodec.h"
> +#include "internal.h"
> +
> +#include "nvenc_cuda.h"
> +#include "nvenc_api.h"
> +
> +typedef struct NvencInputSurface
> +{
> + NV_ENC_INPUT_PTR inputSurface;
> + int width;
> + int height;
> +
> + int lockCount;
The usual coding style for structure members and variables in ffmpeg is
names_separated_with_underscodes, not uglyCamelCase. (But I believe the
person who will end up maintaining the file should have last word on this.)
> +
> + NV_ENC_BUFFER_FORMAT format;
> +} NvencInputSurface;
> +
> +typedef struct NvencOutputSurface
> +{
> + NV_ENC_OUTPUT_PTR outputSurface;
> + int size;
> +
> + NvencInputSurface *inputSurface;
> +
> + int busy;
> +} NvencOutputSurface;
> +
> +typedef struct NvencOutputSurfaceList
> +{
> + NvencOutputSurface *surface;
> + struct NvencOutputSurfaceList *next;
> +} NvencOutputSurfaceList;
> +
> +typedef struct NvencTimestampList
> +{
> + int64_t timestamp;
> + struct NvencTimestampList *next;
> +} NvencTimestampList;
> +
> +typedef struct NvencContext
> +{
> + AVClass *avclass;
> +
> + NV_ENC_INITIALIZE_PARAMS initEncodeParams;
> + NV_ENC_CONFIG encodeConfig;
> + CUcontext cuContext;
> +
> + int maxSurfaceCount;
> + NvencInputSurface *inputSurfaces;
> + NvencOutputSurface *outputSurfaces;
> +
> + NvencOutputSurfaceList *outputSurfaceQueue;
> + NvencOutputSurfaceList *outputSurfaceReadyQueue;
> + NvencTimestampList *timestampList;
> + int64_t lastDts;
> +
> + void *nvencoder;
> +
> + char *profile;
> + char *preset;
> + int cqp;
> + int cbr;
> + int twopass;
> + int gobpattern;
> +} NvencContext;
> +
> +static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
> +
> +static void out_surf_queue_push(NvencOutputSurfaceList** head, NvencOutputSurface *surface)
> +{
> + if (!*head) {
> + *head = av_malloc(sizeof(NvencOutputSurfaceList));
> + (*head)->next = 0;
ffmpeg code usually uses NULL for NULL pointers, not 0; other similar cases
below.
> + (*head)->surface = surface;
> + return;
> + }
> +
> + while ((*head)->next)
> + head = &((*head)->next);
This looks inefficient. Do you have an estimate of the usual size of the
queue?
I suggest you have a look at the dynarray (in libavutil/mem.h and
dynarray.h) API.
If you really need linked lists, you could probably keep the final pointer
to head in the structure to avoid walking the list every time.
> +
> + (*head)->next = av_malloc(sizeof(NvencOutputSurfaceList));
av_malloc() return value needs to be checked. Other similar cases below.
> + (*head)->next->next = 0;
> + (*head)->next->surface = surface;
> +}
> +
> +static NvencOutputSurface *out_surf_queue_pop(NvencOutputSurfaceList** head)
If you call this one pop instead of shift, people used to Perl will be very
confused.
> +{
> + NvencOutputSurfaceList *tmp;
> + NvencOutputSurface *res;
> +
> + if (!*head)
> + return 0;
> +
> + tmp = *head;
> + res = tmp->surface;
> + *head = tmp->next;
> + av_free(tmp);
> +
> + return res;
> +}
> +
> +static void timestamp_list_insert_sorted(NvencTimestampList** head, int64_t timestamp)
Same as before: maybe dynarray would be more efficient, avoiding malloc()
with its huge overhead for every insertion.
Also, if the list is expected to be large, you may consider using a heap
instead of a sorted list.
> +{
> + NvencTimestampList *newelem;
> + NvencTimestampList *prev;
> +
> + if (!*head) {
> + *head = av_malloc(sizeof(NvencTimestampList));
> + (*head)->next = 0;
> + (*head)->timestamp = timestamp;
> + return;
> + }
> +
> + prev = 0;
> + while (*head && timestamp >= (*head)->timestamp) {
> + prev = *head;
> + head = &((*head)->next);
> + }
> +
> + newelem = av_malloc(sizeof(NvencTimestampList));
> + newelem->next = *head;
> + newelem->timestamp = timestamp;
> +
> + if (*head) {
> + *head = newelem;
> + } else {
> + prev->next = newelem;
> + }
> +}
> +
> +static int64_t timestamp_list_get_lowest(NvencTimestampList** head)
> +{
> + NvencTimestampList *tmp;
> + int64_t res;
> +
> + if (!*head)
> + return 0;
> +
> + tmp = *head;
> + res = tmp->timestamp;
> + *head = tmp->next;
> + av_free(tmp);
> +
> + return res;
> +}
> +
> +static av_cold int nvenc_encode_init(AVCodecContext *avctx)
> +{
> + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 };
> + NV_ENC_PRESET_CONFIG presetConfig = { 0 };
> + CUcontext cuContextCurr;
> + GUID encoderPreset = NV_ENC_PRESET_HQ_GUID;
> + GUID license = dummy_license;
> + NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
> + int surfaceCount = 0;
> + int i, numMBs;
> + int isLL = 0;
> +
> + NvencContext *ctx = avctx->priv_data;
> +
> + if (!ff_nvenc_dyload_nvenc(avctx))
> + return AVERROR_EXTERNAL;
> +
> + avctx->coded_frame = av_frame_alloc();
> + if (!avctx->coded_frame)
> + return AVERROR(ENOMEM);
> +
> + memset(&ctx->initEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
> + memset(&ctx->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
Non needed, the whole structure is set to 0 by the library.
> +
> + ctx->outputSurfaceQueue = 0;
> + ctx->outputSurfaceReadyQueue = 0;
> + ctx->timestampList = 0;
> + ctx->lastDts = AV_NOPTS_VALUE;
> + ctx->nvencoder = 0;
> +
> + ctx->encodeConfig.version = NV_ENC_CONFIG_VER;
> + ctx->initEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
> + presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
> + presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
> + stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
> + stEncodeSessionParams.apiVersion = NVENCAPI_VERSION;
> + stEncodeSessionParams.clientKeyPtr = &license;
> +
> + ctx->cuContext = 0;
> + if (ff_cuCtxCreate(&ctx->cuContext, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS
> + || ff_cuCtxPopCurrent(&cuContextCurr) != CUDA_SUCCESS) {
> + av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC\n");
Is there a chance of getting a more detailed error reason?
> + goto error;
> + }
> +
> + stEncodeSessionParams.device = (void*)ctx->cuContext;
> + stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
> +
> + nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &ctx->nvencoder);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + ctx->nvencoder = 0;
> + av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nvStatus);
> + goto error;
> + }
> +
> + if (ctx->preset) {
> + if (!strcmp(ctx->preset, "hp")) {
> + encoderPreset = NV_ENC_PRESET_HP_GUID;
> + } else if (!strcmp(ctx->preset, "hq")) {
> + encoderPreset = NV_ENC_PRESET_HQ_GUID;
> + } else if (!strcmp(ctx->preset, "bd")) {
> + encoderPreset = NV_ENC_PRESET_BD_GUID;
> + } else if (!strcmp(ctx->preset, "ll")) {
> + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
> + isLL = 1;
> + } else if (!strcmp(ctx->preset, "llhp")) {
> + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
> + isLL = 1;
> + } else if (!strcmp(ctx->preset, "llhq")) {
> + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
> + isLL = 1;
> + } else if (!strcmp(ctx->preset, "default")) {
> + encoderPreset = NV_ENC_PRESET_DEFAULT_GUID;
> + } else {
> + av_log(avctx, AV_LOG_ERROR, "Preset \"%s\" is unknown!\n", ctx->preset);
Should return an error. And if you use a table with the list of presets, you
can dump the list.
> + }
> + }
> +
> + nvStatus = ff_pNvEnc->nvEncGetEncodePresetConfig(ctx->nvencoder, NV_ENC_CODEC_H264_GUID, encoderPreset, &presetConfig);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nvStatus);
> + goto error;
> + }
> +
> + ctx->initEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
> + ctx->initEncodeParams.encodeHeight = avctx->height;
> + ctx->initEncodeParams.encodeWidth = avctx->width;
> + ctx->initEncodeParams.darHeight = avctx->height;
> + ctx->initEncodeParams.darWidth = avctx->width;
Was this tested with anamorphic videos?
> + ctx->initEncodeParams.frameRateNum = avctx->time_base.den;
> + ctx->initEncodeParams.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
> +
> + numMBs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
> + ctx->maxSurfaceCount = (numMBs >= 8160) ? 16 : 32;
> +
> + ctx->initEncodeParams.enableEncodeAsync = 0;
> + ctx->initEncodeParams.enablePTD = 1;
> +
> + ctx->initEncodeParams.presetGUID = encoderPreset;
> +
> + ctx->initEncodeParams.encodeConfig = &ctx->encodeConfig;
> + memcpy(&ctx->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
> + ctx->encodeConfig.version = NV_ENC_CONFIG_VER;
> +
> + if (avctx->gop_size >= 0) {
> + ctx->encodeConfig.gopLength = avctx->gop_size;
> + ctx->encodeConfig.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
> + }
> +
> + if (avctx->bit_rate > 0)
> + ctx->encodeConfig.rcParams.averageBitRate = avctx->bit_rate;
> +
> + if (avctx->rc_max_rate > 0)
> + ctx->encodeConfig.rcParams.maxBitRate = avctx->rc_max_rate;
> +
> + if (ctx->cbr) {
> + if (!ctx->twopass) {
> + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
> + } else if (ctx->twopass == 1 || isLL) {
> + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
> +
> + ctx->encodeConfig.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
> + ctx->encodeConfig.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
> +
> + if (!isLL)
> + av_log(avctx, AV_LOG_WARNING, "Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
> + } else {
> + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
> + }
> + } else if (ctx->cqp >= 0) {
> + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
> + ctx->encodeConfig.rcParams.constQP.qpInterB = ctx->cqp;
> + ctx->encodeConfig.rcParams.constQP.qpInterP = ctx->cqp;
> + ctx->encodeConfig.rcParams.constQP.qpIntra = ctx->cqp;
> +
> + avctx->qmin = -1;
> + avctx->qmax = -1;
> + } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
> + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
> +
> + ctx->encodeConfig.rcParams.enableMinQP = 1;
> + ctx->encodeConfig.rcParams.enableMaxQP = 1;
> +
> + ctx->encodeConfig.rcParams.minQP.qpInterB = avctx->qmin;
> + ctx->encodeConfig.rcParams.minQP.qpInterP = avctx->qmin;
> + ctx->encodeConfig.rcParams.minQP.qpIntra = avctx->qmin;
> +
> + ctx->encodeConfig.rcParams.maxQP.qpInterB = avctx->qmax;
> + ctx->encodeConfig.rcParams.maxQP.qpInterP = avctx->qmax;
> + ctx->encodeConfig.rcParams.maxQP.qpIntra = avctx->qmax;
> + }
> +
> + if (avctx->rc_buffer_size > 0)
> + ctx->encodeConfig.rcParams.vbvBufferSize = avctx->rc_buffer_size;
> +
> + if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
> + ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
> + } else {
> + ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
> + }
> +
> + if (!ctx->profile) {
> + switch (avctx->profile) {
> + case FF_PROFILE_H264_BASELINE:
case is usually intended the same as switch.
> + ctx->profile = av_strdup("baseline");
Need to check the return value.
But it seems you have the private option "profile" conflicting with the
global option "profile", which is confusing, and possibly problematic, for
users.
> + break;
> + case FF_PROFILE_H264_MAIN:
> + ctx->profile = av_strdup("main");
> + break;
> + default:
> + ctx->profile = av_strdup("high");
> + break;
> + }
> + }
> +
> + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
> +
> + if (!strcmp(ctx->profile, "high")) {
> + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
> + } else if (!strcmp(ctx->profile, "main")) {
> + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
> + } else if (!strcmp(ctx->profile, "baseline")) {
> + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
> + } else {
> + av_log(avctx, AV_LOG_WARNING, "Unknown profile requested: %s\n", ctx->profile);
> + }
> +
> + if (ctx->gobpattern >= 0) {
> + ctx->encodeConfig.frameIntervalP = 1;
> + }
> +
> + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
> + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
> +
> + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
> + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
> + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
> +
> + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
> +
> + ctx->encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
> +
> + nvStatus = ff_pNvEnc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->initEncodeParams);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nvStatus);
> + goto error;
> + }
> +
> + ctx->inputSurfaces = (NvencInputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencInputSurface));
> + ctx->outputSurfaces = (NvencOutputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencOutputSurface));
The cast is an ugly c++ism, and ffmpeg code recommends sizeof(*variable)
instead of sizeof(Type).
Do you need to use calloc instead of the corresponding av_ function?
Other similar cases below.
> +
> + for (surfaceCount = 0; surfaceCount < ctx->maxSurfaceCount; ++surfaceCount) {
> + NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
> + NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
> + allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
> + allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
> +
> + allocSurf.width = (avctx->width + 31) & ~31;
> + allocSurf.height = (avctx->height + 31) & ~31;
> +
> + allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
> +
> + switch (avctx->pix_fmt) {
> + case AV_PIX_FMT_YUV420P:
> + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
> + break;
> +
> + case AV_PIX_FMT_NV12:
> + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
> + break;
> +
> + case AV_PIX_FMT_YUV444P:
> + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
> + break;
> +
> + default:
> + av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
> + goto error;
> + }
> +
> + nvStatus = ff_pNvEnc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
> + if (nvStatus = NV_ENC_SUCCESS){
> + av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n");
> + goto error;
> + }
> +
> + ctx->inputSurfaces[surfaceCount].lockCount = 0;
> + ctx->inputSurfaces[surfaceCount].inputSurface = allocSurf.inputBuffer;
> + ctx->inputSurfaces[surfaceCount].format = allocSurf.bufferFmt;
> + ctx->inputSurfaces[surfaceCount].width = allocSurf.width;
> + ctx->inputSurfaces[surfaceCount].height = allocSurf.height;
> +
> + allocOut.size = 1024 * 1024;
Maybe a comment to explain where this value comes from?
> + allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
> +
> + nvStatus = ff_pNvEnc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
> + if (nvStatus = NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n");
> + ctx->outputSurfaces[surfaceCount++].outputSurface = 0;
> + goto error;
> + }
> +
> + ctx->outputSurfaces[surfaceCount].outputSurface = allocOut.bitstreamBuffer;
> + ctx->outputSurfaces[surfaceCount].size = allocOut.size;
> + ctx->outputSurfaces[surfaceCount].busy = 0;
> + }
> +
> + if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
> + uint32_t outSize = 0;
> + char tmpHeader[256];
> + NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
> + payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
> +
> + payload.spsppsBuffer = tmpHeader;
> + payload.inBufferSize = 256;
> + payload.outSPSPPSPayloadSize = &outSize;
> +
> + nvStatus = ff_pNvEnc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n");
> + goto error;
> + }
> +
> + avctx->extradata_size = outSize;
> + avctx->extradata = av_mallocz(outSize + FF_INPUT_BUFFER_PADDING_SIZE);
> +
> + memcpy(avctx->extradata, tmpHeader, outSize);
> + } else {
> + avctx->extradata = 0;
> + avctx->extradata_size = 0;
Not needed.
> + }
> +
> + if (ctx->encodeConfig.frameIntervalP > 1)
> + avctx->has_b_frames = 2;
> +
> + if (ctx->encodeConfig.rcParams.averageBitRate > 0)
> + avctx->bit_rate = ctx->encodeConfig.rcParams.averageBitRate;
> +
> + return 0;
> +
> +error:
> +
> + for (i = 0; i < surfaceCount; ++i) {
> + ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface);
> + if (ctx->outputSurfaces[i].outputSurface)
> + ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface);
> + }
> +
> + if (ctx->nvencoder)
> + ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder);
> +
> + if (ctx->cuContext)
> + ff_cuCtxDestroy(ctx->cuContext);
> +
> + ff_nvenc_unload_nvenc(avctx);
> +
> + ctx->nvencoder = 0;
> + ctx->cuContext = 0;
> +
> + return AVERROR_EXTERNAL;
> +}
> +
> +static av_cold int nvenc_encode_close(AVCodecContext *avctx)
> +{
> + NvencContext *ctx = avctx->priv_data;
> + int i;
> +
> + if (ctx->profile)
> + av_freep(&ctx->profile);
Freeing NULL is valid, so you do not need to check beforehand. And in this
case, since ctx->profile is an option, it is automatically freed anyway.
> +
> + if (avctx->extradata)
> + av_freep(&avctx->extradata);
extradata is automatically freed for encoders.
> +
> + while (ctx->timestampList)
> + timestamp_list_get_lowest(&ctx->timestampList);
> +
> + while (ctx->outputSurfaceReadyQueue)
> + out_surf_queue_pop(&ctx->outputSurfaceReadyQueue);
> +
> + while (ctx->outputSurfaceQueue)
> + out_surf_queue_pop(&ctx->outputSurfaceQueue);
> +
> + for (i = 0; i < ctx->maxSurfaceCount; ++i) {
> + ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface);
> + ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface);
> + }
> + ctx->maxSurfaceCount = 0;
> +
> + ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder);
> + ctx->nvencoder = 0;
> +
> + ff_cuCtxDestroy(ctx->cuContext);
> + ctx->cuContext = 0;
> +
> + ff_nvenc_unload_nvenc(avctx);
> +
> + av_frame_free(&avctx->coded_frame);
> +
> + return 0;
> +}
> +
> +static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf)
> +{
> + NvencContext *ctx = avctx->priv_data;
> + uint32_t *sliceOffsets = (uint32_t*)calloc(ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData, sizeof(uint32_t));
> + NV_ENC_LOCK_BITSTREAM lockParams = { 0 };
> + NVENCSTATUS nvStatus;
> +
> + lockParams.version = NV_ENC_LOCK_BITSTREAM_VER;
> +
> + lockParams.doNotWait = 0;
> + lockParams.outputBitstream = tmpoutsurf->outputSurface;
> + lockParams.sliceOffsets = sliceOffsets;
> +
> + nvStatus = ff_pNvEnc->nvEncLockBitstream(ctx->nvencoder, &lockParams);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n");
> + timestamp_list_get_lowest(&ctx->timestampList);
> + return 0;
Looks like it should return an error.
> + }
> +
> + if (ff_alloc_packet2(avctx, pkt, lockParams.bitstreamSizeInBytes) < 0) {
> + ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface);
> + timestamp_list_get_lowest(&ctx->timestampList);
> + return 0;
> + }
Same as above, and ff_alloc_packet2() already returns a proper error code.
> +
> + memcpy(pkt->data, lockParams.bitstreamBufferPtr, lockParams.bitstreamSizeInBytes);
> +
> + nvStatus = ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface);
> + if (nvStatus != NV_ENC_SUCCESS)
> + av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
> +
> + switch (lockParams.pictureType) {
> + case NV_ENC_PIC_TYPE_IDR:
> + pkt->flags |= AV_PKT_FLAG_KEY;
> + case NV_ENC_PIC_TYPE_I:
> + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
> + break;
> +
> + case NV_ENC_PIC_TYPE_P:
> + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
> + break;
> +
> + case NV_ENC_PIC_TYPE_B:
> + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
> + break;
> +
> + case NV_ENC_PIC_TYPE_BI:
> + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
> + break;
> +
> + default:
> + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_NONE;
Does this happen normally?
> + break;
> + }
> +
> + pkt->pts = lockParams.outputTimeStamp;
> + pkt->dts = timestamp_list_get_lowest(&ctx->timestampList) - ctx->encodeConfig.frameIntervalP;
> +
> + if (pkt->dts > pkt->pts)
> + pkt->dts = pkt->pts;
> +
> + if (ctx->lastDts != AV_NOPTS_VALUE && pkt->dts <= ctx->lastDts)
> + pkt->dts = ctx->lastDts + 1;
> +
> + ctx->lastDts = pkt->dts;
> +
> + return 1;
> +}
> +
> +static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
> + const AVFrame *frame, int *got_packet)
> +{
> + NVENCSTATUS nvStatus;
> + NvencContext *ctx = avctx->priv_data;
> + NvencOutputSurface *tmpoutsurf;
> + int i = 0;
> +
> + NV_ENC_PIC_PARAMS picParams = { 0 };
> + picParams.version = NV_ENC_PIC_PARAMS_VER;
> +
> + if (frame) {
> + NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
> + NvencInputSurface *inSurf = 0;
> +
> + for (i = 0; i < ctx->maxSurfaceCount; ++i)
> + if (!ctx->inputSurfaces[i].lockCount)
> + inSurf = &ctx->inputSurfaces[i];
Maybe a break here.
> + av_assert0(inSurf);
Are you positively sure that an input surface will always be available?
> +
> + inSurf->lockCount = 1;
> +
> + lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
> + lockBufferParams.inputBuffer = inSurf->inputSurface;
> +
> + nvStatus = ff_pNvEnc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n");
> + return 0;
> + }
> +
> + if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
> + uint8_t *buf = lockBufferParams.bufferDataPtr;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch,
> + frame->data[0], frame->linesize[0],
> + avctx->width, avctx->height);
> +
> + buf += inSurf->height * lockBufferParams.pitch;
Could be factored out, unless I am missing something.
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
> + frame->data[2], frame->linesize[2],
> + avctx->width >> 1, avctx->height >> 1);
> +
> + buf += (inSurf->height * lockBufferParams.pitch) >> 2;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
> + frame->data[1], frame->linesize[1],
> + avctx->width >> 1, avctx->height >> 1);
> + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
> + uint8_t *buf = lockBufferParams.bufferDataPtr;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch,
> + frame->data[0], frame->linesize[0],
> + avctx->width, avctx->height);
> +
> + buf += inSurf->height * lockBufferParams.pitch;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch,
> + frame->data[1], frame->linesize[1],
> + avctx->width, avctx->height >> 1);
> + } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
> + uint8_t *buf = lockBufferParams.bufferDataPtr;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch,
> + frame->data[0], frame->linesize[0],
> + avctx->width, avctx->height);
> +
> + buf += inSurf->height * lockBufferParams.pitch;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch,
> + frame->data[1], frame->linesize[1],
> + avctx->width, avctx->height);
> +
> + buf += inSurf->height * lockBufferParams.pitch;
> +
> + av_image_copy_plane(buf, lockBufferParams.pitch,
> + frame->data[2], frame->linesize[2],
> + avctx->width, avctx->height);
> + } else {
> + av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
> + return AVERROR(EINVAL);
> + }
> +
> + nvStatus = ff_pNvEnc->nvEncUnlockInputBuffer(ctx->nvencoder, inSurf->inputSurface);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n");
> + return AVERROR_EXTERNAL;
> + }
> +
> + for (i = 0; i < ctx->maxSurfaceCount; ++i)
> + if (!ctx->outputSurfaces[i].busy)
> + break;
> +
> + if (i == ctx->maxSurfaceCount) {
> + inSurf->lockCount = 0;
> + av_log(avctx, AV_LOG_ERROR, "No free output surface found!\n");
> + return 0;
Proper error code?
> + }
> +
> + ctx->outputSurfaces[i].inputSurface = inSurf;
> +
> + picParams.inputBuffer = inSurf->inputSurface;
> + picParams.bufferFmt = inSurf->format;
> + picParams.inputWidth = avctx->width;
> + picParams.inputHeight = avctx->height;
> + picParams.outputBitstream = ctx->outputSurfaces[i].outputSurface;
> + picParams.completionEvent = 0;
> +
> + if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
> + if (frame->top_field_first) {
> + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
> + } else {
> + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
> + }
> + } else {
> + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
> + }
> +
> + picParams.encodePicFlags = 0;
> + picParams.inputTimeStamp = frame->pts;
> + picParams.inputDuration = 0;
> + picParams.codecPicParams.h264PicParams.sliceMode = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceMode;
> + picParams.codecPicParams.h264PicParams.sliceModeData = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData;
> + memcpy(&picParams.rcParams, &ctx->encodeConfig.rcParams, sizeof(NV_ENC_RC_PARAMS));
> +
> + timestamp_list_insert_sorted(&ctx->timestampList, frame->pts);
> + } else {
> + picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
> + }
> +
> + nvStatus = ff_pNvEnc->nvEncEncodePicture(ctx->nvencoder, &picParams);
> +
> + if (frame && nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) {
> + out_surf_queue_push(&ctx->outputSurfaceQueue, &ctx->outputSurfaces[i]);
> + ctx->outputSurfaces[i].busy = 1;
> + }
> +
> + if (nvStatus != NV_ENC_SUCCESS && nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) {
> + av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n");
> + return AVERROR_EXTERNAL;
> + }
> +
> + if (nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) {
> + while (ctx->outputSurfaceQueue) {
> + tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceQueue);
> + out_surf_queue_push(&ctx->outputSurfaceReadyQueue, tmpoutsurf);
> + }
> +
> + if (frame) {
> + out_surf_queue_push(&ctx->outputSurfaceReadyQueue, &ctx->outputSurfaces[i]);
> + ctx->outputSurfaces[i].busy = 1;
> + }
> + }
> +
> + if (ctx->outputSurfaceReadyQueue) {
> + tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceReadyQueue);
> +
> + *got_packet = process_output_surface(avctx, pkt, avctx->coded_frame, tmpoutsurf);
> +
> + tmpoutsurf->busy = 0;
> + av_assert0(tmpoutsurf->inputSurface->lockCount);
> + tmpoutsurf->inputSurface->lockCount--;
> + }
> +
> + return 0;
> +}
> +
> +static int pix_fmts_nvenc_initialized;
> +
> +static enum AVPixelFormat pix_fmts_nvenc[] = {
> + AV_PIX_FMT_NV12,
> + AV_PIX_FMT_NONE,
> + AV_PIX_FMT_NONE,
> + AV_PIX_FMT_NONE
> +};
> +
> +static av_cold void nvenc_init_static(AVCodec *codec)
> +{
> + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 };
> + CUcontext cuctxcur = 0, cuctx = 0;
> + NVENCSTATUS nvStatus;
> + void *nvencoder = 0;
> + GUID encodeGuid = NV_ENC_CODEC_H264_GUID;
> + GUID license = dummy_license;
> + int i = 0, pos = 0;
> + int gotnv12 = 0, got420 = 0, got444 = 0;
> + uint32_t inputFmtCount = 32;
> + NV_ENC_BUFFER_FORMAT inputFmts[32];
> +
> + for (i = 0; i < 32; ++i)
> + inputFmts[i] = (NV_ENC_BUFFER_FORMAT)0;
> + i = 0;
> +
> + if (pix_fmts_nvenc_initialized) {
> + codec->pix_fmts = pix_fmts_nvenc;
> + return;
> + }
> +
> + if (!ff_nvenc_dyload_nvenc(0)) {
> + pix_fmts_nvenc_initialized = 1;
> + return;
> + }
> +
> + stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
> + stEncodeSessionParams.apiVersion = NVENCAPI_VERSION;
> + stEncodeSessionParams.clientKeyPtr = &license;
> +
> + cuctx = 0;
> + if (ff_cuCtxCreate(&cuctx, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS) {
It would probably be better to get ff_cuCtxCreate() return an AVERROR code
instead of a CUDA error code. Same for all ff_ helper functions.
> + cuctx = 0;
> + goto error;
> + }
> +
> + if (ff_cuCtxPopCurrent(&cuctxcur) != CUDA_SUCCESS)
> + goto error;
> +
> + stEncodeSessionParams.device = (void*)cuctx;
> + stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
> +
> + nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &nvencoder);
> + if (nvStatus != NV_ENC_SUCCESS) {
> + nvencoder = 0;
> + goto error;
> + }
> +
> + nvStatus = ff_pNvEnc->nvEncGetInputFormats(nvencoder, encodeGuid, inputFmts, 32, &inputFmtCount);
> + if (nvStatus != NV_ENC_SUCCESS)
> + goto error;
> +
> + pos = 0;
> + for (i = 0; i < inputFmtCount && pos < 3; ++i) {
> + if (!gotnv12 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_PL
> + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED16x16
> + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED64x16)) {
> +
> + pix_fmts_nvenc[pos++] = AV_PIX_FMT_NV12;
> + gotnv12 = 1;
> + } else if (!got420 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_PL
> + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED16x16
> + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED64x16)) {
> +
> + pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV420P;
> + got420 = 1;
> + } else if (!got444 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_PL
> + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16
> + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16)) {
> +
> + pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV444P;
> + got444 = 1;
> + }
> + }
> +
> + pix_fmts_nvenc[pos] = AV_PIX_FMT_NONE;
> +
> + pix_fmts_nvenc_initialized = 1;
> + codec->pix_fmts = pix_fmts_nvenc;
> +
> + ff_pNvEnc->nvEncDestroyEncoder(nvencoder);
> + ff_cuCtxDestroy(cuctx);
> +
> + ff_nvenc_unload_nvenc(0);
> +
> + return;
> +
> +error:
> +
> + if (nvencoder)
> + ff_pNvEnc->nvEncDestroyEncoder(nvencoder);
> +
> + if (cuctx)
> + ff_cuCtxDestroy(cuctx);
> +
> + pix_fmts_nvenc_initialized = 1;
> + pix_fmts_nvenc[0] = AV_PIX_FMT_NV12;
> + pix_fmts_nvenc[1] = AV_PIX_FMT_NONE;
> +
> + codec->pix_fmts = pix_fmts_nvenc;
> +
> + ff_nvenc_unload_nvenc(0);
> +}
> +
> +#define OFFSET(x) offsetof(NvencContext, x)
> +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> +static const AVOption options[] = {
> + { "profile", "Set profile restrictions", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "high" }, 0, 0, VE},
> + { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
> + { "cqp", "Constant quantization parameter rate control method", OFFSET(cqp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
> + { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
> + { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
Some of these options are redundant with global ones; "profile" already
cited, "2pass" = -flags +pass1/+pass2; "cqp" = "global_quality".
> + { "goppattern", "Specifies the GOP pattern as follows: 0: I, 1: IPP, 2: IBP, 3: IBBP", OFFSET(gobpattern), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 3, VE },
> + { NULL }
> +};
> +
> +static const AVClass nvenc_class = {
> + .class_name = "nvenc",
> + .item_name = av_default_item_name,
> + .option = options,
> + .version = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const AVCodecDefault nvenc_defaults[] = {
> + { "b", "0" },
> + { "qmin", "-1" },
> + { "qmax", "-1" },
> + { "qdiff", "-1" },
> + { "qblur", "-1" },
> + { "qcomp", "-1" },
> + { NULL },
> +};
> +
> +AVCodec ff_nvenc_encoder = {
> + .name = "nvenc",
> + .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
> + .type = AVMEDIA_TYPE_VIDEO,
> + .id = AV_CODEC_ID_H264,
> + .priv_data_size = sizeof(NvencContext),
> + .init = nvenc_encode_init,
> + .encode2 = nvenc_encode_frame,
> + .close = nvenc_encode_close,
> + .capabilities = CODEC_CAP_DELAY,
> + .priv_class = &nvenc_class,
> + .defaults = nvenc_defaults,
> + .init_static_data = nvenc_init_static
> +};
> diff --git a/libavcodec/nvenc_api.c b/libavcodec/nvenc_api.c
> new file mode 100644
> index 0000000..53d5fa8
> --- /dev/null
> +++ b/libavcodec/nvenc_api.c
> @@ -0,0 +1,275 @@
> +/*
> + * H.264 hardware encoding using nvidia nvenc
> + * Copyright (c) 2014 Timo Rothenpieler <timo at rothenpieler.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifdef _WIN32
> +#include <windows.h>
> +#else
> +#include <dlfcn.h>
> +#endif
> +
> +#include "libavutil/avassert.h"
> +#include "avcodec.h"
> +#include "internal.h"
> +
> +#include "nvenc_cuda.h"
> +#include "nvenc_api.h"
> +
> +PCUINIT ff_cuInit = 0;
> +PCUDEVICEGETCOUNT ff_cuDeviceGetCount = 0;
> +PCUDEVICEGET ff_cuDeviceGet = 0;
> +PCUDEVICEGETNAME ff_cuDeviceGetName = 0;
> +PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability = 0;
> +PCUCTXCREATE ff_cuCtxCreate = 0;
> +PCUCTXPOPCURRENT ff_cuCtxPopCurrent = 0;
> +PCUCTXDESTROY ff_cuCtxDestroy = 0;
> +
> +static int nvenc_init_count;
> +static NV_ENCODE_API_FUNCTION_LIST nvEncFuncs;
> +NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc = 0;
> +int ff_iNvencDeviceCount = 0;
> +CUdevice ff_pNvencDevices[16];
> +unsigned int ff_iNvencUseDeviceID = 0;
> +
> +#ifdef _WIN32
> +#define LOAD_FUNC(l, s) GetProcAddress(l, s)
> +#define DL_CLOSE_FUNC(l) FreeLibrary(l)
> +static HMODULE cudaLib;
> +static HMODULE nvEncLib;
> +#else
> +#define LOAD_FUNC(l, s) dlsym(l, s)
> +#define DL_CLOSE_FUNC(l) dlclose(l)
> +static void *cudaLib;
> +static void *nvEncLib;
> +#endif
> +
> +#define ifav_log(...) if (avctx) { av_log(__VA_ARGS__); }
Looks strange: why no error message when there is no context?
> +
> +#define CHECK_LOAD_FUNC(t, f, s) \
> +{ \
> + f = (t)LOAD_FUNC(cudaLib, s); \
> + if (!f) { \
> + ifav_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
> + goto error; \
> + } \
> +}
Some compilers choke on that because of the semicolon after the block; for
that reason, it is recommended to use do { ... } while (0).
> +
> +static int nvenc_dyload_cuda(AVCodecContext *avctx)
> +{
> + if (cudaLib)
> + return 1;
Thread safe?
> +
> +#if defined(_WIN32)
> + cudaLib = LoadLibrary(TEXT("nvcuda.dll"));
> +#elif defined(__CYGWIN__)
> + cudaLib = dlopen("nvcuda.dll", RTLD_LAZY);
> +#else
> + cudaLib = dlopen("libcuda.so", RTLD_LAZY);
> +#endif
> +
> + if (!cudaLib) {
> + ifav_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
> + goto error;
> + }
> +
> + CHECK_LOAD_FUNC(PCUINIT, ff_cuInit, "cuInit");
> + CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, ff_cuDeviceGetCount, "cuDeviceGetCount");
> + CHECK_LOAD_FUNC(PCUDEVICEGET, ff_cuDeviceGet, "cuDeviceGet");
> + CHECK_LOAD_FUNC(PCUDEVICEGETNAME, ff_cuDeviceGetName, "cuDeviceGetName");
> + CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, ff_cuDeviceComputeCapability, "cuDeviceComputeCapability");
> + CHECK_LOAD_FUNC(PCUCTXCREATE, ff_cuCtxCreate, "cuCtxCreate_v2");
> + CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, ff_cuCtxPopCurrent, "cuCtxPopCurrent_v2");
> + CHECK_LOAD_FUNC(PCUCTXDESTROY, ff_cuCtxDestroy, "cuCtxDestroy_v2");
You could almost use #name and ff_##name to avoid duplicating the parameter.
> +
> + return 1;
> +
> +error:
> +
> + if (cudaLib)
> + DL_CLOSE_FUNC(cudaLib);
> +
> + cudaLib = 0;
> +
> + return 0;
> +}
> +
> +static int checkCudaErrors(AVCodecContext *avctx, CUresult err, const char *func)
> +{
> + if (err != CUDA_SUCCESS) {
> + ifav_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
The library does not provide error code -> string utility?
> + return 0;
> + }
> + return 1;
> +}
> +#define checkCudaErrors(f) if (!checkCudaErrors(avctx, f, #f)) goto error
> +
> +static int nvenc_check_cuda(AVCodecContext *avctx)
> +{
> + int deviceCount = 0;
> + CUdevice cuDevice = 0;
> + char gpu_name[128];
> + int SMminor = 0, SMmajor = 0;
> + int i, smver;
> +
> + if (!nvenc_dyload_cuda(avctx))
> + return 0;
> +
> + if (ff_iNvencDeviceCount > 0)
> + return 1;
> +
> + checkCudaErrors(ff_cuInit(0));
> +
> + checkCudaErrors(ff_cuDeviceGetCount(&deviceCount));
> +
> + if (!deviceCount) {
> + ifav_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
> + goto error;
> + }
> +
> + ifav_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", deviceCount);
> +
> + ff_iNvencDeviceCount = 0;
> +
> + for (i = 0; i < deviceCount; ++i) {
> + checkCudaErrors(ff_cuDeviceGet(&cuDevice, i));
> + checkCudaErrors(ff_cuDeviceGetName(gpu_name, 128, cuDevice));
sizeof(gpu_name), to avoid desync errors.
> + checkCudaErrors(ff_cuDeviceComputeCapability(&SMmajor, &SMminor, cuDevice));
> +
> + smver = (SMmajor << 4) | SMminor;
> +
> + ifav_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, SMmajor, SMminor, (smver >= 0x30) ? "Available" : "Not Available");
> +
> + if (smver >= 0x30)
> + ff_pNvencDevices[ff_iNvencDeviceCount++] = cuDevice;
> + }
> +
> + if (!ff_iNvencDeviceCount) {
> + ifav_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
> + goto error;
> + }
> +
> + return 1;
> +
> +error:
> +
> + ff_iNvencDeviceCount = 0;
> +
> + return 0;
> +}
> +
> +av_cold int ff_nvenc_dyload_nvenc(AVCodecContext *avctx)
> +{
> + PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
> + NVENCSTATUS nvstatus;
> +
> + if (!nvenc_check_cuda(avctx))
> + return 0;
> +
> + if (ff_pNvEnc) {
> + nvenc_init_count++;
> + return 1;
> + }
> +
> +#if defined(_WIN32)
> + if (sizeof(void*) == 8) {
> + nvEncLib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
> + } else {
> + nvEncLib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
> + }
> +#elif defined(__CYGWIN__)
> + if (sizeof(void*) == 8) {
> + nvEncLib = dlopen("nvEncodeAPI64.dll", RTLD_LAZY);
> + } else {
> + nvEncLib = dlopen("nvEncodeAPI.dll", RTLD_LAZY);
> + }
> +#else
> + nvEncLib = dlopen("libnvidia-encode.so", RTLD_LAZY);
> +#endif
> +
> + if (!nvEncLib) {
> + ifav_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
> + goto error;
> + }
> +
> + nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(nvEncLib, "NvEncodeAPICreateInstance");
> +
> + if (!nvEncodeAPICreateInstance) {
> + ifav_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
> + goto error;
> + }
> +
> + ff_pNvEnc = &nvEncFuncs;
> + memset(ff_pNvEnc, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
> + ff_pNvEnc->version = NV_ENCODE_API_FUNCTION_LIST_VER;
> +
> + nvstatus = nvEncodeAPICreateInstance(ff_pNvEnc);
> +
> + if (nvstatus != NV_ENC_SUCCESS) {
> + ifav_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n");
> + goto error;
> + }
> +
> + ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
> +
> + nvenc_init_count = 1;
> +
> + return 1;
> +
> +error:
> + if (nvEncLib)
> + DL_CLOSE_FUNC(nvEncLib);
> +
> + nvEncLib = 0;
> + ff_pNvEnc = 0;
> + nvenc_init_count = 0;
> +
> + return 0;
> +}
> +
> +av_cold void ff_nvenc_unload_nvenc(AVCodecContext *avctx)
> +{
> + if (nvenc_init_count <= 0)
> + return;
> +
> + nvenc_init_count--;
This looks not thread safe.
> +
> + if (nvenc_init_count > 0)
> + return;
> +
> + DL_CLOSE_FUNC(nvEncLib);
> + nvEncLib = 0;
> + ff_pNvEnc = 0;
> +
> + ff_iNvencDeviceCount = 0;
> +
> + DL_CLOSE_FUNC(cudaLib);
> + cudaLib = 0;
> +
> + ff_cuInit = 0;
> + ff_cuDeviceGetCount = 0;
> + ff_cuDeviceGet = 0;
> + ff_cuDeviceGetName = 0;
> + ff_cuDeviceComputeCapability = 0;
> + ff_cuCtxCreate = 0;
> + ff_cuCtxPopCurrent = 0;
> + ff_cuCtxDestroy = 0;
> +
> + ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
> +}
> diff --git a/libavcodec/nvenc_api.h b/libavcodec/nvenc_api.h
> new file mode 100644
> index 0000000..16b1c72
> --- /dev/null
> +++ b/libavcodec/nvenc_api.h
> @@ -0,0 +1,35 @@
> +/*
> + * H.264 hardware encoding using nvidia nvenc
> + * Copyright (c) 2014 Timo Rothenpieler <timo at rothenpieler.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_NVENC_API_H
> +#define AVCODEC_NVENC_API_H
> +
> +#include <nvEncodeAPI.h>
> +
> +
> +typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
> +
> +extern NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc;
> +
> +int ff_nvenc_dyload_nvenc(AVCodecContext *avctx);
> +void ff_nvenc_unload_nvenc(AVCodecContext *avctx);
> +
> +#endif
> diff --git a/libavcodec/nvenc_cuda.h b/libavcodec/nvenc_cuda.h
> new file mode 100644
> index 0000000..ae43a22
> --- /dev/null
> +++ b/libavcodec/nvenc_cuda.h
> @@ -0,0 +1,62 @@
> +/*
> + * H.264 hardware encoding using nvidia nvenc
> + * Copyright (c) 2014 Timo Rothenpieler <timo at rothenpieler.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_NVENC_CUDA_H
> +#define AVCODEC_NVENC_CUDA_H
> +
> +typedef enum cudaError_enum {
> + CUDA_SUCCESS = 0
> +} CUresult;
> +typedef int CUdevice;
> +typedef void* CUcontext;
> +
> +#ifdef _WIN32
> +#define CUDAAPI __stdcall
> +#else
> +#define CUDAAPI
> +#endif
> +
> +typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
> +typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
> +typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
> +typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
> +typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
> +typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
> +typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
> +typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
> +
> +extern PCUINIT ff_cuInit;
> +extern PCUDEVICEGETCOUNT ff_cuDeviceGetCount;
> +extern PCUDEVICEGET ff_cuDeviceGet;
> +extern PCUDEVICEGETNAME ff_cuDeviceGetName;
> +extern PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability;
> +extern PCUCTXCREATE ff_cuCtxCreate;
> +extern PCUCTXPOPCURRENT ff_cuCtxPopCurrent;
> +extern PCUCTXDESTROY ff_cuCtxDestroy;
> +
> +int ff_nvenc_dyload_cuda(AVCodecContext *avctx);
> +int ff_nvenc_check_cuda(AVCodecContext *avctx);
> +
> +extern int ff_iNvencDeviceCount;
> +extern CUdevice ff_pNvencDevices[16];
> +extern unsigned int ff_iNvencUseDeviceID;
> +
> +#endif
Regards,
--
Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141126/254554e3/attachment.asc>
More information about the ffmpeg-devel
mailing list