[FFmpeg-devel] [PATCH 1/8] compat/cuda: add dynamic loader
Hendrik Leppkes
h.leppkes at gmail.com
Wed Oct 19 15:50:44 EEST 2016
On Wed, Oct 19, 2016 at 2:00 PM, Timo Rothenpieler
<timo at rothenpieler.org> wrote:
> ---
> compat/cuda/dynlink_cuda.h | 88 +++++
> compat/cuda/dynlink_cuviddec.h | 808 +++++++++++++++++++++++++++++++++++++++++
> compat/cuda/dynlink_loader.h | 254 +++++++++++++
> compat/cuda/dynlink_nvcuvid.h | 316 ++++++++++++++++
> 4 files changed, 1466 insertions(+)
> create mode 100644 compat/cuda/dynlink_cuda.h
> create mode 100644 compat/cuda/dynlink_cuviddec.h
> create mode 100644 compat/cuda/dynlink_loader.h
> create mode 100644 compat/cuda/dynlink_nvcuvid.h
>
> diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h
> new file mode 100644
> index 0000000..908f12d
> --- /dev/null
> +++ b/compat/cuda/dynlink_cuda.h
> @@ -0,0 +1,88 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
So did you write this without once looking at the NVIDIA header file?
Because if you did even read it, then you can't realistically claim
this is LGPL.
> +
> +#if !defined(AV_COMPAT_DYNLINK_CUDA_H) && !defined(CUDA_VERSION)
> +#define AV_COMPAT_DYNLINK_CUDA_H
> +
> +#include <stddef.h>
> +
> +#define CUDA_VERSION 7050
> +
> +#if defined(_WIN32) || defined(__CYGWIN__)
> +#define CUDAAPI __stdcall
> +#else
> +#define CUDAAPI
> +#endif
> +
> +#define CU_CTX_SCHED_BLOCKING_SYNC 4
> +
> +typedef int CUdevice;
> +typedef void* CUarray;
> +typedef void* CUcontext;
> +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
> +typedef unsigned long long CUdeviceptr;
> +#else
> +typedef unsigned int CUdeviceptr;
> +#endif
> +
> +typedef enum cudaError_enum {
> + CUDA_SUCCESS = 0
> +} CUresult;
> +
> +typedef enum CUmemorytype_enum {
> + CU_MEMORYTYPE_HOST = 1,
> + CU_MEMORYTYPE_DEVICE = 2
> +} CUmemorytype;
> +
> +typedef struct CUDA_MEMCPY2D_st {
> + size_t srcXInBytes;
> + size_t srcY;
> + CUmemorytype srcMemoryType;
> + const void *srcHost;
> + CUdeviceptr srcDevice;
> + CUarray srcArray;
> + size_t srcPitch;
> +
> + size_t dstXInBytes;
> + size_t dstY;
> + CUmemorytype dstMemoryType;
> + void *dstHost;
> + CUdeviceptr dstDevice;
> + CUarray dstArray;
> + size_t dstPitch;
> +
> + size_t WidthInBytes;
> + size_t Height;
> +} CUDA_MEMCPY2D;
> +
> +typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
> +typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
> +typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
> +typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
> +typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
> +typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
> +typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
> +typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
> +typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
> +typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
> +typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
> +typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
> +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
> +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
> +
> +#endif
> diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h
> new file mode 100644
> index 0000000..17207bc
> --- /dev/null
> +++ b/compat/cuda/dynlink_cuviddec.h
> @@ -0,0 +1,808 @@
> +/*
> + * This copyright notice applies to this header file only:
> + *
> + * Copyright (c) 2010-2016 NVIDIA Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> + * obtaining a copy of this software and associated documentation
> + * files (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use,
> + * copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the software, and to permit persons to whom the
> + * software is furnished to do so, subject to the following
> + * conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> + * included in all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
> + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file cuviddec.h
> + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
> + * \date 2015-2016
> + * This file contains constants, structure definitions and function prototypes used for decoding.
> + */
> +
> +#if !defined(__CUDA_VIDEO_H__)
> +#define __CUDA_VIDEO_H__
> +
> +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
> +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
> +#define __CUVID_DEVPTR64
> +#endif
> +#endif
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* __cplusplus */
> +
> +typedef void *CUvideodecoder;
> +typedef struct _CUcontextlock_st *CUvideoctxlock;
> +
> +/**
> + * \addtogroup VIDEO_DECODER Video Decoder
> + * @{
> + */
> +
> +/*!
> + * \enum cudaVideoCodec
> + * Video Codec Enums
> + */
> +typedef enum cudaVideoCodec_enum {
> + cudaVideoCodec_MPEG1=0, /**< MPEG1 */
> + cudaVideoCodec_MPEG2, /**< MPEG2 */
> + cudaVideoCodec_MPEG4, /**< MPEG4 */
> + cudaVideoCodec_VC1, /**< VC1 */
> + cudaVideoCodec_H264, /**< H264 */
> + cudaVideoCodec_JPEG, /**< JPEG */
> + cudaVideoCodec_H264_SVC, /**< H264-SVC */
> + cudaVideoCodec_H264_MVC, /**< H264-MVC */
> + cudaVideoCodec_HEVC, /**< HEVC */
> + cudaVideoCodec_VP8, /**< VP8 */
> + cudaVideoCodec_VP9, /**< VP9 */
> + cudaVideoCodec_NumCodecs, /**< Max COdecs */
> + // Uncompressed YUV
> + cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< Y,U,V (4:2:0) */
> + cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,V,U (4:2:0) */
> + cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,UV (4:2:0) */
> + cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), /**< YUYV/YUY2 (4:2:2) */
> + cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) /**< UYVY (4:2:2) */
> +} cudaVideoCodec;
> +
> +/*!
> + * \enum cudaVideoSurfaceFormat
> + * Video Surface Formats Enums
> + */
> +typedef enum cudaVideoSurfaceFormat_enum {
> + cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only supported output format) */
> +} cudaVideoSurfaceFormat;
> +
> +/*!
> + * \enum cudaVideoDeinterlaceMode
> + * Deinterlacing Modes Enums
> + */
> +typedef enum cudaVideoDeinterlaceMode_enum {
> + cudaVideoDeinterlaceMode_Weave=0, /**< Weave both fields (no deinterlacing) */
> + cudaVideoDeinterlaceMode_Bob, /**< Drop one field */
> + cudaVideoDeinterlaceMode_Adaptive /**< Adaptive deinterlacing */
> +} cudaVideoDeinterlaceMode;
> +
> +/*!
> + * \enum cudaVideoChromaFormat
> + * Chroma Formats Enums
> + */
> +typedef enum cudaVideoChromaFormat_enum {
> + cudaVideoChromaFormat_Monochrome=0, /**< MonoChrome */
> + cudaVideoChromaFormat_420, /**< 4:2:0 */
> + cudaVideoChromaFormat_422, /**< 4:2:2 */
> + cudaVideoChromaFormat_444 /**< 4:4:4 */
> +} cudaVideoChromaFormat;
> +
> +/*!
> + * \enum cudaVideoCreateFlags
> + * Decoder Flags Enums
> + */
> +typedef enum cudaVideoCreateFlags_enum {
> + cudaVideoCreate_Default = 0x00, /**< Default operation mode: use dedicated video engines */
> + cudaVideoCreate_PreferCUDA = 0x01, /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */
> + cudaVideoCreate_PreferDXVA = 0x02, /**< Go through DXVA internally if possible (requires D3D9 interop) */
> + cudaVideoCreate_PreferCUVID = 0x04 /**< Use dedicated video engines directly */
> +} cudaVideoCreateFlags;
> +
> +/*!
> + * \struct CUVIDDECODECREATEINFO
> + * Struct used in create decoder
> + */
> +typedef struct _CUVIDDECODECREATEINFO
> +{
> + unsigned long ulWidth; /**< Coded Sequence Width */
> + unsigned long ulHeight; /**< Coded Sequence Height */
> + unsigned long ulNumDecodeSurfaces; /**< Maximum number of internal decode surfaces */
> + cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */
> + cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */
> + unsigned long ulCreationFlags; /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */
> + unsigned long bitDepthMinus8;
> + unsigned long Reserved1[4]; /**< Reserved for future use - set to zero */
> + /**
> + * area of the frame that should be displayed
> + */
> + struct {
> + short left;
> + short top;
> + short right;
> + short bottom;
> + } display_area;
> +
> + cudaVideoSurfaceFormat OutputFormat; /**< cudaVideoSurfaceFormat_XXX */
> + cudaVideoDeinterlaceMode DeinterlaceMode; /**< cudaVideoDeinterlaceMode_XXX */
> + unsigned long ulTargetWidth; /**< Post-processed Output Width (Should be aligned to 2) */
> + unsigned long ulTargetHeight; /**< Post-processed Output Height (Should be aligbed to 2) */
> + unsigned long ulNumOutputSurfaces; /**< Maximum number of output surfaces simultaneously mapped */
> + CUvideoctxlock vidLock; /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */
> + /**
> + * target rectangle in the output frame (for aspect ratio conversion)
> + * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
> + */
> + struct {
> + short left;
> + short top;
> + short right;
> + short bottom;
> + } target_rect;
> + unsigned long Reserved2[5]; /**< Reserved for future use - set to zero */
> +} CUVIDDECODECREATEINFO;
> +
> +/*!
> + * \struct CUVIDH264DPBENTRY
> + * H.264 DPB Entry
> + */
> +typedef struct _CUVIDH264DPBENTRY
> +{
> + int PicIdx; /**< picture index of reference frame */
> + int FrameIdx; /**< frame_num(short-term) or LongTermFrameIdx(long-term) */
> + int is_long_term; /**< 0=short term reference, 1=long term reference */
> + int not_existing; /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
> + int used_for_reference; /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */
> + int FieldOrderCnt[2]; /**< field order count of top and bottom fields */
> +} CUVIDH264DPBENTRY;
> +
> +/*!
> + * \struct CUVIDH264MVCEXT
> + * H.264 MVC Picture Parameters Ext
> + */
> +typedef struct _CUVIDH264MVCEXT
> +{
> + int num_views_minus1;
> + int view_id;
> + unsigned char inter_view_flag;
> + unsigned char num_inter_view_refs_l0;
> + unsigned char num_inter_view_refs_l1;
> + unsigned char MVCReserved8Bits;
> + int InterViewRefsL0[16];
> + int InterViewRefsL1[16];
> +} CUVIDH264MVCEXT;
> +
> +/*!
> + * \struct CUVIDH264SVCEXT
> + * H.264 SVC Picture Parameters Ext
> + */
> +typedef struct _CUVIDH264SVCEXT
> +{
> + unsigned char profile_idc;
> + unsigned char level_idc;
> + unsigned char DQId;
> + unsigned char DQIdMax;
> + unsigned char disable_inter_layer_deblocking_filter_idc;
> + unsigned char ref_layer_chroma_phase_y_plus1;
> + signed char inter_layer_slice_alpha_c0_offset_div2;
> + signed char inter_layer_slice_beta_offset_div2;
> +
> + unsigned short DPBEntryValidFlag;
> + unsigned char inter_layer_deblocking_filter_control_present_flag;
> + unsigned char extended_spatial_scalability_idc;
> + unsigned char adaptive_tcoeff_level_prediction_flag;
> + unsigned char slice_header_restriction_flag;
> + unsigned char chroma_phase_x_plus1_flag;
> + unsigned char chroma_phase_y_plus1;
> +
> + unsigned char tcoeff_level_prediction_flag;
> + unsigned char constrained_intra_resampling_flag;
> + unsigned char ref_layer_chroma_phase_x_plus1_flag;
> + unsigned char store_ref_base_pic_flag;
> + unsigned char Reserved8BitsA;
> + unsigned char Reserved8BitsB;
> + // For the 4 scaled_ref_layer_XX fields below,
> + // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_"
> + // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4,
> + short scaled_ref_layer_left_offset;
> + short scaled_ref_layer_top_offset;
> + short scaled_ref_layer_right_offset;
> + short scaled_ref_layer_bottom_offset;
> + unsigned short Reserved16Bits;
> + struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */
> + int bRefBaseLayer; /**< whether to store ref base pic */
> +} CUVIDH264SVCEXT;
> +
> +/*!
> + * \struct CUVIDH264PICPARAMS
> + * H.264 Picture Parameters
> + */
> +typedef struct _CUVIDH264PICPARAMS
> +{
> + // SPS
> + int log2_max_frame_num_minus4;
> + int pic_order_cnt_type;
> + int log2_max_pic_order_cnt_lsb_minus4;
> + int delta_pic_order_always_zero_flag;
> + int frame_mbs_only_flag;
> + int direct_8x8_inference_flag;
> + int num_ref_frames; // NOTE: shall meet level 4.1 restrictions
> + unsigned char residual_colour_transform_flag;
> + unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit supported)
> + unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit supported)
> + unsigned char qpprime_y_zero_transform_bypass_flag;
> + // PPS
> + int entropy_coding_mode_flag;
> + int pic_order_present_flag;
> + int num_ref_idx_l0_active_minus1;
> + int num_ref_idx_l1_active_minus1;
> + int weighted_pred_flag;
> + int weighted_bipred_idc;
> + int pic_init_qp_minus26;
> + int deblocking_filter_control_present_flag;
> + int redundant_pic_cnt_present_flag;
> + int transform_8x8_mode_flag;
> + int MbaffFrameFlag;
> + int constrained_intra_pred_flag;
> + int chroma_qp_index_offset;
> + int second_chroma_qp_index_offset;
> + int ref_pic_flag;
> + int frame_num;
> + int CurrFieldOrderCnt[2];
> + // DPB
> + CUVIDH264DPBENTRY dpb[16]; // List of reference frames within the DPB
> + // Quantization Matrices (raster-order)
> + unsigned char WeightScale4x4[6][16];
> + unsigned char WeightScale8x8[2][64];
> + // FMO/ASO
> + unsigned char fmo_aso_enable;
> + unsigned char num_slice_groups_minus1;
> + unsigned char slice_group_map_type;
> + signed char pic_init_qs_minus26;
> + unsigned int slice_group_change_rate_minus1;
> + union
> + {
> + unsigned long long slice_group_map_addr;
> + const unsigned char *pMb2SliceGroupMap;
> + } fmo;
> + unsigned int Reserved[12];
> + // SVC/MVC
> + union
> + {
> + CUVIDH264MVCEXT mvcext;
> + CUVIDH264SVCEXT svcext;
> + } svcmvc;
> +} CUVIDH264PICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDMPEG2PICPARAMS
> + * MPEG-2 Picture Parameters
> + */
> +typedef struct _CUVIDMPEG2PICPARAMS
> +{
> + int ForwardRefIdx; // Picture index of forward reference (P/B-frames)
> + int BackwardRefIdx; // Picture index of backward reference (B-frames)
> + int picture_coding_type;
> + int full_pel_forward_vector;
> + int full_pel_backward_vector;
> + int f_code[2][2];
> + int intra_dc_precision;
> + int frame_pred_frame_dct;
> + int concealment_motion_vectors;
> + int q_scale_type;
> + int intra_vlc_format;
> + int alternate_scan;
> + int top_field_first;
> + // Quantization matrices (raster order)
> + unsigned char QuantMatrixIntra[64];
> + unsigned char QuantMatrixInter[64];
> +} CUVIDMPEG2PICPARAMS;
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////
> +//
> +// MPEG-4 Picture Parameters
> +//
> +
> +// MPEG-4 has VOP types instead of Picture types
> +#define I_VOP 0
> +#define P_VOP 1
> +#define B_VOP 2
> +#define S_VOP 3
> +
> +/*!
> + * \struct CUVIDMPEG4PICPARAMS
> + * MPEG-4 Picture Parameters
> + */
> +typedef struct _CUVIDMPEG4PICPARAMS
> +{
> + int ForwardRefIdx; // Picture index of forward reference (P/B-frames)
> + int BackwardRefIdx; // Picture index of backward reference (B-frames)
> + // VOL
> + int video_object_layer_width;
> + int video_object_layer_height;
> + int vop_time_increment_bitcount;
> + int top_field_first;
> + int resync_marker_disable;
> + int quant_type;
> + int quarter_sample;
> + int short_video_header;
> + int divx_flags;
> + // VOP
> + int vop_coding_type;
> + int vop_coded;
> + int vop_rounding_type;
> + int alternate_vertical_scan_flag;
> + int interlaced;
> + int vop_fcode_forward;
> + int vop_fcode_backward;
> + int trd[2];
> + int trb[2];
> + // Quantization matrices (raster order)
> + unsigned char QuantMatrixIntra[64];
> + unsigned char QuantMatrixInter[64];
> + int gmc_enabled;
> +} CUVIDMPEG4PICPARAMS;
> +
> +/*!
> + * \struct CUVIDVC1PICPARAMS
> + * VC1 Picture Parameters
> + */
> +typedef struct _CUVIDVC1PICPARAMS
> +{
> + int ForwardRefIdx; /**< Picture index of forward reference (P/B-frames) */
> + int BackwardRefIdx; /**< Picture index of backward reference (B-frames) */
> + int FrameWidth; /**< Actual frame width */
> + int FrameHeight; /**< Actual frame height */
> + // PICTURE
> + int intra_pic_flag; /**< Set to 1 for I,BI frames */
> + int ref_pic_flag; /**< Set to 1 for I,P frames */
> + int progressive_fcm; /**< Progressive frame */
> + // SEQUENCE
> + int profile;
> + int postprocflag;
> + int pulldown;
> + int interlace;
> + int tfcntrflag;
> + int finterpflag;
> + int psf;
> + int multires;
> + int syncmarker;
> + int rangered;
> + int maxbframes;
> + // ENTRYPOINT
> + int panscan_flag;
> + int refdist_flag;
> + int extended_mv;
> + int dquant;
> + int vstransform;
> + int loopfilter;
> + int fastuvmc;
> + int overlap;
> + int quantizer;
> + int extended_dmv;
> + int range_mapy_flag;
> + int range_mapy;
> + int range_mapuv_flag;
> + int range_mapuv;
> + int rangeredfrm; // range reduction state
> +} CUVIDVC1PICPARAMS;
> +
> +/*!
> + * \struct CUVIDJPEGPICPARAMS
> + * JPEG Picture Parameters
> + */
> +typedef struct _CUVIDJPEGPICPARAMS
> +{
> + int Reserved;
> +} CUVIDJPEGPICPARAMS;
> +
> +
> + /*!
> + * \struct CUVIDHEVCPICPARAMS
> + * HEVC Picture Parameters
> + */
> +typedef struct _CUVIDHEVCPICPARAMS
> +{
> + // sps
> + int pic_width_in_luma_samples;
> + int pic_height_in_luma_samples;
> + unsigned char log2_min_luma_coding_block_size_minus3;
> + unsigned char log2_diff_max_min_luma_coding_block_size;
> + unsigned char log2_min_transform_block_size_minus2;
> + unsigned char log2_diff_max_min_transform_block_size;
> + unsigned char pcm_enabled_flag;
> + unsigned char log2_min_pcm_luma_coding_block_size_minus3;
> + unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
> + unsigned char pcm_sample_bit_depth_luma_minus1;
> +
> + unsigned char pcm_sample_bit_depth_chroma_minus1;
> + unsigned char pcm_loop_filter_disabled_flag;
> + unsigned char strong_intra_smoothing_enabled_flag;
> + unsigned char max_transform_hierarchy_depth_intra;
> + unsigned char max_transform_hierarchy_depth_inter;
> + unsigned char amp_enabled_flag;
> + unsigned char separate_colour_plane_flag;
> + unsigned char log2_max_pic_order_cnt_lsb_minus4;
> +
> + unsigned char num_short_term_ref_pic_sets;
> + unsigned char long_term_ref_pics_present_flag;
> + unsigned char num_long_term_ref_pics_sps;
> + unsigned char sps_temporal_mvp_enabled_flag;
> + unsigned char sample_adaptive_offset_enabled_flag;
> + unsigned char scaling_list_enable_flag;
> + unsigned char IrapPicFlag;
> + unsigned char IdrPicFlag;
> +
> + unsigned char bit_depth_luma_minus8;
> + unsigned char bit_depth_chroma_minus8;
> + unsigned char reserved1[14];
> +
> + // pps
> + unsigned char dependent_slice_segments_enabled_flag;
> + unsigned char slice_segment_header_extension_present_flag;
> + unsigned char sign_data_hiding_enabled_flag;
> + unsigned char cu_qp_delta_enabled_flag;
> + unsigned char diff_cu_qp_delta_depth;
> + signed char init_qp_minus26;
> + signed char pps_cb_qp_offset;
> + signed char pps_cr_qp_offset;
> +
> + unsigned char constrained_intra_pred_flag;
> + unsigned char weighted_pred_flag;
> + unsigned char weighted_bipred_flag;
> + unsigned char transform_skip_enabled_flag;
> + unsigned char transquant_bypass_enabled_flag;
> + unsigned char entropy_coding_sync_enabled_flag;
> + unsigned char log2_parallel_merge_level_minus2;
> + unsigned char num_extra_slice_header_bits;
> +
> + unsigned char loop_filter_across_tiles_enabled_flag;
> + unsigned char loop_filter_across_slices_enabled_flag;
> + unsigned char output_flag_present_flag;
> + unsigned char num_ref_idx_l0_default_active_minus1;
> + unsigned char num_ref_idx_l1_default_active_minus1;
> + unsigned char lists_modification_present_flag;
> + unsigned char cabac_init_present_flag;
> + unsigned char pps_slice_chroma_qp_offsets_present_flag;
> +
> + unsigned char deblocking_filter_override_enabled_flag;
> + unsigned char pps_deblocking_filter_disabled_flag;
> + signed char pps_beta_offset_div2;
> + signed char pps_tc_offset_div2;
> + unsigned char tiles_enabled_flag;
> + unsigned char uniform_spacing_flag;
> + unsigned char num_tile_columns_minus1;
> + unsigned char num_tile_rows_minus1;
> +
> + unsigned short column_width_minus1[21];
> + unsigned short row_height_minus1[21];
> + unsigned int reserved3[15];
> +
> + // RefPicSets
> + int NumBitsForShortTermRPSInSlice;
> + int NumDeltaPocsOfRefRpsIdx;
> + int NumPocTotalCurr;
> + int NumPocStCurrBefore;
> + int NumPocStCurrAfter;
> + int NumPocLtCurr;
> + int CurrPicOrderCntVal;
> + int RefPicIdx[16]; // [refpic] Indices of valid reference pictures (-1 if unused for reference)
> + int PicOrderCntVal[16]; // [refpic]
> + unsigned char IsLongTerm[16]; // [refpic] 0=not a long-term reference, 1=long-term reference
> + unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
> + unsigned char RefPicSetStCurrAfter[8]; // [0..NumPocStCurrAfter-1] -> refpic (0..15)
> + unsigned char RefPicSetLtCurr[8]; // [0..NumPocLtCurr-1] -> refpic (0..15)
> + unsigned char RefPicSetInterLayer0[8];
> + unsigned char RefPicSetInterLayer1[8];
> + unsigned int reserved4[12];
> +
> + // scaling lists (diag order)
> + unsigned char ScalingList4x4[6][16]; // [matrixId][i]
> + unsigned char ScalingList8x8[6][64]; // [matrixId][i]
> + unsigned char ScalingList16x16[6][64]; // [matrixId][i]
> + unsigned char ScalingList32x32[2][64]; // [matrixId][i]
> + unsigned char ScalingListDCCoeff16x16[6]; // [matrixId]
> + unsigned char ScalingListDCCoeff32x32[2]; // [matrixId]
> +} CUVIDHEVCPICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDVP8PICPARAMS
> + * VP8 Picture Parameters
> + */
> +typedef struct _CUVIDVP8PICPARAMS
> +{
> + int width;
> + int height;
> + unsigned int first_partition_size;
> + //Frame Indexes
> + unsigned char LastRefIdx;
> + unsigned char GoldenRefIdx;
> + unsigned char AltRefIdx;
> + union {
> + struct {
> + unsigned char frame_type : 1; /**< 0 = KEYFRAME, 1 = INTERFRAME */
> + unsigned char version : 3;
> + unsigned char show_frame : 1;
> + unsigned char update_mb_segmentation_data : 1; /**< Must be 0 if segmentation is not enabled */
> + unsigned char Reserved2Bits : 2;
> + };
> + unsigned char wFrameTagFlags;
> + } tagflags;
> + unsigned char Reserved1[4];
> + unsigned int Reserved2[3];
> +} CUVIDVP8PICPARAMS;
> +
> +/*!
> + * \struct CUVIDVP9PICPARAMS
> + * VP9 Picture Parameters
> + */
> +typedef struct _CUVIDVP9PICPARAMS
> +{
> + unsigned int width;
> + unsigned int height;
> +
> + //Frame Indices
> + unsigned char LastRefIdx;
> + unsigned char GoldenRefIdx;
> + unsigned char AltRefIdx;
> + unsigned char colorSpace;
> +
> + unsigned short profile : 3;
> + unsigned short frameContextIdx : 2;
> + unsigned short frameType : 1;
> + unsigned short showFrame : 1;
> + unsigned short errorResilient : 1;
> + unsigned short frameParallelDecoding : 1;
> + unsigned short subSamplingX : 1;
> + unsigned short subSamplingY : 1;
> + unsigned short intraOnly : 1;
> + unsigned short allow_high_precision_mv : 1;
> + unsigned short refreshEntropyProbs : 1;
> + unsigned short reserved2Bits : 2;
> +
> + unsigned short reserved16Bits;
> +
> + unsigned char refFrameSignBias[4];
> +
> + unsigned char bitDepthMinus8Luma;
> + unsigned char bitDepthMinus8Chroma;
> + unsigned char loopFilterLevel;
> + unsigned char loopFilterSharpness;
> +
> + unsigned char modeRefLfEnabled;
> + unsigned char log2_tile_columns;
> + unsigned char log2_tile_rows;
> +
> + unsigned char segmentEnabled : 1;
> + unsigned char segmentMapUpdate : 1;
> + unsigned char segmentMapTemporalUpdate : 1;
> + unsigned char segmentFeatureMode : 1;
> + unsigned char reserved4Bits : 4;
> +
> +
> + unsigned char segmentFeatureEnable[8][4];
> + short segmentFeatureData[8][4];
> + unsigned char mb_segment_tree_probs[7];
> + unsigned char segment_pred_probs[3];
> + unsigned char reservedSegment16Bits[2];
> +
> + int qpYAc;
> + int qpYDc;
> + int qpChDc;
> + int qpChAc;
> +
> + unsigned int activeRefIdx[3];
> + unsigned int resetFrameContext;
> + unsigned int mcomp_filter_type;
> + unsigned int mbRefLfDelta[4];
> + unsigned int mbModeLfDelta[2];
> + unsigned int frameTagSize;
> + unsigned int offsetToDctParts;
> + unsigned int reserved128Bits[4];
> +
> +} CUVIDVP9PICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDPICPARAMS
> + * Picture Parameters for Decoding
> + */
> +typedef struct _CUVIDPICPARAMS
> +{
> + int PicWidthInMbs; /**< Coded Frame Size */
> + int FrameHeightInMbs; /**< Coded Frame Height */
> + int CurrPicIdx; /**< Output index of the current picture */
> + int field_pic_flag; /**< 0=frame picture, 1=field picture */
> + int bottom_field_flag; /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
> + int second_field; /**< Second field of a complementary field pair */
> + // Bitstream data
> + unsigned int nBitstreamDataLen; /**< Number of bytes in bitstream data buffer */
> + const unsigned char *pBitstreamData; /**< Ptr to bitstream data for this picture (slice-layer) */
> + unsigned int nNumSlices; /**< Number of slices in this picture */
> + const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */
> + int ref_pic_flag; /**< This picture is a reference picture */
> + int intra_pic_flag; /**< This picture is entirely intra coded */
> + unsigned int Reserved[30]; /**< Reserved for future use */
> + // Codec-specific data
> + union {
> + CUVIDMPEG2PICPARAMS mpeg2; /**< Also used for MPEG-1 */
> + CUVIDH264PICPARAMS h264;
> + CUVIDVC1PICPARAMS vc1;
> + CUVIDMPEG4PICPARAMS mpeg4;
> + CUVIDJPEGPICPARAMS jpeg;
> + CUVIDHEVCPICPARAMS hevc;
> + CUVIDVP8PICPARAMS vp8;
> + CUVIDVP9PICPARAMS vp9;
> + unsigned int CodecReserved[1024];
> + } CodecSpecific;
> +} CUVIDPICPARAMS;
> +
> +
> +/*!
> + * \struct CUVIDPROCPARAMS
> + * Picture Parameters for Postprocessing
> + */
> +typedef struct _CUVIDPROCPARAMS
> +{
> + int progressive_frame; /**< Input is progressive (deinterlace_mode will be ignored) */
> + int second_field; /**< Output the second field (ignored if deinterlace mode is Weave) */
> + int top_field_first; /**< Input frame is top field first (1st field is top, 2nd field is bottom) */
> + int unpaired_field; /**< Input only contains one field (2nd field is invalid) */
> + // The fields below are used for raw YUV input
> + unsigned int reserved_flags; /**< Reserved for future use (set to zero) */
> + unsigned int reserved_zero; /**< Reserved (set to zero) */
> + unsigned long long raw_input_dptr; /**< Input CUdeviceptr for raw YUV extensions */
> + unsigned int raw_input_pitch; /**< pitch in bytes of raw YUV input (should be aligned appropriately) */
> + unsigned int raw_input_format; /**< Reserved for future use (set to zero) */
> + unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */
> + unsigned int raw_output_pitch; /**< Reserved for future use (set to zero) */
> + unsigned int Reserved[48];
> + void *Reserved3[3];
> +} CUVIDPROCPARAMS;
> +
> +
> +/**
> + *
> + * In order to minimize decode latencies, there should be always at least 2 pictures in the decode
> + * queue at any time, in order to make sure that all decode engines are always busy.
> + *
> + * Overall data flow:
> + * - cuvidCreateDecoder(...)
> + * For each picture:
> + * - cuvidDecodePicture(N)
> + * - cuvidMapVideoFrame(N-4)
> + * - do some processing in cuda
> + * - cuvidUnmapVideoFrame(N-4)
> + * - cuvidDecodePicture(N+1)
> + * - cuvidMapVideoFrame(N-3)
> + * ...
> + * - cuvidDestroyDecoder(...)
> + *
> + * NOTE:
> + * - When the cuda context is created from a D3D device, the D3D device must also be created
> + * with the D3DCREATE_MULTITHREADED flag.
> + * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
> + * - cuVidDecodePicture may block the calling thread if there are too many pictures pending
> + * in the decode queue
> + */
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
> + * Create the decoder object
> + */
> +typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
> + * Destroy the decoder object
> + */
> +typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
> + * Decode a single picture (field or frame)
> + */
> +typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
> +
> +
> +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
> +/**
> + * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
> + * Post-process and map a video frame for use in cuda
> + */
> +typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
> + unsigned int *pDevPtr, unsigned int *pPitch,
> + CUVIDPROCPARAMS *pVPP);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
> + * Unmap a previously mapped video frame
> + */
> +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
> +#endif
> +
> +#if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
> +/**
> + * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
> + * map a video frame
> + */
> +typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
> + unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
> + * Unmap a previously mapped video frame
> + */
> +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
> +
> +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
> +#define tcuvidMapVideoFrame tcuvidMapVideoFrame64
> +#define tcuvidUnmapVideoFrame tcuvidUnmapVideoFrame64
> +#endif
> +#endif
> +
> +
> +/**
> + *
> + * Context-locking: to facilitate multi-threaded implementations, the following 4 functions
> + * provide a simple mutex-style host synchronization. If a non-NULL context is specified
> + * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
> + * context before making any cuda calls.
> + * A multi-threaded application could create a lock associated with a context handle so that
> + * multiple threads can safely share the same cuda context:
> + * - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
> + * that can be passed to cuvidCtxLockCreate.
> + * - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
> + *
> + * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
> + * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
> +*/
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
> + */
> +typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
> +
> +/** @} */ /* End VIDEO_DECODER */
> +
> +#if defined(__cplusplus)
> +}
> +#endif /* __cplusplus */
> +
> +#endif // __CUDA_VIDEO_H__
> diff --git a/compat/cuda/dynlink_loader.h b/compat/cuda/dynlink_loader.h
> new file mode 100644
> index 0000000..6275664
> --- /dev/null
> +++ b/compat/cuda/dynlink_loader.h
> @@ -0,0 +1,254 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AV_COMPAT_CUDA_DYNLINK_LOADER_H
> +#define AV_COMPAT_CUDA_DYNLINK_LOADER_H
> +
> +#include "compat/cuda/dynlink_cuda.h"
> +#include "compat/cuda/dynlink_nvcuvid.h"
> +#include "compat/nvenc/nvEncodeAPI.h"
> +
> +#include "libavutil/log.h"
> +#include "libavutil/error.h"
> +
> +#if defined(_WIN32)
> +# include <windows.h>
> +# define dlopen(filename, flags) LoadLibrary(TEXT(filename))
> +# define dlsym(handle, symbol) GetProcAddress(handle, symbol)
> +# define dlclose(handle) FreeLibrary(handle)
> +# define LIB_HANDLE HMODULE
> +#else
> +# include <dlfcn.h>
> +# define LIB_HANDLE void*
> +#endif
> +
> +#if defined(_WIN32) || defined(__CYGWIN__)
> +# define CUDA_LIBNAME "nvcuda.dll"
> +# define NVCUVID_LIBNAME "nvcuvid.dll"
> +# if ARCH_X86_64
> +# define NVENC_LIBNAME "nvEncodeAPI64.dll"
> +# else
> +# define NVENC_LIBNAME "nvEncodeAPI.dll"
> +# endif
> +#else
> +# define CUDA_LIBNAME "libcuda.so.1"
> +# define NVCUVID_LIBNAME "libnvcuvid.so.1"
> +# define NVENC_LIBNAME "libnvidia-encode.so.1"
> +#endif
> +
> +#define LOAD_LIBRARY(l, path) \
> + do { \
> + if (!((l) = dlopen(path, RTLD_LAZY))) { \
> + av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", path); \
> + ret = AVERROR_UNKNOWN; \
> + goto error; \
> + } \
> + av_log(NULL, AV_LOG_TRACE, "Loaded lib: %s\n", path); \
> + } while (0)
> +
> +#define LOAD_SYMBOL(fun, symbol) \
> + do { \
> + if (!((f->fun) = dlsym(f->lib, symbol))) { \
> + av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", symbol); \
> + ret = AVERROR_UNKNOWN; \
> + goto error; \
> + } \
> + av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol); \
> + } while (0)
> +
> +#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N) \
> + T *f; \
> + int ret; \
> + \
> + n##_free_functions(functions); \
> + \
> + f = *functions = av_mallocz(sizeof(*f)); \
> + if (!f) \
> + return AVERROR(ENOMEM); \
> + \
> + LOAD_LIBRARY(f->lib, N);
> +
> +#define GENERIC_LOAD_FUNC_FINALE(n) \
> + return 0; \
> +error: \
> + n##_free_functions(functions); \
> + return ret;
> +
> +#define GENERIC_FREE_FUNC() \
> + if (!functions) \
> + return; \
> + if (*functions && (*functions)->lib) \
> + dlclose((*functions)->lib); \
> + av_freep(functions);
> +
> +#ifdef AV_COMPAT_DYNLINK_CUDA_H
> +typedef struct CudaFunctions {
> + tcuInit *cuInit;
> + tcuDeviceGetCount *cuDeviceGetCount;
> + tcuDeviceGet *cuDeviceGet;
> + tcuDeviceGetName *cuDeviceGetName;
> + tcuDeviceComputeCapability *cuDeviceComputeCapability;
> + tcuCtxCreate_v2 *cuCtxCreate;
> + tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
> + tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
> + tcuCtxDestroy_v2 *cuCtxDestroy;
> + tcuMemAlloc_v2 *cuMemAlloc;
> + tcuMemFree_v2 *cuMemFree;
> + tcuMemcpy2D_v2 *cuMemcpy2D;
> + tcuGetErrorName *cuGetErrorName;
> + tcuGetErrorString *cuGetErrorString;
> +
> + LIB_HANDLE lib;
> +} CudaFunctions;
> +#else
> +typedef struct CudaFunctions CudaFunctions;
> +#endif
> +
> +typedef struct CuvidFunctions {
> + tcuvidCreateDecoder *cuvidCreateDecoder;
> + tcuvidDestroyDecoder *cuvidDestroyDecoder;
> + tcuvidDecodePicture *cuvidDecodePicture;
> + tcuvidMapVideoFrame *cuvidMapVideoFrame;
> + tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame;
> + tcuvidCtxLockCreate *cuvidCtxLockCreate;
> + tcuvidCtxLockDestroy *cuvidCtxLockDestroy;
> + tcuvidCtxLock *cuvidCtxLock;
> + tcuvidCtxUnlock *cuvidCtxUnlock;
> +
> + tcuvidCreateVideoSource *cuvidCreateVideoSource;
> + tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW;
> + tcuvidDestroyVideoSource *cuvidDestroyVideoSource;
> + tcuvidSetVideoSourceState *cuvidSetVideoSourceState;
> + tcuvidGetVideoSourceState *cuvidGetVideoSourceState;
> + tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat;
> + tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat;
> + tcuvidCreateVideoParser *cuvidCreateVideoParser;
> + tcuvidParseVideoData *cuvidParseVideoData;
> + tcuvidDestroyVideoParser *cuvidDestroyVideoParser;
> +
> + LIB_HANDLE lib;
> +} CuvidFunctions;
> +
> +typedef struct NvencFunctions {
> + NVENCSTATUS (NVENCAPI *NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList);
> + NVENCSTATUS (NVENCAPI *NvEncodeAPIGetMaxSupportedVersion)(uint32_t* version);
> +
> + LIB_HANDLE lib;
> +} NvencFunctions;
> +
> +#ifdef AV_COMPAT_DYNLINK_CUDA_H
> +static inline void cuda_free_functions(CudaFunctions **functions)
> +{
> + GENERIC_FREE_FUNC();
> +}
> +#endif
> +
> +static inline void cuvid_free_functions(CuvidFunctions **functions)
> +{
> + GENERIC_FREE_FUNC();
> +}
> +
> +static inline void nvenc_free_functions(NvencFunctions **functions)
> +{
> + GENERIC_FREE_FUNC();
> +}
> +
> +#ifdef AV_COMPAT_DYNLINK_CUDA_H
> +static inline int cuda_load_functions(CudaFunctions **functions)
> +{
> + GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME);
> +
> + LOAD_SYMBOL(cuInit, "cuInit");
> + LOAD_SYMBOL(cuDeviceGetCount, "cuDeviceGetCount");
> + LOAD_SYMBOL(cuDeviceGet, "cuDeviceGet");
> + LOAD_SYMBOL(cuDeviceGetName, "cuDeviceGetName");
> + LOAD_SYMBOL(cuDeviceComputeCapability, "cuDeviceComputeCapability");
> + LOAD_SYMBOL(cuCtxCreate, "cuCtxCreate_v2");
> + LOAD_SYMBOL(cuCtxPushCurrent, "cuCtxPushCurrent_v2");
> + LOAD_SYMBOL(cuCtxPopCurrent, "cuCtxPopCurrent_v2");
> + LOAD_SYMBOL(cuCtxDestroy, "cuCtxDestroy_v2");
> + LOAD_SYMBOL(cuMemAlloc, "cuMemAlloc_v2");
> + LOAD_SYMBOL(cuMemFree, "cuMemFree_v2");
> + LOAD_SYMBOL(cuMemcpy2D, "cuMemcpy2D_v2");
> + LOAD_SYMBOL(cuGetErrorName, "cuGetErrorName");
> + LOAD_SYMBOL(cuGetErrorString, "cuGetErrorString");
> +
> + GENERIC_LOAD_FUNC_FINALE(cuda);
> +}
> +#endif
> +
> +static inline int cuvid_load_functions(CuvidFunctions **functions)
> +{
> + GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME);
> +
> + LOAD_SYMBOL(cuvidCreateDecoder, "cuvidCreateDecoder");
> + LOAD_SYMBOL(cuvidDestroyDecoder, "cuvidDestroyDecoder");
> + LOAD_SYMBOL(cuvidDecodePicture, "cuvidDecodePicture");
> +#ifdef __CUVID_DEVPTR64
> + LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame64");
> + LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64");
> +#else
> + LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame");
> + LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame");
> +#endif
> + LOAD_SYMBOL(cuvidCtxLockCreate, "cuvidCtxLockCreate");
> + LOAD_SYMBOL(cuvidCtxLockDestroy, "cuvidCtxLockDestroy");
> + LOAD_SYMBOL(cuvidCtxLock, "cuvidCtxLock");
> + LOAD_SYMBOL(cuvidCtxUnlock, "cuvidCtxUnlock");
> +
> + LOAD_SYMBOL(cuvidCreateVideoSource, "cuvidCreateVideoSource");
> + LOAD_SYMBOL(cuvidCreateVideoSourceW, "cuvidCreateVideoSourceW");
> + LOAD_SYMBOL(cuvidDestroyVideoSource, "cuvidDestroyVideoSource");
> + LOAD_SYMBOL(cuvidSetVideoSourceState, "cuvidSetVideoSourceState");
> + LOAD_SYMBOL(cuvidGetVideoSourceState, "cuvidGetVideoSourceState");
> + LOAD_SYMBOL(cuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat");
> + LOAD_SYMBOL(cuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat");
> + LOAD_SYMBOL(cuvidCreateVideoParser, "cuvidCreateVideoParser");
> + LOAD_SYMBOL(cuvidParseVideoData, "cuvidParseVideoData");
> + LOAD_SYMBOL(cuvidDestroyVideoParser, "cuvidDestroyVideoParser");
> +
> + GENERIC_LOAD_FUNC_FINALE(cuvid);
> +}
> +
> +static inline int nvenc_load_functions(NvencFunctions **functions)
> +{
> + GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME);
> +
> + LOAD_SYMBOL(NvEncodeAPICreateInstance, "NvEncodeAPICreateInstance");
> + LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion");
> +
> + GENERIC_LOAD_FUNC_FINALE(nvenc);
> +}
> +
> +#undef GENERIC_LOAD_FUNC_PREAMBLE
> +#undef LOAD_LIBRARY
> +#undef LOAD_SYMBOL
> +#undef GENERIC_LOAD_FUNC_FINALE
> +#undef GENERIC_FREE_FUNC
> +#undef CUDA_LIBNAME
> +#undef NVCUVID_LIBNAME
> +#undef NVENC_LIBNAME
> +#undef LIB_HANDLE
> +
> +#if defined(_WIN32)
> +#undef dlopen
> +#undef dlsym
> +#undef dlclose
> +#endif
> +
> +#endif
> \ No newline at end of file
> diff --git a/compat/cuda/dynlink_nvcuvid.h b/compat/cuda/dynlink_nvcuvid.h
> new file mode 100644
> index 0000000..6c197e0
> --- /dev/null
> +++ b/compat/cuda/dynlink_nvcuvid.h
> @@ -0,0 +1,316 @@
> +/*
> + * This copyright notice applies to this header file only:
> + *
> + * Copyright (c) 2010-2016 NVIDIA Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> + * obtaining a copy of this software and associated documentation
> + * files (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use,
> + * copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the software, and to permit persons to whom the
> + * software is furnished to do so, subject to the following
> + * conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> + * included in all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
> + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
> + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file nvcuvid.h
> + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices.
> + * \date 2015-2015
> + * This file contains the interface constants, structure definitions and function prototypes.
> + */
> +
> +#if !defined(__NVCUVID_H__)
> +#define __NVCUVID_H__
> +
> +#include "compat/cuda/dynlink_cuviddec.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* __cplusplus */
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////
> +//
> +// High-level helper APIs for video sources
> +//
> +
> +typedef void *CUvideosource;
> +typedef void *CUvideoparser;
> +typedef long long CUvideotimestamp;
> +
> +/**
> + * \addtogroup VIDEO_PARSER Video Parser
> + * @{
> + */
> +
> +/*!
> + * \enum cudaVideoState
> + * Video Source State
> + */
> +typedef enum {
> + cudaVideoState_Error = -1, /**< Error state (invalid source) */
> + cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */
> + cudaVideoState_Started = 1 /**< Source is running and delivering data */
> +} cudaVideoState;
> +
> +/*!
> + * \enum cudaAudioCodec
> + * Audio compression
> + */
> +typedef enum {
> + cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */
> + cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */
> + cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */
> + cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */
> + cudaAudioCodec_LPCM /**< PCM Audio */
> +} cudaAudioCodec;
> +
> +/*!
> + * \struct CUVIDEOFORMAT
> + * Video format
> + */
> +typedef struct
> +{
> + cudaVideoCodec codec; /**< Compression format */
> + /**
> + * frame rate = numerator / denominator (for example: 30000/1001)
> + */
> + struct {
> + unsigned int numerator; /**< frame rate numerator (0 = unspecified or variable frame rate) */
> + unsigned int denominator; /**< frame rate denominator (0 = unspecified or variable frame rate) */
> + } frame_rate;
> + unsigned char progressive_sequence; /**< 0=interlaced, 1=progressive */
> + unsigned char bit_depth_luma_minus8; /**< high bit depth Luma */
> + unsigned char bit_depth_chroma_minus8; /**< high bit depth Chroma */
> + unsigned char reserved1; /**< Reserved for future use */
> + unsigned int coded_width; /**< coded frame width */
> + unsigned int coded_height; /**< coded frame height */
> + /**
> + * area of the frame that should be displayed
> + * typical example:
> + * coded_width = 1920, coded_height = 1088
> + * display_area = { 0,0,1920,1080 }
> + */
> + struct {
> + int left; /**< left position of display rect */
> + int top; /**< top position of display rect */
> + int right; /**< right position of display rect */
> + int bottom; /**< bottom position of display rect */
> + } display_area;
> + cudaVideoChromaFormat chroma_format; /**< Chroma format */
> + unsigned int bitrate; /**< video bitrate (bps, 0=unknown) */
> + /**
> + * Display Aspect Ratio = x:y (4:3, 16:9, etc)
> + */
> + struct {
> + int x;
> + int y;
> + } display_aspect_ratio;
> + /**
> + * Video Signal Description
> + */
> + struct {
> + unsigned char video_format : 3;
> + unsigned char video_full_range_flag : 1;
> + unsigned char reserved_zero_bits : 4;
> + unsigned char color_primaries;
> + unsigned char transfer_characteristics;
> + unsigned char matrix_coefficients;
> + } video_signal_description;
> + unsigned int seqhdr_data_length; /**< Additional bytes following (CUVIDEOFORMATEX) */
> +} CUVIDEOFORMAT;
> +
> +/*!
> + * \struct CUVIDEOFORMATEX
> + * Video format including raw sequence header information
> + */
> +typedef struct
> +{
> + CUVIDEOFORMAT format;
> + unsigned char raw_seqhdr_data[1024];
> +} CUVIDEOFORMATEX;
> +
> +/*!
> + * \struct CUAUDIOFORMAT
> + * Audio Formats
> + */
> +typedef struct
> +{
> + cudaAudioCodec codec; /**< Compression format */
> + unsigned int channels; /**< number of audio channels */
> + unsigned int samplespersec; /**< sampling frequency */
> + unsigned int bitrate; /**< For uncompressed, can also be used to determine bits per sample */
> + unsigned int reserved1; /**< Reserved for future use */
> + unsigned int reserved2; /**< Reserved for future use */
> +} CUAUDIOFORMAT;
> +
> +
> +/*!
> + * \enum CUvideopacketflags
> + * Data packet flags
> + */
> +typedef enum {
> + CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */
> + CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */
> + CUVID_PKT_DISCONTINUITY = 0x04 /**< Set when a discontinuity has to be signalled */
> +} CUvideopacketflags;
> +
> +/*!
> + * \struct CUVIDSOURCEDATAPACKET
> + * Data Packet
> + */
> +typedef struct _CUVIDSOURCEDATAPACKET
> +{
> + unsigned long flags; /**< Combination of CUVID_PKT_XXX flags */
> + unsigned long payload_size; /**< number of bytes in the payload (may be zero if EOS flag is set) */
> + const unsigned char *payload; /**< Pointer to packet payload data (may be NULL if EOS flag is set) */
> + CUvideotimestamp timestamp; /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */
> +} CUVIDSOURCEDATAPACKET;
> +
> +// Callback for packet delivery
> +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
> +
> +/*!
> + * \struct CUVIDSOURCEPARAMS
> + * Source Params
> + */
> +typedef struct _CUVIDSOURCEPARAMS
> +{
> + unsigned int ulClockRate; /**< Timestamp units in Hz (0=default=10000000Hz) */
> + unsigned int uReserved1[7]; /**< Reserved for future use - set to zero */
> + void *pUserData; /**< Parameter passed in to the data handlers */
> + PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< Called to deliver audio packets */
> + PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< Called to deliver video packets */
> + void *pvReserved2[8]; /**< Reserved for future use - set to NULL */
> +} CUVIDSOURCEPARAMS;
> +
> +/*!
> + * \enum CUvideosourceformat_flags
> + * CUvideosourceformat_flags
> + */
> +typedef enum {
> + CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */
> +} CUvideosourceformat_flags;
> +
> +#if !defined(__APPLE__)
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
> + * Create Video Source
> + */
> +typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
> + * Create Video Source
> + */
> +typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
> + * Destroy Video Source
> + */
> +typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
> + * Set Video Source state
> + */
> +typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
> +
> +/**
> + * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
> + * Get Video Source state
> + */
> +typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
> + * Get Video Source Format
> + */
> +typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
> + * Set Video Source state
> + */
> +typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
> +
> +#endif
> +
> +/**
> + * \struct CUVIDPARSERDISPINFO
> + */
> +typedef struct _CUVIDPARSERDISPINFO
> +{
> + int picture_index; /**< */
> + int progressive_frame; /**< */
> + int top_field_first; /**< */
> + int repeat_first_field; /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field) */
> + CUvideotimestamp timestamp; /**< */
> +} CUVIDPARSERDISPINFO;
> +
> +//
> +// Parser callbacks
> +// The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to
> +// be decoded and/or displayed.
> +//
> +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
> +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
> +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
> +
> +/**
> + * \struct CUVIDPARSERPARAMS
> + */
> +typedef struct _CUVIDPARSERPARAMS
> +{
> + cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */
> + unsigned int ulMaxNumDecodeSurfaces; /**< Max # of decode surfaces (parser will cycle through these) */
> + unsigned int ulClockRate; /**< Timestamp units in Hz (0=default=10000000Hz) */
> + unsigned int ulErrorThreshold; /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */
> + unsigned int ulMaxDisplayDelay; /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */
> + unsigned int uReserved1[5]; /**< Reserved for future use - set to 0 */
> + void *pUserData; /**< User data for callbacks */
> + PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */
> + PFNVIDDECODECALLBACK pfnDecodePicture; /**< Called when a picture is ready to be decoded (decode order) */
> + PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< Called whenever a picture is ready to be displayed (display order) */
> + void *pvReserved2[7]; /**< Reserved for future use - set to NULL */
> + CUVIDEOFORMATEX *pExtVideoInfo; /**< [Optional] sequence header data from system layer */
> +} CUVIDPARSERPARAMS;
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
> + */
> +typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
> + */
> +typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
> +
> +/**
> + * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
> + */
> +typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj);
> +
> +/** @} */ /* END VIDEO_PARSER */
> +////////////////////////////////////////////////////////////////////////////////////////////////
> +
> +#if defined(__cplusplus)
> +}
> +#endif /* __cplusplus */
> +
> +#endif // __NVCUVID_H__
> +
> +
> --
> 2.10.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
More information about the ffmpeg-devel
mailing list