[FFmpeg-devel] [PATCH v4 4/5] libavcodec: VAAPI H.265 encoder
Mark Thompson
sw at jkqxz.net
Sat Jan 23 20:16:09 CET 2016
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/vaapi_enc_hevc.c | 1664 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 1668 insertions(+)
create mode 100644 libavcodec/vaapi_enc_hevc.c
diff --git a/configure b/configure
index bf22ae1..f30ddab 100755
--- a/configure
+++ b/configure
@@ -2523,6 +2523,7 @@ hevc_dxva2_hwaccel_select="hevc_decoder"
hevc_qsv_hwaccel_deps="libmfx"
hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
hevc_vaapi_hwaccel_select="hevc_decoder"
+hevc_vaapi_encoder_deps="vaapi_recent VAEncPictureParameterBufferHEVC"
hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
hevc_vdpau_hwaccel_select="hevc_decoder"
mpeg_vdpau_decoder_deps="vdpau"
@@ -5360,6 +5361,7 @@ check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
check_type "va/va.h" "VAPictureParameterBufferHEVC"
check_type "va/va.h" "VADecPictureParameterBufferVP9"
check_type "va/va.h" "VAEncPictureParameterBufferH264"
+check_type "va/va.h" "VAEncPictureParameterBufferHEVC"
check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 06b3c48..a5e1cab 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -311,6 +311,7 @@ OBJS-$(CONFIG_HEVC_DECODER) += hevc.o hevc_mvs.o hevc_ps.o hevc_sei.o
hevcdsp.o hevc_filter.o hevc_parse.o hevc_data.o
OBJS-$(CONFIG_HEVC_QSV_DECODER) += qsvdec_h2645.o
OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc_ps_enc.o hevc_parse.o
+OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_enc_hevc.o
OBJS-$(CONFIG_HNM4_VIDEO_DECODER) += hnm4video.o
OBJS-$(CONFIG_HQ_HQA_DECODER) += hq_hqa.o hq_hqadata.o hq_hqadsp.o \
canopus.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0d07087..a25da5b 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -203,6 +203,7 @@ void avcodec_register_all(void)
REGISTER_ENCDEC (HAP, hap);
REGISTER_DECODER(HEVC, hevc);
REGISTER_DECODER(HEVC_QSV, hevc_qsv);
+ REGISTER_ENCODER(HEVC_VAAPI, hevc_vaapi);
REGISTER_DECODER(HNM4_VIDEO, hnm4_video);
REGISTER_DECODER(HQ_HQA, hq_hqa);
REGISTER_DECODER(HQX, hqx);
diff --git a/libavcodec/vaapi_enc_hevc.c b/libavcodec/vaapi_enc_hevc.c
new file mode 100644
index 0000000..1dbd303
--- /dev/null
+++ b/libavcodec/vaapi_enc_hevc.c
@@ -0,0 +1,1664 @@
+/*
+ * VAAPI H.265 encoder.
+ *
+ * Copyright (C) 2016 Mark Thompson <mrt at jkqxz.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "golomb.h"
+#include "put_bits.h"
+
+#include "hevc.h"
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/vaapi.h"
+
+#define MAX_DPB_PICS 16
+#define INPUT_PICS 2
+
+#define bool unsigned char
+#define MAX_ST_REF_PIC_SETS 32
+#define MAX_LAYERS 1
+
+
+// This structure contains all possibly-useful per-sequence syntax elements
+// which are not already contained in the various VAAPI structures.
+typedef struct VAAPIHEVCEncodeMiscSequenceParams {
+
+ // Parameter set IDs.
+ unsigned int video_parameter_set_id;
+ unsigned int seq_parameter_set_id;
+
+ // Layering.
+ unsigned int vps_max_layers_minus1;
+ unsigned int vps_max_sub_layers_minus1;
+ bool vps_temporal_id_nesting_flag;
+ unsigned int vps_max_layer_id;
+ unsigned int vps_num_layer_sets_minus1;
+ unsigned int sps_max_sub_layers_minus1;
+ bool sps_temporal_id_nesting_flag;
+ bool layer_id_included_flag[MAX_LAYERS][64];
+
+ // Profile/tier/level parameters.
+ bool general_profile_compatibility_flag[32];
+ bool general_progressive_source_flag;
+ bool general_interlaced_source_flag;
+ bool general_non_packed_constraint_flag;
+ bool general_frame_only_constraint_flag;
+ bool general_inbld_flag;
+
+ // Decode/display ordering parameters.
+ unsigned int log2_max_pic_order_cnt_lsb_minus4;
+ bool vps_sub_layer_ordering_info_present_flag;
+ unsigned int vps_max_dec_pic_buffering_minus1[MAX_LAYERS];
+ unsigned int vps_max_num_reorder_pics[MAX_LAYERS];
+ unsigned int vps_max_latency_increase_plus1[MAX_LAYERS];
+ bool sps_sub_layer_ordering_info_present_flag;
+ unsigned int sps_max_dec_pic_buffering_minus1[MAX_LAYERS];
+ unsigned int sps_max_num_reorder_pics[MAX_LAYERS];
+ unsigned int sps_max_latency_increase_plus1[MAX_LAYERS];
+
+ // Timing information.
+ bool vps_timing_info_present_flag;
+ unsigned int vps_num_units_in_tick;
+ unsigned int vps_time_scale;
+ bool vps_poc_proportional_to_timing_flag;
+ unsigned int vps_num_ticks_poc_diff_minus1;
+
+ // Cropping information.
+ bool conformance_window_flag;
+ unsigned int conf_win_left_offset;
+ unsigned int conf_win_right_offset;
+ unsigned int conf_win_top_offset;
+ unsigned int conf_win_bottom_offset;
+
+ // Short-term reference picture sets.
+ unsigned int num_short_term_ref_pic_sets;
+ struct {
+ unsigned int num_negative_pics;
+ unsigned int num_positive_pics;
+
+ unsigned int delta_poc_s0_minus1[MAX_DPB_PICS];
+ bool used_by_curr_pic_s0_flag[MAX_DPB_PICS];
+
+ unsigned int delta_poc_s1_minus1[MAX_DPB_PICS];
+ bool used_by_curr_pic_s1_flag[MAX_DPB_PICS];
+ } st_ref_pic_set[MAX_ST_REF_PIC_SETS];
+
+ // Long-term reference pictures.
+ bool long_term_ref_pics_present_flag;
+ unsigned int num_long_term_ref_pics_sps;
+ struct {
+ unsigned int lt_ref_pic_poc_lsb_sps;
+ bool used_by_curr_pic_lt_sps_flag;
+ } lt_ref_pic;
+
+ // Deblocking filter control.
+ bool deblocking_filter_control_present_flag;
+ bool deblocking_filter_override_enabled_flag;
+ bool pps_deblocking_filter_disabled_flag;
+ int pps_beta_offset_div2;
+ int pps_tc_offset_div2;
+
+ // Video Usability Information.
+ bool vui_parameters_present_flag;
+ bool aspect_ratio_info_present_flag;
+ unsigned int aspect_ratio_idc;
+ unsigned int sar_width;
+ unsigned int sar_height;
+ bool video_signal_type_present_flag;
+ unsigned int video_format;
+ bool video_full_range_flag;
+ bool colour_description_present_flag;
+ unsigned int colour_primaries;
+ unsigned int transfer_characteristics;
+ unsigned int matrix_coeffs;
+
+ // Oddments.
+ bool uniform_spacing_flag;
+ bool output_flag_present_flag;
+ bool cabac_init_present_flag;
+ unsigned int num_extra_slice_header_bits;
+ bool lists_modification_present_flag;
+ bool pps_slice_chroma_qp_offsets_present_flag;
+ bool pps_slice_chroma_offset_list_enabled_flag;
+
+} VAAPIHEVCEncodeMiscSequenceParams;
+
+// This structure contains all possibly-useful per-slice syntax elements
+// which are not already contained in the various VAAPI structures.
+typedef struct {
+ // Slice segments.
+ bool first_slice_segment_in_pic_flag;
+ unsigned int slice_segment_address;
+
+ // Short-term reference picture sets.
+ bool short_term_ref_pic_set_sps_flag;
+ unsigned int short_term_ref_pic_idx;
+
+ // Deblocking filter.
+ bool deblocking_filter_override_flag;
+
+ // Oddments.
+ bool slice_reserved_flag[8];
+ bool no_output_of_prior_pics_flag;
+ bool pic_output_flag;
+
+} VAAPIHEVCEncodeMiscPictureParams;
+
+typedef struct VAAPIHEVCEncodeFrame {
+ AVFrame *frame;
+ VASurfaceID surface_id;
+
+ int poc;
+ enum {
+ FRAME_TYPE_I = I_SLICE,
+ FRAME_TYPE_P = P_SLICE,
+ FRAME_TYPE_B = B_SLICE,
+ } type;
+
+ VAPictureHEVC pic;
+
+ VAEncPictureParameterBufferHEVC pic_params;
+ VABufferID pic_params_id;
+
+ VAEncSliceParameterBufferHEVC slice_params;
+ VABufferID slice_params_id;
+
+ VAAPIHEVCEncodeMiscPictureParams misc_params;
+
+ VABufferID coded_data_id;
+
+ struct VAAPIHEVCEncodeFrame *refa, *refb;
+} VAAPIHEVCEncodeFrame;
+
+typedef struct VAAPIHEVCEncodeContext {
+ const AVClass *class;
+ const AVCodecContext *avctx;
+
+ AVVAAPIHardwareContext *hardware_context;
+
+ AVVAAPIPipelineConfig codec_config;
+ AVVAAPIPipelineContext codec;
+
+ AVVAAPISurfaceConfig input_config;
+ AVVAAPISurfacePool input_pool;
+ AVVAAPISurfaceConfig recon_config;
+ AVVAAPISurfacePool recon_pool;
+
+ int input_is_vaapi;
+
+ VAProfile va_profile;
+ int level;
+ int rc_mode;
+ int fixed_qp;
+
+ int input_width;
+ int input_height;
+
+ int aligned_width;
+ int aligned_height;
+ int ctu_width;
+ int ctu_height;
+
+ VAEncSequenceParameterBufferHEVC seq_params;
+ VABufferID seq_params_id;
+
+ VAEncMiscParameterRateControl rc_params;
+ VAEncMiscParameterBuffer rc_params_buffer;
+ VABufferID rc_params_id;
+
+ VAEncPictureParameterBufferHEVC pic_params;
+ VABufferID pic_params_id;
+
+ VAAPIHEVCEncodeMiscSequenceParams misc_params;
+
+ int poc;
+
+ VAAPIHEVCEncodeFrame dpb[MAX_DPB_PICS];
+ int current_frame;
+ int previous_frame;
+
+ struct {
+ int64_t hardware_context;
+
+ const char *profile;
+ const char *level;
+ int qp;
+ int idr_interval;
+ } options;
+
+} VAAPIHEVCEncodeContext;
+
+
+// Set to 1 to log a full trace of all bitstream output (debugging only).
+#if 0
+static void trace_hevc_write_u(PutBitContext *s, unsigned int width,
+ unsigned int value, const char *name)
+{
+ av_log(0, AV_LOG_DEBUG, "H.265 bitstream [%3d]: %4u u(%u) / %s\n",
+ put_bits_count(s), value, width, name);
+ put_bits(s, width, value);
+}
+static void trace_hevc_write_ue(PutBitContext *s,
+ unsigned int value, const char *name)
+{
+ av_log(0, AV_LOG_DEBUG, "H.265 bitstream [%3d]: %4u ue(v) / %s\n",
+ put_bits_count(s), value, name);
+ set_ue_golomb(s, value);
+}
+static void trace_hevc_write_se(PutBitContext *s,
+ int value, const char *name)
+{
+ av_log(0, AV_LOG_DEBUG, "H.265 bitstream [%3d]: %+4d se(v) / %s\n",
+ put_bits_count(s), value, name);
+ set_se_golomb(s, value);
+}
+
+#define hevc_write_u(pbc, width, value, name) \
+ trace_hevc_write_u(pbc, width, value, #name)
+#define hevc_write_ue(pbc, value, name) \
+ trace_hevc_write_ue(pbc, value, #name)
+#define hevc_write_se(pbc, value, name) \
+ trace_hevc_write_se(pbc, value, #name)
+#else
+#define hevc_write_u(pbc, width, value, name) put_bits(pbc, width, value)
+#define hevc_write_ue(pbc, value, name) set_ue_golomb(pbc, value)
+#define hevc_write_se(pbc, value, name) set_se_golomb(pbc, value)
+#endif
+
+#define u(width, ...) hevc_write_u(s, width, __VA_ARGS__)
+#define ue(...) hevc_write_ue(s, __VA_ARGS__)
+#define se(...) hevc_write_se(s, __VA_ARGS__)
+
+#define seq_var(name) seq->name, name
+#define seq_field(name) seq->seq_fields.bits.name, name
+#define pic_var(name) pic->name, name
+#define pic_field(name) pic->pic_fields.bits.name, name
+#define slice_var(name) slice->name, name
+#define slice_field(name) slice->slice_fields.bits.name, name
+#define misc_var(name) misc->name, name
+#define miscs_var(name) miscs->name, name
+
+static void vaapi_hevc_write_nal_unit_header(PutBitContext *s,
+ int nal_unit_type)
+{
+ u(1, 0, forbidden_zero_bit);
+ u(6, nal_unit_type, nal_unit_type);
+ u(6, 0, nuh_layer_id);
+ u(3, 1, nuh_temporal_id_plus1);
+}
+
+static void vaapi_hevc_write_rbsp_trailing_bits(PutBitContext *s)
+{
+ u(1, 1, rbsp_stop_one_bit);
+ while(put_bits_count(s) & 7)
+ u(1, 0, rbsp_alignment_zero_bit);
+}
+
+static void vaapi_hevc_write_profile_tier_level(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx)
+{
+ VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+ int j;
+
+ if(1) {
+ u(2, 0, general_profile_space);
+ u(1, seq->general_tier_flag, general_tier_flag);
+ u(5, seq->general_profile_idc, general_profile_idc);
+
+ for(j = 0; j < 32; j++) {
+ u(1, misc_var(general_profile_compatibility_flag[j]));
+ }
+
+ u(1, misc_var(general_progressive_source_flag));
+ u(1, misc_var(general_interlaced_source_flag));
+ u(1, misc_var(general_non_packed_constraint_flag));
+ u(1, misc_var(general_frame_only_constraint_flag));
+
+ if(0) {
+ // Not main profile.
+ // Lots of extra constraint flags.
+ } else {
+ // put_bits only handles up to 31 bits.
+ u(23, 0, general_reserved_zero_43bits);
+ u(20, 0, general_reserved_zero_43bits);
+ }
+
+ if(seq->general_profile_idc >= 1 && seq->general_profile_idc <= 5) {
+ u(1, misc_var(general_inbld_flag));
+ } else {
+ u(1, 0, general_reserved_zero_bit);
+ }
+ }
+
+ u(8, seq->general_level_idc, general_level_idc);
+
+ // No sublayers.
+}
+
+static void vaapi_hevc_write_vps(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx)
+{
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+ int i, j;
+
+ vaapi_hevc_write_nal_unit_header(s, NAL_VPS);
+
+ u(4, misc->video_parameter_set_id, vps_video_parameter_set_id);
+
+ u(1, 1, vps_base_layer_internal_flag);
+ u(1, 1, vps_base_layer_available_flag);
+ u(6, misc_var(vps_max_layers_minus1));
+ u(3, misc_var(vps_max_sub_layers_minus1));
+ u(1, misc_var(vps_temporal_id_nesting_flag));
+
+ u(16, 0xffff, vps_reserved_0xffff_16bits);
+
+ vaapi_hevc_write_profile_tier_level(s, ctx);
+
+ u(1, misc_var(vps_sub_layer_ordering_info_present_flag));
+ for(i = (misc->vps_sub_layer_ordering_info_present_flag ?
+ 0 : misc->vps_max_sub_layers_minus1);
+ i <= misc->vps_max_sub_layers_minus1; i++) {
+ ue(misc_var(vps_max_dec_pic_buffering_minus1[i]));
+ ue(misc_var(vps_max_num_reorder_pics[i]));
+ ue(misc_var(vps_max_latency_increase_plus1[i]));
+ }
+
+ u(6, misc_var(vps_max_layer_id));
+ ue(misc_var(vps_num_layer_sets_minus1));
+ for(i = 1; i <= misc->vps_num_layer_sets_minus1; i++) {
+ for(j = 0; j < misc->vps_max_layer_id; j++)
+ u(1, misc_var(layer_id_included_flag[i][j]));
+ }
+
+ u(1, misc_var(vps_timing_info_present_flag));
+ if(misc->vps_timing_info_present_flag) {
+ u(1, 0, put_bits_hack_zero_bit);
+ u(31, misc_var(vps_num_units_in_tick));
+ u(1, 0, put_bits_hack_zero_bit);
+ u(31, misc_var(vps_time_scale));
+ u(1, misc_var(vps_poc_proportional_to_timing_flag));
+ if(misc->vps_poc_proportional_to_timing_flag) {
+ ue(misc_var(vps_num_ticks_poc_diff_minus1));
+ }
+ ue(0, vps_num_hrd_parameters);
+ }
+
+ u(1, 0, vps_extension_flag);
+
+ vaapi_hevc_write_rbsp_trailing_bits(s);
+}
+
+static void vaapi_hevc_write_st_ref_pic_set(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx,
+ int st_rps_idx)
+{
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+#define strps_var(name) misc->st_ref_pic_set[st_rps_idx].name, name
+ int i;
+
+ if(st_rps_idx != 0)
+ u(1, 0, inter_ref_pic_set_prediction_flag);
+
+ if(0) {
+ // Inter ref pic set prediction.
+ } else {
+ ue(strps_var(num_negative_pics));
+ ue(strps_var(num_positive_pics));
+
+ for(i = 0; i <
+ misc->st_ref_pic_set[st_rps_idx].num_negative_pics; i++) {
+ ue(strps_var(delta_poc_s0_minus1[i]));
+ u(1, strps_var(used_by_curr_pic_s0_flag[i]));
+ }
+ for(i = 0; i <
+ misc->st_ref_pic_set[st_rps_idx].num_positive_pics; i++) {
+ ue(strps_var(delta_poc_s1_minus1[i]));
+ u(1, strps_var(used_by_curr_pic_s1_flag[i]));
+ }
+ }
+}
+
+static void vaapi_hevc_write_vui_parameters(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx)
+{
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+
+ u(1, misc_var(aspect_ratio_info_present_flag));
+ if(misc->aspect_ratio_info_present_flag) {
+ u(8, misc_var(aspect_ratio_idc));
+ if(misc->aspect_ratio_idc == 255) {
+ u(16, misc_var(sar_width));
+ u(16, misc_var(sar_height));
+ }
+ }
+
+ u(1, 0, overscan_info_present_flag);
+
+ u(1, misc_var(video_signal_type_present_flag));
+ if(misc->video_signal_type_present_flag) {
+ u(3, misc_var(video_format));
+ u(1, misc_var(video_full_range_flag));
+ u(1, misc_var(colour_description_present_flag));
+ if(misc->colour_description_present_flag) {
+ u(8, misc_var(colour_primaries));
+ u(8, misc_var(transfer_characteristics));
+ u(8, misc_var(matrix_coeffs));
+ }
+ }
+
+ u(1, 0, chroma_loc_info_present_flag);
+ u(1, 0, neutral_chroma_indication_flag);
+ u(1, 0, field_seq_flag);
+ u(1, 0, frame_field_info_present_flag);
+ u(1, 0, default_display_window_flag);
+ u(1, 0, vui_timing_info_present_flag);
+ u(1, 0, bitstream_restriction_flag_flag);
+}
+
+static void vaapi_hevc_write_sps(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx)
+{
+ VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+ int i;
+
+ vaapi_hevc_write_nal_unit_header(s, NAL_SPS);
+
+ u(4, misc->video_parameter_set_id, sps_video_parameter_set_id);
+
+ u(3, misc_var(sps_max_sub_layers_minus1));
+ u(1, misc_var(sps_temporal_id_nesting_flag));
+
+ vaapi_hevc_write_profile_tier_level(s, ctx);
+
+ ue(misc->seq_parameter_set_id, sps_seq_parameter_set_id);
+ ue(seq_field(chroma_format_idc));
+ if(seq->seq_fields.bits.chroma_format_idc == 3)
+ u(1, 0, separate_colour_plane_flag);
+
+ ue(seq_var(pic_width_in_luma_samples));
+ ue(seq_var(pic_height_in_luma_samples));
+
+ u(1, misc_var(conformance_window_flag));
+ if(misc->conformance_window_flag) {
+ ue(misc_var(conf_win_left_offset));
+ ue(misc_var(conf_win_right_offset));
+ ue(misc_var(conf_win_top_offset));
+ ue(misc_var(conf_win_bottom_offset));
+ }
+
+ ue(seq_field(bit_depth_luma_minus8));
+ ue(seq_field(bit_depth_chroma_minus8));
+
+ ue(misc_var(log2_max_pic_order_cnt_lsb_minus4));
+
+ u(1, misc_var(sps_sub_layer_ordering_info_present_flag));
+ for(i = (misc->sps_sub_layer_ordering_info_present_flag ?
+ 0 : misc->sps_max_sub_layers_minus1);
+ i <= misc->sps_max_sub_layers_minus1; i++) {
+ ue(misc_var(sps_max_dec_pic_buffering_minus1[i]));
+ ue(misc_var(sps_max_num_reorder_pics[i]));
+ ue(misc_var(sps_max_latency_increase_plus1[i]));
+ }
+
+ ue(seq_var(log2_min_luma_coding_block_size_minus3));
+ ue(seq_var(log2_diff_max_min_luma_coding_block_size));
+ ue(seq_var(log2_min_transform_block_size_minus2));
+ ue(seq_var(log2_diff_max_min_transform_block_size));
+ ue(seq_var(max_transform_hierarchy_depth_inter));
+ ue(seq_var(max_transform_hierarchy_depth_intra));
+
+ u(1, seq_field(scaling_list_enabled_flag));
+ if(seq->seq_fields.bits.scaling_list_enabled_flag) {
+ u(1, 0, sps_scaling_list_data_present_flag);
+ }
+
+ u(1, seq_field(amp_enabled_flag));
+ u(1, seq_field(sample_adaptive_offset_enabled_flag));
+
+ u(1, seq_field(pcm_enabled_flag));
+ if(seq->seq_fields.bits.pcm_enabled_flag) {
+ u(4, seq_var(pcm_sample_bit_depth_luma_minus1));
+ u(4, seq_var(pcm_sample_bit_depth_chroma_minus1));
+ ue(seq_var(log2_min_pcm_luma_coding_block_size_minus3));
+ ue(seq->log2_max_pcm_luma_coding_block_size_minus3 -
+ seq->log2_min_pcm_luma_coding_block_size_minus3,
+ log2_diff_max_min_pcm_luma_coding_block_size);
+ u(1, seq_field(pcm_loop_filter_disabled_flag));
+ }
+
+ ue(misc_var(num_short_term_ref_pic_sets));
+ for(i = 0; i < misc->num_short_term_ref_pic_sets; i++)
+ vaapi_hevc_write_st_ref_pic_set(s, ctx, i);
+
+ u(1, misc_var(long_term_ref_pics_present_flag));
+ if(misc->long_term_ref_pics_present_flag) {
+ ue(0, num_long_term_ref_pics_sps);
+ }
+
+ u(1, seq_field(sps_temporal_mvp_enabled_flag));
+ u(1, seq_field(strong_intra_smoothing_enabled_flag));
+
+ u(1, misc_var(vui_parameters_present_flag));
+ if(misc->vui_parameters_present_flag) {
+ vaapi_hevc_write_vui_parameters(s, ctx);
+ }
+
+ u(1, 0, sps_extension_present_flag);
+
+ vaapi_hevc_write_rbsp_trailing_bits(s);
+}
+
+static void vaapi_hevc_write_pps(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx)
+{
+ VAEncPictureParameterBufferHEVC *pic = &ctx->pic_params;
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+ int i;
+
+ vaapi_hevc_write_nal_unit_header(s, NAL_PPS);
+
+ ue(pic->slice_pic_parameter_set_id, pps_pic_parameter_set_id);
+ ue(misc->seq_parameter_set_id, pps_seq_parameter_set_id);
+
+ u(1, pic_field(dependent_slice_segments_enabled_flag));
+ u(1, misc_var(output_flag_present_flag));
+ u(3, misc_var(num_extra_slice_header_bits));
+ u(1, pic_field(sign_data_hiding_enabled_flag));
+ u(1, misc_var(cabac_init_present_flag));
+
+ ue(pic_var(num_ref_idx_l0_default_active_minus1));
+ ue(pic_var(num_ref_idx_l1_default_active_minus1));
+
+ se(pic->pic_init_qp - 26, init_qp_minus26);
+
+ u(1, pic_field(constrained_intra_pred_flag));
+ u(1, pic_field(transform_skip_enabled_flag));
+
+ u(1, pic_field(cu_qp_delta_enabled_flag));
+ if(pic->pic_fields.bits.cu_qp_delta_enabled_flag)
+ ue(pic_var(diff_cu_qp_delta_depth));
+
+ se(pic_var(pps_cb_qp_offset));
+ se(pic_var(pps_cr_qp_offset));
+
+ u(1, misc_var(pps_slice_chroma_qp_offsets_present_flag));
+ u(1, pic_field(weighted_pred_flag));
+ u(1, pic_field(weighted_bipred_flag));
+ u(1, pic_field(transquant_bypass_enabled_flag));
+ u(1, pic_field(tiles_enabled_flag));
+ u(1, pic_field(entropy_coding_sync_enabled_flag));
+
+ if(pic->pic_fields.bits.tiles_enabled_flag) {
+ ue(pic_var(num_tile_columns_minus1));
+ ue(pic_var(num_tile_rows_minus1));
+ u(1, misc_var(uniform_spacing_flag));
+ if(!misc->uniform_spacing_flag) {
+ for(i = 0; i < pic->num_tile_columns_minus1; i++)
+ ue(pic_var(column_width_minus1[i]));
+ for(i = 0; i < pic->num_tile_rows_minus1; i++)
+ ue(pic_var(row_height_minus1[i]));
+ }
+ u(1, pic_field(loop_filter_across_tiles_enabled_flag));
+ }
+
+ u(1, pic_field(pps_loop_filter_across_slices_enabled_flag));
+ u(1, misc_var(deblocking_filter_control_present_flag));
+ if(misc->deblocking_filter_control_present_flag) {
+ u(1, misc_var(deblocking_filter_override_enabled_flag));
+ u(1, misc_var(pps_deblocking_filter_disabled_flag));
+ if(!misc->pps_deblocking_filter_disabled_flag) {
+ se(misc_var(pps_beta_offset_div2));
+ se(misc_var(pps_tc_offset_div2));
+ }
+ }
+
+ u(1, 0, pps_scaling_list_data_present_flag);
+ // No scaling list data.
+
+ u(1, misc_var(lists_modification_present_flag));
+ ue(pic_var(log2_parallel_merge_level_minus2));
+ u(1, 0, slice_segment_header_extension_present_flag);
+ u(1, 0, pps_extension_present_flag);
+
+ vaapi_hevc_write_rbsp_trailing_bits(s);
+}
+
+static void vaapi_hevc_write_slice_header(PutBitContext *s,
+ VAAPIHEVCEncodeContext *ctx,
+ VAAPIHEVCEncodeFrame *current)
+{
+ VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+ VAEncPictureParameterBufferHEVC *pic = ¤t->pic_params;
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+ VAEncSliceParameterBufferHEVC *slice = ¤t->slice_params;
+ VAAPIHEVCEncodeMiscPictureParams *miscs = ¤t->misc_params;
+ int i;
+
+ vaapi_hevc_write_nal_unit_header(s, pic->nal_unit_type);
+
+ u(1, miscs_var(first_slice_segment_in_pic_flag));
+ if(pic->nal_unit_type >= NAL_BLA_W_LP &&
+ pic->nal_unit_type <= 23)
+ u(1, miscs_var(no_output_of_prior_pics_flag));
+
+ ue(slice_var(slice_pic_parameter_set_id));
+
+ if(!miscs->first_slice_segment_in_pic_flag) {
+ if(pic->pic_fields.bits.dependent_slice_segments_enabled_flag)
+ u(1, slice_field(dependent_slice_segment_flag));
+ u(av_log2((ctx->ctu_width * ctx->ctu_height) - 1) + 1,
+ miscs_var(slice_segment_address));
+ }
+ if(!slice->slice_fields.bits.dependent_slice_segment_flag) {
+ for(i = 0; i < misc->num_extra_slice_header_bits; i++)
+ u(1, miscs_var(slice_reserved_flag[i]));
+
+ ue(slice_var(slice_type));
+ if(misc->output_flag_present_flag)
+ u(1, 1, pic_output_flag);
+ if(seq->seq_fields.bits.separate_colour_plane_flag)
+ u(2, slice_field(colour_plane_id));
+ if(pic->nal_unit_type != NAL_IDR_W_RADL &&
+ pic->nal_unit_type != NAL_IDR_N_LP) {
+ u(4 + misc->log2_max_pic_order_cnt_lsb_minus4,
+ current->poc & ((1 << (misc->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1),
+ slice_pic_order_cnt_lsb);
+
+ u(1, miscs_var(short_term_ref_pic_set_sps_flag));
+ if(!miscs->short_term_ref_pic_set_sps_flag) {
+ av_assert0(0);
+ // vaapi_hevc_write_st_ref_pic_set(ctx->num_short_term_ref_pic_sets);
+ } else if(misc->num_short_term_ref_pic_sets > 1) {
+ u(av_log2(misc->num_short_term_ref_pic_sets - 1) + 1,
+ miscs_var(short_term_ref_pic_idx));
+ }
+
+ if(misc->long_term_ref_pics_present_flag) {
+ av_assert0(0);
+ }
+
+ if(seq->seq_fields.bits.sps_temporal_mvp_enabled_flag) {
+ u(1, slice_field(slice_temporal_mvp_enabled_flag));
+ }
+
+ if(seq->seq_fields.bits.sample_adaptive_offset_enabled_flag) {
+ u(1, slice_field(slice_sao_luma_flag));
+ if(!seq->seq_fields.bits.separate_colour_plane_flag &&
+ seq->seq_fields.bits.chroma_format_idc != 0) {
+ u(1, slice_field(slice_sao_chroma_flag));
+ }
+ }
+
+ if(slice->slice_type == P_SLICE || slice->slice_type == B_SLICE) {
+ u(1, slice_field(num_ref_idx_active_override_flag));
+ if(slice->slice_fields.bits.num_ref_idx_active_override_flag) {
+ ue(slice_var(num_ref_idx_l0_active_minus1));
+ if(slice->slice_type == B_SLICE) {
+ ue(slice_var(num_ref_idx_l1_active_minus1));
+ }
+ }
+
+ if(misc->lists_modification_present_flag) {
+ av_assert0(0);
+ // ref_pic_lists_modification()
+ }
+ if(slice->slice_type == B_SLICE) {
+ u(1, slice_field(mvd_l1_zero_flag));
+ }
+ if(misc->cabac_init_present_flag) {
+ u(1, slice_field(cabac_init_flag));
+ }
+ if(slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) {
+ if(slice->slice_type == B_SLICE)
+ u(1, slice_field(collocated_from_l0_flag));
+ ue(pic->collocated_ref_pic_index, collocated_ref_idx);
+ }
+ if((pic->pic_fields.bits.weighted_pred_flag &&
+ slice->slice_type == P_SLICE) ||
+ (pic->pic_fields.bits.weighted_bipred_flag &&
+ slice->slice_type == B_SLICE)) {
+ ue(5 - slice->max_num_merge_cand, five_minus_max_num_merge_cand);
+ }
+ }
+
+ se(slice_var(slice_qp_delta));
+ if(misc->pps_slice_chroma_qp_offsets_present_flag) {
+ se(slice_var(slice_cb_qp_offset));
+ se(slice_var(slice_cr_qp_offset));
+ }
+ if(misc->pps_slice_chroma_offset_list_enabled_flag) {
+ u(1, 0, cu_chroma_qp_offset_enabled_flag);
+ }
+ if(misc->deblocking_filter_override_enabled_flag) {
+ u(1, miscs_var(deblocking_filter_override_flag));
+ }
+ if(miscs->deblocking_filter_override_flag) {
+ u(1, slice_field(slice_deblocking_filter_disabled_flag));
+ if(!slice->slice_fields.bits.slice_deblocking_filter_disabled_flag) {
+ se(slice_var(slice_beta_offset_div2));
+ se(slice_var(slice_tc_offset_div2));
+ }
+ }
+ if(pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag &&
+ (slice->slice_fields.bits.slice_sao_luma_flag ||
+ slice->slice_fields.bits.slice_sao_chroma_flag ||
+ slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) {
+ u(1, slice_field(slice_loop_filter_across_slices_enabled_flag));
+ }
+ }
+
+ if(pic->pic_fields.bits.tiles_enabled_flag ||
+ pic->pic_fields.bits.entropy_coding_sync_enabled_flag) {
+ // num_entry_point_offsets
+ }
+
+ if(0) {
+ // slice_segment_header_extension_length
+ }
+ }
+
+ u(1, 1, alignment_bit_equal_to_one);
+ while(put_bits_count(s) & 7)
+ u(1, 0, alignment_bit_equal_to_zero);
+}
+
+static size_t vaapi_hevc_nal_unit_to_byte_stream(uint8_t *dst, uint8_t *src, size_t len)
+{
+ size_t dp, sp;
+ int zero_run = 0;
+
+ // Start code.
+ dst[0] = dst[1] = dst[2] = 0;
+ dst[3] = 1;
+ dp = 4;
+
+ for(sp = 0; sp < len; sp++) {
+ if(zero_run < 2) {
+ if(src[sp] == 0)
+ ++zero_run;
+ else
+ zero_run = 0;
+ } else {
+ if((src[sp] & ~3) == 0) {
+ // emulation_prevention_three_byte
+ dst[dp++] = 3;
+ }
+ zero_run = src[sp] == 0;
+ }
+ dst[dp++] = src[sp];
+ }
+
+ return dp;
+}
+
+static int vaapi_hevc_render_packed_header(VAAPIHEVCEncodeContext *ctx, int type,
+ char *data, size_t bit_len)
+{
+ VAStatus vas;
+ VABufferID id_list[2];
+ VAEncPackedHeaderParameterBuffer buffer = {
+ .type = type,
+ .bit_length = bit_len,
+ .has_emulation_bytes = 1,
+ };
+
+ vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id,
+ VAEncPackedHeaderParameterBufferType,
+ sizeof(&buffer), 1, &buffer, &id_list[0]);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create parameter buffer for packed "
+ "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+
+ vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id,
+ VAEncPackedHeaderDataBufferType,
+ (bit_len + 7) / 8, 1, data, &id_list[1]);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create data buffer for packed "
+ "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+
+ av_log(ctx, AV_LOG_DEBUG, "Packed header buffer (%d) is %#x/%#x "
+ "(%zu bits).\n", type, id_list[0], id_list[1], bit_len);
+
+ vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id,
+ id_list, 2);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to render packed "
+ "header (type %d): %d (%s).\n", type, vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static int vaapi_hevc_render_packed_vps_sps(VAAPIHEVCEncodeContext *ctx)
+{
+ PutBitContext pbc, *s = &pbc;
+ uint8_t tmp[256];
+ uint8_t buf[512];
+ size_t byte_len, nal_len;
+
+ init_put_bits(s, tmp, sizeof(tmp));
+ vaapi_hevc_write_vps(s, ctx);
+ nal_len = put_bits_count(s);
+ flush_put_bits(s);
+ byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8);
+
+ init_put_bits(s, tmp, sizeof(tmp));
+ vaapi_hevc_write_sps(s, ctx);
+ nal_len = put_bits_count(s);
+ flush_put_bits(s);
+ byte_len += vaapi_hevc_nal_unit_to_byte_stream(buf + byte_len, tmp, nal_len / 8);
+
+ return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderSequence,
+ buf, byte_len * 8);
+}
+
+static int vaapi_hevc_render_packed_pps(VAAPIHEVCEncodeContext *ctx)
+{
+ PutBitContext pbc, *s = &pbc;
+ uint8_t tmp[256];
+ uint8_t buf[512];
+ size_t byte_len, nal_len;
+
+ init_put_bits(s, tmp, sizeof(tmp));
+ vaapi_hevc_write_pps(s, ctx);
+ nal_len = put_bits_count(s);
+ flush_put_bits(s);
+ byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8);
+
+ return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderPicture,
+ buf, byte_len * 8);
+}
+
+static int vaapi_hevc_render_packed_slice(VAAPIHEVCEncodeContext *ctx,
+ VAAPIHEVCEncodeFrame *current)
+{
+ PutBitContext pbc, *s = &pbc;
+ uint8_t tmp[256];
+ uint8_t buf[512];
+ size_t byte_len, nal_len;
+
+ init_put_bits(s, tmp, sizeof(tmp));
+ vaapi_hevc_write_slice_header(s, ctx, current);
+ nal_len = put_bits_count(s);
+ flush_put_bits(s);
+ byte_len = vaapi_hevc_nal_unit_to_byte_stream(buf, tmp, nal_len / 8);
+
+ return vaapi_hevc_render_packed_header(ctx, VAEncPackedHeaderSlice,
+ buf, byte_len * 8);
+}
+
+static int vaapi_hevc_render_sequence(VAAPIHEVCEncodeContext *ctx)
+{
+ VAStatus vas;
+ VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+
+ vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id,
+ VAEncSequenceParameterBufferType,
+ sizeof(*seq), 1, seq, &ctx->seq_params_id);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for sequence "
+ "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+ av_log(ctx, AV_LOG_DEBUG, "Sequence parameter buffer is %#x.\n",
+ ctx->seq_params_id);
+
+ vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id,
+ &ctx->seq_params_id, 1);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to send sequence parameters: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static int vaapi_hevc_render_picture(VAAPIHEVCEncodeContext *ctx,
+ VAAPIHEVCEncodeFrame *current)
+{
+ VAStatus vas;
+ VAEncPictureParameterBufferHEVC *pic = ¤t->pic_params;
+
+ vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id,
+ VAEncPictureParameterBufferType,
+ sizeof(*pic), 1, pic, &ctx->pic_params_id);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for picture "
+ "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+ av_log(ctx, AV_LOG_DEBUG, "Picture parameter buffer is %#x.\n",
+ ctx->pic_params_id);
+
+ vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id,
+ &ctx->pic_params_id, 1);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to send picture parameters: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static int vaapi_hevc_render_slice(VAAPIHEVCEncodeContext *ctx,
+ VAAPIHEVCEncodeFrame *current)
+{
+ VAStatus vas;
+ VAEncSliceParameterBufferHEVC *slice = ¤t->slice_params;
+
+ vas = vaCreateBuffer(ctx->hardware_context->display, ctx->codec.context_id,
+ VAEncSliceParameterBufferType,
+ sizeof(*slice), 1, slice, ¤t->slice_params_id);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for slice "
+ "parameters: %d (%s).\n", vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+ av_log(ctx, AV_LOG_DEBUG, "Slice buffer is %#x.\n", current->slice_params_id);
+
+ vas = vaRenderPicture(ctx->hardware_context->display, ctx->codec.context_id,
+ ¤t->slice_params_id, 1);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to send slice parameters: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static av_cold int vaapi_hevc_encode_init_stream(VAAPIHEVCEncodeContext *ctx)
+{
+ VAEncSequenceParameterBufferHEVC *seq = &ctx->seq_params;
+ VAEncPictureParameterBufferHEVC *pic = &ctx->pic_params;
+ VAAPIHEVCEncodeMiscSequenceParams *misc = &ctx->misc_params;
+ int i;
+
+ memset(seq, 0, sizeof(*seq));
+ memset(pic, 0, sizeof(*pic));
+
+ {
+ // general_profile_space == 0.
+ seq->general_profile_idc = 1; // Main profile.
+ seq->general_tier_flag = 0;
+
+ seq->general_level_idc = ctx->level * 3;
+
+ seq->intra_period = 0;
+ seq->intra_idr_period = 0;
+ seq->ip_period = 0;
+
+ seq->pic_width_in_luma_samples = ctx->aligned_width;
+ seq->pic_height_in_luma_samples = ctx->aligned_height;
+
+ seq->seq_fields.bits.chroma_format_idc = 1; // 4:2:0.
+ seq->seq_fields.bits.separate_colour_plane_flag = 0;
+ seq->seq_fields.bits.bit_depth_luma_minus8 = 0; // 8-bit luma.
+ seq->seq_fields.bits.bit_depth_chroma_minus8 = 0; // 8-bit chroma.
+ // Other misc flags all zero.
+
+ // These have to come from the capabilities of the encoder. We have
+ // no way to query it, so just hardcode ones which worked for me...
+ // CTB size from 8x8 to 32x32.
+ seq->log2_min_luma_coding_block_size_minus3 = 0;
+ seq->log2_diff_max_min_luma_coding_block_size = 2;
+ // Transform size from 4x4 to 32x32.
+ seq->log2_min_transform_block_size_minus2 = 0;
+ seq->log2_diff_max_min_transform_block_size = 3;
+ // Full transform hierarchy allowed (2-5).
+ seq->max_transform_hierarchy_depth_inter = 3;
+ seq->max_transform_hierarchy_depth_intra = 3;
+
+ seq->vui_parameters_present_flag = 0;
+ }
+
+ {
+ for(i = 0; i < FF_ARRAY_ELEMS(pic->reference_frames); i++) {
+ pic->reference_frames[i].picture_id = VA_INVALID_ID;
+ pic->reference_frames[i].flags = VA_PICTURE_HEVC_INVALID;
+ }
+
+ pic->collocated_ref_pic_index = 0xff;
+
+ pic->last_picture = 0;
+
+ pic->pic_init_qp = ctx->fixed_qp;
+
+ pic->diff_cu_qp_delta_depth = 0;
+ pic->pps_cb_qp_offset = 0;
+ pic->pps_cr_qp_offset = 0;
+
+ // tiles_enabled_flag == 0, so ignore num_tile_(rows|columns)_minus1.
+
+ pic->log2_parallel_merge_level_minus2 = 0;
+
+ // No limit on size.
+ pic->ctu_max_bitsize_allowed = 0;
+
+ pic->num_ref_idx_l0_default_active_minus1 = 0;
+ pic->num_ref_idx_l1_default_active_minus1 = 0;
+
+ pic->slice_pic_parameter_set_id = 0;
+
+ pic->pic_fields.bits.screen_content_flag = 0;
+ pic->pic_fields.bits.enable_gpu_weighted_prediction = 0;
+
+ //pic->pic_fields.bits.cu_qp_delta_enabled_flag = 1;
+ }
+
+ {
+ misc->video_parameter_set_id = 5;
+ misc->seq_parameter_set_id = 5;
+
+ misc->vps_max_layers_minus1 = 0;
+ misc->vps_max_sub_layers_minus1 = 0;
+ misc->vps_temporal_id_nesting_flag = 1;
+ misc->sps_max_sub_layers_minus1 = 0;
+ misc->sps_temporal_id_nesting_flag = 1;
+
+ for(i = 0; i < 32; i++) {
+ misc->general_profile_compatibility_flag[i] =
+ (i == seq->general_profile_idc);
+ }
+
+ misc->general_progressive_source_flag = 1;
+ misc->general_interlaced_source_flag = 0;
+ misc->general_non_packed_constraint_flag = 0;
+ misc->general_frame_only_constraint_flag = 1;
+ misc->general_inbld_flag = 0;
+
+ misc->log2_max_pic_order_cnt_lsb_minus4 = 4;
+ misc->vps_sub_layer_ordering_info_present_flag = 0;
+ misc->vps_max_dec_pic_buffering_minus1[0] = 0;
+ misc->vps_max_num_reorder_pics[0] = 0;
+ misc->vps_max_latency_increase_plus1[0] = 0;
+ misc->sps_sub_layer_ordering_info_present_flag = 0;
+ misc->sps_max_dec_pic_buffering_minus1[0] = 0;
+ misc->sps_max_num_reorder_pics[0] = 0;
+ misc->sps_max_latency_increase_plus1[0] = 0;
+
+ misc->vps_timing_info_present_flag = 1;
+ misc->vps_num_units_in_tick = ctx->avctx->time_base.num;
+ misc->vps_time_scale = ctx->avctx->time_base.den;
+ misc->vps_poc_proportional_to_timing_flag = 1;
+ misc->vps_num_ticks_poc_diff_minus1 = 0;
+
+ if(ctx->input_width != ctx->aligned_width ||
+ ctx->input_height != ctx->aligned_height) {
+ misc->conformance_window_flag = 1;
+ misc->conf_win_left_offset = 0;
+ misc->conf_win_right_offset =
+ (ctx->aligned_width - ctx->input_width) / 2;
+ misc->conf_win_top_offset = 0;
+ misc->conf_win_bottom_offset =
+ (ctx->aligned_height - ctx->input_height) / 2;
+ } else {
+ misc->conformance_window_flag = 0;
+ }
+
+ misc->num_short_term_ref_pic_sets = 1;
+ misc->st_ref_pic_set[0].num_negative_pics = 1;
+ misc->st_ref_pic_set[0].num_positive_pics = 0;
+ misc->st_ref_pic_set[0].delta_poc_s0_minus1[0] = 0;
+ misc->st_ref_pic_set[0].used_by_curr_pic_s0_flag[0] = 1;
+
+ misc->vui_parameters_present_flag = 1;
+ if(ctx->avctx->sample_aspect_ratio.num != 0) {
+ misc->aspect_ratio_info_present_flag = 1;
+ if(ctx->avctx->sample_aspect_ratio.num ==
+ ctx->avctx->sample_aspect_ratio.den) {
+ misc->aspect_ratio_idc = 1;
+ } else {
+ misc->aspect_ratio_idc = 255; // Extended SAR.
+ misc->sar_width = ctx->avctx->sample_aspect_ratio.num;
+ misc->sar_height = ctx->avctx->sample_aspect_ratio.den;
+ }
+ }
+ if(1) {
+ // Should this be conditional on some of these being set?
+ misc->video_signal_type_present_flag = 1;
+ misc->video_format = 5; // Unspecified.
+ misc->video_full_range_flag = 0;
+ misc->colour_description_present_flag = 1;
+ misc->colour_primaries = ctx->avctx->color_primaries;
+ misc->transfer_characteristics = ctx->avctx->color_trc;
+ misc->matrix_coeffs = ctx->avctx->colorspace;
+ }
+ }
+
+ return 0;
+}
+
+static int vaapi_hevc_encode_init_picture(VAAPIHEVCEncodeContext *ctx,
+ VAAPIHEVCEncodeFrame *current)
+{
+ VAEncPictureParameterBufferHEVC *pic = ¤t->pic_params;
+ VAEncSliceParameterBufferHEVC *slice = ¤t->slice_params;
+ VAAPIHEVCEncodeMiscPictureParams *misc = ¤t->misc_params;
+ int idr = current->type == FRAME_TYPE_I;
+
+ memcpy(pic, &ctx->pic_params, sizeof(*pic));
+ memset(slice, 0, sizeof(*slice));
+ memset(misc, 0, sizeof(*misc));
+
+ {
+ memcpy(&pic->decoded_curr_pic, ¤t->pic, sizeof(VAPictureHEVC));
+
+ if(current->type != FRAME_TYPE_I) {
+ memcpy(&pic->reference_frames[0],
+ ¤t->refa->pic, sizeof(VAPictureHEVC));
+ }
+ if(current->type == FRAME_TYPE_B) {
+ memcpy(&pic->reference_frames[1],
+ ¤t->refb->pic, sizeof(VAPictureHEVC));
+ }
+
+ pic->coded_buf = current->coded_data_id;
+
+ pic->nal_unit_type = (idr ? NAL_IDR_W_RADL : NAL_TRAIL_R);
+
+ pic->pic_fields.bits.idr_pic_flag = (idr ? 1 : 0);
+ pic->pic_fields.bits.coding_type = (idr ? 1 : 2);
+
+ pic->pic_fields.bits.reference_pic_flag = 1;
+ }
+
+ {
+ slice->slice_segment_address = 0;
+ slice->num_ctu_in_slice = ctx->ctu_width * ctx->ctu_height;
+
+ slice->slice_type = current->type;
+ slice->slice_pic_parameter_set_id = 0;
+
+ slice->num_ref_idx_l0_active_minus1 = 0;
+ slice->num_ref_idx_l1_active_minus1 = 0;
+ memcpy(slice->ref_pic_list0, pic->reference_frames, sizeof(pic->reference_frames));
+ memcpy(slice->ref_pic_list1, pic->reference_frames, sizeof(pic->reference_frames));
+
+ slice->max_num_merge_cand = 5;
+ slice->slice_qp_delta = 0;
+
+ slice->slice_fields.bits.last_slice_of_pic_flag = 1;
+ }
+
+ {
+ misc->first_slice_segment_in_pic_flag = 1;
+
+ misc->short_term_ref_pic_set_sps_flag = 1;
+ misc->short_term_ref_pic_idx = 0;
+ }
+
+ return 0;
+}
+
+static int vaapi_hevc_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
+ const AVFrame *pic, int *got_packet)
+{
+ VAAPIHEVCEncodeContext *ctx = avctx->priv_data;
+ VASurfaceID input_surface, recon_surface;
+ VAAPIHEVCEncodeFrame *current;
+ AVFrame *input_image, *recon_image;
+ VACodedBufferSegment *buf_list, *buf;
+ VAStatus vas;
+ int err;
+
+ av_log(ctx, AV_LOG_DEBUG, "New frame: format %s, size %ux%u.\n",
+ av_get_pix_fmt_name(pic->format), pic->width, pic->height);
+
+ av_vaapi_lock_hardware_context(ctx->hardware_context);
+
+ if(pic->format == AV_PIX_FMT_VAAPI) {
+ input_image = 0;
+ input_surface = (VASurfaceID)pic->data[3];
+
+ } else {
+ input_image = av_frame_alloc();
+ if(!input_image) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate input frame.");
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ err = av_vaapi_surface_pool_get(&ctx->input_pool, input_image);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate input frame "
+ "from surface pool: %d (%s).\n", err, av_err2str(err));
+ goto fail;
+ }
+
+ input_image->format = AV_PIX_FMT_VAAPI;
+ input_image->width = pic->width;
+ input_image->height = pic->height;
+
+ err = av_vaapi_copy_to_surface(input_image, pic);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to copy to input surface: "
+ "%d (%s).\n", err, av_err2str(err));
+ goto fail;
+ }
+
+ input_surface = (VASurfaceID)input_image->data[3];
+ }
+ av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for input image.\n",
+ input_surface);
+
+ recon_image = av_frame_alloc();
+ if(!recon_image) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate reconstructed frame.");
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ err = av_vaapi_surface_pool_get(&ctx->recon_pool, recon_image);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate reconstructed frame "
+ "from surface pool: %d (%s).\n", err, av_err2str(err));
+ goto fail;
+ }
+
+ recon_surface = (VASurfaceID)recon_image->data[3];
+ av_log(ctx, AV_LOG_DEBUG, "Using surface %#x for reconstructed image.\n",
+ recon_surface);
+
+ if(ctx->previous_frame != ctx->current_frame) {
+ av_frame_unref(ctx->dpb[ctx->previous_frame].frame);
+ }
+
+ ctx->previous_frame = ctx->current_frame;
+ ctx->current_frame = (ctx->current_frame + 1) % MAX_DPB_PICS;
+ {
+ current = &ctx->dpb[ctx->current_frame];
+
+ if(ctx->poc < 0 ||
+ ctx->poc == ctx->options.idr_interval)
+ current->type = FRAME_TYPE_I;
+ else
+ current->type = FRAME_TYPE_P;
+
+ if(current->type == FRAME_TYPE_I)
+ ctx->poc = 0;
+ else
+ ++ctx->poc;
+ current->poc = ctx->poc;
+
+ if(current->type == FRAME_TYPE_I) {
+ current->refa = 0;
+ current->refb = 0;
+ } else if(current->type == FRAME_TYPE_P) {
+ current->refa = &ctx->dpb[ctx->previous_frame];
+ current->refb = 0;
+ } else {
+ av_assert0(0);
+ }
+
+ memset(¤t->pic, 0, sizeof(VAPictureHEVC));
+ current->pic.picture_id = recon_surface;
+ current->pic.pic_order_cnt = current->poc;
+
+ current->frame = recon_image;
+ }
+ av_log(ctx, AV_LOG_DEBUG, "Encoding as frame as %s (%d).\n",
+ current->type == FRAME_TYPE_I ? "I" :
+ current->type == FRAME_TYPE_P ? "P" : "B", current->poc);
+
+ vas = vaBeginPicture(ctx->hardware_context->display, ctx->codec.context_id,
+ input_surface);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to attach new picture: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ vaapi_hevc_encode_init_picture(ctx, current);
+
+ if(current->type == FRAME_TYPE_I) {
+ err = vaapi_hevc_render_sequence(ctx);
+ if(err) goto fail;
+ }
+
+ err = vaapi_hevc_render_picture(ctx, current);
+ if(err) goto fail;
+
+ if(current->type == FRAME_TYPE_I) {
+ err = vaapi_hevc_render_packed_vps_sps(ctx);
+ if(err) goto fail;
+
+ err = vaapi_hevc_render_packed_pps(ctx);
+ if(err) goto fail;
+ }
+
+ err = vaapi_hevc_render_packed_slice(ctx, current);
+ if(err) goto fail;
+
+ err = vaapi_hevc_render_slice(ctx, current);
+ if(err) goto fail;
+
+ vas = vaEndPicture(ctx->hardware_context->display, ctx->codec.context_id);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to start picture processing: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ vas = vaSyncSurface(ctx->hardware_context->display, input_surface);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to sync to picture completion: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ buf_list = 0;
+ vas = vaMapBuffer(ctx->hardware_context->display, current->coded_data_id,
+ (void**)&buf_list);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to map output buffers: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ for(buf = buf_list; buf; buf = buf->next) {
+ av_log(ctx, AV_LOG_DEBUG, "Output buffer: %u bytes.\n", buf->size);
+
+ err = av_new_packet(pkt, buf->size);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to make output buffer "
+ "(%u bytes).\n", buf->size);
+ goto fail;
+ }
+
+ memcpy(pkt->data, buf->buf, buf->size);
+
+ if(current->type == FRAME_TYPE_I)
+ pkt->flags |= AV_PKT_FLAG_KEY;
+
+ pkt->pts = pic->pts;
+
+ *got_packet = 1;
+ }
+
+ vas = vaUnmapBuffer(ctx->hardware_context->display, current->coded_data_id);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to unmap output buffers: "
+ "%d (%s).\n", vas, vaErrorStr(vas));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ if(pic->format != AV_PIX_FMT_VAAPI)
+ av_frame_free(&input_image);
+
+ err = 0;
+ fail:
+ av_vaapi_unlock_hardware_context(ctx->hardware_context);
+ return err;
+}
+
+static VAConfigAttrib config_attributes[] = {
+ { .type = VAConfigAttribRTFormat,
+ .value = VA_RT_FORMAT_YUV420 },
+ { .type = VAConfigAttribRateControl,
+ .value = VA_RC_CQP },
+ { .type = VAConfigAttribEncPackedHeaders,
+ .value = 0 },
+};
+
+static av_cold int vaapi_hevc_encode_init(AVCodecContext *avctx)
+{
+ VAAPIHEVCEncodeContext *ctx = avctx->priv_data;
+ VAStatus vas;
+ int i, err;
+
+ if(ctx->options.hardware_context == 0) {
+ av_log(ctx, AV_LOG_ERROR, "VAAPI encode requires hardware context.\n");
+ return AVERROR(EINVAL);
+ }
+ ctx->hardware_context =
+ (AVVAAPIHardwareContext*)ctx->options.hardware_context;
+
+ ctx->avctx = avctx;
+
+ ctx->va_profile = VAProfileHEVCMain;
+ ctx->level = -1;
+ if(sscanf(ctx->options.level, "%d", &ctx->level) <= 0 ||
+ ctx->level < 0 || ctx->level > 63) {
+ av_log(ctx, AV_LOG_ERROR, "Invaid level '%s'.\n", ctx->options.level);
+ return AVERROR(EINVAL);
+ }
+
+ if(ctx->options.qp >= 0) {
+ ctx->rc_mode = VA_RC_CQP;
+ } else {
+ // Default to fixed-QP 26.
+ ctx->rc_mode = VA_RC_CQP;
+ ctx->options.qp = 26;
+ }
+ av_log(ctx, AV_LOG_VERBOSE, "Using constant-QP mode at %d.\n",
+ ctx->options.qp);
+
+ ctx->input_width = avctx->width;
+ ctx->input_height = avctx->height;
+
+ ctx->aligned_width = FFALIGN(ctx->input_width, 16);
+ ctx->aligned_height = FFALIGN(ctx->input_height, 16);
+ ctx->ctu_width = FFALIGN(ctx->aligned_width, 32) / 32;
+ ctx->ctu_height = FFALIGN(ctx->aligned_height, 32) / 32;
+
+ av_log(ctx, AV_LOG_VERBOSE, "Input %ux%u -> Aligned %ux%u -> CTU %ux%u.\n",
+ ctx->input_width, ctx->input_height, ctx->aligned_width,
+ ctx->aligned_height, ctx->ctu_width, ctx->ctu_height);
+
+ ctx->fixed_qp = ctx->options.qp;
+
+ ctx->poc = -1;
+
+ av_vaapi_lock_hardware_context(ctx->hardware_context);
+
+ if(avctx->pix_fmt == AV_PIX_FMT_VAAPI) {
+ // Just use the input surfaces directly.
+ ctx->input_is_vaapi = 1;
+
+ } else {
+ AVVAAPISurfaceConfig *config = &ctx->input_config;
+
+ config->rt_format = VA_RT_FORMAT_YUV420;
+ config->av_format = AV_PIX_FMT_VAAPI;
+
+ config->image_format.fourcc = VA_FOURCC_NV12;
+ config->image_format.bits_per_pixel = 12;
+
+ config->width = ctx->aligned_width;
+ config->height = ctx->aligned_height;
+
+ config->attribute_count = 0;
+
+ ctx->input_is_vaapi = 0;
+
+ err = av_vaapi_surface_pool_init(&ctx->input_pool,
+ ctx->hardware_context,
+ config, INPUT_PICS);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create input surface pool: "
+ "%d (%s).\n", err, av_err2str(err));
+ goto fail;
+ }
+ }
+
+ {
+ AVVAAPISurfaceConfig *config = &ctx->recon_config;
+
+ config->rt_format = VA_RT_FORMAT_YUV420;
+ config->av_format = AV_PIX_FMT_VAAPI;
+
+ config->image_format.fourcc = VA_FOURCC_NV12;
+ config->image_format.bits_per_pixel = 12;
+
+ config->width = ctx->aligned_width;
+ config->height = ctx->aligned_height;
+
+ config->attribute_count = 0;
+
+ err = av_vaapi_surface_pool_init(&ctx->recon_pool,
+ ctx->hardware_context,
+ config, MAX_DPB_PICS);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create recon surface pool: "
+ "%d (%s).\n", err, av_err2str(err));
+ goto fail;
+ }
+ }
+
+ {
+ AVVAAPIPipelineConfig *config = &ctx->codec_config;
+
+ config->profile = ctx->va_profile;
+ config->entrypoint = VAEntrypointEncSlice;
+
+ config->width = ctx->aligned_width;
+ config->height = ctx->aligned_height;
+
+ config->attribute_count = FF_ARRAY_ELEMS(config_attributes);
+ config->attributes = config_attributes;
+ }
+
+ err = av_vaapi_pipeline_init(&ctx->codec, ctx->hardware_context,
+ &ctx->codec_config, &ctx->recon_pool);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create codec: %d (%s).\n",
+ err, av_err2str(err));
+ goto fail;
+ }
+
+ for(i = 0; i < MAX_DPB_PICS; i++) {
+ vas = vaCreateBuffer(ctx->hardware_context->display,
+ ctx->codec.context_id,
+ VAEncCodedBufferType,
+ 1048576, 1, 0, &ctx->dpb[i].coded_data_id);
+ if(vas != VA_STATUS_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer for "
+ "coded data: %d (%s).\n", vas, vaErrorStr(vas));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ av_log(ctx, AV_LOG_TRACE, "Coded data buffer %d is %#x.\n",
+ i, ctx->dpb[i].coded_data_id);
+ }
+
+ av_vaapi_unlock_hardware_context(ctx->hardware_context);
+
+ av_log(ctx, AV_LOG_VERBOSE, "Started VAAPI H.265 encoder.\n");
+
+ vaapi_hevc_encode_init_stream(ctx);
+
+ return 0;
+
+ fail:
+ av_vaapi_unlock_hardware_context(ctx->hardware_context);
+ return err;
+}
+
+static av_cold int vaapi_hevc_encode_close(AVCodecContext *avctx)
+{
+ VAAPIHEVCEncodeContext *ctx = avctx->priv_data;
+ int err;
+
+ av_vaapi_lock_hardware_context(ctx->hardware_context);
+
+ err = av_vaapi_pipeline_uninit(&ctx->codec);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to destroy codec: %d (%s).\n",
+ err, av_err2str(err));
+ }
+
+ err = av_vaapi_surface_pool_uninit(&ctx->recon_pool);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to uninitialise recon "
+ "surface pool: %d (%s).\n", err, av_err2str(err));
+ }
+
+ if(!ctx->input_is_vaapi) {
+ err = av_vaapi_surface_pool_uninit(&ctx->input_pool);
+ if(err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to uninitialise input "
+ "surface pool: %d (%s).\n", err, av_err2str(err));
+ }
+ }
+
+ av_vaapi_unlock_hardware_context(ctx->hardware_context);
+
+ return 0;
+}
+
+#define OFFSET(member) offsetof(VAAPIHEVCEncodeContext, options.member)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+static const AVOption vaapi_hevc_options[] = {
+ { "hardware_context", "VAAPI hardware context",
+ OFFSET(hardware_context), AV_OPT_TYPE_INT64,
+ { .i64 = 0 }, INT64_MIN, INT64_MAX, AV_OPT_FLAG_VIDEO_PARAM },
+ { "level", "Set H.265 level",
+ OFFSET(level), AV_OPT_TYPE_STRING,
+ { .str = "52" }, 0, 0, FLAGS },
+ { "qp", "Use constant quantisation parameter",
+ OFFSET(qp), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, MAX_QP, FLAGS },
+ { "idr_interval", "Number of frames between IDR frames (0 = all intra)",
+ OFFSET(idr_interval), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, INT_MAX, FLAGS },
+ { 0 }
+};
+
+static const AVClass vaapi_hevc_class = {
+ .class_name = "VAAPI/H.265",
+ .item_name = av_default_item_name,
+ .option = vaapi_hevc_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_hevc_vaapi_encoder = {
+ .name = "vaapi_hevc",
+ .long_name = NULL_IF_CONFIG_SMALL("H.265 (VAAPI)"),
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = AV_CODEC_ID_HEVC,
+ .priv_data_size = sizeof(VAAPIHEVCEncodeContext),
+ .init = &vaapi_hevc_encode_init,
+ .encode2 = &vaapi_hevc_encode_picture,
+ .close = &vaapi_hevc_encode_close,
+ .priv_class = &vaapi_hevc_class,
+ .pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VAAPI,
+ AV_PIX_FMT_NV12,
+ AV_PIX_FMT_NONE,
+ },
+};
--
2.6.4
More information about the ffmpeg-devel
mailing list