[FFmpeg-devel] [PATCH 34/42] lavc/hevcdec: implement decoding MV-HEVC
Anton Khirnov
anton at khirnov.net
Tue Aug 27 18:05:14 EEST 2024
At most two layers are supported.
---
doc/decoders.texi | 68 +++++++
libavcodec/hevc/hevcdec.c | 375 ++++++++++++++++++++++++++++++++++----
libavcodec/hevc/hevcdec.h | 41 ++++-
libavcodec/hevc/refs.c | 64 +++++--
libavcodec/version.h | 2 +-
5 files changed, 493 insertions(+), 57 deletions(-)
diff --git a/doc/decoders.texi b/doc/decoders.texi
index 2fcc761d2f..6fc585477c 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi
@@ -38,6 +38,74 @@ Select an operating point of a scalable AV1 bitstream (0 - 31). Default is 0.
@end table
+ at section hevc
+HEVC (AKA ITU-T H.265 or ISO/IEC 23008-2) decoder.
+
+The decoder supports MV-HEVC multiview streams with at most two views. Views to
+be output may be selected either as the index of an output layer set defined in
+the VPS (the @option{output_layer_set} option), or as a list of view IDs (the
+ at option{view_ids} option). These options are mutually exclusive - only one may
+be specified at a time. These option may be set either statically before decoder
+init, or from the @code{get_format()} callback - useful for the case when the
+view count or IDs change dynamically during decoding.
+
+Only the base layer is decoded by default.
+
+Note that if you are using the @code{ffmpeg} CLI tool, you should be using view
+specifiers as documented in its manual, rather than the options documented here.
+
+ at subsection Options
+
+ at table @option
+
+ at item output_layer_set (MV-HEVC)
+Select the index of the output layer set that should be output. Available layer
+sets may be read by the user from the @option{output_layer_set_available}
+option.
+
+ at item output_layer_set_available (MV-HEVC)
+This option may be read by the caller to retrieve an array of output layer sets
+available in the active VPS. The array is empty for single-layer video.
+
+Each array element is a bitmask defining layers present in the set. I.e. when
+i-th bit is equal to 1, the layer with view ID @option{view_ids[i]} is present
+in the set.
+
+The value of this option is guaranteed to be accurate when read from the
+ at code{get_format()} callback. It may also be set at other times (e.g. after
+opening the decoder), but the value is informational only and may be incorrect
+(e.g. when the stream contains multiple distinct VPS NALUs).
+
+ at item view_ids (MV-HEVC)
+Specify a list of view IDs that should be output. This option can also be set to
+a single '-1', which will cause all views defined in the VPS to be decoded and
+output.
+
+ at item view_ids_available (MV-HEVC)
+This option may be read by the caller to retrieve an array of view IDs available
+in the active VPS. The array is empty for single-layer video.
+
+Same validity restrictions as for @option{output_layer_set_available} apply to
+this option.
+
+ at item view_pos_available (MV-HEVC)
+This option may be read by the caller to retrieve an array of view positions
+(left, right, or unspecified) available in the active VPS, as AVStereo3DView
+values. When the array is available, its elements describe view IDs from
+ at option{view_ids_available}.
+
+Same validity restrictions as for @option{output_layer_set_available} apply to
+this option.
+
+ at item view_id_cur (MV-HEVC)
+This option may be read from within the @code{get_buffer2()} callback to obtain
+the view ID of the frame for which the callback is being called.
+
+It should not be accessed at other times and its contents are then not
+specified.
+
+ at end table
+
@section rawvideo
Raw video decoder.
diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index 9a549bb0d2..f0e284bdf6 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -34,6 +34,7 @@
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
+#include "libavutil/stereo3d.h"
#include "libavutil/timecode.h"
#include "aom_film_grain.h"
@@ -54,6 +55,8 @@
#include "refstruct.h"
#include "thread.h"
+#define HEVC_OLS_AUTO INT_MAX
+
static const uint8_t hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
/**
@@ -417,6 +420,137 @@ static int export_stream_params_from_sei(HEVCContext *s)
return 0;
}
+static int export_multilayer(HEVCContext *s, const HEVCVPS *vps)
+{
+ const HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
+
+ av_freep(&s->view_ids_available);
+ s->nb_view_ids_available = 0;
+ av_freep(&s->view_pos_available);
+ s->nb_view_pos_available = 0;
+ av_freep(&s->output_layer_set_available);
+ s->nb_output_layer_set_available = 0;
+
+ // don't export anything in the trivial case (1 layer, view id=0)
+ if (vps->nb_layers < 2 && !vps->view_id[0])
+ return 0;
+
+ s->view_ids_available = av_calloc(vps->nb_layers, sizeof(*s->view_ids_available));
+ if (!s->view_ids_available)
+ return AVERROR(ENOMEM);
+
+ if (tdrdi->num_ref_displays) {
+ s->view_pos_available = av_calloc(vps->nb_layers, sizeof(*s->view_pos_available));
+ if (!s->view_pos_available)
+ return AVERROR(ENOMEM);
+ }
+
+ for (int i = 0; i < vps->nb_layers; i++) {
+ s->view_ids_available[i] = vps->view_id[i];
+
+ if (s->view_pos_available) {
+ s->view_pos_available[i] = vps->view_id[i] == tdrdi->left_view_id[0] ?
+ AV_STEREO3D_VIEW_LEFT :
+ vps->view_id[i] == tdrdi->right_view_id[0] ?
+ AV_STEREO3D_VIEW_RIGHT : AV_STEREO3D_VIEW_UNSPEC;
+ }
+ }
+ s->nb_view_ids_available = vps->nb_layers;
+ s->nb_view_pos_available = s->view_pos_available ? vps->nb_layers : 0;
+
+ if (vps->num_output_layer_sets) {
+ s->output_layer_set_available = av_calloc(vps->num_output_layer_sets,
+ sizeof(*s->output_layer_set_available));
+ if (!s->output_layer_set_available)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < vps->num_output_layer_sets; i++)
+ s->output_layer_set_available[i] = vps->ols[i];
+ s->nb_output_layer_set_available = vps->num_output_layer_sets;
+ }
+
+ return 0;
+}
+
+static int setup_multilayer(HEVCContext *s, const HEVCVPS *vps)
+{
+ unsigned layers_active_output = 0, highest_layer;
+
+ s->layers_active_output = 1;
+ s->layers_active_decode = 1;
+
+ // nothing requested - decode base layer only
+ if (!s->nb_view_ids && s->output_layer_set == HEVC_OLS_AUTO)
+ return 0;
+
+ if (s->nb_view_ids && s->output_layer_set != HEVC_OLS_AUTO) {
+ av_log(s->avctx, AV_LOG_ERROR, "Both output_layer_set and view_ids "
+ "were specified, only one of those may be used.\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (s->nb_view_ids == 1 && s->view_ids[0] == -1) {
+ layers_active_output = (1 << vps->nb_layers) - 1;
+ } else if (s->nb_view_ids) {
+ for (int i = 0; i < s->nb_view_ids; i++) {
+ int view_id = s->view_ids[i];
+ int layer_idx = -1;
+
+ if (view_id < 0) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "Invalid view ID requested: %d\n", view_id);
+ return AVERROR(EINVAL);
+ }
+
+ for (int j = 0; j < vps->nb_layers; j++) {
+ if (vps->view_id[j] == view_id) {
+ layer_idx = j;
+ break;
+ }
+ }
+ if (layer_idx < 0) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "View ID %d not present in VPS\n", view_id);
+ return AVERROR(EINVAL);
+ }
+ layers_active_output |= 1 << layer_idx;
+ }
+ } else {
+ if (s->output_layer_set < 0 ||
+ s->output_layer_set >= vps->num_output_layer_sets) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "Invalid output layer set index requested: %d\n", s->output_layer_set);
+ return AVERROR(EINVAL);
+ }
+
+ layers_active_output = vps->ols[s->output_layer_set];
+ }
+
+ if (!layers_active_output) {
+ av_log(s->avctx, AV_LOG_ERROR, "No layers selected\n");
+ return AVERROR_BUG;
+ }
+
+ highest_layer = ff_log2(layers_active_output);
+ if (highest_layer >= FF_ARRAY_ELEMS(s->layers)) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "Too many layers requested: %u\n", layers_active_output);
+ return AVERROR(EINVAL);
+ }
+
+ /* Assume a higher layer depends on all the lower ones.
+ * This is enforced in VPS parsing currently, this logic will need
+ * to be changed if we want to support more complex dependency structures.
+ */
+ s->layers_active_decode = (1 << (highest_layer + 1)) - 1;
+ s->layers_active_output = layers_active_output;
+
+ av_log(s->avctx, AV_LOG_DEBUG, "decode/output layers: %x/%x\n",
+ s->layers_active_decode, s->layers_active_output);
+
+ return 0;
+}
+
static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
{
#define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
@@ -428,6 +562,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
CONFIG_HEVC_VDPAU_HWACCEL + \
CONFIG_HEVC_VULKAN_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
+ int ret;
switch (sps->pix_fmt) {
case AV_PIX_FMT_YUV420P:
@@ -547,7 +682,23 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
*fmt++ = sps->pix_fmt;
*fmt = AV_PIX_FMT_NONE;
- return ff_get_format(s->avctx, pix_fmts);
+ // export multilayer information from active VPS to the caller,
+ // so it is available in get_format()
+ ret = export_multilayer(s, sps->vps);
+ if (ret < 0)
+ return ret;
+
+ ret = ff_get_format(s->avctx, pix_fmts);
+ if (ret < 0)
+ return ret;
+ s->avctx->pix_fmt = ret;
+
+ // set up multilayer decoding, if requested by caller
+ ret = setup_multilayer(s, sps->vps);
+ if (ret < 0)
+ return ret;
+
+ return 0;
}
static int set_sps(HEVCContext *s, HEVCLayerContext *l, const HEVCSPS *sps)
@@ -2944,13 +3095,59 @@ static int set_side_data(HEVCContext *s)
return 0;
}
-static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l)
+static int find_finish_setup_nal(const HEVCContext *s)
+{
+ int nal_idx = 0;
+
+ for (int i = nal_idx; i < s->pkt.nb_nals; i++) {
+ const H2645NAL *nal = &s->pkt.nals[i];
+ const int layer_id = nal->nuh_layer_id;
+ GetBitContext gb = nal->gb;
+
+ if (layer_id > HEVC_MAX_NUH_LAYER_ID || s->vps->layer_idx[layer_id] < 0 ||
+ !(s->layers_active_decode & (1 << s->vps->layer_idx[layer_id])))
+ continue;
+
+ switch (nal->type) {
+ case HEVC_NAL_TRAIL_R:
+ case HEVC_NAL_TRAIL_N:
+ case HEVC_NAL_TSA_N:
+ case HEVC_NAL_TSA_R:
+ case HEVC_NAL_STSA_N:
+ case HEVC_NAL_STSA_R:
+ case HEVC_NAL_BLA_W_LP:
+ case HEVC_NAL_BLA_W_RADL:
+ case HEVC_NAL_BLA_N_LP:
+ case HEVC_NAL_IDR_W_RADL:
+ case HEVC_NAL_IDR_N_LP:
+ case HEVC_NAL_CRA_NUT:
+ case HEVC_NAL_RADL_N:
+ case HEVC_NAL_RADL_R:
+ case HEVC_NAL_RASL_N:
+ case HEVC_NAL_RASL_R:
+ if (!get_bits1(&gb)) // first_slice_segment_in_pic_flag
+ continue;
+ case HEVC_NAL_VPS:
+ case HEVC_NAL_SPS:
+ case HEVC_NAL_PPS:
+ nal_idx = i;
+ break;
+ }
+ }
+
+ return nal_idx;
+}
+
+static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l,
+ unsigned nal_idx)
{
const HEVCPPS *const pps = s->ps.pps_list[s->sh.pps_id];
const HEVCSPS *const sps = pps->sps;
int pic_size_in_ctb = ((sps->width >> sps->log2_min_cb_size) + 1) *
((sps->height >> sps->log2_min_cb_size) + 1);
- int new_sequence = IS_IDR(s) || IS_BLA(s) || s->last_eos;
+ int new_sequence = (l == &s->layers[0]) &&
+ (IS_IDR(s) || IS_BLA(s) || s->last_eos);
+ int prev_layers_active = s->layers_active_decode;
int ret;
if (sps->vps != s->vps && l != &s->layers[0]) {
@@ -2961,7 +3158,32 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l)
ff_refstruct_replace(&s->pps, pps);
if (l->sps != sps) {
- enum AVPixelFormat pix_fmt;
+ const HEVCSPS *sps_base = s->layers[0].sps;
+ enum AVPixelFormat pix_fmt = sps->pix_fmt;
+
+ if (l != &s->layers[0]) {
+ if (!sps_base) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "Access unit starts with a non-base layer frame\n");
+ return AVERROR_INVALIDDATA;
+ }
+
+ // Files produced by Vision Pro lack VPS extension VUI,
+ // so the secondary layer has no range information.
+ // This check avoids failing in such a case.
+ if (sps_base->pix_fmt == AV_PIX_FMT_YUVJ420P &&
+ sps->pix_fmt == AV_PIX_FMT_YUV420P &&
+ !sps->vui.common.video_signal_type_present_flag)
+ pix_fmt = sps_base->pix_fmt;
+
+ if (pix_fmt != sps_base->pix_fmt ||
+ sps->width != sps_base->width ||
+ sps->height != sps_base->height) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "Base/non-base layer SPS have unsupported parameter combination\n");
+ return AVERROR(ENOSYS);
+ }
+ }
ff_hevc_clear_refs(l);
@@ -2969,14 +3191,17 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l)
if (ret < 0)
return ret;
- export_stream_params(s, sps);
+ if (l == &s->layers[0]) {
+ export_stream_params(s, sps);
- pix_fmt = get_format(s, sps);
- if (pix_fmt < 0)
- return pix_fmt;
- s->avctx->pix_fmt = pix_fmt;
+ ret = get_format(s, sps);
+ if (ret < 0) {
+ set_sps(s, l, NULL);
+ return ret;
+ }
- new_sequence = 1;
+ new_sequence = 1;
+ }
}
memset(l->horizontal_bs, 0, l->bs_width * l->bs_height);
@@ -3011,7 +3236,8 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l)
s->local_ctx[0].end_of_tiles_x = pps->column_width[0] << sps->log2_ctb_size;
if (new_sequence) {
- ret = ff_hevc_output_frames(s, l, 0, 0, s->sh.no_output_of_prior_pics_flag);
+ ret = ff_hevc_output_frames(s, prev_layers_active,
+ 0, 0, s->sh.no_output_of_prior_pics_flag);
if (ret < 0)
return ret;
}
@@ -3068,7 +3294,8 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l)
s->cur_frame->f->pict_type = 3 - s->sh.slice_type;
- ret = ff_hevc_output_frames(s, l, sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
+ ret = ff_hevc_output_frames(s, s->layers_active_decode,
+ sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, 0);
if (ret < 0)
goto fail;
@@ -3079,13 +3306,21 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l)
goto fail;
}
- ff_thread_finish_setup(s->avctx);
+ // after starting the base-layer frame we know which layers will be decoded,
+ // so we can now figure out which NALUs to wait for before we can call
+ // ff_thread_finish_setup()
+ if (l == &s->layers[0])
+ s->finish_setup_nal_idx = find_finish_setup_nal(s);
+
+ if (nal_idx >= s->finish_setup_nal_idx)
+ ff_thread_finish_setup(s->avctx);
return 0;
fail:
- if (s->cur_frame)
- ff_hevc_unref_frame(s->cur_frame, ~0);
+ if (l->cur_frame)
+ ff_hevc_unref_frame(l->cur_frame, ~0);
+ l->cur_frame = NULL;
s->cur_frame = s->collocated_ref = NULL;
s->slice_initialized = 0;
return ret;
@@ -3160,9 +3395,9 @@ static int verify_md5(HEVCContext *s, AVFrame *frame)
return err;
}
-static int hevc_frame_end(HEVCContext *s)
+static int hevc_frame_end(HEVCContext *s, HEVCLayerContext *l)
{
- HEVCFrame *out = s->cur_frame;
+ HEVCFrame *out = l->cur_frame;
const AVFilmGrainParams *fgp;
av_unused int ret;
@@ -3194,23 +3429,32 @@ static int hevc_frame_end(HEVCContext *s)
} else {
if (s->avctx->err_recognition & AV_EF_CRCCHECK &&
s->sei.picture_hash.is_md5) {
- ret = verify_md5(s, s->cur_frame->f);
+ ret = verify_md5(s, out->f);
if (ret < 0 && s->avctx->err_recognition & AV_EF_EXPLODE)
return ret;
}
}
s->sei.picture_hash.is_md5 = 0;
- av_log(s->avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
+ av_log(s->avctx, AV_LOG_DEBUG, "Decoded frame with POC %zu/%d.\n",
+ l - s->layers, s->poc);
return 0;
}
-static int decode_slice(HEVCContext *s, HEVCLayerContext *l,
- const H2645NAL *nal, GetBitContext *gb)
+static int decode_slice(HEVCContext *s, unsigned nal_idx, GetBitContext *gb)
{
+ const int layer_idx = s->vps ? s->vps->layer_idx[s->nuh_layer_id] : 0;
+ HEVCLayerContext *l;
int ret;
+ // skip layers not requested to be decoded
+ // layers_active_decode can only change while decoding a base-layer frame,
+ // so we can check it for non-base layers
+ if (layer_idx < 0 ||
+ (s->nuh_layer_id > 0 && !(s->layers_active_decode & (1 << layer_idx))))
+ return 0;
+
ret = hls_slice_header(&s->sh, s, gb);
if (ret < 0) {
// hls_slice_header() does not cleanup on failure thus the state now is inconsistant so we cannot use it on depandant slices
@@ -3226,16 +3470,25 @@ static int decode_slice(HEVCContext *s, HEVCLayerContext *l,
return 0;
}
+ // switching to a new layer, mark previous layer's frame (if any) as done
+ if (s->cur_layer != layer_idx &&
+ s->layers[s->cur_layer].cur_frame &&
+ s->avctx->active_thread_type == FF_THREAD_FRAME)
+ ff_progress_frame_report(&s->layers[s->cur_layer].cur_frame->tf, INT_MAX);
+
+ s->cur_layer = layer_idx;
+ l = &s->layers[s->cur_layer];
+
if (s->sh.first_slice_in_pic_flag) {
- if (s->cur_frame) {
+ if (l->cur_frame) {
av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
return AVERROR_INVALIDDATA;
}
- ret = hevc_frame_start(s, l);
+ ret = hevc_frame_start(s, l, nal_idx);
if (ret < 0)
return ret;
- } else if (!s->cur_frame) {
+ } else if (!l->cur_frame) {
av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
return AVERROR_INVALIDDATA;
}
@@ -3247,16 +3500,16 @@ static int decode_slice(HEVCContext *s, HEVCLayerContext *l,
return AVERROR_INVALIDDATA;
}
- ret = decode_slice_data(s, l, nal, gb);
+ ret = decode_slice_data(s, l, &s->pkt.nals[nal_idx], gb);
if (ret < 0)
return ret;
return 0;
}
-static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
+static int decode_nal_unit(HEVCContext *s, unsigned nal_idx)
{
- HEVCLayerContext *l = &s->layers[0];
+ H2645NAL *nal = &s->pkt.nals[nal_idx];
GetBitContext gb = nal->gb;
int ret;
@@ -3315,7 +3568,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
case HEVC_NAL_RADL_R:
case HEVC_NAL_RASL_N:
case HEVC_NAL_RASL_R:
- ret = decode_slice(s, l, nal, &gb);
+ ret = decode_slice(s, nal_idx, &gb);
if (ret < 0)
goto fail;
break;
@@ -3416,11 +3669,10 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
H2645NAL *nal = &s->pkt.nals[i];
if (s->avctx->skip_frame >= AVDISCARD_ALL ||
- (s->avctx->skip_frame >= AVDISCARD_NONREF
- && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
+ (s->avctx->skip_frame >= AVDISCARD_NONREF && ff_hevc_nal_is_nonref(nal->type)))
continue;
- ret = decode_nal_unit(s, nal);
+ ret = decode_nal_unit(s, i);
if (ret < 0) {
av_log(s->avctx, AV_LOG_WARNING,
"Error parsing NAL unit #%d.\n", i);
@@ -3429,12 +3681,17 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
}
fail:
- if (s->cur_frame) {
+ for (int i = 0; i < FF_ARRAY_ELEMS(s->layers); i++) {
+ HEVCLayerContext *l = &s->layers[i];
+
+ if (!l->cur_frame)
+ continue;
+
if (ret >= 0)
- ret = hevc_frame_end(s);
+ ret = hevc_frame_end(s, l);
if (s->avctx->active_thread_type == FF_THREAD_FRAME)
- ff_progress_frame_report(&s->cur_frame->tf, INT_MAX);
+ ff_progress_frame_report(&l->cur_frame->tf, INT_MAX);
}
return ret;
@@ -3455,6 +3712,11 @@ static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int f
if (first && s->ps.sps_list[i]) {
const HEVCSPS *sps = s->ps.sps_list[i];
export_stream_params(s, sps);
+
+ ret = export_multilayer(s, sps->vps);
+ if (ret < 0)
+ return ret;
+
break;
}
}
@@ -3483,7 +3745,7 @@ static int hevc_receive_frame(AVCodecContext *avctx, AVFrame *frame)
av_packet_unref(avpkt);
ret = ff_decode_get_packet(avctx, avpkt);
if (ret == AVERROR_EOF) {
- ret = ff_hevc_output_frames(s, &s->layers[0], 0, 0, 0);
+ ret = ff_hevc_output_frames(s, s->layers_active_decode, 0, 0, 0);
if (ret < 0)
return ret;
goto do_output;
@@ -3547,6 +3809,8 @@ static int hevc_ref_frame(HEVCFrame *dst, const HEVCFrame *src)
dst->ctb_count = src->ctb_count;
dst->flags = src->flags;
+ dst->base_layer_frame = src->base_layer_frame;
+
ff_refstruct_replace(&dst->hwaccel_picture_private,
src->hwaccel_picture_private);
@@ -3682,9 +3946,25 @@ static int hevc_update_thread_context(AVCodecContext *dst,
s->is_nalff = s0->is_nalff;
s->nal_length_size = s0->nal_length_size;
+ s->output_layer_set = s0->output_layer_set;
+ s->layers_active_decode = s0->layers_active_decode;
+ s->layers_active_output = s0->layers_active_output;
s->film_grain_warning_shown = s0->film_grain_warning_shown;
+ if (s->nb_view_ids != s0->nb_view_ids ||
+ memcmp(s->view_ids, s0->view_ids, sizeof(*s->view_ids) * s->nb_view_ids)) {
+ av_freep(&s->view_ids);
+ s->nb_view_ids = 0;
+
+ if (s0->nb_view_ids) {
+ s->view_ids = av_memdup(s0->view_ids, s0->nb_view_ids * sizeof(*s0->view_ids));
+ if (!s->view_ids)
+ return AVERROR(ENOMEM);
+ s->nb_view_ids = s0->nb_view_ids;
+ }
+ }
+
ret = ff_h2645_sei_ctx_replace(&s->sei.common, &s0->sei.common);
if (ret < 0)
return ret;
@@ -3779,6 +4059,31 @@ static const AVOption options[] = {
AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
{ "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
+ { "output_layer_set", "Index of the multilayer output layer set to output",
+ .offset = OFFSET(output_layer_set), .type = AV_OPT_TYPE_INT,
+ .default_val.i64 = HEVC_OLS_AUTO, .max = INT_MAX, .flags = PAR, .unit = "output_layer_set", },
+ { "auto", "select layer set automatically, or use the view_ids value",
+ .type = AV_OPT_TYPE_CONST, { .i64 = HEVC_OLS_AUTO },
+ .flags = PAR, .unit = "output_layer_set" },
+ { "output_layer_set_available", "Array of available output layer sets is exported here",
+ .offset = OFFSET(output_layer_set_available), .type = AV_OPT_TYPE_UINT64 | AV_OPT_TYPE_FLAG_ARRAY,
+ .flags = PAR | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
+ { "view_ids", "Array of view IDs that should be decoded and output; a single -1 to decode all views",
+ .offset = OFFSET(view_ids), .type = AV_OPT_TYPE_INT | AV_OPT_TYPE_FLAG_ARRAY,
+ .min = -1, .max = INT_MAX, .flags = PAR },
+ { "view_ids_available", "Array of available view IDs is exported here",
+ .offset = OFFSET(view_ids_available), .type = AV_OPT_TYPE_UINT | AV_OPT_TYPE_FLAG_ARRAY,
+ .flags = PAR | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
+ { "view_pos_available", "Array of view positions for view_ids_available is exported here, as AVStereo3DView",
+ .offset = OFFSET(view_pos_available), .type = AV_OPT_TYPE_UINT | AV_OPT_TYPE_FLAG_ARRAY,
+ .flags = PAR | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY, .unit = "view_pos" },
+ { "unspecified", .type = AV_OPT_TYPE_CONST, .default_val = { .i64 = AV_STEREO3D_VIEW_UNSPEC }, .unit = "view_pos" },
+ { "left", .type = AV_OPT_TYPE_CONST, .default_val = { .i64 = AV_STEREO3D_VIEW_LEFT }, .unit = "view_pos" },
+ { "right", .type = AV_OPT_TYPE_CONST, .default_val = { .i64 = AV_STEREO3D_VIEW_RIGHT }, .unit = "view_pos" },
+ { "view_id_cur", "View ID of the view currently being decoded. Only to be read from within get_buffer2()",
+ .offset = OFFSET(view_id_cur), .type = AV_OPT_TYPE_UINT,
+ .flags = PAR | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
+
{ NULL },
};
diff --git a/libavcodec/hevc/hevcdec.h b/libavcodec/hevc/hevcdec.h
index eba7cca1a6..bfb50b13ff 100644
--- a/libavcodec/hevc/hevcdec.h
+++ b/libavcodec/hevc/hevcdec.h
@@ -375,6 +375,10 @@ typedef struct HEVCFrame {
void *hwaccel_picture_private; ///< RefStruct reference
+ // for secondary-layer frames, this is the DPB index of the base-layer frame
+ // from the same AU, if it exists, otherwise -1
+ int base_layer_frame;
+
/**
* A combination of HEVC_FRAME_FLAG_*
*/
@@ -487,9 +491,13 @@ typedef struct HEVCContext {
HEVCLocalContext *local_ctx;
unsigned nb_local_ctx;
- HEVCLayerContext layers[1];
- // index in layers of the layer currently being decoded
+ // per-layer decoding state, addressed by VPS layer indices
+ HEVCLayerContext layers[HEVC_VPS_MAX_LAYERS];
+ // VPS index of the layer currently being decoded
unsigned cur_layer;
+ // bitmask of layer indices that are active for decoding/output
+ unsigned layers_active_decode;
+ unsigned layers_active_output;
/** 1 if the independent slice segment header was successfully parsed */
uint8_t slice_initialized;
@@ -539,11 +547,31 @@ typedef struct HEVCContext {
H2645Packet pkt;
// type of the first VCL NAL of the current frame
enum HEVCNALUnitType first_nal_type;
+ // index in pkt.nals of the NAL unit after which we can call
+ // ff_thread_finish_setup()
+ unsigned finish_setup_nal_idx;
int is_nalff; ///< this flag is != 0 if bitstream is encapsulated
///< as a format defined in 14496-15
int apply_defdispwin;
+ // multi-layer AVOptions
+ int output_layer_set;
+
+ uint64_t *output_layer_set_available;
+ unsigned nb_output_layer_set_available;
+
+ int *view_ids;
+ unsigned nb_view_ids;
+
+ unsigned *view_ids_available;
+ unsigned nb_view_ids_available;
+
+ unsigned *view_pos_available;
+ unsigned nb_view_pos_available;
+
+ unsigned view_id_cur;
+
int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
int nuh_layer_id;
@@ -641,12 +669,13 @@ static av_always_inline int ff_hevc_nal_is_nonref(enum HEVCNALUnitType type)
* Find frames in the DPB that are ready for output and either write them to the
* output FIFO or drop their output flag, depending on the value of discard.
*
- * @param max_output maximum number of output-pending frames that can be
- * present in the DPB before output is triggered
+ * @param max_output maximum number of AUs with an output-pending frame in at
+ * least one layer that can be present in the DPB before output
+ * is triggered
* @param max_dpb maximum number of any frames that can be present in the DPB
- * before output is triggered
+ * for any layer before output is triggered
*/
-int ff_hevc_output_frames(HEVCContext *s, HEVCLayerContext *l,
+int ff_hevc_output_frames(HEVCContext *s, unsigned layers_active,
unsigned max_output, unsigned max_dpb, int discard);
void ff_hevc_unref_frame(HEVCFrame *frame, int flags);
diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
index f93c5893c6..dbedd69aa1 100644
--- a/libavcodec/hevc/refs.c
+++ b/libavcodec/hevc/refs.c
@@ -130,6 +130,8 @@ fail:
int ff_hevc_set_new_ref(HEVCContext *s, HEVCLayerContext *l, int poc)
{
+ const HEVCVPS *vps = l->sps->vps;
+ const int view_id = vps->view_id[s->cur_layer];
HEVCFrame *ref;
int i;
@@ -144,6 +146,8 @@ int ff_hevc_set_new_ref(HEVCContext *s, HEVCLayerContext *l, int poc)
}
}
+ s->view_id_cur = view_id;
+
ref = alloc_frame(s, l);
if (!ref)
return AVERROR(ENOMEM);
@@ -152,6 +156,9 @@ int ff_hevc_set_new_ref(HEVCContext *s, HEVCLayerContext *l, int poc)
l->cur_frame = ref;
s->collocated_ref = NULL;
+ ref->base_layer_frame = (l != &s->layers[0] && s->layers[0].cur_frame) ?
+ s->layers[0].cur_frame - s->layers[0].DPB : -1;
+
if (s->sh.pic_output_flag)
ref->flags = HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_SHORT_REF;
else
@@ -163,6 +170,19 @@ int ff_hevc_set_new_ref(HEVCContext *s, HEVCLayerContext *l, int poc)
ref->f->crop_top = l->sps->output_window.top_offset;
ref->f->crop_bottom = l->sps->output_window.bottom_offset;
+ // add view ID side data if it's nontrivial
+ if (vps->nb_layers > 1 || view_id) {
+ AVFrameSideData *sd = av_frame_side_data_new(&ref->f->side_data, &ref->f->nb_side_data,
+ AV_FRAME_DATA_VIEW_ID, sizeof(int),
+ AV_FRAME_SIDE_DATA_FLAG_REPLACE);
+ if (!sd)
+ return AVERROR(ENOMEM);
+ *(int*)sd->data = view_id;
+ }
+
+ if (!(s->layers_active_output & (1 << s->cur_layer)))
+ ref->f->flags |= AV_FRAME_FLAG_DISCARD;
+
return 0;
}
@@ -176,30 +196,44 @@ static void unref_missing_refs(HEVCLayerContext *l)
}
}
-int ff_hevc_output_frames(HEVCContext *s, HEVCLayerContext *l,
+int ff_hevc_output_frames(HEVCContext *s, unsigned layers_active,
unsigned max_output, unsigned max_dpb, int discard)
{
while (1) {
- int nb_dpb = 0;
+ int nb_dpb[HEVC_VPS_MAX_LAYERS] = { 0 };
int nb_output = 0;
int min_poc = INT_MAX;
- int i, min_idx, ret;
+ int min_layer = -1;
+ int min_idx, ret;
- for (i = 0; i < FF_ARRAY_ELEMS(l->DPB); i++) {
- HEVCFrame *frame = &l->DPB[i];
- if (frame->flags & HEVC_FRAME_FLAG_OUTPUT) {
- nb_output++;
- if (frame->poc < min_poc || nb_output == 1) {
- min_poc = frame->poc;
- min_idx = i;
+ for (int layer = 0; layer < FF_ARRAY_ELEMS(s->layers); layer++) {
+ HEVCLayerContext *l = &s->layers[layer];
+
+ if (!(layers_active & (1 << layer)))
+ continue;
+
+ for (int i = 0; i < FF_ARRAY_ELEMS(l->DPB); i++) {
+ HEVCFrame *frame = &l->DPB[i];
+ if (frame->flags & HEVC_FRAME_FLAG_OUTPUT) {
+ // nb_output counts AUs with an output-pending frame
+ // in at least one layer
+ if (!(frame->base_layer_frame >= 0 &&
+ (s->layers[0].DPB[frame->base_layer_frame].flags & HEVC_FRAME_FLAG_OUTPUT)))
+ nb_output++;
+ if (min_layer < 0 || frame->poc < min_poc) {
+ min_poc = frame->poc;
+ min_idx = i;
+ min_layer = layer;
+ }
}
+ nb_dpb[layer] += !!frame->flags;
}
- nb_dpb += !!frame->flags;
}
if (nb_output > max_output ||
- (nb_output && nb_dpb > max_dpb)) {
- HEVCFrame *frame = &l->DPB[min_idx];
+ (nb_output &&
+ (nb_dpb[0] > max_dpb || nb_dpb[1] > max_dpb))) {
+ HEVCFrame *frame = &s->layers[min_layer].DPB[min_idx];
ret = discard ? 0 :
ff_container_fifo_write(s->output_fifo,
@@ -208,8 +242,8 @@ int ff_hevc_output_frames(HEVCContext *s, HEVCLayerContext *l,
if (ret < 0)
return ret;
- av_log(s->avctx, AV_LOG_DEBUG, "%s frame with POC %d.\n",
- discard ? "Discarded" : "Output", frame->poc);
+ av_log(s->avctx, AV_LOG_DEBUG, "%s frame with POC %d/%d.\n",
+ discard ? "Discarded" : "Output", min_layer, frame->poc);
continue;
}
return 0;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index da2264a097..755c90bbc1 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
#include "version_major.h"
-#define LIBAVCODEC_VERSION_MINOR 12
+#define LIBAVCODEC_VERSION_MINOR 13
#define LIBAVCODEC_VERSION_MICRO 100
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
--
2.43.0
More information about the ffmpeg-devel
mailing list