[FFmpeg-devel] [PATCH v11] lavu/frame: Add Dolby Vision metadata side data type

Tue Jan 4 11:03:17 EET 2022

Niklas Haas:
> From: Niklas Haas <git at haasn.dev>
> 
> Yeah, I think I agree that this is probably the best compromise here.
> 
> Updated documentation (and also changed one unnecessarily-large uint64_t
> to uint16_t)
> 
> ---
> In order to be able to extend this struct later (as the Dolby Vision RPU
> evolves), all of the 'container' structs are considered extensible, and
> the individual constituent fields must instead be accessed via offsets.
> The precedent for this style of access is set in
> <libavutil/detection_bbox.h>
> 
> Signed-off-by: Niklas Haas <git at haasn.dev>
> ---
>  doc/APIchanges        |   3 +
>  libavutil/dovi_meta.c |  25 +++++++
>  libavutil/dovi_meta.h | 166 ++++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c     |   1 +
>  libavutil/frame.h     |   9 ++-
>  libavutil/version.h   |   2 +-
>  6 files changed, 204 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 670a59329e..5721486f09 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -14,6 +14,9 @@ libavutil:     2021-04-27
>  
>  API changes, most recent first:
>  
> +2021-12-xx - xxxxxxxxxx - lavu 57.14.100 - frame.h
> +  Add AV_FRAME_DATA_DOVI_METADATA.
> +
>  2021-12-xx - xxxxxxxxxx - lavu 57.13.100 - hwcontext_videotoolbox.h
>    Add av_vt_pixbuf_set_attachments
>  
> diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
> index 7bd08f6c54..9c50da561e 100644
> --- a/libavutil/dovi_meta.c
> +++ b/libavutil/dovi_meta.c
> @@ -33,3 +33,28 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
>  
>      return dovi;
>  }
> +
> +typedef struct AVDOVIMetadataInternal {
> +    AVDOVIMetadata metadata;
> +    AVDOVIRpuDataHeader header;
> +    AVDOVIDataMapping mapping;
> +    AVDOVIColorMetadata color;
> +} AVDOVIMetadataInternal;
> +
> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
> +{
> +    AVDOVIMetadataInternal *dovi = av_mallocz(sizeof(AVDOVIMetadataInternal));
> +    if (!dovi)
> +        return NULL;
> +
> +    if (size)
> +        *size = sizeof(*dovi);
> +
> +    dovi->metadata = (struct AVDOVIMetadata) {
> +        .header_offset  = offsetof(AVDOVIMetadataInternal, header),
> +        .mapping_offset = offsetof(AVDOVIMetadataInternal, mapping),
> +        .color_offset   = offsetof(AVDOVIMetadataInternal, color),
> +    };
> +
> +    return &dovi->metadata;
> +}
> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
> index 299911d434..3d11e02bff 100644
> --- a/libavutil/dovi_meta.h
> +++ b/libavutil/dovi_meta.h
> @@ -29,6 +29,7 @@
>  
>  #include <stdint.h>
>  #include <stddef.h>
> +#include "rational.h"
>  
>  /*
>   * DOVI configuration
> @@ -67,4 +68,169 @@ typedef struct AVDOVIDecoderConfigurationRecord {
>   */
>  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
>  
> +/**
> + * Dolby Vision RPU data header.
> + *
> + * @note sizeof(AVDOVIRpuDataHeader) is not part of the public ABI.
> + */
> +typedef struct AVDOVIRpuDataHeader {
> +    uint8_t rpu_type;
> +    uint16_t rpu_format;
> +    uint8_t vdr_rpu_profile;
> +    uint8_t vdr_rpu_level;
> +    uint8_t chroma_resampling_explicit_filter_flag;
> +    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
> +    uint8_t coef_log2_denom;
> +    uint8_t vdr_rpu_normalized_idc;
> +    uint8_t bl_video_full_range_flag;
> +    uint8_t bl_bit_depth; /* [8, 16] */
> +    uint8_t el_bit_depth; /* [8, 16] */
> +    uint8_t vdr_bit_depth; /* [8, 16] */
> +    uint8_t spatial_resampling_filter_flag;
> +    uint8_t el_spatial_resampling_filter_flag;
> +    uint8_t disable_residual_flag;
> +} AVDOVIRpuDataHeader;
> +
> +enum AVDOVIMappingMethod {
> +    AV_DOVI_MAPPING_POLYNOMIAL = 0,
> +    AV_DOVI_MAPPING_MMR = 1,
> +};
> +
> +/**
> + * Coefficients of a piece-wise function. The pieces of the function span the
> + * value ranges between two adjacent pivot values.
> + */
> +#define AV_DOVI_MAX_PIECES 8
> +typedef struct AVDOVIReshapingCurve {
> +    uint8_t num_pivots;                         /* [2, 9] */
> +    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
> +    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
> +    /* AV_DOVI_MAPPING_POLYNOMIAL */
> +    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
> +    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
> +    /* AV_DOVI_MAPPING_MMR */
> +    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
> +    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
> +    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
> +} AVDOVIReshapingCurve;
> +
> +enum AVDOVINLQMethod {
> +    AV_DOVI_NLQ_NONE = -1,
> +    AV_DOVI_NLQ_LINEAR_DZ = 0,
> +};
> +
> +/**
> + * Coefficients of the non-linear inverse quantization. For the interpretation
> + * of these, see ETSI GS CCM 001.
> + */
> +typedef struct AVDOVINLQParams {
> +    uint16_t nlq_offset;
> +    uint64_t vdr_in_max;
> +    /* AV_DOVI_NLQ_LINEAR_DZ */
> +    uint64_t linear_deadzone_slope;
> +    uint64_t linear_deadzone_threshold;
> +} AVDOVINLQParams;
> +
> +/**
> + * Dolby Vision RPU data mapping parameters.
> + *
> + * @note sizeof(AVDOVIDataMapping) is not part of the public ABI.
> + */
> +typedef struct AVDOVIDataMapping {
> +    uint8_t vdr_rpu_id;
> +    uint8_t mapping_color_space;
> +    uint8_t mapping_chroma_format_idc;
> +    AVDOVIReshapingCurve curves[3]; /* per component */
> +
> +    /* Non-linear inverse quantization */
> +    enum AVDOVINLQMethod nlq_method_idc;
> +    uint32_t num_x_partitions;
> +    uint32_t num_y_partitions;
> +    AVDOVINLQParams nlq[3]; /* per component */
> +} AVDOVIDataMapping;
> +
> +/**
> + * Dolby Vision RPU colorspace metadata parameters.
> + *
> + * @note sizeof(AVDOVIColorMetadata) is not part of the public ABI.
> + */
> +typedef struct AVDOVIColorMetadata {
> +    uint8_t dm_metadata_id;
> +    uint8_t scene_refresh_flag;
> +
> +    /**
> +     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
> +     * used instead of the matrices indicated by the frame's colorspace tags.
> +     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
> +     * matrix based on a Hunt-Pointer-Estevez transform, but without any
> +     * crosstalk. (See the definition of the ICtCp colorspace for more
> +     * information.)
> +     */
> +    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
> +    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
> +    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
> +
> +    /**
> +     * Extra signal metadata (see Dolby patents for more info).
> +     */
> +    uint16_t signal_eotf;
> +    uint16_t signal_eotf_param0;
> +    uint16_t signal_eotf_param1;
> +    uint32_t signal_eotf_param2;
> +    uint8_t signal_bit_depth;
> +    uint8_t signal_color_space;
> +    uint8_t signal_chroma_format;
> +    uint8_t signal_full_range_flag; /* [0, 3] */
> +    uint16_t source_min_pq;
> +    uint16_t source_max_pq;
> +    uint16_t source_diagonal;
> +} AVDOVIColorMetadata;
> +
> +/**
> + * Combined struct representing a combination of header, mapping and color
> + * metadata, for attaching to frames as side data.
> + *
> + * @note The struct must be allocated with av_dovi_metadata_alloc() and
> + *       its size is not a part of the public ABI.
> + */
> +
> +typedef struct AVDOVIMetadata {
> +    /**
> +     * Offset in bytes from the beginning of this structure at which the
> +     * respective structs start.
> +     */
> +    size_t header_offset;   /* AVDOVIRpuDataHeader */
> +    size_t mapping_offset;  /* AVDOVIDataMapping */
> +    size_t color_offset;    /* AVDOVIColorMetadata */
> +} AVDOVIMetadata;
> +
> +static av_always_inline AVDOVIRpuDataHeader *
> +av_dovi_get_header(const AVDOVIMetadata *data)
> +{
> +    return (AVDOVIRpuDataHeader *)((uint8_t *) data + data->header_offset);
> +}
> +
> +static av_always_inline AVDOVIDataMapping *
> +av_dovi_get_mapping(const AVDOVIMetadata *data)
> +{
> +    return (AVDOVIDataMapping *)((uint8_t *) data + data->mapping_offset);
> +}
> +
> +static av_always_inline AVDOVIColorMetadata *
> +av_dovi_get_color(const AVDOVIMetadata *data)
> +{
> +    return (AVDOVIColorMetadata *)((uint8_t *) data + data->color_offset);
> +}
> +
> +/**
> + * Allocate an AVDOVIMetadata structure and initialize its
> + * fields to default values.
> + *
> + * @param size If this parameter is non-NULL, the size in bytes of the
> + *             allocated struct will be written here on success
> + *
> + * @return the newly allocated struct or NULL on failure
> + */
> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size);
> +
>  #endif /* AVUTIL_DOVI_META_H */
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index 0912ad9131..8997c85e35 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -729,6 +729,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
>      case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters";
>      case AV_FRAME_DATA_DETECTION_BBOXES:            return "Bounding boxes for object detection and classification";
>      case AV_FRAME_DATA_DOVI_RPU_BUFFER:             return "Dolby Vision RPU Data";
> +    case AV_FRAME_DATA_DOVI_METADATA:               return "Dolby Vision Metadata";
>      }
>      return NULL;
>  }
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index 3f295f6b9e..18e239f870 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -189,11 +189,18 @@ enum AVFrameSideDataType {
>      AV_FRAME_DATA_DETECTION_BBOXES,
>  
>      /**
> -     * Dolby Vision RPU data, suitable for passing to x265
> +     * Dolby Vision RPU raw data, suitable for passing to x265
>       * or other libraries. Array of uint8_t, with NAL emulation
>       * bytes intact.
>       */
>      AV_FRAME_DATA_DOVI_RPU_BUFFER,
> +
> +    /**
> +     * Parsed Dolby Vision metadata, suitable for passing to a software
> +     * implementation. The payload is the AVDOVIMetadata struct defined in
> +     * libavutil/dovi_meta.h.
> +     */
> +    AV_FRAME_DATA_DOVI_METADATA,
>  };
>  
>  enum AVActiveFormatDescription {
> diff --git a/libavutil/version.h b/libavutil/version.h
> index 3cac09cb96..318045d4c4 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -79,7 +79,7 @@
>   */
>  
>  #define LIBAVUTIL_VERSION_MAJOR  57
> -#define LIBAVUTIL_VERSION_MINOR  13
> +#define LIBAVUTIL_VERSION_MINOR  14
>  #define LIBAVUTIL_VERSION_MICRO 100
>  
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
> 

No further comment from me.

- Andreas