[FFmpeg-devel] [PATCH v3] libavcodec: implementation of DNxUncompressed decoder
Anton Khirnov
anton at khirnov.net
Mon Sep 9 13:56:53 EEST 2024
Quoting Martin Schitter (2024-09-08 20:41:38)
> diff --git a/libavcodec/dnxucdec.c b/libavcodec/dnxucdec.c
> new file mode 100644
> index 0000000..455c374
> --- /dev/null
> +++ b/libavcodec/dnxucdec.c
> @@ -0,0 +1,495 @@
> +/*
> + * Avid DNxUncomressed / SMPTE RDD 50 demuxer
This says it's a demuxer, while it's implemented as a decoder.
I'm also wondering if this shouldn't be handled as demuxer exporting
raw video, at least in some of cases if not all.
> + * Copyright (c) 2024 Martin Schitter
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/*
> + This partial implementation of a decoder for DNxUncompressed video data
> + is based on reverse engineering of output generated by DaVinci Resolve 19
> + because the SMPTE RDD 50 specification is unfortunately not freely accessible.
> +
> + It's therefor limited by the present export capabilities of Resolve (YUV444
> + variants, YUV422 16bit, and alpha support is missing). But also some ffmpeg
> + shortcomings are affecting the range of available formats (no YUV half and
> + float pixel formats and filters are provided by ffmpeg until now and RGB half
> + content always requires an alpha plane).
> +
> + A wide range of DNxUncompressed formats are nevertheless already supported:
> +
> + - YUV 4:2:2 8-/10-/12-bit
> + - RGB 8-/10-/12-bit/half/float
> +
> +*/
> +
> +#include "avcodec.h"
> +#include "codec_internal.h"
> +#include "decode.h"
> +#include "libavutil/imgutils.h"
> +#include "thread.h"
> +
> +typedef struct DNxUcParseContext {
> + uint32_t fourcc_tag;
> + uint32_t width;
> + uint32_t height;
> + uint32_t nr_bytes;
> +} DNxUcParseContext;
Parser should be in its own file, as it can be enabled or disabled
independently from the encoder.
> +
> +/*
> +DNxUncompressed frame data comes wrapped in simple metadata
> +and fourcc markers:
> +
> +[0-4] number of raw data (37 bytes header + frame data)
> +[4-7] fourcc 'pack'
> +[8-11] unknown value (allways: 0x15)
> +[12-15] fourcc 'sinf'
> +[16-19] frame width / line packing size
> +[20-23] frame hight / nr of lines
> +[24-27] fourcc pixel format indicator
> +[28] unknown value (alpha?)
> +[29-32] nr of bytes in frame data + 8
> +[33-36] fourcc 'sdat'
> +[37-..] frame data
> +*/
> +
> +static int dnxuc_parse(AVCodecParserContext *s,
> + AVCodecContext *avctx,
> + const uint8_t **poutbuf, int *poutbuf_size,
> + const uint8_t *buf, int buf_size){
Opening brace of a function body should be on its own line.
> +
> + char fourcc_buf[5];
> + const int HEADER_SIZE = 37;
> +
> + DNxUcParseContext *pc;
> + pc = (DNxUcParseContext *) s->priv_data;
> +
> + if (!buf_size) {
> + return 0;
> + } else if ( buf_size < 37 /* check metadata structure expectations */
> + || MKTAG('p','a','c','k') != *(uint32_t*) (buf+4)
> + || MKTAG('s','i','n','f') != *(uint32_t*) (buf+12)
> + || MKTAG('s','d','a','t') != *(uint32_t*) (buf+33)){
> + av_log(0, AV_LOG_ERROR, "can't read DNxUncompressed metadata.\n");
av_log() should always get a proper logging context, avctx in this case
> + *poutbuf_size = 0;
> + return buf_size;
> + }
> +
> + pc->fourcc_tag = *(uint32_t*)(buf+24);
> + pc->width = *(uint32_t*)(buf+16);
> + pc->height = *(uint32_t*)(buf+20);
> + pc->nr_bytes = *(uint32_t*)(buf+29) - 8;
Use AV_RL32()
> +
> + if (!avctx->codec_tag) {
> + av_fourcc_make_string(fourcc_buf, pc->fourcc_tag);
> + av_log(0, AV_LOG_INFO, "dnxuc_parser: '%s' %dx%d %dbpp %d\n",
> + fourcc_buf,
> + pc->width, pc->height,
> + (pc->nr_bytes*8)/(pc->width*pc->height),
> + pc->nr_bytes);
> + avctx->codec_tag = pc->fourcc_tag;
> + }
> +
> + if (pc->nr_bytes != buf_size - HEADER_SIZE){
> + av_log(avctx, AV_LOG_ERROR, "Insufficient size of data.\n");
> + *poutbuf_size = 0;
> + return buf_size;
> + }
> +
> + *poutbuf = buf + HEADER_SIZE;
> + *poutbuf_size = pc->nr_bytes;
> +
> + return buf_size;
> +}
> +
> +static av_cold int dnxuc_decode_init(AVCodecContext *avctx){
> + return 0;
> +}
> +
> +
> +static int pass_though(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt){
> +
> + /* there is no need to copy as the data already match
> + * a known pixel format */
> +
> + frame->buf[0] = av_buffer_ref(avpkt->buf);
> +
> + if (!frame->buf[0]) {
> + return AVERROR(ENOMEM);
> + }
> +
> + return av_image_fill_arrays(frame->data, frame->linesize, avpkt->data,
> + avctx->pix_fmt, avctx->width, avctx->height, 1);
> +}
> +
> +static int float2planes(AVCodecContext *avctx, AVFrame *frame, const AVPacket *pkt){
> +
> + int ret, x, y, lw;
> + const size_t sof = 4;
> +
> + ret = ff_thread_get_buffer(avctx, frame, 0);
> + if (ret < 0)
> + return ret;
> +
> + lw = frame->width;
> +
> + for(y = 0; y < frame->height; y++){
> + for(x = 0; x < frame->width; x++){
> + memcpy(&frame->data[2][sof*(lw*y + x)], &pkt->data[sof* 3*(lw*y + x)], sof);
> + memcpy(&frame->data[0][sof*(lw*y + x)], &pkt->data[sof*(3*(lw*y + x) + 1)], sof);
> + memcpy(&frame->data[1][sof*(lw*y + x)], &pkt->data[sof*(3*(lw*y + x) + 2)], sof);
> + }
> + }
> + return pkt->size;
> +}
> +
> +static int half_add_alpha(AVCodecContext *avctx, AVFrame *frame, const AVPacket *pkt){
> +
> + /* ffmpeg doesn't provide RGB half bit depth without alpha channel right now
> + * we simply add an opaque alpha layer as workaround */
> +
> + int ret, x, y, lw;
> + const size_t soh = 2;
> + const uint16_t opaque = 0x3c00;
> +
> + ret = ff_thread_get_buffer(avctx, frame, 0);
> + if (ret < 0)
> + return ret;
This call should not be duplicated in every handler.
> +
> + lw = frame->width;
> +
> + for(y = 0; y < frame->height; y++){
> + for(x = 0; x < frame->width; x++){
> + memcpy(&frame->data[0][soh*4*(lw*y + x)], &pkt->data[soh*3*(lw*y + x)], soh*3);
> + memcpy(&frame->data[0][soh*(4*(lw*y + x) + 3)], &opaque, soh);
> + }
> + }
> + return pkt->size;
> +}
> +
> +/* DNxUncompressed utilizes a very dense bitpack representation of 10bit and 12bit pixel data.
> +
> +Lines of Image data, which look like in their ordinary 8bit counterpart, contain the most
> +significant upper bits of the pixel data. These sections alternate with shorter segments in
> +which the complementary least significant bits of information get packed in a gapless sequence.
> +
> ++----------------------+ +----------------------+ +------------------------+ +----------~
> +| 8 m.s.bits of R[1] | | 8 m.s.bits of G[1] | | 8 m.s.bits of B[1] | | msb R[2] ... one line
> ++----------------------+ +----------------------+ +------------------------+ +----------~
> +
> ++---------------------------------------------------------------+ +-----------~
> +| +------------+ +------------+ +------------+ +--------------+ | | +--------~
> +| | 2 lsb R[1] | | 2 lsb G[1] | | 2 lsb B[1] | | 2 lsb R[2] | | | | G[2]lsb ... LSB bits for line
> +| +------------+ +------------+ +------------+ +--------------+ | | +--------~
> ++---------------------------- one byte ------------------------ + +-----------~
> +
> +next line of MSB bytes... */
> +
> +static int unpack_rg10(AVCodecContext *avctx, AVFrame *frame, const AVPacket *pkt){
> +
> + int ret, x, y, lw, msp, pack, lsp, p_off;
> + uint16_t r,g,b;
> +
> + if (avctx->width % 4){
> + av_log(0, AV_LOG_ERROR,
> + "Image width has to be dividable by 4 for 10bit RGB DNxUncompressed!\n");
> + return AVERROR_EXIT;
> + }
These checks can be done once during init.
> +
> + ret = ff_thread_get_buffer(avctx, frame, 0);
> + if (ret < 0)
> + return ret;
> +
> + lw = frame->width;
> +
> + for(y = 0; y < frame->height; y++){
> + for(x = 0; x < frame->width; x++){
> + msp = pkt->data[y*3*(lw + lw/4) + 3*x];
Does anything guarantee the packet is large enough?
--
Anton Khirnov
More information about the ffmpeg-devel
mailing list