[FFmpeg-devel] [PATCH v3] libavcodec: implementation of DNxUncompressed decoder

Mon Sep 9 13:56:53 EEST 2024

Quoting Martin Schitter (2024-09-08 20:41:38)
> diff --git a/libavcodec/dnxucdec.c b/libavcodec/dnxucdec.c
> new file mode 100644
> index 0000000..455c374
> --- /dev/null
> +++ b/libavcodec/dnxucdec.c
> @@ -0,0 +1,495 @@
> +/*
> + * Avid DNxUncomressed / SMPTE RDD 50 demuxer

This says it's a demuxer, while it's implemented as a decoder.

I'm also wondering if this shouldn't be handled as demuxer exporting
raw video, at least in some of cases if not all.

> + * Copyright (c) 2024 Martin Schitter
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/*
> + This partial implementation of a decoder for DNxUncompressed video data
> + is based on reverse engineering of output generated by DaVinci Resolve 19
> + because the SMPTE RDD 50 specification is unfortunately not freely accessible.
> +
> + It's therefor limited by the present export capabilities of Resolve (YUV444
> + variants, YUV422 16bit, and alpha support is missing). But also some ffmpeg
> + shortcomings are affecting the range of available formats (no YUV half and
> + float pixel formats and filters are provided by ffmpeg until now and RGB half
> + content always requires an alpha plane).
> +
> + A wide range of DNxUncompressed formats are nevertheless already supported:
> +
> +  - YUV 4:2:2 8-/10-/12-bit
> +  - RGB 8-/10-/12-bit/half/float
> +
> +*/
> +
> +#include "avcodec.h"
> +#include "codec_internal.h"
> +#include "decode.h"
> +#include "libavutil/imgutils.h"
> +#include "thread.h"
> +
> +typedef struct DNxUcParseContext {
> +    uint32_t fourcc_tag;
> +    uint32_t width;
> +    uint32_t height;
> +    uint32_t nr_bytes;
> +} DNxUcParseContext;

Parser should be in its own file, as it can be enabled or disabled
independently from the encoder.
> +
> +/*
> +DNxUncompressed frame data comes wrapped in simple metadata
> +and fourcc markers:
> +
> +[0-4]   number of raw data (37 bytes header + frame data)
> +[4-7]   fourcc 'pack'
> +[8-11]  unknown value (allways: 0x15)
> +[12-15] fourcc 'sinf'
> +[16-19] frame width / line packing size
> +[20-23] frame hight / nr of lines
> +[24-27] fourcc pixel format indicator
> +[28]    unknown value (alpha?)
> +[29-32] nr of bytes in frame data + 8
> +[33-36] fourcc 'sdat'
> +[37-..] frame data
> +*/
> +
> +static int dnxuc_parse(AVCodecParserContext *s,
> +                    AVCodecContext *avctx,
> +                    const uint8_t **poutbuf, int *poutbuf_size,
> +                    const uint8_t *buf, int buf_size){

Opening brace of a function body should be on its own line.
> +
> +    char fourcc_buf[5];
> +    const int HEADER_SIZE = 37;
> +
> +    DNxUcParseContext *pc;
> +    pc = (DNxUcParseContext *) s->priv_data;
> +
> +    if (!buf_size) {
> +        return 0;
> +    } else if ( buf_size < 37 /* check metadata structure expectations */
> +        || MKTAG('p','a','c','k') != *(uint32_t*) (buf+4)
> +        || MKTAG('s','i','n','f') != *(uint32_t*) (buf+12)
> +        || MKTAG('s','d','a','t') != *(uint32_t*) (buf+33)){
> +            av_log(0, AV_LOG_ERROR, "can't read DNxUncompressed metadata.\n");

av_log() should always get a proper logging context, avctx in this case

> +            *poutbuf_size = 0;
> +            return buf_size;
> +    }
> +
> +    pc->fourcc_tag = *(uint32_t*)(buf+24);
> +    pc->width =  *(uint32_t*)(buf+16);
> +    pc->height =  *(uint32_t*)(buf+20);
> +    pc->nr_bytes = *(uint32_t*)(buf+29) - 8;

Use AV_RL32()
> +
> +    if (!avctx->codec_tag) {
> +        av_fourcc_make_string(fourcc_buf, pc->fourcc_tag);
> +        av_log(0, AV_LOG_INFO, "dnxuc_parser: '%s' %dx%d %dbpp %d\n",
> +            fourcc_buf,
> +            pc->width, pc->height,
> +            (pc->nr_bytes*8)/(pc->width*pc->height),
> +            pc->nr_bytes);
> +        avctx->codec_tag = pc->fourcc_tag;
> +    }
> +
> +    if (pc->nr_bytes != buf_size - HEADER_SIZE){
> +        av_log(avctx, AV_LOG_ERROR, "Insufficient size of data.\n");
> +        *poutbuf_size = 0;
> +        return buf_size;
> +    }
> +
> +    *poutbuf = buf + HEADER_SIZE;
> +    *poutbuf_size = pc->nr_bytes;
> +
> +    return buf_size;
> +}
> +
> +static av_cold int dnxuc_decode_init(AVCodecContext *avctx){
> +    return 0;
> +}
> +
> +
> +static int pass_though(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt){
> +
> +    /* there is no need to copy as the data already match
> +     * a known pixel format */
> +
> +    frame->buf[0] = av_buffer_ref(avpkt->buf);
> +
> +    if (!frame->buf[0]) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return av_image_fill_arrays(frame->data, frame->linesize, avpkt->data,
> +                               avctx->pix_fmt, avctx->width, avctx->height, 1);
> +}
> +
> +static int float2planes(AVCodecContext *avctx, AVFrame *frame, const AVPacket *pkt){
> +
> +    int ret, x, y, lw;
> +    const size_t sof = 4;
> +
> +    ret = ff_thread_get_buffer(avctx, frame, 0);
> +    if (ret < 0)
> +        return ret;
> +
> +    lw = frame->width;
> +
> +    for(y = 0; y < frame->height; y++){
> +        for(x = 0; x < frame->width; x++){
> +            memcpy(&frame->data[2][sof*(lw*y + x)], &pkt->data[sof* 3*(lw*y + x)], sof);
> +            memcpy(&frame->data[0][sof*(lw*y + x)], &pkt->data[sof*(3*(lw*y + x) + 1)], sof);
> +            memcpy(&frame->data[1][sof*(lw*y + x)], &pkt->data[sof*(3*(lw*y + x) + 2)], sof);
> +        }
> +    }
> +    return pkt->size;
> +}
> +
> +static int half_add_alpha(AVCodecContext *avctx, AVFrame *frame, const AVPacket *pkt){
> +
> +    /* ffmpeg doesn't provide RGB half bit depth without alpha channel right now
> +     * we simply add an opaque alpha layer as workaround */
> +
> +    int ret, x, y, lw;
> +    const size_t soh = 2;
> +    const uint16_t opaque = 0x3c00;
> +
> +    ret = ff_thread_get_buffer(avctx, frame, 0);
> +    if (ret < 0)
> +        return ret;

This call should not be duplicated in every handler.

> +
> +    lw = frame->width;
> +
> +    for(y = 0; y < frame->height; y++){
> +        for(x = 0; x < frame->width; x++){
> +            memcpy(&frame->data[0][soh*4*(lw*y + x)], &pkt->data[soh*3*(lw*y + x)], soh*3);
> +            memcpy(&frame->data[0][soh*(4*(lw*y + x) + 3)], &opaque, soh);
> +        }
> +    }
> +    return pkt->size;
> +}
> +
> +/* DNxUncompressed utilizes a very dense bitpack representation of 10bit and 12bit pixel data.
> +
> +Lines of Image data, which look like in their ordinary 8bit counterpart, contain the most
> +significant upper bits of the pixel data. These sections alternate with shorter segments in
> +which the complementary least significant bits of information get packed in a gapless sequence.
> +
> ++----------------------+ +----------------------+ +------------------------+ +----------~
> +|  8 m.s.bits of R[1]  | |  8 m.s.bits of G[1]  | |  8 m.s.bits of B[1]    | | msb R[2]    ... one line
> ++----------------------+ +----------------------+ +------------------------+ +----------~
> +
> ++---------------------------------------------------------------+  +-----------~
> +| +------------+ +------------+ +------------+ +--------------+ |  | +--------~
> +| | 2 lsb R[1] | | 2 lsb G[1] | | 2 lsb B[1] | | 2 lsb R[2]   | |  | | G[2]lsb    ... LSB bits for line
> +| +------------+ +------------+ +------------+ +--------------+ |  | +--------~
> ++---------------------------- one byte ------------------------ +  +-----------~
> +
> +next line of MSB bytes...   */
> +
> +static int unpack_rg10(AVCodecContext *avctx, AVFrame *frame, const AVPacket *pkt){
> +
> +    int ret, x, y, lw, msp, pack, lsp, p_off;
> +    uint16_t r,g,b;
> +
> +    if (avctx->width % 4){
> +        av_log(0, AV_LOG_ERROR,
> +        "Image width has to be dividable by 4 for 10bit RGB DNxUncompressed!\n");
> +        return AVERROR_EXIT;
> +    }

These checks can be done once during init.
> +
> +    ret = ff_thread_get_buffer(avctx, frame, 0);
> +    if (ret < 0)
> +        return ret;
> +
> +    lw = frame->width;
> +
> +    for(y = 0; y < frame->height; y++){
> +        for(x = 0; x < frame->width; x++){
> +            msp = pkt->data[y*3*(lw + lw/4) + 3*x];

Does anything guarantee the packet is large enough?

-- 
Anton Khirnov