[FFmpeg-devel] [PATCH v3 4/4] lavc, doc: add libuavs3d video decoder wrapper

Wed Aug 12 01:18:46 EEST 2020

On 05/08/2020 17:18, hwrenx at 126.com wrote:
> From: hwren <hwrenx at 126.com>
> 
> Signed-off-by: hbj <hanbj at pku.edu.cn>
> Signed-off-by: hwren <hwrenx at 126.com>
> ---
>   Changelog              |   1 +
>   configure              |   4 +
>   doc/decoders.texi      |  21 +++
>   doc/general.texi       |   8 ++
>   libavcodec/Makefile    |   1 +
>   libavcodec/allcodecs.c |   1 +
>   libavcodec/libuavs3d.c | 283 +++++++++++++++++++++++++++++++++++++++++
>   7 files changed, 319 insertions(+)
>   create mode 100644 libavcodec/libuavs3d.c
> 
> ...
> diff --git a/libavcodec/libuavs3d.c b/libavcodec/libuavs3d.c
> new file mode 100644
> index 0000000000..c0d89cd1ce
> --- /dev/null
> +++ b/libavcodec/libuavs3d.c
> @@ -0,0 +1,283 @@
> +/*
> + * RAW AVS3-P2/IEEE1857.10 video demuxer
> + * Copyright (c) 2020 Zhenyu Wang <wangzhenyu at pkusz.edu.cn>
> + *                    Bingjie Han <hanbj at pkusz.edu.cn>
> + *                    Huiwen Ren  <hwrenx at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/avutil.h"
> +#include "libavutil/common.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/opt.h"
> +#include "avcodec.h"
> +#include "internal.h"
> +#include "uavs3d.h"
> +
> +#define UAVS3D_MAX_FRAME_THREADS 48

This restriction seems weird.  Is it reflecting some intrinsic property of the codec, or might it change in future?

> +
> +static const int color_primaries_tab[10] = {
> +    AVCOL_PRI_RESERVED0   ,    // 0
> +    AVCOL_PRI_BT709       ,    // 1
> +    AVCOL_PRI_UNSPECIFIED ,    // 2
> +    AVCOL_PRI_RESERVED    ,    // 3
> +    AVCOL_PRI_BT470M      ,    // 4
> +    AVCOL_PRI_BT470BG      ,    // 5
> +    AVCOL_PRI_SMPTE170M   ,    // 6
> +    AVCOL_PRI_SMPTE240M   ,    // 7
> +    AVCOL_PRI_FILM        ,    // 8
> +    AVCOL_PRI_BT2020           // 9
> +};
> +
> +static const int color_transfer_tab[15] = {
> +    AVCOL_TRC_RESERVED0    , // 0
> +    AVCOL_TRC_BT709        , // 1
> +    AVCOL_TRC_UNSPECIFIED  , // 2
> +    AVCOL_TRC_RESERVED     , // 3
> +    AVCOL_TRC_GAMMA22      , // 4
> +    AVCOL_TRC_GAMMA28      , // 5
> +    AVCOL_TRC_SMPTE170M    , // 6
> +    AVCOL_TRC_SMPTE240M    , // 7
> +    AVCOL_TRC_LINEAR       , // 8
> +    AVCOL_TRC_LOG          , // 9
> +    AVCOL_TRC_LOG_SQRT     , // 10
> +    AVCOL_TRC_BT2020_12    , // 11
> +    AVCOL_TRC_SMPTE2084    , // 12
> +    AVCOL_TRC_UNSPECIFIED  , // 13
> +    AVCOL_TRC_ARIB_STD_B67   // 14
> +};
> +
> +static const int color_matrix_tab[12] = {
> +    AVCOL_SPC_RESERVED     , // 0
> +    AVCOL_SPC_BT709        , // 1
> +    AVCOL_SPC_UNSPECIFIED  , // 2
> +    AVCOL_SPC_RESERVED     , // 3
> +    AVCOL_SPC_FCC          , // 4
> +    AVCOL_SPC_BT470BG      , // 5
> +    AVCOL_SPC_SMPTE170M    , // 6
> +    AVCOL_SPC_SMPTE240M    , // 7
> +    AVCOL_SPC_BT2020_NCL   , // 8
> +    AVCOL_SPC_BT2020_CL    , // 9
> +    AVCOL_SPC_UNSPECIFIED  , // 10
> +    AVCOL_SPC_UNSPECIFIED    // 11
> +};

These tables aren't used anywhere.

> +
> +static const enum AVPictureType IMGTYPE[8] = {

This name is doesn't conform to the usual naming convention.  uavs3_frame_types[], maybe?

> +    AV_PICTURE_TYPE_NONE,
> +    AV_PICTURE_TYPE_I,
> +    AV_PICTURE_TYPE_P,
> +    AV_PICTURE_TYPE_B
> +};

What do the other four values in the table mean?

> +
> +typedef struct UAVS3DContext {
> +    AVCodecContext  *avctx;
> +    void            *dec_handle;
> +    int              frame_threads;
> +    int              got_seqhdr;
> +    uavs3d_io_frm_t  dec_frame;
> +} UAVS3DContext;
> +
> +
> +static int uavs3d_find_next_start_code(const unsigned char *bs_data, int bs_len, int *left)
> +{
> +    const unsigned char *data_ptr = bs_data + 4;
> +    int count = bs_len - 4;
> +
> +    while (count >= 4 &&
> +        ((*(unsigned int *)data_ptr) != 0xB6010000) && /* P/B picture */

This pointer cast will be undefined behaviour, because the data need not be aligned.  Even if it were valid, it's still going to give the wrong answer on a machine with different endianness to the one you tested on.  Use AV_RB32() or AV_RL32().

> +        ((*(unsigned int *)data_ptr) != 0xB3010000) && /* I   picture */
> +        ((*(unsigned int *)data_ptr) != 0xB0010000) && /* sequence header */
> +        ((*(unsigned int *)data_ptr) != 0x00010000) && /* first slice */
> +        ((*(unsigned int *)data_ptr) != 0xB1010000)) { /* sequence end */

Make symbolic constants for the magic numbers here.

> +        data_ptr++;
> +        count--;
> +    }
> +
> +    if (count >= 4) {
> +        *left = count;
> +        return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +static void uavs3d_output_callback(uavs3d_io_frm_t *dec_frame) {
> +    uavs3d_io_frm_t frm_out;
> +    AVFrame *frm = (AVFrame *)dec_frame->priv;
> +    int i;
> +
> +    if (!frm) {
> +        return;
> +    }
> +
> +    frm->pts       = dec_frame->pts;
> +    frm->pkt_dts   = dec_frame->dts;
> +    frm->pict_type = IMGTYPE[dec_frame->type];

Is overflow possible?

> +    frm->key_frame = (frm->pict_type == AV_PICTURE_TYPE_I);
> +
> +    for (i = 0; i < 3; i++) {
> +        frm_out.width [i] = dec_frame->width[i];
> +        frm_out.height[i] = dec_frame->height[i];
> +        frm_out.stride[i] = frm->linesize[i];
> +        frm_out.buffer[i] = frm->data[i];
> +    }
> +
> +    uavs3d_img_cpy_cvt(&frm_out, dec_frame, dec_frame->bit_depth);

Is this copy really required?  Decoders will normally allow you to reference the frame they hold internally.

Can the call ever fail?

> +}
> +
> +static av_cold int libuavs3d_init(AVCodecContext *avctx)
> +{
> +    UAVS3DContext *h = avctx->priv_data;
> +    uavs3d_cfg_t cdsc;
> +
> +    cdsc.frm_threads = FFMIN(h->frame_threads > 0 ? h->frame_threads : av_cpu_count(), UAVS3D_MAX_FRAME_THREADS);
> +    cdsc.check_md5 = 0;
> +    h->dec_handle = uavs3d_create(&cdsc, uavs3d_output_callback, NULL);

Can this call fail?  (For example, if out of memory.)

> +    h->got_seqhdr = 0;
> +
> +    return 0;
> +}
> +
> +static av_cold int libuavs3d_end(AVCodecContext *avctx)
> +{
> +    UAVS3DContext *h = avctx->priv_data;
> +
> +    if (h->dec_handle) {
> +        uavs3d_flush(h->dec_handle, NULL);
> +        uavs3d_delete(h->dec_handle);
> +        h->dec_handle = NULL;
> +    }
> +    h->got_seqhdr = 0;
> +
> +    return 0;
> +}
> +
> +static void libuavs3d_flush(AVCodecContext * avctx)
> +{
> +    UAVS3DContext *h = avctx->priv_data;
> +    uavs3d_cfg_t cdsc;
> +    cdsc.frm_threads = FFMIN(h->frame_threads > 0 ? h->frame_threads : av_cpu_count(), UAVS3D_MAX_FRAME_THREADS);
> +    cdsc.check_md5 = 0;
> +
> +    if (h->dec_handle) {
> +        uavs3d_flush(h->dec_handle, NULL);
> +        uavs3d_delete(h->dec_handle);
> +    }
> +
> +    h->dec_handle = uavs3d_create(&cdsc, uavs3d_output_callback, NULL);
> +    h->got_seqhdr = 0;
> +}
> +
> +static int libuavs3d_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
> +{
> +    UAVS3DContext *h = avctx->priv_data;
> +    const uint8_t *buf = avpkt->data;
> +    int buf_size = avpkt->size;
> +    const uint8_t *buf_end;
> +    const uint8_t *buf_ptr;
> +    AVFrame *frm = (AVFrame*)data;

No need for explicit casts from void*.

> +    int left_bytes;
> +    int ret, finish = 0;
> +
> +    *got_frame = 0;
> +    frm->pts = -1;
> +    frm->pict_type = AV_PICTURE_TYPE_NONE;
> +
> +    if (h->got_seqhdr) {
> +        if (!frm->data[0] && (ret = ff_get_buffer(avctx, frm, 0)) < 0) {
> +            return ret;
> +        }
> +        h->dec_frame.priv = data;   // AVFrame

Always allocating here if you have sequence header looks suspicious.  What if you don't get any frame output?

> +    }
> +
> +    if (!buf_size) {
> +        do {
> +            ret = uavs3d_flush(h->dec_handle, &h->dec_frame);
> +        } while (ret > 0 && !h->dec_frame.got_pic);
> +    } else {
> +        buf_ptr = buf;
> +        buf_end = buf + buf_size;
> +
> +        while (!finish) {
> +            int bs_len;
> +            uavs3d_io_frm_t *frm_dec = &h->dec_frame;
> +
> +            if (uavs3d_find_next_start_code(buf_ptr, buf_end - buf_ptr, &left_bytes)) {
> +                bs_len = buf_end - buf_ptr - left_bytes;
> +            } else {
> +                bs_len = buf_end - buf_ptr;
> +                finish = 1;
> +            }
> +            frm_dec->bs = (unsigned char *)buf_ptr;
> +            frm_dec->bs_len = bs_len;
> +            frm_dec->pts = avpkt->pts;
> +            frm_dec->dts = avpkt->dts;
> +            uavs3d_decode(h->dec_handle, frm_dec);

Does this call return any sort of error?

More generally, what happens if there is an error in decoding?  (For example, because the stream is corrupted.)

> +            buf_ptr += bs_len;
> +
> +            if (frm_dec->nal_type == NAL_SEQ_HEADER) {
> +                static const int avs3_fps_num[9] = {0, 240000, 24, 25, 30000, 30, 50, 60000, 60 };
> +                static const int avs3_fps_den[9] = {1,   1001,  1,  1,  1001,  1,  1,  1001,  1 };
> +                avctx->framerate.num = avs3_fps_num[frm_dec->seqhdr->frame_rate_code];
> +                avctx->framerate.den = avs3_fps_den[frm_dec->seqhdr->frame_rate_code];

Same comment as in 2/4 about ff_mpeg12_frame_rate_tab[].

> +                avctx->has_b_frames = 1;

Always?

> +                avctx->pix_fmt = frm_dec->seqhdr->bit_depth_internal == 8 ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV420P10LE;
> +                ff_set_dimensions(avctx, frm_dec->seqhdr->horizontal_size, frm_dec->seqhdr->vertical_size);
> +                h->got_seqhdr = 1;
> +            }
> +            if (frm_dec->got_pic) {
> +                break;
> +            }
> +        }
> +    }
> +
> +    *got_frame = h->dec_frame.got_pic;
> +
> +    return buf_ptr - buf;
> +}
> +
> +#define UAVS3D_OFFSET(x) offsetof(UAVS3DContext, x)
> +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> +static const AVOption options[] = {
> +    { "frame_threads",      "number of frame-level threads ", UAVS3D_OFFSET(frame_threads),  AV_OPT_TYPE_INT,    {.i64 =  0 }, 0, UAVS3D_MAX_FRAME_THREADS, VE },
> +    { NULL }
> +};
> +static const AVClass libuavs3d_class = {
> +    .class_name = "libuavs3d_class",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +AVCodec ff_libuavs3d_decoder = {
> +    .name           = "libuavs3d",
> +    .long_name      = NULL_IF_CONFIG_SMALL("uavs3d AVS3-P2/IEEE1857.10 decoder"),
> +    .type           = AVMEDIA_TYPE_VIDEO,
> +    .id             = AV_CODEC_ID_AVS3,
> +    .priv_data_size = sizeof(UAVS3DContext),
> +    .priv_class     = &libuavs3d_class,
> +    .init           = libuavs3d_init,
> +    .close          = libuavs3d_end,
> +    .decode         = libuavs3d_decode_frame,
> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> +    .flush          = libuavs3d_flush,
> +    .pix_fmts        = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_NONE },
> +
> +    .wrapper_name   = "libuavs3d",
> +};
> 

- Mark