[FFmpeg-devel] [PATCH 2/3] avcodec/rv60: RealVideo 6.0 decoder

Sun Mar 3 09:55:59 EET 2024

Peter Ross:
> Reviewed-by: Anton Khirnov <anton at khirnov.net>
> Reviewed-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> Signed-off-by: Peter Ross <pross at xvid.org>
> ---
>  libavcodec/Makefile     |    1 +
>  libavcodec/allcodecs.c  |    1 +
>  libavcodec/codec_desc.c |    7 +
>  libavcodec/codec_id.h   |    1 +
>  libavcodec/rv60data.h   |  118 ++
>  libavcodec/rv60dec.c    | 2419 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/rv60dsp.c    |  164 +++
>  libavcodec/rv60dsp.h    |   30 +
>  libavcodec/rv60vlcs.h   | 2315 +++++++++++++++++++++++++++++++++++++
>  libavformat/riff.c      |    1 +
>  libavformat/rm.c        |    1 +
>  11 files changed, 5058 insertions(+)
>  create mode 100644 libavcodec/rv60data.h
>  create mode 100644 libavcodec/rv60dec.c
>  create mode 100644 libavcodec/rv60dsp.c
>  create mode 100644 libavcodec/rv60dsp.h
>  create mode 100644 libavcodec/rv60vlcs.h
> 
> diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> index 033344304c..2ed6ce0953 100644
> --- a/libavcodec/codec_desc.c
> +++ b/libavcodec/codec_desc.c
> @@ -1967,6 +1967,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
>          .long_name = NULL_IF_CONFIG_SMALL("LEAD MCMP"),
>          .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
>      },
> +    {
> +        .id        = AV_CODEC_ID_RV60,
> +        .type      = AVMEDIA_TYPE_VIDEO,
> +        .name      = "rv60",
> +        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 6.0"),
> +        .props     = AV_CODEC_PROP_LOSSY,

Missing AV_CODEC_PROP_REORDER

> +    },
>  
>      /* various PCM "codecs" */
>      {
> diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
> index d96e49430e..3be0dda20e 100644
> --- a/libavcodec/codec_id.h
> +++ b/libavcodec/codec_id.h
> @@ -325,6 +325,7 @@ enum AVCodecID {
>      AV_CODEC_ID_RTV1,
>      AV_CODEC_ID_VMIX,
>      AV_CODEC_ID_LEAD,
> +    AV_CODEC_ID_RV60,
>  
>      /* various PCM "codecs" */
>      AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
> diff --git a/libavcodec/rv60data.h b/libavcodec/rv60data.h
> new file mode 100644
> index 0000000000..65f9853770
> --- /dev/null
> +++ b/libavcodec/rv60data.h
> @@ -0,0 +1,118 @@
> +/*
> + * RV60 decoder
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_RV60DATA_H
> +#define AVCODEC_RV60DATA_H
> +
> +#include <stdint.h>
> +
> +static const uint8_t rv60_candidate_intra_angles[6] = {
> +    0, 1, 10, 26, 18, 2
> +};
> +
> +static const uint8_t rv60_ipred_angle[9] = {
> +    0, 2, 5, 9, 13, 17, 21, 26, 32
> +};
> +
> +static const uint16_t rv60_ipred_inv_angle[9] = {
> +    0, 4096, 1638, 910, 630, 482, 390, 315, 256
> +};
> +
> +static const uint8_t rv60_avail_mask[64] = {
> +    0, 1, 0, 3, 0, 1, 0, 7, 0, 1, 0, 3, 0, 1, 0, 0xF,
> +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
> +};
> +
> +static const uint8_t rv60_edge1[4] = {
> +    0, 2, 2, 2
> +};
> +
> +static const uint8_t rv60_edge2[4] = {
> +    0, 3, 3, 3
> +};
> +
> +static const uint8_t rv60_qp_to_idx[64] = {
> +    0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3,
> +    3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 0,
> +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
> +    2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0
> +};
> +
> +static const uint16_t rv60_quants_b[32] = {
> +     60,   67,   76,   85,   96,  108,  121,  136,
> +    152,  171,  192,  216,  242,  272,  305,  341,
> +    383,  432,  481,  544,  606,  683,  767,  854,
> +    963, 1074, 1212, 1392, 1566, 1708, 1978, 2211
> +};
> +
> +static const uint16_t rv60_chroma_quant_dc[32] = {
> +     0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
> +    14, 15, 15, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23
> +};
> +
> +static const uint16_t rv60_chroma_quant_ac[32] = {
> +     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
> +    16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 23, 24, 24, 25, 25
> +};

These two tables can use uint8_t. Or given that they are only used via
rv60_quants_b[rv60_chroma_quant_[ad]c[qp]] one could replace them with
uint16_t arrays that avoid the indirection.

...

> +static VLC cbp8_vlc[7][4];
> +static VLC cbp16_vlc[7][3][4];
> +
> +typedef struct {
> +    VLC l0[2];
> +    VLC l12[2];
> +    VLC l3[2];
> +    VLC esc;
> +} CoeffVLCs;
> +
> +static CoeffVLCs intra_coeff_vlc[5];
> +static CoeffVLCs inter_coeff_vlc[7];
> +

You do not need full VLC structures, only VLC.table is ever used,
because all VLCs here use nine bits. So you can replace e.g. cbp8_vlc by
static const VLCElem *cbp8_vlc[7][4] and switch to ff_vlc_init_tables()
(replace the int offset by VLCInitState for this).

> +#define MAX_VLC_SIZE 864
> +static VLCElem table_data[129148];
> +
> +/* 32-bit version of rv34_gen_vlc */
> +static void gen_vlc(const uint8_t *bits, int size, VLC *vlc, int *offset)
> +{
> +    int counts[17] = {0};
> +    uint32_t codes[18];
> +    uint32_t cw[MAX_VLC_SIZE];
> +
> +    for (int i = 0; i < size; i++)
> +        counts[bits[i]]++;
> +
> +    codes[0] = counts[0] = 0;
> +    for (int i = 0; i < 17; i++)
> +        codes[i+1] = (codes[i] + counts[i]) << 1;
> +
> +    for (int i = 0; i < size; i++)
> +        cw[i] = codes[bits[i]]++;
> +
> +    vlc->table           = &table_data[*offset];
> +    vlc->table_allocated = FF_ARRAY_ELEMS(table_data) - *offset;
> +    ff_vlc_init_sparse(vlc,  9, size,
> +                       bits, 1, 1,
> +                       cw,   4, 4,
> +                       NULL, 0, 0, VLC_INIT_STATIC_OVERLONG);
> +    *offset += vlc->table_size;
> +}
> +
> +static void build_coeff_vlc(const CoeffLens * lens, CoeffVLCs * vlc, int count, int * offset)
> +{
> +    for (int i = 0; i < count; i++) {
> +        for (int j = 0; j < 2; j++) {
> +            gen_vlc(lens[i].l0[j], 864, &vlc[i].l0[j], offset);
> +            gen_vlc(lens[i].l12[j], 108, &vlc[i].l12[j], offset);
> +            gen_vlc(lens[i].l3[j], 108, &vlc[i].l3[j], offset);
> +        }
> +        gen_vlc(lens[i].esc, 32, &vlc[i].esc, offset);
> +    }
> +}
> +
> +static av_cold void rv60_init_static_data(void)
> +{
> +    int offset = 0;
> +
> +    for (int i = 0; i < 7; i++)
> +        for (int j = 0; j < 4; j++)
> +            gen_vlc(rv60_cbp8_lens[i][j], 64, &cbp8_vlc[i][j], &offset);
> +
> +    for (int i = 0; i < 7; i++)
> +        for (int j = 0; j < 3; j++)
> +            for (int k = 0; k < 4; k++)
> +                gen_vlc(rv60_cbp16_lens[i][j][k], 64, &cbp16_vlc[i][j][k], &offset);
> +
> +    build_coeff_vlc(rv60_intra_lens, intra_coeff_vlc, 5, &offset);
> +    build_coeff_vlc(rv60_inter_lens, inter_coeff_vlc, 7, &offset);
> +}
> +
> +typedef struct {
> +    int sign;
> +    int size;
> +    const uint8_t * data;
> +    int data_size;
> +} Slice;
> +
> +typedef struct {
> +    int cu_split_pos;
> +    uint8_t cu_split[1+4+16+64];
> +
> +    uint8_t coded_blk[64];
> +
> +    uint8_t avg_buffer[64*64 + 32*32*2];
> +    AVFrame avg_buf;

Don't do this. Just store uint8_t *avg_buf[3] and int
avg_buf_linesize[3] and switch the two functions using avg_buf to accept
the data pointers and linesizes separately (for the output frame, not
for the ref frame).

> +} ThreadContext;
> +
> +typedef struct {
> +    int16_t x;
> +    int16_t y;
> +} MV;
> +
> +typedef struct {
> +    enum MVRefEnum mvref;
> +    MV f_mv;
> +    MV b_mv;
> +} MVInfo;
> +
> +typedef struct {
> +    enum IntraMode imode;
> +    MVInfo mv;
> +} BlockInfo;
> +
> +typedef struct {
> +    enum CUType cu_type;
> +    enum PUType pu_type;
> +} PUInfo;
> +
> +typedef struct RV60Context {
> +    AVCodecContext * avctx;
> +    VideoDSPContext vdsp;
> +
> +#define CUR_PIC 0
> +#define LAST_PIC 1
> +#define NEXT_PIC 2
> +    AVFrame *last_frame[3];
> +
> +    int pict_type;
> +    int qp;
> +    int osvquant;
> +    int ts;
> +    int two_f_refs;
> +    int qp_off_type;
> +    int deblock;
> +    int deblock_chroma;
> +    int awidth;
> +    int aheight;
> +    int cu_width;
> +    int cu_height;
> +
> +    Slice * slice;
> +
> +    int pu_stride;
> +    PUInfo * pu_info;
> +
> +    int blk_stride;
> +    BlockInfo * blk_info;
> +
> +    int dblk_stride;
> +    uint8_t * left_str;
> +    uint8_t * top_str;
> +
> +    uint64_t ref_pts[2], ts_scale;
> +    uint32_t ref_ts[2];
> +} RV60Context;
> +
> +static av_cold int rv60_decode_init(AVCodecContext * avctx)
> +{
> +    static AVOnce init_static_once = AV_ONCE_INIT;
> +    RV60Context *s = avctx->priv_data;
> +    int ret;
> +
> +    s->avctx = avctx;
> +
> +    if (avctx->active_thread_type & FF_THREAD_SLICE) {
> +        ret = ff_slice_thread_init_progress(avctx);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    ff_videodsp_init(&s->vdsp, 8);

Missing configure dependency for this

> +
> +    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
> +
> +    for (int i = 0; i < 3; i++) {
> +        s->last_frame[i] = av_frame_alloc();
> +        if (!s->last_frame[i])
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    ff_thread_once(&init_static_once, rv60_init_static_data);
> +
> +    return 0;
> +}
> +

...

> +
> +static int rv60_decode_frame(AVCodecContext *avctx, AVFrame * frame,
> +                             int * got_frame, AVPacket * avpkt)
> +{
> +    RV60Context *s = avctx->priv_data;
> +    GetBitContext gb;
> +    int ret, header_size, width, height, ofs;
> +
> +    if (avpkt->size == 0) {
> +        if (s->last_frame[NEXT_PIC]->data[0]) {
> +            av_frame_move_ref(frame, s->last_frame[NEXT_PIC]);
> +            *got_frame = 1;
> +        }
> +        return 0;
> +    }
> +
> +    if (avpkt->size < 9)
> +        return AVERROR_INVALIDDATA;
> +
> +    header_size = avpkt->data[0] * 8 + 9;
> +    if (avpkt->size < header_size)
> +        return AVERROR_INVALIDDATA;
> +
> +    init_get_bits8(&gb, avpkt->data + header_size, avpkt->size - header_size);
> +
> +    if ((ret = read_frame_header(s, &gb, &width, &height)) < 0)
> +        return ret;
> +
> +    if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
> +        avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
> +        avctx->skip_frame >= AVDISCARD_ALL)
> +        return avpkt->size;
> +
> +    if (s->pict_type != AV_PICTURE_TYPE_B)
> +        FFSWAP(AVFrame *, s->last_frame[NEXT_PIC], s->last_frame[LAST_PIC]);
> +
> +    if ((s->pict_type == AV_PICTURE_TYPE_P && !s->last_frame[LAST_PIC]->data[0]) ||
> +        (s->pict_type == AV_PICTURE_TYPE_B && (!s->last_frame[LAST_PIC]->data[0] || !s->last_frame[NEXT_PIC]->data[0]))) {
> +        av_log(s->avctx, AV_LOG_ERROR, "missing reference frame\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    s->last_frame[CUR_PIC]->pict_type = s->pict_type;
> +    if (s->pict_type == AV_PICTURE_TYPE_I)
> +        s->last_frame[CUR_PIC]->flags |= AV_FRAME_FLAG_KEY;
> +
> +    if ((ret = update_dimensions_clear_info(s, width, height)) < 0)
> +        return ret;
> +
> +    if ((ret = ff_reget_buffer(avctx, s->last_frame[CUR_PIC], 0)) < 0)
> +        return ret;

ff_reget_buffer() might have to perform a full-frame copy. Why do you
use it?

> +
> +    if ((ret = read_slice_sizes(s, &gb)) < 0)
> +        return ret;
> +
> +    ofs = get_bits_count(&gb) / 8;
> +
> +    for (int i = 0; i < s->cu_height; i++) {
> +        s->slice[i].data = avpkt->data + header_size + ofs;
> +        s->slice[i].data_size = FFMIN(s->slice[i].size, avpkt->size - header_size - ofs);
> +        ofs += s->slice[i].size;
> +    }
> +
> +    if (ffcodec(avctx->codec)->update_thread_context)
> +        ff_thread_finish_setup(avctx);

This seems to have been copied from VP8; but the check is always false
here and ff_thread_finish_setup is unnecessary, as this decoder does not
support frame threads.

> +
> +    ret = ff_slice_thread_allocz_entries(s->avctx, s->cu_height);
> +    if (ret < 0)
> +        return ret;
> +
> +    s->avctx->execute2(s->avctx, decode_slice, s->last_frame[CUR_PIC], NULL, s->cu_height);
> +
> +    ret = 0;
> +    if (s->pict_type == AV_PICTURE_TYPE_B)
> +        av_frame_move_ref(frame, s->last_frame[CUR_PIC]);
> +    else if (s->last_frame[LAST_PIC]->data[0])
> +        ret = av_frame_ref(frame, s->last_frame[LAST_PIC]);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (s->last_frame[LAST_PIC]->data[0])

Why not check for frame->data[0]? Seems clearer to me. And it would work
if one implemented low-delay (i.e. where it is presumed that no b-frames
are present).

> +        *got_frame = 1;
> +
> +    if (s->pict_type != AV_PICTURE_TYPE_B)
> +        FFSWAP(AVFrame *, s->last_frame[CUR_PIC], s->last_frame[NEXT_PIC]);
> +
> +    if (s->pict_type != AV_PICTURE_TYPE_B) {
> +        s->ref_pts[0] = s->ref_pts[1];
> +        s->ref_pts[1] = avpkt->pts;
> +
> +        s->ref_ts[0] = s->ref_ts[1];
> +        s->ref_ts[1] = s->ts;
> +
> +        if (s->ref_pts[1] > s->ref_pts[0] && s->ref_ts[1] > s->ref_ts[0])
> +            s->ts_scale = (s->ref_pts[1] - s->ref_pts[0]) / (s->ref_ts[1] - s->ref_ts[0]);
> +    } else {
> +        frame->pts = s->ref_pts[0] + (s->ts - s->ref_ts[0]) * s->ts_scale;
> +    }
> +
> +    return avpkt->size;
> +}
> +
> +static void rv60_flush(AVCodecContext *avctx)
> +{
> +    RV60Context *s = avctx->priv_data;
> +
> +    for (int i = 0; i < 3; i++)
> +        av_frame_unref(s->last_frame[i]);
> +}
> +
> +static av_cold int rv60_decode_end(AVCodecContext * avctx)
> +{
> +    RV60Context *s = avctx->priv_data;
> +
> +    for (int i = 0; i < 3; i++)
> +        av_frame_free(&s->last_frame[i]);
> +
> +    av_freep(&s->slice);
> +    av_freep(&s->pu_info);
> +    av_freep(&s->blk_info);
> +    av_freep(&s->top_str);
> +    av_freep(&s->left_str);
> +
> +    return 0;
> +}
> +
> +const FFCodec ff_rv60_decoder = {
> +    .p.name         = "rv60",
> +    CODEC_LONG_NAME("RealVideo 6.0"),
> +    .p.type         = AVMEDIA_TYPE_VIDEO,
> +    .p.id           = AV_CODEC_ID_RV60,
> +    .priv_data_size = sizeof(RV60Context),
> +    .init           = rv60_decode_init,
> +    .close          = rv60_decode_end,
> +    FF_CODEC_DECODE_CB(rv60_decode_frame),
> +    .flush          = rv60_flush,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS,
> +    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
> +};