[FFmpeg-devel] [PATCH] avcodec: add VMX1 decoder

Sat Jun 10 13:24:51 EEST 2023

Paul B Mahol:
> +static av_cold int decode_init(AVCodecContext *avctx)
> +{
> +    VMIXContext *s = avctx->priv_data;
> +
> +    avctx->bits_per_raw_sample = 8;
> +    avctx->pix_fmt = AV_PIX_FMT_YUV422P;
> +
> +    avctx->coded_width = FFALIGN(avctx->width, 16);
> +    avctx->coded_height = FFALIGN(avctx->height, 16);
> +
> +    ff_idctdsp_init(&s->idsp, avctx);

Missing idctdsp configure dependency.

> +    ff_permute_scantable(s->scan, ff_zigzag_direct,
> +                         s->idsp.idct_permutation);
> +    return 0;
> +}
> +
> +static inline int get_se_golomb_vmix(GetBitContext *gb)
> +{
> +    unsigned int buf = get_ue_golomb_long(gb);
> +    int sign = (buf & 1) - 1;
> +    return ((buf >> 1) ^ (~sign));
> +}
> +
> +static int decode_dcac(AVCodecContext *avctx,
> +                       GetBitContext *dc_gb, GetBitContext *ac_gb,
> +                       unsigned *dcrun, unsigned *acrun,
> +                       AVFrame *frame, int width, int by, int plane)
> +{
> +    const ptrdiff_t linesize = frame->linesize[plane];
> +    uint8_t *dst = frame->data[plane] + by * linesize;
> +    unsigned dc_run = *dcrun, ac_run = *acrun;
> +    LOCAL_ALIGNED_32(int16_t, block, [64]);
> +    VMIXContext *s = avctx->priv_data;
> +    const int16_t *factors = s->factors;
> +    const uint8_t *scan = s->scan;
> +    const int add = plane ? 0 : 1024;
> +    int i, dc_v = 0, ac_v = 0, dc = 0;
> +
> +    for (int y = 0; y < 2; y++) {
> +        for (int x = 0; x < width; x += 8) {
> +            memset(block, 0, sizeof(*block)*64);
> +
> +            if (dc_run > 0) {
> +                dc_run--;
> +            } else {
> +                dc_v = get_se_golomb_vmix(dc_gb);
> +                dc += dc_v;
> +                if (!dc_v)
> +                    dc_run = get_ue_golomb_long(dc_gb);
> +            }
> +
> +            for (int n = 0; n < 64; n++) {
> +                if (ac_run > 0) {
> +                    ac_run--;
> +                    continue;
> +                }
> +
> +                ac_v = get_se_golomb_vmix(ac_gb);
> +                i = scan[n];
> +                block[i] = (ac_v * factors[i]) >> 4;
> +                if (!ac_v)
> +                    ac_run = get_ue_golomb_long(ac_gb);
> +            }
> +
> +            block[0] = ((dc + add) * 16) >> 4;
> +            s->idsp.idct_put(dst + x, linesize, block);
> +        }
> +
> +        dst += 8 * linesize;
> +    }
> +
> +    *dcrun = dc_run;
> +    *acrun = ac_run;
> +
> +    return 0;
> +}
> +
> +static int decode_slice(AVCodecContext *avctx, AVFrame *frame,
> +                        const uint8_t *dc_src, unsigned dc_slice_size,
> +                        const uint8_t *ac_src, unsigned ac_slice_size,
> +                        int by)
> +{
> +    unsigned dc_run = 0, ac_run = 0;
> +    GetBitContext dc_gb, ac_gb;
> +    int ret;
> +
> +    ret = init_get_bits8(&dc_gb, dc_src, dc_slice_size);
> +    if (ret < 0)
> +        return ret;
> +
> +    ret = init_get_bits8(&ac_gb, ac_src, ac_slice_size);
> +    if (ret < 0)
> +        return ret;
> +
> +    for (int p = 0; p < 3; p++) {
> +        const int rshift = !!p;
> +        ret = decode_dcac(avctx, &dc_gb, &ac_gb,
> +                          &dc_run, &ac_run, frame,
> +                          frame->width >> rshift, by, p);
> +        if (ret < 0)
> +            return ret;
> +
> +        if (get_bits_left(&dc_gb) < 0)
> +            return AVERROR_INVALIDDATA;
> +        if (get_bits_left(&ac_gb) < 0)
> +            return AVERROR_INVALIDDATA;
> +
> +        align_get_bits(&dc_gb);
> +        align_get_bits(&ac_gb);
> +    }
> +
> +    if (get_bits_left(&dc_gb) > 0)
> +        return AVERROR_INVALIDDATA;
> +    if (get_bits_left(&ac_gb) > 0)
> +        return AVERROR_INVALIDDATA;
> +
> +    return 0;
> +}
> +
> +static int decode_slices(AVCodecContext *avctx, void *arg,
> +                         int n, int thread_nb)
> +{
> +    VMIXContext *s = avctx->priv_data;
> +    const uint8_t *dc_slice_ptr = s->slices[n].dc_ptr;
> +    const uint8_t *ac_slice_ptr = s->slices[n].ac_ptr;
> +    unsigned dc_slice_size = s->slices[n].dc_size;
> +    unsigned ac_slice_size = s->slices[n].ac_size;
> +    AVFrame *frame = arg;
> +
> +    return decode_slice(avctx, frame, dc_slice_ptr, dc_slice_size,
> +                        ac_slice_ptr, ac_slice_size, n * 16);
> +}
> +
> +static int decode_frame(AVCodecContext *avctx,
> +                        AVFrame *frame, int *got_frame,
> +                        AVPacket *avpkt)
> +{
> +    VMIXContext *s = avctx->priv_data;
> +    unsigned offset = 3, q;
> +    int ret;
> +
> +    if (avpkt->size <= 7)
> +        return AVERROR_INVALIDDATA;
> +
> +    if (avpkt->data[0] != 0x01)
> +        return AVERROR_INVALIDDATA;
> +
> +    q = av_clip(99 - av_clip(avpkt->data[1], 0, 99), 0, FF_ARRAY_ELEMS(quality) - 1);

The inner av_clip() is redundant.

> +    for (int n = 0; n < 64; n++)
> +        s->factors[n] = quant[n] * quality[q];
> +
> +    s->nb_slices = avpkt->data[2];
> +    if (!s->nb_slices || s->nb_slices > (avctx->height + 15) / 16)
> +        return AVERROR_INVALIDDATA;
> +
> +    ret = ff_thread_get_buffer(avctx, frame, 0);

This should be moved after the loops below in order to avoid an
allocation in case of invalid input.

> +    if (ret < 0)
> +        return ret;
> +
> +    for (int n = 0; n < s->nb_slices; n++) {
> +        unsigned slice_size;
> +
> +        if (offset + 4 > avpkt->size)
> +            return AVERROR_INVALIDDATA;
> +
> +        slice_size = AV_RL32(avpkt->data + offset);
> +        if (slice_size > avpkt->size)
> +            return AVERROR_INVALIDDATA;
> +
> +        if (avpkt->size - slice_size - 4LL < offset)
> +            return AVERROR_INVALIDDATA;
> +
> +        s->slices[n].dc_size = slice_size;
> +        s->slices[n].dc_ptr = avpkt->data + offset + 4;
> +        offset += slice_size + 4;
> +    }
> +
> +    for (int n = 0; n < s->nb_slices; n++) {
> +        unsigned slice_size;
> +
> +        if (offset + 4 > avpkt->size)
> +            return AVERROR_INVALIDDATA;
> +
> +        slice_size = AV_RL32(avpkt->data + offset);
> +        if (slice_size > avpkt->size)
> +            return AVERROR_INVALIDDATA;
> +
> +        if (avpkt->size - slice_size - 4LL < offset)
> +            return AVERROR_INVALIDDATA;
> +
> +        s->slices[n].ac_size = slice_size;
> +        s->slices[n].ac_ptr = avpkt->data + offset + 4;
> +        offset += slice_size + 4;
> +    }

These loops could be combined if you modified the way you stored the
data by putting the dc information into slices[n] and the corresponding
ac information into slices[n + nb_slices]. In this case, you would need
to unpack this in decode_slices.
If you don't do this, then there is no need to put nb_slices in the
context. It can be local to this function.

> +
> +    avctx->execute2(avctx, decode_slices, frame, NULL, s->nb_slices);
> +
> +    frame->pict_type = AV_PICTURE_TYPE_I;
> +    frame->flags |= AV_FRAME_FLAG_KEY;
> +
> +    *got_frame = 1;
> +
> +    return avpkt->size;
> +}