[FFmpeg-devel] [vaapi-cavs 7/7] cavs: support vaapi hwaccel decoding

Mark Thompson sw at jkqxz.net
Mon Jan 22 23:24:10 EET 2024


On 21/01/2024 14:18, jianfeng.zheng wrote:
> see https://github.com/intel/libva/pull/738
> 
> Signed-off-by: jianfeng.zheng <jianfeng.zheng at mthreads.com>
> ---
>   configure                 |  14 ++++
>   libavcodec/Makefile       |   1 +
>   libavcodec/cavs.h         |   4 +
>   libavcodec/cavsdec.c      | 101 +++++++++++++++++++++--
>   libavcodec/hwaccels.h     |   1 +
>   libavcodec/vaapi_cavs.c   | 164 ++++++++++++++++++++++++++++++++++++++
>   libavcodec/vaapi_decode.c |   4 +
>   7 files changed, 284 insertions(+), 5 deletions(-)
>   create mode 100644 libavcodec/vaapi_cavs.c

I suggest splitting this patch into two: add hwaccel hooks to AVS decoder, then add VAAPI implementation for it.

> diff --git a/configure b/configure
> index c8ae0a061d..89759eda5d 100755
> --- a/configure
> +++ b/configure
> ...
> @@ -7175,6 +7177,18 @@ if enabled vaapi; then
>       check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
>       check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
>       check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
> +
> +    #
> +    # Using 'VA_CHECK_VERSION' in source codes make things easy. But we have to wait
> +    # until newly added VAProfile being distributed by VAAPI released version.
> +    #
> +    # Before or after that, we can use auto-detection to keep version compatibility.
> +    # It always works.
> +    #
> +    disable va_profile_avs &&
> +        test_code cc va/va.h "VAProfile p1 = VAProfileAVSJizhun, p2 = VAProfileAVSGuangdian;" &&
> +        enable va_profile_avs
> +    enabled va_profile_avs && check_type "va/va.h va/va_dec_avs.h" "VAPictureParameterBufferAVS"
>   fi

Why can't this be done with check_type for the decode structure like the other VAAPI codecs?

(I don't think this should be applied to ffmpeg mainline until finalised in libva to avoid any incompatibility, but that doesn't require a released version of libva.)

> ...
> diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
> index 5036ef50f7..5ca021c098 100644
> --- a/libavcodec/cavsdec.c
> +++ b/libavcodec/cavsdec.c
> @@ -25,11 +25,14 @@
>    * @author Stefan Gehrer <stefan.gehrer at gmx.de>
>    */
>   
> +#include "config_components.h"
>   #include "libavutil/avassert.h"
>   #include "libavutil/emms.h"
>   #include "avcodec.h"
>   #include "get_bits.h"
>   #include "golomb.h"
> +#include "hwaccel_internal.h"
> +#include "hwconfig.h"
>   #include "profiles.h"
>   #include "cavs.h"
>   #include "codec_internal.h"
> @@ -1002,9 +1005,9 @@ static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
>               }
>               h->mb_weight_pred_flag = get_bits1(gb);
>               if (!h->avctx->hwaccel) {
> -            av_log(h->avctx, AV_LOG_ERROR,
> -                   "weighted prediction not yet supported\n");
> -        }
> +                av_log(h->avctx, AV_LOG_ERROR,
> +                    "weighted prediction not yet supported\n");
> +            }

Unrelated whitespace change?

>           }
>       }
>       if (h->aec_flag) {
> @@ -1115,6 +1118,46 @@ static inline int check_for_slice(AVSContext *h)
>    * frame level
>    *
>    ****************************************************************************/
> +static int hwaccel_pic(AVSContext *h)
> +{
> +    int ret = 0;
> +    int stc = -1;
> +    const uint8_t *frm_start = align_get_bits(&h->gb);
> +    const uint8_t *frm_end = h->gb.buffer_end;
> +    const uint8_t *slc_start = frm_start;
> +    const uint8_t *slc_end = frm_end;
> +    GetBitContext gb = h->gb;
> +    const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
> +
> +    ret = hwaccel->start_frame(h->avctx, NULL, 0);
> +    if (ret < 0)
> +        return ret;
> +
> +    for (slc_start = frm_start; slc_start + 4 < frm_end; slc_start = slc_end) {
> +        slc_end = avpriv_find_start_code(slc_start + 4, frm_end, &stc);
> +        if (slc_end < frm_end) {
> +            slc_end -= 4;
> +        }
> +
> +        init_get_bits(&h->gb, slc_start, (slc_end - slc_start) * 8);

init_get_bits8 avoids the unlikely-but-possible overflow case here.

> +        if (!check_for_slice(h)) {
> +            break;
> +        }
> +
> +        ret = hwaccel->decode_slice(h->avctx, slc_start, slc_end - slc_start);
> +        if (ret < 0) {
> +            break;
> +        }
> +    }
> +
> +    h->gb = gb;
> +    skip_bits(&h->gb, (slc_start - frm_start) * 8);

This is skipping to the start of the error in error cases?  Is that intended?

> +
> +    if (ret < 0)
> +        return ret;
> +
> +    return hwaccel->end_frame(h->avctx);

Are there any bad consequences if you call this with zero slices in the frame, since the loop above appears to allow that?  (If yes, maybe only call start_frame once you know you have some slices.)

> +}
>   
>   /**
>    * @brief remove frame out of dpb
> @@ -1125,6 +1168,9 @@ static void cavs_frame_unref(AVSFrame *frame)
>       if (!frame->f || !frame->f->buf[0])
>           return;
>   
> +    av_buffer_unref(&frame->hwaccel_priv_buf);
> +    frame->hwaccel_picture_private = NULL;
> +
>       av_frame_unref(frame->f);
>   }
>   
> @@ -1219,6 +1265,17 @@ static int decode_pic(AVSContext *h)
>       if (ret < 0)
>           return ret;
>   
> +    if (h->avctx->hwaccel) {
> +        const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
> +        av_assert0(!h->cur.hwaccel_picture_private);
> +        if (hwaccel->frame_priv_data_size) {
> +            h->cur.hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
> +            if (!h->cur.hwaccel_priv_buf)
> +                return AVERROR(ENOMEM);
> +            h->cur.hwaccel_picture_private = h->cur.hwaccel_priv_buf->data;
> +        }
> +    }
> +
>       if (!h->edge_emu_buffer) {
>           int alloc_size = FFALIGN(FFABS(h->cur.f->linesize[0]) + 32, 32);
>           h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 24);
> @@ -1247,7 +1304,9 @@ static int decode_pic(AVSContext *h)
>               av_log(h->avctx, AV_LOG_ERROR, "poc=%d/%d/%d, dist=%d/%d\n",
>                       h->DPB[1].poc, h->DPB[0].poc, h->cur.poc, h->dist[0], h->dist[1]);
>               av_log(h->avctx, AV_LOG_ERROR, "sym_factor %d too large\n", h->sym_factor);
> -            return AVERROR_INVALIDDATA;
> +
> +            if (!h->avctx->hwaccel)
> +                return AVERROR_INVALIDDATA;

Why is this not an error in hw cases?

>           }
>       } else {
>           h->direct_den[0] = h->dist[0] ? 16384 / h->dist[0] : 0;
> @@ -1332,7 +1391,9 @@ static int decode_pic(AVSContext *h)
>       }
>   
>       ret = 0;
> -    if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
> +    if (h->avctx->hwaccel) {
> +        ret = hwaccel_pic(h);
> +    } else if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
>           do {
>               check_for_slice(h);
>               ret = decode_mb_i(h, 0);
> @@ -1503,6 +1564,20 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>                   return ret;
>               avctx->profile = h->profile;
>               avctx->level = h->level;
> +            if (!h->got_pix_fmt) {
> +                h->got_pix_fmt = 1;
> +                ret = ff_get_format(avctx, avctx->codec->pix_fmts);
> +                if (ret < 0)
> +                    return ret;
> +
> +                avctx->pix_fmt = ret;
> +
> +                if (h->profile == AV_PROFILE_CAVS_GUANGDIAN && !avctx->hwaccel) {
> +                    av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware"
> +                                    " accelerated for CAVS Guangdian Profile decoding.\n");

This message will also appear in a confusing way in software-only cases with no hardware at all.  Can it be clearer that it's really a software feature that's missing?

> +                    return AVERROR(ENOTSUP);
> +                }
> +            }
>               break;
>           case PIC_I_START_CODE:
>               if (!h->got_keyframe) {
> @@ -1577,6 +1652,14 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>       return (buf_ptr - avpkt->data);
>   }
>   
> +static const enum AVPixelFormat cavs_hwaccel_pixfmt_list_420[] = {
> +#if CONFIG_CAVS_VAAPI_HWACCEL
> +    AV_PIX_FMT_VAAPI,
> +#endif
> +    AV_PIX_FMT_YUV420P,
> +    AV_PIX_FMT_NONE
> +};
> +
>   const FFCodec ff_cavs_decoder = {
>       .p.name         = "cavs",
>       CODEC_LONG_NAME("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"),

This description probably isn't right any more!

> @@ -1589,4 +1672,12 @@ const FFCodec ff_cavs_decoder = {
>       .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>       .flush          = cavs_flush,
>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
> +    .p.pix_fmts     = cavs_hwaccel_pixfmt_list_420,
> +    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
> +#if CONFIG_CAVS_VAAPI_HWACCEL
> +                        HWACCEL_VAAPI(cavs),
> +#endif
> +                        NULL
> +                    },
> +    .p.profiles     = NULL_IF_CONFIG_SMALL(ff_cavs_profiles),
>   };
> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> index 5171e4c7d7..a1a973b460 100644
> --- a/libavcodec/hwaccels.h
> +++ b/libavcodec/hwaccels.h
> @@ -89,5 +89,6 @@ extern const struct FFHWAccel ff_wmv3_dxva2_hwaccel;
>   extern const struct FFHWAccel ff_wmv3_nvdec_hwaccel;
>   extern const struct FFHWAccel ff_wmv3_vaapi_hwaccel;
>   extern const struct FFHWAccel ff_wmv3_vdpau_hwaccel;
> +extern const struct FFHWAccel ff_cavs_vaapi_hwaccel;
>   
>   #endif /* AVCODEC_HWACCELS_H */
> diff --git a/libavcodec/vaapi_cavs.c b/libavcodec/vaapi_cavs.c
> new file mode 100644
> index 0000000000..4a7a9b95ad
> --- /dev/null
> +++ b/libavcodec/vaapi_cavs.c
> @@ -0,0 +1,164 @@
> +/*
> + * AVS (Chinese GY/T 257.1—2012) HW decode acceleration through VA-API
> + * Copyright (c) 2022 JianfengZheng <jianfeng.zheng at mthreads.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "hwconfig.h"
> +#include "hwaccel_internal.h"
> +#include "vaapi_decode.h"
> +#include "cavs.h"
> +
> +/**
> + * @file
> + * This file implements the glue code between FFmpeg's and VA-API's
> + * structures for AVS (Chinese GY/T 257.1—2012) decoding.
> + */
> +
> +static int vaapi_avs_pic_type_cvt(int pict_type)
> +{
> +    switch (pict_type)
> +    {
> +    case AV_PICTURE_TYPE_I: return VA_AVS_I_IMG;
> +    case AV_PICTURE_TYPE_P: return VA_AVS_P_IMG;
> +    case AV_PICTURE_TYPE_B: return VA_AVS_B_IMG;
> +    default:                return VA_AVS_I_IMG;
> +    }
> +}
> +
> +static void vaapi_avs_fill_pic(VAPictureAVS *va_pic, const AVSFrame *frame)
> +{
> +    va_pic->surface_id = ff_vaapi_get_surface_id(frame->f);
> +    va_pic->poc = frame->poc / 2;

This / 2 makes me suspicious.  Does the interface actually support interlacing, and does this code?

> +}
> +
> +/** Initialize and start decoding a frame with VA API. */
> +static int vaapi_avs_start_frame(AVCodecContext         *avctx,
> +                                av_unused const uint8_t *buffer,
> +                                av_unused uint32_t       size)
> +{
> +    int i, err;
> +    AVSContext   *h   = avctx->priv_data;
> +    VAPictureParameterBufferAVS pic_param = {};
> +    VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
> +    vapic->output_surface = ff_vaapi_get_surface_id(h->cur.f);
> +
> +    pic_param = (VAPictureParameterBufferAVS) {
> +        .width = h->width,
> +        .height = h->height,
> +        .picture_type = vaapi_avs_pic_type_cvt(h->cur.f->pict_type),

Going via the pic type with a separate enum seems unnecessarily confusing here?  I think this is just the value read from the bitstream, so could it be stored in the context?

> +        .progressive_seq_flag = h->progressive_seq,
> +        .progressive_frame_flag = h->progressive_frame,
> +        .picture_structure_flag = h->pic_structure,
> +        .fixed_pic_qp_flag = h->qp_fixed,
> +        .picture_qp = h->qp,
> +        .loop_filter_disable_flag = h->loop_filter_disable,
> +        .alpha_c_offset = h->alpha_offset,
> +        .beta_offset = h->beta_offset,
> +        .skip_mode_flag_flag = h->skip_mode_flag,
> +        .picture_reference_flag = h->ref_flag,
> +    };
> +
> +    if (h->profile == 0x48) {

You already added a nice name for this magic number in the first patch!

> +		pic_param.guangdian_fields.guangdian_flag = 1;
> +        pic_param.guangdian_fields.aec_flag = h->aec_flag;
> +        pic_param.guangdian_fields.weight_quant_flag = h->weight_quant_flag;
> +        pic_param.guangdian_fields.chroma_quant_param_delta_cb = h->chroma_quant_param_delta_cb;
> +        pic_param.guangdian_fields.chroma_quant_param_delta_cr = h->chroma_quant_param_delta_cr;
> +        memcpy(pic_param.guangdian_fields.wqm_8x8, h->wqm_8x8, 64);
> +    }
> +
> +    vaapi_avs_fill_pic(&pic_param.curr_pic, &h->cur);
> +    for (i = 0; i < 2; i++) {
> +        vaapi_avs_fill_pic(&pic_param.ref_list[i], &h->DPB[i]);
> +    }
> +
> +    err = ff_vaapi_decode_make_param_buffer(avctx, vapic,
> +                                            VAPictureParameterBufferType,
> +                                            &pic_param, sizeof(pic_param));
> +    if (err < 0)
> +        goto fail;
> +
> +    return 0;
> +fail:
> +    ff_vaapi_decode_cancel(avctx, vapic);
> +    return err;
> +}
> +
> +/** End a hardware decoding based frame. */
> +static int vaapi_avs_end_frame(AVCodecContext *avctx)
> +{
> +    AVSContext *h = avctx->priv_data;
> +    VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
> +    return ff_vaapi_decode_issue(avctx, vapic);
> +}
> +
> +/** Decode the given H.264 slice with VA API. */

The comment is betraying where the code was copied from...

> +static int vaapi_avs_decode_slice(AVCodecContext *avctx,
> +                                   const uint8_t  *buffer,
> +                                   uint32_t        size)
> +{
> +    int err;
> +    AVSContext *h = avctx->priv_data;
> +    VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
> +    VASliceParameterBufferAVS slice_param;
> +    slice_param = (VASliceParameterBufferAVS) {
> +        .slice_data_size        = size,
> +        .slice_data_offset      = 0,
> +        .slice_data_flag        = VA_SLICE_DATA_FLAG_ALL,
> +        .mb_data_bit_offset     = get_bits_count(&h->gb),
> +        .slice_vertical_pos     = h->stc,
> +        .fixed_slice_qp_flag    = h->qp_fixed,
> +        .slice_qp               = h->qp,
> +        .slice_weight_pred_flag = h->slice_weight_pred_flag,
> +        .mb_weight_pred_flag    = h->mb_weight_pred_flag,
> +    };
> +
> +    *((uint32_t *)slice_param.luma_scale) = *((uint32_t *)h->luma_scale);
> +    *((uint32_t *)slice_param.luma_shift) = *((uint32_t *)h->luma_shift);
> +    *((uint32_t *)slice_param.chroma_scale) = *((uint32_t *)h->chroma_scale);
> +    *((uint32_t *)slice_param.chroma_shift) = *((uint32_t *)h->chroma_shift);

Please don't type-pun like this - just loop to copy each of the four bytes normally.

> +
> +    err = ff_vaapi_decode_make_slice_buffer(avctx, vapic,
> +                                            &slice_param, sizeof(slice_param),
> +                                            buffer, size);
> +    if (err < 0)
> +        goto fail;
> +
> +    return 0;
> +
> +fail:
> +    ff_vaapi_decode_cancel(avctx, vapic);
> +    return err;
> +}
> +
> +const FFHWAccel ff_cavs_vaapi_hwaccel = {
> +    .p.name                 = "cavs_vaapi",
> +    .p.type                 = AVMEDIA_TYPE_VIDEO,
> +    .p.id                   = AV_CODEC_ID_CAVS,
> +    .p.pix_fmt              = AV_PIX_FMT_VAAPI,
> +    .start_frame          = &vaapi_avs_start_frame,
> +    .end_frame            = &vaapi_avs_end_frame,
> +    .decode_slice         = &vaapi_avs_decode_slice,
> +    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
> +    .init                 = &ff_vaapi_decode_init,
> +    .uninit               = &ff_vaapi_decode_uninit,
> +    .frame_params         = &ff_vaapi_common_frame_params,
> +    .priv_data_size       = sizeof(VAAPIDecodeContext),
> +    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
> +};
> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> index ceac769c52..13a3f6aa42 100644
> --- a/libavcodec/vaapi_decode.c
> +++ b/libavcodec/vaapi_decode.c
> @@ -408,6 +408,10 @@ static const struct {
>                              H264ConstrainedBaseline),
>       MAP(H264,        H264_MAIN,       H264Main    ),
>       MAP(H264,        H264_HIGH,       H264High    ),
> +#if HAVE_VA_PROFILE_AVS
> +    MAP(CAVS,        CAVS_JIZHUN,     AVSJizhun   ),
> +    MAP(CAVS,        CAVS_GUANGDIAN,  AVSGuangdian),
> +#endif
>   #if VA_CHECK_VERSION(0, 37, 0)
>       MAP(HEVC,        HEVC_MAIN,       HEVCMain    ),
>       MAP(HEVC,        HEVC_MAIN_10,    HEVCMain10  ),
> 

Some questions:
* What hardware do you need to run this?
* I'm guessing you have a matching driver for the other side of the VAAPI interface, can you give a link to that?  (Is it going to be in Mesa, given that your company's products are primarily GPUs?)
* How much effort would it be to add support for the Guangdian profile to the software decoder?

Thanks,

- Mark


More information about the ffmpeg-devel mailing list