[FFmpeg-devel] [vaapi-cavs 7/7] cavs: support vaapi hwaccel decoding

Sun Jan 21 16:18:46 EET 2024

see https://github.com/intel/libva/pull/738

Signed-off-by: jianfeng.zheng <jianfeng.zheng at mthreads.com>
---
 configure                 |  14 ++++
 libavcodec/Makefile       |   1 +
 libavcodec/cavs.h         |   4 +
 libavcodec/cavsdec.c      | 101 +++++++++++++++++++++--
 libavcodec/hwaccels.h     |   1 +
 libavcodec/vaapi_cavs.c   | 164 ++++++++++++++++++++++++++++++++++++++
 libavcodec/vaapi_decode.c |   4 +
 7 files changed, 284 insertions(+), 5 deletions(-)
 create mode 100644 libavcodec/vaapi_cavs.c

diff --git a/configure b/configure
index c8ae0a061d..89759eda5d 100755
--- a/configure
+++ b/configure
@@ -2463,6 +2463,7 @@ HAVE_LIST="
     xmllint
     zlib_gzip
     openvino2
+    va_profile_avs
 "
 
 # options emitted with CONFIG_ prefix but not available on the command line
@@ -3202,6 +3203,7 @@ wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
 wmv3_nvdec_hwaccel_select="vc1_nvdec_hwaccel"
 wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
 wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
+cavs_vaapi_hwaccel_deps="vaapi va_profile_avs VAPictureParameterBufferAVS"
 
 # hardware-accelerated codecs
 mediafoundation_deps="mftransform_h MFCreateAlignedMemoryBuffer"
@@ -7175,6 +7177,18 @@ if enabled vaapi; then
     check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
     check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
     check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
+
+    #
+    # Using 'VA_CHECK_VERSION' in source codes make things easy. But we have to wait
+    # until newly added VAProfile being distributed by VAAPI released version.
+    #
+    # Before or after that, we can use auto-detection to keep version compatibility.
+    # It always works.
+    #
+    disable va_profile_avs &&
+        test_code cc va/va.h "VAProfile p1 = VAProfileAVSJizhun, p2 = VAProfileAVSGuangdian;" &&
+        enable va_profile_avs
+    enabled va_profile_avs && check_type "va/va.h va/va_dec_avs.h" "VAPictureParameterBufferAVS"
 fi
 
 if enabled_all opencl libdrm ; then
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index bb42095165..7d92375fed 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1055,6 +1055,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
 OBJS-$(CONFIG_VP9_VDPAU_HWACCEL)          += vdpau_vp9.o
 OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec.o
+OBJS-$(CONFIG_CAVS_VAAPI_HWACCEL)         += vaapi_cavs.o
 
 # Objects duplicated from other libraries for shared builds
 SHLIBOBJS                              += log2_tab.o reverse.o
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index 33ef10e850..4a0918da5a 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -167,10 +167,14 @@ typedef struct AVSFrame {
     AVFrame *f;
     int poc;
     int outputed;
+
+    AVBufferRef   *hwaccel_priv_buf;
+    void          *hwaccel_picture_private;
 } AVSFrame;
 
 typedef struct AVSContext {
     AVCodecContext *avctx;
+    int got_pix_fmt;
     BlockDSPContext bdsp;
     H264ChromaContext h264chroma;
     VideoDSPContext vdsp;
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index 5036ef50f7..5ca021c098 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -25,11 +25,14 @@
  * @author Stefan Gehrer <stefan.gehrer at gmx.de>
  */
 
+#include "config_components.h"
 #include "libavutil/avassert.h"
 #include "libavutil/emms.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "golomb.h"
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
 #include "profiles.h"
 #include "cavs.h"
 #include "codec_internal.h"
@@ -1002,9 +1005,9 @@ static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
             }
             h->mb_weight_pred_flag = get_bits1(gb);
             if (!h->avctx->hwaccel) {
-            av_log(h->avctx, AV_LOG_ERROR,
-                   "weighted prediction not yet supported\n");
-        }
+                av_log(h->avctx, AV_LOG_ERROR,
+                    "weighted prediction not yet supported\n");
+            }
         }
     }
     if (h->aec_flag) {
@@ -1115,6 +1118,46 @@ static inline int check_for_slice(AVSContext *h)
  * frame level
  *
  ****************************************************************************/
+static int hwaccel_pic(AVSContext *h)
+{
+    int ret = 0;
+    int stc = -1;
+    const uint8_t *frm_start = align_get_bits(&h->gb);
+    const uint8_t *frm_end = h->gb.buffer_end;
+    const uint8_t *slc_start = frm_start;
+    const uint8_t *slc_end = frm_end;
+    GetBitContext gb = h->gb;
+    const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
+
+    ret = hwaccel->start_frame(h->avctx, NULL, 0);
+    if (ret < 0)
+        return ret;
+
+    for (slc_start = frm_start; slc_start + 4 < frm_end; slc_start = slc_end) {
+        slc_end = avpriv_find_start_code(slc_start + 4, frm_end, &stc);
+        if (slc_end < frm_end) {
+            slc_end -= 4;
+        }
+
+        init_get_bits(&h->gb, slc_start, (slc_end - slc_start) * 8);
+        if (!check_for_slice(h)) {
+            break;
+        }
+
+        ret = hwaccel->decode_slice(h->avctx, slc_start, slc_end - slc_start);
+        if (ret < 0) {
+            break;
+        }
+    }
+
+    h->gb = gb;
+    skip_bits(&h->gb, (slc_start - frm_start) * 8);
+
+    if (ret < 0)
+        return ret;
+
+    return hwaccel->end_frame(h->avctx);
+}
 
 /**
  * @brief remove frame out of dpb
@@ -1125,6 +1168,9 @@ static void cavs_frame_unref(AVSFrame *frame)
     if (!frame->f || !frame->f->buf[0])
         return;
 
+    av_buffer_unref(&frame->hwaccel_priv_buf);
+    frame->hwaccel_picture_private = NULL;
+
     av_frame_unref(frame->f);
 }
 
@@ -1219,6 +1265,17 @@ static int decode_pic(AVSContext *h)
     if (ret < 0)
         return ret;
 
+    if (h->avctx->hwaccel) {
+        const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
+        av_assert0(!h->cur.hwaccel_picture_private);
+        if (hwaccel->frame_priv_data_size) {
+            h->cur.hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!h->cur.hwaccel_priv_buf)
+                return AVERROR(ENOMEM);
+            h->cur.hwaccel_picture_private = h->cur.hwaccel_priv_buf->data;
+        }
+    }
+
     if (!h->edge_emu_buffer) {
         int alloc_size = FFALIGN(FFABS(h->cur.f->linesize[0]) + 32, 32);
         h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 24);
@@ -1247,7 +1304,9 @@ static int decode_pic(AVSContext *h)
             av_log(h->avctx, AV_LOG_ERROR, "poc=%d/%d/%d, dist=%d/%d\n",
                     h->DPB[1].poc, h->DPB[0].poc, h->cur.poc, h->dist[0], h->dist[1]);
             av_log(h->avctx, AV_LOG_ERROR, "sym_factor %d too large\n", h->sym_factor);
-            return AVERROR_INVALIDDATA;
+
+            if (!h->avctx->hwaccel)
+                return AVERROR_INVALIDDATA;
         }
     } else {
         h->direct_den[0] = h->dist[0] ? 16384 / h->dist[0] : 0;
@@ -1332,7 +1391,9 @@ static int decode_pic(AVSContext *h)
     }
 
     ret = 0;
-    if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
+    if (h->avctx->hwaccel) {
+        ret = hwaccel_pic(h);
+    } else if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
         do {
             check_for_slice(h);
             ret = decode_mb_i(h, 0);
@@ -1503,6 +1564,20 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
                 return ret;
             avctx->profile = h->profile;
             avctx->level = h->level;
+            if (!h->got_pix_fmt) {
+                h->got_pix_fmt = 1;
+                ret = ff_get_format(avctx, avctx->codec->pix_fmts);
+                if (ret < 0)
+                    return ret;
+
+                avctx->pix_fmt = ret;
+
+                if (h->profile == AV_PROFILE_CAVS_GUANGDIAN && !avctx->hwaccel) {
+                    av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware"
+                                    " accelerated for CAVS Guangdian Profile decoding.\n");
+                    return AVERROR(ENOTSUP);
+                }
+            }
             break;
         case PIC_I_START_CODE:
             if (!h->got_keyframe) {
@@ -1577,6 +1652,14 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     return (buf_ptr - avpkt->data);
 }
 
+static const enum AVPixelFormat cavs_hwaccel_pixfmt_list_420[] = {
+#if CONFIG_CAVS_VAAPI_HWACCEL
+    AV_PIX_FMT_VAAPI,
+#endif
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NONE
+};
+
 const FFCodec ff_cavs_decoder = {
     .p.name         = "cavs",
     CODEC_LONG_NAME("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"),
@@ -1589,4 +1672,12 @@ const FFCodec ff_cavs_decoder = {
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = cavs_flush,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .p.pix_fmts     = cavs_hwaccel_pixfmt_list_420,
+    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_CAVS_VAAPI_HWACCEL
+                        HWACCEL_VAAPI(cavs),
+#endif
+                        NULL
+                    },
+    .p.profiles     = NULL_IF_CONFIG_SMALL(ff_cavs_profiles),
 };
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 5171e4c7d7..a1a973b460 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -89,5 +89,6 @@ extern const struct FFHWAccel ff_wmv3_dxva2_hwaccel;
 extern const struct FFHWAccel ff_wmv3_nvdec_hwaccel;
 extern const struct FFHWAccel ff_wmv3_vaapi_hwaccel;
 extern const struct FFHWAccel ff_wmv3_vdpau_hwaccel;
+extern const struct FFHWAccel ff_cavs_vaapi_hwaccel;
 
 #endif /* AVCODEC_HWACCELS_H */
diff --git a/libavcodec/vaapi_cavs.c b/libavcodec/vaapi_cavs.c
new file mode 100644
index 0000000000..4a7a9b95ad
--- /dev/null
+++ b/libavcodec/vaapi_cavs.c
@@ -0,0 +1,164 @@
+/*
+ * AVS (Chinese GY/T 257.1â€”2012) HW decode acceleration through VA-API
+ * Copyright (c) 2022 JianfengZheng <jianfeng.zheng at mthreads.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+#include "vaapi_decode.h"
+#include "cavs.h"
+
+/**
+ * @file
+ * This file implements the glue code between FFmpeg's and VA-API's
+ * structures for AVS (Chinese GY/T 257.1â€”2012) decoding.
+ */
+
+static int vaapi_avs_pic_type_cvt(int pict_type) 
+{
+    switch (pict_type)
+    {
+    case AV_PICTURE_TYPE_I: return VA_AVS_I_IMG;
+    case AV_PICTURE_TYPE_P: return VA_AVS_P_IMG;
+    case AV_PICTURE_TYPE_B: return VA_AVS_B_IMG;
+    default:                return VA_AVS_I_IMG;
+    }
+}
+
+static void vaapi_avs_fill_pic(VAPictureAVS *va_pic, const AVSFrame *frame)
+{
+    va_pic->surface_id = ff_vaapi_get_surface_id(frame->f);
+    va_pic->poc = frame->poc / 2;
+}
+
+/** Initialize and start decoding a frame with VA API. */
+static int vaapi_avs_start_frame(AVCodecContext         *avctx,
+                                av_unused const uint8_t *buffer,
+                                av_unused uint32_t       size)
+{
+    int i, err;
+    AVSContext   *h   = avctx->priv_data;
+    VAPictureParameterBufferAVS pic_param = {};
+    VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
+    vapic->output_surface = ff_vaapi_get_surface_id(h->cur.f);
+
+    pic_param = (VAPictureParameterBufferAVS) {
+        .width = h->width,
+        .height = h->height,
+        .picture_type = vaapi_avs_pic_type_cvt(h->cur.f->pict_type),
+        .progressive_seq_flag = h->progressive_seq,
+        .progressive_frame_flag = h->progressive_frame,
+        .picture_structure_flag = h->pic_structure,
+        .fixed_pic_qp_flag = h->qp_fixed,
+        .picture_qp = h->qp,
+        .loop_filter_disable_flag = h->loop_filter_disable,
+        .alpha_c_offset = h->alpha_offset,
+        .beta_offset = h->beta_offset,
+        .skip_mode_flag_flag = h->skip_mode_flag,
+        .picture_reference_flag = h->ref_flag,
+    };
+
+    if (h->profile == 0x48) {
+		pic_param.guangdian_fields.guangdian_flag = 1;
+        pic_param.guangdian_fields.aec_flag = h->aec_flag;
+        pic_param.guangdian_fields.weight_quant_flag = h->weight_quant_flag;
+        pic_param.guangdian_fields.chroma_quant_param_delta_cb = h->chroma_quant_param_delta_cb;
+        pic_param.guangdian_fields.chroma_quant_param_delta_cr = h->chroma_quant_param_delta_cr;
+        memcpy(pic_param.guangdian_fields.wqm_8x8, h->wqm_8x8, 64);
+    }
+
+    vaapi_avs_fill_pic(&pic_param.curr_pic, &h->cur);
+    for (i = 0; i < 2; i++) {
+        vaapi_avs_fill_pic(&pic_param.ref_list[i], &h->DPB[i]);
+    }
+
+    err = ff_vaapi_decode_make_param_buffer(avctx, vapic,
+                                            VAPictureParameterBufferType,
+                                            &pic_param, sizeof(pic_param));
+    if (err < 0)
+        goto fail;
+
+    return 0;
+fail:
+    ff_vaapi_decode_cancel(avctx, vapic);
+    return err;
+}
+
+/** End a hardware decoding based frame. */
+static int vaapi_avs_end_frame(AVCodecContext *avctx)
+{
+    AVSContext *h = avctx->priv_data;
+    VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
+    return ff_vaapi_decode_issue(avctx, vapic);
+}
+
+/** Decode the given H.264 slice with VA API. */
+static int vaapi_avs_decode_slice(AVCodecContext *avctx,
+                                   const uint8_t  *buffer,
+                                   uint32_t        size)
+{
+    int err;
+    AVSContext *h = avctx->priv_data;
+    VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
+    VASliceParameterBufferAVS slice_param;
+    slice_param = (VASliceParameterBufferAVS) {
+        .slice_data_size        = size,
+        .slice_data_offset      = 0,
+        .slice_data_flag        = VA_SLICE_DATA_FLAG_ALL,
+        .mb_data_bit_offset     = get_bits_count(&h->gb),
+        .slice_vertical_pos     = h->stc,
+        .fixed_slice_qp_flag    = h->qp_fixed,
+        .slice_qp               = h->qp,
+        .slice_weight_pred_flag = h->slice_weight_pred_flag,
+        .mb_weight_pred_flag    = h->mb_weight_pred_flag,
+    };
+
+    *((uint32_t *)slice_param.luma_scale) = *((uint32_t *)h->luma_scale);
+    *((uint32_t *)slice_param.luma_shift) = *((uint32_t *)h->luma_shift);
+    *((uint32_t *)slice_param.chroma_scale) = *((uint32_t *)h->chroma_scale);
+    *((uint32_t *)slice_param.chroma_shift) = *((uint32_t *)h->chroma_shift);
+
+    err = ff_vaapi_decode_make_slice_buffer(avctx, vapic,
+                                            &slice_param, sizeof(slice_param),
+                                            buffer, size);
+    if (err < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, vapic);
+    return err;
+}
+
+const FFHWAccel ff_cavs_vaapi_hwaccel = {
+    .p.name                 = "cavs_vaapi",
+    .p.type                 = AVMEDIA_TYPE_VIDEO,
+    .p.id                   = AV_CODEC_ID_CAVS,
+    .p.pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = &vaapi_avs_start_frame,
+    .end_frame            = &vaapi_avs_end_frame,
+    .decode_slice         = &vaapi_avs_decode_slice,
+    .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+    .init                 = &ff_vaapi_decode_init,
+    .uninit               = &ff_vaapi_decode_uninit,
+    .frame_params         = &ff_vaapi_common_frame_params,
+    .priv_data_size       = sizeof(VAAPIDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index ceac769c52..13a3f6aa42 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -408,6 +408,10 @@ static const struct {
                            H264ConstrainedBaseline),
     MAP(H264,        H264_MAIN,       H264Main    ),
     MAP(H264,        H264_HIGH,       H264High    ),
+#if HAVE_VA_PROFILE_AVS
+    MAP(CAVS,        CAVS_JIZHUN,     AVSJizhun   ),
+    MAP(CAVS,        CAVS_GUANGDIAN,  AVSGuangdian),
+#endif
 #if VA_CHECK_VERSION(0, 37, 0)
     MAP(HEVC,        HEVC_MAIN,       HEVCMain    ),
     MAP(HEVC,        HEVC_MAIN_10,    HEVCMain10  ),
-- 
2.25.1