[FFmpeg-devel] [PATCH v3 4/4] lavc, doc: add libuavs3d video decoder wrapper

Sat Aug 8 20:00:12 EEST 2020

At 2020-08-06 05:21:43, "James Almer" <jamrial at gmail.com> wrote:
>On 8/5/2020 1:18 PM, hwrenx at 126.com wrote:
>> From: hwren <hwrenx at 126.com>
>> 
>> Signed-off-by: hbj <hanbj at pku.edu.cn>
>> Signed-off-by: hwren <hwrenx at 126.com>
>> ---
>>  Changelog              |   1 +
>>  configure              |   4 +
>>  doc/decoders.texi      |  21 +++
>>  doc/general.texi       |   8 ++
>>  libavcodec/Makefile    |   1 +
>>  libavcodec/allcodecs.c |   1 +
>>  libavcodec/libuavs3d.c | 283 +++++++++++++++++++++++++++++++++++++++++
>>  7 files changed, 319 insertions(+)
>>  create mode 100644 libavcodec/libuavs3d.c
>> 
>> diff --git a/Changelog b/Changelog
>> index a60e7d2eb8..dfd56b3fc6 100644
>> --- a/Changelog
>> +++ b/Changelog
>> @@ -4,6 +4,7 @@ releases are sorted from youngest to oldest.
>>  version <next>:
>>  - AudioToolbox output device
>>  - MacCaption demuxer
>> +- AVS3 video decoder via libuavs3d
>>  
>>  
>>  version 4.3:
>> diff --git a/configure b/configure
>> index 7495f35faa..7340bc4a35 100755
>> --- a/configure
>> +++ b/configure
>> @@ -274,6 +274,7 @@ External library support:
>>    --enable-libtls          enable LibreSSL (via libtls), needed for https support
>>                             if openssl, gnutls or mbedtls is not used [no]
>>    --enable-libtwolame      enable MP2 encoding via libtwolame [no]
>> +  --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
>>    --enable-libv4l2         enable libv4l2/v4l-utils [no]
>>    --enable-libvidstab      enable video stabilization using vid.stab [no]
>>    --enable-libvmaf         enable vmaf filter via libvmaf [no]
>> @@ -1807,6 +1808,7 @@ EXTERNAL_LIBRARY_LIST="
>>      libtesseract
>>      libtheora
>>      libtwolame
>> +    libuavs3d
>>      libv4l2
>>      libvorbis
>>      libvpx
>> @@ -3242,6 +3244,7 @@ libspeex_encoder_deps="libspeex"
>>  libspeex_encoder_select="audio_frame_queue"
>>  libtheora_encoder_deps="libtheora"
>>  libtwolame_encoder_deps="libtwolame"
>> +libuavs3d_decoder_deps="libuavs3d"
>>  libvo_amrwbenc_encoder_deps="libvo_amrwbenc"
>>  libvorbis_decoder_deps="libvorbis"
>>  libvorbis_encoder_deps="libvorbis libvorbisenc"
>> @@ -6379,6 +6382,7 @@ enabled libtls            && require_pkg_config libtls libtls tls.h tls_configur
>>  enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame &&
>>                               { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
>>                                 die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
>> +enabled libuavs3d         && require_pkg_config libuavs3d uavs3d uavs3d.h uavs3d_decode
>>  enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
>>  enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
>>  enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 1.3.9" libvmaf.h compute_vmaf
>> diff --git a/doc/decoders.texi b/doc/decoders.texi
>> index 9005714e3c..f1a0b3c36e 100644
>> --- a/doc/decoders.texi
>> +++ b/doc/decoders.texi
>> @@ -86,6 +86,27 @@ AVS2-P2/IEEE1857.4 video decoder wrapper.
>>  
>>  This decoder allows libavcodec to decode AVS2 streams with davs2 library.
>>  
>> + at c man end VIDEO DECODERS
>> + 
>> + at section libuavs3d
>> +
>> +AVS3-P2/IEEE1857.10 video decoder.
>> +
>> +libuavs3d allows libavcodec to decode AVS3 streams.
>> +Requires the presence of the libuavs3d headers and library during configuration.
>> +You need to explicitly configure the build with @code{--enable-libuavs3d}.
>> +
>> + at subsection Options
>> +
>> +The following option is supported by the libuavs3d wrapper.
>> +
>> + at table @option
>> +
>> + at item frame_threads
>> +Set amount of frame threads to use during decoding. The default value is 0 (autodetect).
>> +
>> + at end table
>> +
>>  @c man end VIDEO DECODERS
>>  
>>  @chapter Audio Decoders
>> diff --git a/doc/general.texi b/doc/general.texi
>> index 9b0ee96752..6d673b74e1 100644
>> --- a/doc/general.texi
>> +++ b/doc/general.texi
>> @@ -125,6 +125,14 @@ Go to @url{https://github.com/pkuvcl/davs2} and follow the instructions for
>>  installing the library. Then pass @code{--enable-libdavs2} to configure to
>>  enable it.
>>  
>> + at section uavs3d
>> +
>> +FFmpeg can make use of the uavs3d library for AVS3-P2/IEEE1857.10 video decoding.
>> +
>> +Go to @url{https://github.com/uavs3/uavs3d} and follow the instructions for
>> +installing the library. Then pass @code{--enable-libuavs3d} to configure to
>> +enable it.
>> +
>>  @float NOTE
>>  libdavs2 is under the GNU Public License Version 2 or later
>>  (see @url{http://www.gnu.org/licenses/old-licenses/gpl-2.0.html} for
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index f1512779be..491485f3c0 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1026,6 +1026,7 @@ OBJS-$(CONFIG_LIBSPEEX_DECODER)           += libspeexdec.o
>>  OBJS-$(CONFIG_LIBSPEEX_ENCODER)           += libspeexenc.o
>>  OBJS-$(CONFIG_LIBTHEORA_ENCODER)          += libtheoraenc.o
>>  OBJS-$(CONFIG_LIBTWOLAME_ENCODER)         += libtwolame.o
>> +OBJS-$(CONFIG_LIBUAVS3D_DECODER)          += libuavs3d.o
>>  OBJS-$(CONFIG_LIBVO_AMRWBENC_ENCODER)     += libvo-amrwbenc.o
>>  OBJS-$(CONFIG_LIBVORBIS_DECODER)          += libvorbisdec.o
>>  OBJS-$(CONFIG_LIBVORBIS_ENCODER)          += libvorbisenc.o \
>> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
>> index 80f128cade..3d2d0af87a 100644
>> --- a/libavcodec/allcodecs.c
>> +++ b/libavcodec/allcodecs.c
>> @@ -730,6 +730,7 @@ extern AVCodec ff_libspeex_encoder;
>>  extern AVCodec ff_libspeex_decoder;
>>  extern AVCodec ff_libtheora_encoder;
>>  extern AVCodec ff_libtwolame_encoder;
>> +extern AVCodec ff_libuavs3d_decoder;
>>  extern AVCodec ff_libvo_amrwbenc_encoder;
>>  extern AVCodec ff_libvorbis_encoder;
>>  extern AVCodec ff_libvorbis_decoder;
>> diff --git a/libavcodec/libuavs3d.c b/libavcodec/libuavs3d.c
>> new file mode 100644
>> index 0000000000..c0d89cd1ce
>> --- /dev/null
>> +++ b/libavcodec/libuavs3d.c
>> @@ -0,0 +1,283 @@
>> +/*
>> + * RAW AVS3-P2/IEEE1857.10 video demuxer
>> + * Copyright (c) 2020 Zhenyu Wang <wangzhenyu at pkusz.edu.cn>
>> + *                    Bingjie Han <hanbj at pkusz.edu.cn>
>> + *                    Huiwen Ren  <hwrenx at gmail.com>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "libavutil/avassert.h"
>> +#include "libavutil/avutil.h"
>> +#include "libavutil/common.h"
>> +#include "libavutil/imgutils.h"
>> +#include "libavutil/opt.h"
>> +#include "avcodec.h"
>> +#include "internal.h"
>> +#include "uavs3d.h"
>> +
>> +#define UAVS3D_MAX_FRAME_THREADS 48 
>> +
>> +static const int color_primaries_tab[10] = {
>> +    AVCOL_PRI_RESERVED0   ,    // 0
>> +    AVCOL_PRI_BT709       ,    // 1
>> +    AVCOL_PRI_UNSPECIFIED ,    // 2
>> +    AVCOL_PRI_RESERVED    ,    // 3
>> +    AVCOL_PRI_BT470M      ,    // 4
>> +    AVCOL_PRI_BT470BG      ,    // 5
>> +    AVCOL_PRI_SMPTE170M   ,    // 6
>> +    AVCOL_PRI_SMPTE240M   ,    // 7
>> +    AVCOL_PRI_FILM        ,    // 8
>> +    AVCOL_PRI_BT2020           // 9  
>> +};
>> +    
>> +static const int color_transfer_tab[15] = {
>> +    AVCOL_TRC_RESERVED0    , // 0
>> +    AVCOL_TRC_BT709        , // 1
>> +    AVCOL_TRC_UNSPECIFIED  , // 2
>> +    AVCOL_TRC_RESERVED     , // 3
>> +    AVCOL_TRC_GAMMA22      , // 4
>> +    AVCOL_TRC_GAMMA28      , // 5
>> +    AVCOL_TRC_SMPTE170M    , // 6
>> +    AVCOL_TRC_SMPTE240M    , // 7
>> +    AVCOL_TRC_LINEAR       , // 8
>> +    AVCOL_TRC_LOG          , // 9
>> +    AVCOL_TRC_LOG_SQRT     , // 10
>> +    AVCOL_TRC_BT2020_12    , // 11
>> +    AVCOL_TRC_SMPTE2084    , // 12
>> +    AVCOL_TRC_UNSPECIFIED  , // 13
>> +    AVCOL_TRC_ARIB_STD_B67   // 14
>> +};
>> +
>> +static const int color_matrix_tab[12] = {
>> +    AVCOL_SPC_RESERVED     , // 0
>> +    AVCOL_SPC_BT709        , // 1
>> +    AVCOL_SPC_UNSPECIFIED  , // 2
>> +    AVCOL_SPC_RESERVED     , // 3
>> +    AVCOL_SPC_FCC          , // 4
>> +    AVCOL_SPC_BT470BG      , // 5
>> +    AVCOL_SPC_SMPTE170M    , // 6
>> +    AVCOL_SPC_SMPTE240M    , // 7
>> +    AVCOL_SPC_BT2020_NCL   , // 8
>> +    AVCOL_SPC_BT2020_CL    , // 9
>> +    AVCOL_SPC_UNSPECIFIED  , // 10
>> +    AVCOL_SPC_UNSPECIFIED    // 11
>> +};
>> +
>> +static const enum AVPictureType IMGTYPE[8] = {
>> +    AV_PICTURE_TYPE_NONE,
>> +    AV_PICTURE_TYPE_I,
>> +    AV_PICTURE_TYPE_P,
>> +    AV_PICTURE_TYPE_B
>> +};
>> +
>> +typedef struct UAVS3DContext {
>> +    AVCodecContext  *avctx;
>> +    void            *dec_handle;
>> +    int              frame_threads;
>> +    int              got_seqhdr;
>> +    uavs3d_io_frm_t  dec_frame;
>> +} UAVS3DContext;
>> +
>> +
>> +static int uavs3d_find_next_start_code(const unsigned char *bs_data, int bs_len, int *left)
>> +{
>> +    const unsigned char *data_ptr = bs_data + 4;
>> +    int count = bs_len - 4;
>> +
>> +    while (count >= 4 &&
>> +        ((*(unsigned int *)data_ptr) != 0xB6010000) && /* P/B picture */
>> +        ((*(unsigned int *)data_ptr) != 0xB3010000) && /* I   picture */
>> +        ((*(unsigned int *)data_ptr) != 0xB0010000) && /* sequence header */
>> +        ((*(unsigned int *)data_ptr) != 0x00010000) && /* first slice */
>> +        ((*(unsigned int *)data_ptr) != 0xB1010000)) { /* sequence end */
>> +        data_ptr++;
>> +        count--;
>> +    }
>> +
>> +    if (count >= 4) {
>> +        *left = count; 
>> +        return 1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void uavs3d_output_callback(uavs3d_io_frm_t *dec_frame) {
>> +    uavs3d_io_frm_t frm_out;
>> +    AVFrame *frm = (AVFrame *)dec_frame->priv;
>> +    int i;
>> +
>> +    if (!frm) {
>> +        return;
>> +    }
>> +    
>> +    frm->pts       = dec_frame->pts;
>> +    frm->pkt_dts   = dec_frame->dts;
>> +    frm->pict_type = IMGTYPE[dec_frame->type];
>> +    frm->key_frame = (frm->pict_type == AV_PICTURE_TYPE_I);
>> +
>> +    for (i = 0; i < 3; i++) {
>> +        frm_out.width [i] = dec_frame->width[i];
>> +        frm_out.height[i] = dec_frame->height[i];
>> +        frm_out.stride[i] = frm->linesize[i];
>> +        frm_out.buffer[i] = frm->data[i]; 
>> +    }
>> +
>> +    uavs3d_img_cpy_cvt(&frm_out, dec_frame, dec_frame->bit_depth);
>> +}
>> +
>> +static av_cold int libuavs3d_init(AVCodecContext *avctx)
>> +{
>> +    UAVS3DContext *h = avctx->priv_data;
>> +    uavs3d_cfg_t cdsc;
>> +
>> +    cdsc.frm_threads = FFMIN(h->frame_threads > 0 ? h->frame_threads : av_cpu_count(), UAVS3D_MAX_FRAME_THREADS);
>> +    cdsc.check_md5 = 0;
>> +    h->dec_handle = uavs3d_create(&cdsc, uavs3d_output_callback, NULL);
>> +    h->got_seqhdr = 0;
>> + 
>> +    return 0;
>> +}
>> +
>> +static av_cold int libuavs3d_end(AVCodecContext *avctx)
>> +{
>> +    UAVS3DContext *h = avctx->priv_data;
>> +    
>> +    if (h->dec_handle) {
>> +        uavs3d_flush(h->dec_handle, NULL);
>> +        uavs3d_delete(h->dec_handle);
>> +        h->dec_handle = NULL;
>> +    }
>> +    h->got_seqhdr = 0;
>> +    
>> +    return 0;
>> +}
>> +
>> +static void libuavs3d_flush(AVCodecContext * avctx)
>> +{
>> +    UAVS3DContext *h = avctx->priv_data;
>> +    uavs3d_cfg_t cdsc;
>> +    cdsc.frm_threads = FFMIN(h->frame_threads > 0 ? h->frame_threads : av_cpu_count(), UAVS3D_MAX_FRAME_THREADS);
>> +    cdsc.check_md5 = 0;
>> +
>> +    if (h->dec_handle) {
>> +        uavs3d_flush(h->dec_handle, NULL);
>> +        uavs3d_delete(h->dec_handle);
>> +    }
>> +     
>> +    h->dec_handle = uavs3d_create(&cdsc, uavs3d_output_callback, NULL);
>
>Is it really necessary to destroy and create the decoder context on
>every seek? It feels slow and inefficient.

No, it is redundant. Will be fixed it in the next version. Thanks for pointing out.

>
>> +    h->got_seqhdr = 0;
>> +}
>> +
>> +static int libuavs3d_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
>> +{
>
>Looking at this function, it seems that this decoder would be better
>implemented using the decoupled input/output AVCodec.receive_frame API
>instead of the AVCodec.decode one like you're doing here, as it lets you
>request for packets as you need them.
>
>See binkaudio and libdav1d decoders for example implementations: You
>fetch one packet, keep it around and return frames until it's fully
>consumed, then attempt to fetch another and repeat the process.
>

I am not sure if I correctly understand the difference between AVCodec.receive_frame
and AVCodec.decode. It seems that both AVCodec.receive_frame and AVCodec.decode
are used in function decode_receive_frame_internal in lavc/decode.c.

Once we use the AVCodec.decode, we have to wait until the frame buffer is filled,
(which means we got a frame). If we use the AVCodec.receive_frame, we fetch one packet,
send it to decoder directly, then request output from the decoder. Considering that the
decode_receive_frame function should get one decoded frame data in each receive call,
and not all packets in AVS3 contain frame data, these two functions would be equivalent
in some respects.

I mean, both of them fetch and send packet, and wait for output. Cause AVS3 packets
may contain 0 or 1 frame data, the libuavs3d_receive_frame function still needs to wait,
and may be exactly the same format as decode_simple_receive_frame, which is the upper
encapsulation of AVCodec.decode in lavc/decode.c.

So...I haven't got how to optimize this part or receive_frame should be used in which
condition. Please tell me more and correct me if there is any problem. Thanks.

>> +    UAVS3DContext *h = avctx->priv_data;
>> +    const uint8_t *buf = avpkt->data;
>> +    int buf_size = avpkt->size;
>> +    const uint8_t *buf_end;
>> +    const uint8_t *buf_ptr;
>> +    AVFrame *frm = (AVFrame*)data;
>> +    int left_bytes;
>> +    int ret, finish = 0;
>> +
>> +    *got_frame = 0;
>> +    frm->pts = -1;
>> +    frm->pict_type = AV_PICTURE_TYPE_NONE;
>> +
>> +    if (h->got_seqhdr) {
>> +        if (!frm->data[0] && (ret = ff_get_buffer(avctx, frm, 0)) < 0) {
>> +            return ret;
>> +        } 
>> +        h->dec_frame.priv = data;   // AVFrame
>> +    }
>> +
>> +    if (!buf_size) {
>> +        do {
>> +            ret = uavs3d_flush(h->dec_handle, &h->dec_frame);
>> +        } while (ret > 0 && !h->dec_frame.got_pic);
>> +    } else {
>> +        buf_ptr = buf;
>> +        buf_end = buf + buf_size;
>> +  
>> +        while (!finish) {
>> +            int bs_len;
>> +            uavs3d_io_frm_t *frm_dec = &h->dec_frame;
>> +
>> +            if (uavs3d_find_next_start_code(buf_ptr, buf_end - buf_ptr, &left_bytes)) {
>> +                bs_len = buf_end - buf_ptr - left_bytes;
>> +            } else { 
>> +                bs_len = buf_end - buf_ptr;
>> +                finish = 1;
>> +            }
>> +            frm_dec->bs = (unsigned char *)buf_ptr;
>> +            frm_dec->bs_len = bs_len;
>> +            frm_dec->pts = avpkt->pts;
>> +            frm_dec->dts = avpkt->dts;
>> +            uavs3d_decode(h->dec_handle, frm_dec);
>> +            buf_ptr += bs_len;
>> +
>> +            if (frm_dec->nal_type == NAL_SEQ_HEADER) {
>> +                static const int avs3_fps_num[9] = {0, 240000, 24, 25, 30000, 30, 50, 60000, 60 };
>> +                static const int avs3_fps_den[9] = {1,   1001,  1,  1,  1001,  1,  1,  1001,  1 };
>> +                avctx->framerate.num = avs3_fps_num[frm_dec->seqhdr->frame_rate_code];
>> +                avctx->framerate.den = avs3_fps_den[frm_dec->seqhdr->frame_rate_code];
>> +                avctx->has_b_frames = 1;
>> +                avctx->pix_fmt = frm_dec->seqhdr->bit_depth_internal == 8 ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV420P10LE;
>> +                ff_set_dimensions(avctx, frm_dec->seqhdr->horizontal_size, frm_dec->seqhdr->vertical_size);
>> +                h->got_seqhdr = 1;
>> +            }
>> +            if (frm_dec->got_pic) {
>> +                break;
>> +            }
>> +        }
>> +    }
>> +
>> +    *got_frame = h->dec_frame.got_pic;
>> +
>> +    return buf_ptr - buf;
>> +}
>> +
>> +#define UAVS3D_OFFSET(x) offsetof(UAVS3DContext, x)
>> +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
>> +static const AVOption options[] = {
>> +    { "frame_threads",      "number of frame-level threads ", UAVS3D_OFFSET(frame_threads),  AV_OPT_TYPE_INT,    {.i64 =  0 }, 0, UAVS3D_MAX_FRAME_THREADS, VE },
>
>No need for a custom option and UAVS3DContext field, you have
>avctx->threads for this.
>Just add the AV_CODEC_CAP_AUTO_THREADS capability to
>ff_libuavs3d_decoder and it will default to 0.

Will be fixed. Thanks.

>
>> +    { NULL }
>> +};
>> +static const AVClass libuavs3d_class = {
>> +    .class_name = "libuavs3d_class",
>> +    .item_name  = av_default_item_name,
>> +    .option     = options,
>> +    .version    = LIBAVUTIL_VERSION_INT,
>> +};
>> +
>> +AVCodec ff_libuavs3d_decoder = {
>> +    .name           = "libuavs3d",
>> +    .long_name      = NULL_IF_CONFIG_SMALL("uavs3d AVS3-P2/IEEE1857.10 decoder"),
>> +    .type           = AVMEDIA_TYPE_VIDEO,
>> +    .id             = AV_CODEC_ID_AVS3,
>> +    .priv_data_size = sizeof(UAVS3DContext),
>> +    .priv_class     = &libuavs3d_class,
>> +    .init           = libuavs3d_init,
>> +    .close          = libuavs3d_end,
>> +    .decode         = libuavs3d_decode_frame,
>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>> +    .flush          = libuavs3d_flush,
>> +    .pix_fmts        = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_NONE },
>> +    
>> +    .wrapper_name   = "libuavs3d",
>> +};
>> 
>
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel at ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email
>ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".