[FFmpeg-devel] [PATCH v5] Added Turing codec interface for ffmpeg

Wed Feb 8 15:05:40 EET 2017

On 08/02/17 08:41, Saverio Blasi wrote:
> - This patch contains the changes to interface the Turing codec (http://turingcodec.org/) with ffmpeg. The patch was modified to address the comments in the review as follows:
>   - Added a pkg-config file to list all dependencies required by libturing. This should address the issue pointed out by Hendrik Leppkes on Fri 18/11/2016
>   - As per suggestions of wm4, two functions (add_option and finalise_options) have been created. The former appends new options while the latter sets up the argv array of pointers to char* accordingly. add_option re-allocates the buffer for options using av_realloc
>   - Additionally, both these functions handle the errors in case the memory wasn't allocated correctly
>   - malloc|free|realloc have been substituted with their corresponding av_{malloc|free|realloc} version
>   - Check on bit-depth has been removed since the ffmpeg already casts the right pix_fmt and bit depth
>   - pix_fmts is now set in ff_libturing_encoder as in h264dec.c.
>   - Added av_freep to release the allocated memory
>   - Added brackets to single-line operators
> ---
>  LICENSE.md             |   1 +
>  configure              |   5 +
>  libavcodec/Makefile    |   1 +
>  libavcodec/allcodecs.c |   1 +
>  libavcodec/libturing.c | 320 +++++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 328 insertions(+)
>  create mode 100644 libavcodec/libturing.c
> 
> diff --git a/LICENSE.md b/LICENSE.md
> index 640633c..86e5371 100644
> --- a/LICENSE.md
> +++ b/LICENSE.md
> @@ -85,6 +85,7 @@ The following libraries are under GPL:
>  - frei0r
>  - libcdio
>  - librubberband
> +- libturing
>  - libvidstab
>  - libx264
>  - libx265
> diff --git a/configure b/configure
> index 7154142..a27cb8b 100755
> --- a/configure
> +++ b/configure
> @@ -255,6 +255,7 @@ External library support:
>    --enable-libssh          enable SFTP protocol via libssh [no]
>    --enable-libtesseract    enable Tesseract, needed for ocr filter [no]
>    --enable-libtheora       enable Theora encoding via libtheora [no]
> +  --enable-libturing       enable H.265/HEVC encoding via libturing [no]
>    --enable-libtwolame      enable MP2 encoding via libtwolame [no]
>    --enable-libv4l2         enable libv4l2/v4l-utils [no]
>    --enable-libvidstab      enable video stabilization using vid.stab [no]
> @@ -1562,6 +1563,7 @@ EXTERNAL_LIBRARY_LIST="
>      libssh
>      libtesseract
>      libtheora
> +    libturing
>      libtwolame
>      libv4l2
>      libvidstab
> @@ -2858,6 +2860,7 @@ libspeex_decoder_deps="libspeex"
>  libspeex_encoder_deps="libspeex"
>  libspeex_encoder_select="audio_frame_queue"
>  libtheora_encoder_deps="libtheora"
> +libturing_encoder_deps="libturing"
>  libtwolame_encoder_deps="libtwolame"
>  libvo_amrwbenc_encoder_deps="libvo_amrwbenc"
>  libvorbis_decoder_deps="libvorbis"
> @@ -5131,6 +5134,7 @@ die_license_disabled gpl frei0r
>  die_license_disabled gpl libcdio
>  die_license_disabled gpl librubberband
>  die_license_disabled gpl libsmbclient
> +die_license_disabled gpl libturing
>  die_license_disabled gpl libvidstab
>  die_license_disabled gpl libx264
>  die_license_disabled gpl libx265
> @@ -5789,6 +5793,7 @@ enabled libssh            && require_pkg_config libssh libssh/sftp.h sftp_init
>  enabled libspeex          && require_pkg_config speex speex/speex.h speex_decoder_init -lspeex
>  enabled libtesseract      && require_pkg_config tesseract tesseract/capi.h TessBaseAPICreate
>  enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
> +enabled libturing         && require_pkg_config libturing turing.h turing_version
>  enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame &&
>                               { check_lib twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
>                                 die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 43a6add..51a0662 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -884,6 +884,7 @@ OBJS-$(CONFIG_LIBSPEEX_DECODER)           += libspeexdec.o
>  OBJS-$(CONFIG_LIBSPEEX_ENCODER)           += libspeexenc.o
>  OBJS-$(CONFIG_LIBTHEORA_ENCODER)          += libtheoraenc.o
>  OBJS-$(CONFIG_LIBTWOLAME_ENCODER)         += libtwolame.o
> +OBJS-$(CONFIG_LIBTURING_ENCODER)          += libturing.o

'U' < 'W' (the ones above in configure get the order right).

>  OBJS-$(CONFIG_LIBVO_AMRWBENC_ENCODER)     += libvo-amrwbenc.o
>  OBJS-$(CONFIG_LIBVORBIS_DECODER)          += libvorbisdec.o
>  OBJS-$(CONFIG_LIBVORBIS_ENCODER)          += libvorbisenc.o \
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index f92b2b7..18fc026 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -616,6 +616,7 @@ void avcodec_register_all(void)
>      REGISTER_ENCDEC (LIBSPEEX,          libspeex);
>      REGISTER_ENCODER(LIBTHEORA,         libtheora);
>      REGISTER_ENCODER(LIBTWOLAME,        libtwolame);
> +    REGISTER_ENCODER(LIBTURING,         libturing);

Same.

>      REGISTER_ENCODER(LIBVO_AMRWBENC,    libvo_amrwbenc);
>      REGISTER_ENCDEC (LIBVORBIS,         libvorbis);
>      REGISTER_ENCDEC (LIBVPX_VP8,        libvpx_vp8);
> diff --git a/libavcodec/libturing.c b/libavcodec/libturing.c
> new file mode 100644
> index 0000000..04d9982
> --- /dev/null
> +++ b/libavcodec/libturing.c
> @@ -0,0 +1,320 @@
> +/*
> + * libturing encoder
> + *
> + * Copyright (c) 2016 Turing Codec contributors

It's now 2017.

> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <turing.h>
> +#include <float.h>

What is float.h being included for here?

> +#include "libavutil/internal.h"
> +#include "libavutil/common.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "avcodec.h"
> +#include "internal.h"
> +
> +#define MAX_OPTION_LENGTH 256
> +
> +typedef struct libturingEncodeContext {
> +    const AVClass *class;
> +    turing_encoder *encoder;
> +    const char *options;
> +} libturingEncodeContext;
> +
> +typedef struct optionContext {
> +    char** argv;
> +    char* options;
> +    char* s;
> +    int options_buffer_size;
> +    int buffer_filled;
> +    int options_added;
> +} optionContext;
> +
> +static av_cold int libturing_encode_close(AVCodecContext *avctx)
> +{
> +    libturingEncodeContext *ctx = avctx->priv_data;
> +
> +    if (ctx->encoder) {
> +        turing_destroy_encoder(ctx->encoder);
> +    }
> +    return 0;
> +}
> +
> +static av_cold int add_option(const char* current_option, optionContext* option_ctx)
> +{
> +    int option_length = strlen(current_option);
> +    char *temp_ptr;
> +    if (option_ctx->buffer_filled + option_length + 1 > option_ctx->options_buffer_size) {
> +        option_ctx->options_buffer_size += option_length + 1;
> +        temp_ptr = av_realloc(option_ctx->options, option_ctx->options_buffer_size);
> +        if (temp_ptr == NULL) {
> +            return AVERROR(ENOMEM);
> +        }
> +        option_ctx->options = temp_ptr;
> +        option_ctx->s = option_ctx->options + option_ctx->buffer_filled;
> +    }
> +    strcpy(option_ctx->s, current_option);
> +    option_ctx->s += 1 + option_length;
> +    option_ctx->options_added++;
> +    option_ctx->buffer_filled += option_length + 1;
> +    return 0;
> +}
> +
> +static av_cold int finalise_options(optionContext* option_ctx)
> +{
> +    if (option_ctx->options_added) {
> +        char *p;
> +        option_ctx->argv = av_malloc(option_ctx->options_added * sizeof(char*));
> +        if (option_ctx->argv == NULL) {
> +            return AVERROR(ENOMEM);
> +        }
> +        p = option_ctx->options;
> +        for(int option_idx=0; option_idx<option_ctx->options_added; option_idx++) {
> +            option_ctx->argv[option_idx] = p;
> +            p += strlen(p) + 1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +static av_cold int libturing_encode_init(AVCodecContext *avctx)
> +{
> +    libturingEncodeContext *ctx = avctx->priv_data;
> +    const int bit_depth = av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth;
> +
> +    optionContext encoder_options;
> +    turing_encoder_settings settings;
> +    char option_string[MAX_OPTION_LENGTH];
> +    double frame_rate;
> +
> +    frame_rate = (double)avctx->time_base.den / (avctx->time_base.num * avctx->ticks_per_frame);
> +
> +    encoder_options.buffer_filled = 0;
> +    encoder_options.options_added = 0;
> +    encoder_options.options_buffer_size =  strlen("turing") + 1;
> +    encoder_options.options = av_malloc(encoder_options.options_buffer_size);
> +    if(encoder_options.options == NULL) {
> +        av_log(avctx, AV_LOG_ERROR, "Cannot allocate the memory for command line options\n");
> +        return AVERROR(ENOMEM);
> +    }
> +    encoder_options.s = encoder_options.options;
> +    encoder_options.argv = NULL;
> +
> +    // Add baseline command line options
> +    if (add_option("turing", &encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    if (add_option("--frames=0", &encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    snprintf(option_string, MAX_OPTION_LENGTH, "--input-res=%dx%d", avctx->width, avctx->height);
> +    if (add_option(option_string, &encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    snprintf(option_string, MAX_OPTION_LENGTH, "--frame-rate=%f", frame_rate);
> +    if (add_option(option_string, &encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    snprintf(option_string, MAX_OPTION_LENGTH, "--bit-depth=%d", bit_depth);
> +    if (add_option(option_string, &encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
> +        int sar_num, sar_den;
> +
> +        av_reduce(&sar_num, &sar_den,
> +            avctx->sample_aspect_ratio.num,
> +            avctx->sample_aspect_ratio.den, 65535);
> +        snprintf(option_string, MAX_OPTION_LENGTH, "--sar=%d:%d", sar_num, sar_den);
> +        if (add_option(option_string, &encoder_options)) {
> +            goto optionfail;
> +        }
> +    }

Would it be possible to add a few more of the standard options here to avoid some normal cases having to pass special libturing-only options?

I'm thinking of the following AVCodecContext fields:
* gop_size (-g in ffmpeg.c)
* global_quality (-q/-global_quality, for constant-quality mode)
* profile (-profile, but see other encoders because they often make it a private option)
* rc_{min,max}_rate (-{min,max}rate, for VBR modes)
* rc_buffer_size (-bufsize, for the HRD)

(Though do ignore any of these which don't make sense in your library.)

> +
> +    // Parse additional command line options
> +    if (ctx->options) {
> +        AVDictionary *dict = NULL;
> +        AVDictionaryEntry *en = NULL;
> +
> +        if (!av_dict_parse_string(&dict, ctx->options, "=", ":", 0)) {
> +            while ((en = av_dict_get(dict, "", en, AV_DICT_IGNORE_SUFFIX))) {
> +                int const illegal_option =
> +                    !strcmp("input-res", en->key) ||
> +                    !strcmp("frame-rate", en->key) ||
> +                    !strcmp("f", en->key) ||
> +                    !strcmp("frames", en->key) ||
> +                    !strcmp("sar", en->key) ||
> +                    !strcmp("bit-depth", en->key) ||
> +                    !strcmp("internal-bit-depth", en->key);
> +                if (illegal_option) {
> +                    av_log(avctx, AV_LOG_WARNING, "%s=%s ignored.\n", en->key, en->value);
> +                } else {
> +                    if (turing_check_binary_option(en->key)) {
> +                        snprintf(option_string, MAX_OPTION_LENGTH, "--%s", en->key);
> +                    } else {
> +                        snprintf(option_string, MAX_OPTION_LENGTH, "--%s=%s", en->key, en->value);
> +                    }
> +                    if (add_option(option_string, &encoder_options)) {
> +                        goto optionfail;
> +                    }
> +                }
> +            }
> +            av_dict_free(&dict);
> +        }
> +    }
> +
> +    if (add_option("dummy-input-filename", &encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    if (finalise_options(&encoder_options)) {
> +        goto optionfail;
> +    }
> +
> +    settings.argv = (char const**)encoder_options.argv;
> +    settings.argc = encoder_options.options_added;
> +
> +    for (int i=0; i<settings.argc; ++i) {
> +        av_log(avctx, AV_LOG_INFO, "arg %d: %s\n", i, settings.argv[i]);

Maybe VERBOSE rather than INFO?

> +    }
> +
> +    ctx->encoder = turing_create_encoder(settings);
> +
> +    if (!ctx->encoder) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create libturing encoder.\n");
> +        av_freep(&encoder_options.argv);
> +        av_freep(&encoder_options.options);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
> +        turing_bitstream const *bitstream;
> +        bitstream = turing_encode_headers(ctx->encoder);
> +        if (bitstream->size <= 0) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to encode headers.\n");
> +            av_freep(&encoder_options.argv);
> +            av_freep(&encoder_options.options);
> +            turing_destroy_encoder(ctx->encoder);
> +            return AVERROR_INVALIDDATA;
> +        }
> +
> +        avctx->extradata_size = bitstream->size;
> +
> +        avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
> +        if (!avctx->extradata) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to allocate HEVC extradata %d bytes\n", avctx->extradata_size);
> +            av_freep(&encoder_options.argv);
> +            av_freep(&encoder_options.options);
> +            turing_destroy_encoder(ctx->encoder);
> +            return AVERROR(ENOMEM);

This cleanup code is copied multiple times, maybe replace them all with a single "fail" path at the end and goto it?  (Like optionfail, but without the message and preserving the error code.)

> +        }
> +
> +        memcpy(avctx->extradata, bitstream->p, bitstream->size);
> +    }
> +
> +    av_freep(&encoder_options.argv);
> +    av_freep(&encoder_options.options);
> +    return 0;
> +
> +optionfail:
> +    av_log(avctx, AV_LOG_ERROR, "Error while allocating the memory for command line options\n");
> +    if (encoder_options.argv) {

You don't need this check, av_freep(&null_pointer) is explicitly valid.

> +        av_freep(&encoder_options.argv);
> +    }
> +    av_freep(&encoder_options.options);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static int libturing_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet)
> +{
> +    libturingEncodeContext *ctx = avctx->priv_data;
> +    turing_encoder_output const *output;
> +    int ret = 0;
> +
> +    if (pic) {
> +        turing_picture picture;
> +
> +        picture.image[0].p = pic->data[0];
> +        picture.image[1].p = pic->data[1];
> +        picture.image[2].p = pic->data[2];
> +        picture.image[0].stride = pic->linesize[0];
> +        picture.image[1].stride = pic->linesize[1];
> +        picture.image[2].stride = pic->linesize[2];
> +        picture.pts = pic->pts;
> +        output = turing_encode_picture(ctx->encoder, &picture);
> +    } else {
> +        output = turing_encode_picture(ctx->encoder, 0);
> +    }
> +
> +    if (output->bitstream.size < 0) {
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    if (output->bitstream.size ==0) {
> +        return 0;
> +    }
> +
> +    ret = ff_alloc_packet2(avctx, pkt, output->bitstream.size, 0);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
> +        return ret;
> +    }
> +
> +    memcpy(pkt->data, output->bitstream.p, output->bitstream.size);
> +
> +    pkt->pts = output->pts;
> +    pkt->dts = output->dts;
> +    if (output->keyframe) {
> +        pkt->flags |= AV_PKT_FLAG_KEY;
> +    }
> +
> +    *got_packet = 1;
> +    return 0;
> +}
> +
> +static const AVOption options[] = {
> +    { "turing-params", "configure additional turing encoder parameters", offsetof(libturingEncodeContext, options), AV_OPT_TYPE_STRING,{ 0 }, 0, 0, AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
> +    { NULL }
> +};
> +
> +static const AVClass class = {
> +    .class_name = "libturing",
> +    .item_name = av_default_item_name,
> +    .option = options,
> +    .version = LIBAVUTIL_VERSION_INT,
> +};
> +
> +AVCodec ff_libturing_encoder = {
> +    .name = "libturing",
> +    .long_name = NULL_IF_CONFIG_SMALL("libturing HEVC"),
> +    .type = AVMEDIA_TYPE_VIDEO,
> +    .id = AV_CODEC_ID_HEVC,
> +    .init = libturing_encode_init,
> +    .encode2 = libturing_encode_frame,
> +    .close = libturing_encode_close,
> +    .priv_data_size = sizeof(libturingEncodeContext),
> +    .priv_class = &class,
> +    .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,

Setting AUTO_THREADS here probably isn't meaningful, there is no explicit threading at all.

> +    .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
> +};
>