[FFmpeg-devel] [PATCH v2 1/1] avcodec/libjxlenc: Add libjxl_animated encoder

Leo Izen leo.izen at gmail.com
Fri Dec 15 01:20:58 EET 2023


On 12/13/23 15:53, Zsolt Vadász via ffmpeg-devel wrote:
> ---
>   configure              |   1 +
>   libavcodec/allcodecs.c |   1 +
>   libavcodec/libjxlenc.c | 214 +++++++++++++++++++++++++++++++++--------
>   3 files changed, 177 insertions(+), 39 deletions(-)
> 
> diff --git a/configure b/configure
> index 7d2ee66000..a334a9b1e0 100755
> --- a/configure
> +++ b/configure
> @@ -3418,6 +3418,7 @@ libilbc_decoder_deps="libilbc"
>   libilbc_encoder_deps="libilbc"
>   libjxl_decoder_deps="libjxl libjxl_threads"
>   libjxl_encoder_deps="libjxl libjxl_threads"
> +libjxl_animated_encoder_deps="libjxl libjxl_threads"
>   libkvazaar_encoder_deps="libkvazaar"
>   libmodplug_demuxer_deps="libmodplug"
>   libmp3lame_encoder_deps="libmp3lame"
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index b0f004e15c..e6733b0d4f 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -784,6 +784,7 @@ extern const FFCodec ff_libilbc_encoder;
>   extern const FFCodec ff_libilbc_decoder;
>   extern const FFCodec ff_libjxl_decoder;
>   extern const FFCodec ff_libjxl_encoder;
> +extern const FFCodec ff_libjxl_animated_encoder;

This should be called libjxl_anim for consistency. Compare with 
libwebp_anim and jpegxl_anim demuxer.


>   extern const FFCodec ff_libmp3lame_encoder;
>   extern const FFCodec ff_libopencore_amrnb_encoder;
>   extern const FFCodec ff_libopencore_amrnb_decoder;
> diff --git a/libavcodec/libjxlenc.c b/libavcodec/libjxlenc.c
> index d707f3a61b..bf44307a34 100644
> --- a/libavcodec/libjxlenc.c
> +++ b/libavcodec/libjxlenc.c
> @@ -39,6 +39,7 @@
>   #include "avcodec.h"
>   #include "encode.h"
>   #include "codec_internal.h"
> +#include "internal.h"
>   
>   #include <jxl/encode.h>
>   #include <jxl/thread_parallel_runner.h>
> @@ -49,11 +50,15 @@ typedef struct LibJxlEncodeContext {
>       void *runner;
>       JxlEncoder *encoder;
>       JxlEncoderFrameSettings *options;
> +    JxlPixelFormat jxl_fmt;
>       int effort;
>       float distance;
>       int modular;
>       uint8_t *buffer;
>       size_t buffer_size;
> +    /* Only used by libjxl_animated */
> +    int animated;

It's not actually true that ctx->animated is only used by libjxl_anim 
because it's checked by the standard encoder as well.

> +    AVFrame *last;

I don't see the purpose of keeping this here.

>   } LibJxlEncodeContext;
>   
>   /**
> @@ -183,6 +188,8 @@ static av_cold int libjxl_encode_init(AVCodecContext *avctx)
>           return AVERROR(ENOMEM);
>       }
>   
> +    ctx->animated = 0;
> +
>       return 0;
>   }
>   
> @@ -237,28 +244,19 @@ static int libjxl_populate_primaries(void *avctx, JxlColorEncoding *jxl_color, e
>       return 0;
>   }
>   
> -/**
> - * Encode an entire frame. Currently animation, is not supported by
> - * this encoder, so this will always reinitialize a new still image
> - * and encode a one-frame image (for image2 and image2pipe).
> - */
> -static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet)
> +static int libjxl_encode_init_image(AVCodecContext *avctx, const AVFrame *frame)
>   {
>       LibJxlEncodeContext *ctx = avctx->priv_data;
>       AVFrameSideData *sd;
>       const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(frame->format);
>       JxlBasicInfo info;
>       JxlColorEncoding jxl_color;
> -    JxlPixelFormat jxl_fmt;
> +    JxlPixelFormat *jxl_fmt = &ctx->jxl_fmt;
>       int bits_per_sample;
>   #if JPEGXL_NUMERIC_VERSION >= JPEGXL_COMPUTE_NUMERIC_VERSION(0, 8, 0)
>       JxlBitDepth jxl_bit_depth;
>   #endif
> -    JxlEncoderStatus jret;
>       int ret;
> -    size_t available = ctx->buffer_size;
> -    size_t bytes_written = 0;
> -    uint8_t *next_out = ctx->buffer;
>   
>       ret = libjxl_init_jxl_encoder(avctx);
>       if (ret) {
> @@ -268,23 +266,30 @@ static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFra
>   
>       /* populate the basic info settings */
>       JxlEncoderInitBasicInfo(&info);
> -    jxl_fmt.num_channels = pix_desc->nb_components;
> +    jxl_fmt->num_channels = pix_desc->nb_components;
>       info.xsize = frame->width;
>       info.ysize = frame->height;
> -    info.num_extra_channels = (jxl_fmt.num_channels + 1) % 2;
> -    info.num_color_channels = jxl_fmt.num_channels - info.num_extra_channels;
> -    bits_per_sample = av_get_bits_per_pixel(pix_desc) / jxl_fmt.num_channels;
> +    info.num_extra_channels = (jxl_fmt->num_channels + 1) % 2;
> +    info.num_color_channels = jxl_fmt->num_channels - info.num_extra_channels;
> +    bits_per_sample = av_get_bits_per_pixel(pix_desc) / jxl_fmt->num_channels;
>       info.bits_per_sample = avctx->bits_per_raw_sample > 0 && !(pix_desc->flags & AV_PIX_FMT_FLAG_FLOAT)
>                              ? avctx->bits_per_raw_sample : bits_per_sample;
>       info.alpha_bits = (info.num_extra_channels > 0) * info.bits_per_sample;
>       if (pix_desc->flags & AV_PIX_FMT_FLAG_FLOAT) {
>           info.exponent_bits_per_sample = info.bits_per_sample > 16 ? 8 : 5;
>           info.alpha_exponent_bits = info.alpha_bits ? info.exponent_bits_per_sample : 0;
> -        jxl_fmt.data_type = info.bits_per_sample > 16 ? JXL_TYPE_FLOAT : JXL_TYPE_FLOAT16;
> +        jxl_fmt->data_type = info.bits_per_sample > 16 ? JXL_TYPE_FLOAT : JXL_TYPE_FLOAT16;
>       } else {
>           info.exponent_bits_per_sample = 0;
>           info.alpha_exponent_bits = 0;
> -        jxl_fmt.data_type = info.bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16;
> +        jxl_fmt->data_type = info.bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16;
> +    }
> +    if(ctx->animated) {
> +        info.have_animation = 1;
> +        info.animation.have_timecodes = 0;
> +        info.animation.num_loops = 0;
> +        info.animation.tps_numerator = frame->time_base.den;
> +        info.animation.tps_denominator = frame->time_base.num;
>       }
>   
>   #if JPEGXL_NUMERIC_VERSION >= JPEGXL_COMPUTE_NUMERIC_VERSION(0, 8, 0)
> @@ -382,19 +387,19 @@ static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFra
>               av_log(avctx, AV_LOG_WARNING, "Could not increase codestream level\n");
>       }
>   
> -    jxl_fmt.endianness = JXL_NATIVE_ENDIAN;
> -    jxl_fmt.align = frame->linesize[0];
> +    jxl_fmt->endianness = JXL_NATIVE_ENDIAN;
> +    jxl_fmt->align = frame->linesize[0];
>   
> -    if (JxlEncoderAddImageFrame(ctx->options, &jxl_fmt, frame->data[0], jxl_fmt.align * info.ysize) != JXL_ENC_SUCCESS) {
> -        av_log(avctx, AV_LOG_ERROR, "Failed to add Image Frame\n");
> -        return AVERROR_EXTERNAL;
> -    }
>   
> -    /*
> -     * Run this after the last frame in the image has been passed.
> -     * TODO support animation
> -     */
> -    JxlEncoderCloseInput(ctx->encoder);
> +    return 0;
> +}
> +
> +static int libjxl_encode_process_output(AVCodecContext *avctx, size_t *bytes_written)
> +{
> +    LibJxlEncodeContext *ctx = avctx->priv_data;
> +    JxlEncoderStatus jret;
> +    size_t available = ctx->buffer_size;
> +    uint8_t *next_out = ctx->buffer;
>   
>       while (1) {
>           jret = JxlEncoderProcessOutput(ctx->encoder, &next_out, &available);
> @@ -402,7 +407,7 @@ static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFra
>               av_log(avctx, AV_LOG_ERROR, "Unspecified libjxl error occurred\n");
>               return AVERROR_EXTERNAL;
>           }
> -        bytes_written = ctx->buffer_size - available;
> +        *bytes_written = ctx->buffer_size - available;
>           /* all data passed has been encoded */
>           if (jret == JXL_ENC_SUCCESS)
>               break;
> @@ -419,14 +424,48 @@ static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFra
>                   return AVERROR(ENOMEM);
>               ctx->buffer = temp;
>               ctx->buffer_size = new_size;
> -            next_out = ctx->buffer + bytes_written;
> -            available = new_size - bytes_written;
> +            next_out = ctx->buffer + *bytes_written;
> +            available = new_size - *bytes_written;
>               continue;
>           }
>           av_log(avctx, AV_LOG_ERROR, "Bad libjxl event: %d\n", jret);
>           return AVERROR_EXTERNAL;
>       }
>   
> +    return 0;
> +}
> +
> +/**
> + * Encode an entire frame. Currently animation, is not supported by
> + * this encoder, so this will always reinitialize a new still image
> + * and encode a one-frame image (for image2 and image2pipe).
> + */

Watch your copy/paste.

> +static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet)
> +{
> +    LibJxlEncodeContext *ctx = avctx->priv_data;
> +    JxlPixelFormat *jxl_fmt = &ctx->jxl_fmt;
> +    int ret;
> +    size_t bytes_written = 0;
> +
> +    if((ret = libjxl_encode_init_image(avctx, frame)) < 0)
> +        return ret;
> +
> +    if (JxlEncoderAddImageFrame(ctx->options,
> +                                jxl_fmt,
> +                                frame->data[0],
> +                                jxl_fmt->align * frame->height) != JXL_ENC_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to add Image Frame\n");
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    /*
> +     * Run this after the last frame in the image has been passed.
> +     */
> +    JxlEncoderCloseInput(ctx->encoder);
> +
> +    if((ret = libjxl_encode_process_output(avctx, &bytes_written)) < 0)
> +        return ret;
> +
>       ret = ff_get_encode_buffer(avctx, pkt, bytes_written, 0);
>       if (ret < 0)
>           return ret;
> @@ -437,6 +476,70 @@ static int libjxl_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFra
>       return 0;
>   }
>   
> +static int libjxl_animated_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet)
> +{
> +    LibJxlEncodeContext *ctx = avctx->priv_data;
> +    int ret = 0;
> +
> +    if(avctx->frame_num == 0)
> +        if((ret = libjxl_encode_init_image(avctx, frame)) < 0)
> +            return ret;

Don't rely on avctx->frame_num to be set. You should track the 
sent-initial-frame status separately.


> +
> +    if(!ctx->last && !avctx->internal->draining) { > +        ctx->last = av_frame_clone(frame);
> +        *got_packet = 0;
> +        return AVERROR(EAGAIN);
It looks like you're trying to emit one packet per image, rather than 
one packet per encoded frame. This is fine, but you should not be 
calling av_frame_clone, and there's no reason to use 
avctx->internal->draining here. If you are doing this, you also have no 
reason to cache ctx->last at all.


> +    } else if(!ctx->last && avctx->internal->draining && !frame) {
> +        *got_packet = 0;
> +    } else {
> +        JxlPixelFormat *jxl_fmt = &ctx->jxl_fmt;
> +        JxlFrameHeader frame_header;
> +        size_t bytes_written = 0;
> +
> +        JxlEncoderInitFrameHeader(&frame_header);
> +        frame_header.duration = ctx->last->duration;

AVFrame->duration may be zero if unknown. You need to track PTS instead.

> +        pkt->duration = ctx->last->duration;
> +        pkt->pts = AV_NOPTS_VALUE;
> +        pkt->dts = AV_NOPTS_VALUE;

Do not do this. Set the PTS and DTS correctly.


> +
> +        if(JxlEncoderSetFrameHeader(ctx->options, &frame_header) != JXL_ENC_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set frame header\n");
> +            return AVERROR_EXTERNAL;
> +        }
> +        if (JxlEncoderAddImageFrame(ctx->options,
> +                                    jxl_fmt,
> +                                    ctx->last->data[0],
> +                                    jxl_fmt->align * ctx->last->height) != JXL_ENC_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to add Image Frame\n");
> +            return AVERROR_EXTERNAL;
> +        }
> +
> +        /*
> +         * Run this after the last frame in the image has been passed.
> +         */
> +        if(avctx->internal->draining)
> +            JxlEncoderCloseInput(ctx->encoder);


avctx->internal->draining is not a reliable check. When nothing is left 
to encode, the encoder will receive NULL as the AVFrame argument.

> +
> +        if((ret = libjxl_encode_process_output(avctx, &bytes_written)) < 0)
> +            return ret;
> +
> +        ret = ff_get_encode_buffer(avctx, pkt, bytes_written, 0);
> +        if (ret < 0)
> +            return ret;
> +
> +        memcpy(pkt->data, ctx->buffer, bytes_written);
> +        *got_packet = 1;
> +
> +        if(!avctx->internal->draining) {
> +            av_frame_replace(ctx->last, frame);
> +        } else {
> +            av_frame_free(&ctx->last);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>   static av_cold int libjxl_encode_close(AVCodecContext *avctx)
>   {
>       LibJxlEncodeContext *ctx = avctx->priv_data;
> @@ -458,6 +561,17 @@ static av_cold int libjxl_encode_close(AVCodecContext *avctx)
>       return 0;
>   }
>   
> +static av_cold int libjxl_animated_encode_init(AVCodecContext *avctx)
> +{
> +    int ret;
> +    LibJxlEncodeContext *ctx = avctx->priv_data;
> +    if((ret = libjxl_encode_init(avctx)) < 0)
> +        return ret;
> +    ctx->animated = 1;
> +    ret = libjxl_init_jxl_encoder(avctx);
> +    return ret;
> +}
> +
>   #define OFFSET(x) offsetof(LibJxlEncodeContext, x)
>   #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
>   
> @@ -476,6 +590,15 @@ static const AVClass libjxl_encode_class = {
>       .version    = LIBAVUTIL_VERSION_INT,
>   };
>   
> +static const enum AVPixelFormat libjxl_pix_fmts[] = {
> +    AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
> +    AV_PIX_FMT_RGB48, AV_PIX_FMT_RGBA64,
> +    AV_PIX_FMT_GRAY8, AV_PIX_FMT_YA8,
> +    AV_PIX_FMT_GRAY16, AV_PIX_FMT_YA16,
> +    AV_PIX_FMT_GRAYF32,
> +    AV_PIX_FMT_NONE
> +};
> +
>   const FFCodec ff_libjxl_encoder = {
>       .p.name           = "libjxl",
>       CODEC_LONG_NAME("libjxl JPEG XL"),
> @@ -490,14 +613,27 @@ const FFCodec ff_libjxl_encoder = {
>       .caps_internal    = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
>                           FF_CODEC_CAP_AUTO_THREADS | FF_CODEC_CAP_INIT_CLEANUP |
>                           FF_CODEC_CAP_ICC_PROFILES,
> -    .p.pix_fmts       = (const enum AVPixelFormat[]) {
> -        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
> -        AV_PIX_FMT_RGB48, AV_PIX_FMT_RGBA64,
> -        AV_PIX_FMT_GRAY8, AV_PIX_FMT_YA8,
> -        AV_PIX_FMT_GRAY16, AV_PIX_FMT_YA16,
> -        AV_PIX_FMT_GRAYF32,
> -        AV_PIX_FMT_NONE
> -    },
> +    .p.pix_fmts       = libjxl_pix_fmts,
> +    .p.priv_class     = &libjxl_encode_class,
> +    .p.wrapper_name   = "libjxl",
> +};
> +
> +const FFCodec ff_libjxl_animated_encoder = {
> +    .p.name           = "libjxl_animated",
> +    CODEC_LONG_NAME("libjxl Animated JPEG XL"),
> +    .p.type           = AVMEDIA_TYPE_VIDEO,
> +    .p.id             = AV_CODEC_ID_JPEGXL,
> +    .priv_data_size   = sizeof(LibJxlEncodeContext),
> +    .init             = libjxl_animated_encode_init,
> +    FF_CODEC_ENCODE_CB(libjxl_animated_encode_frame),
> +    .close            = libjxl_encode_close,
> +    .p.capabilities   = AV_CODEC_CAP_OTHER_THREADS |
> +                        AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE |
> +                        AV_CODEC_CAP_DELAY,
> +    .caps_internal    = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
> +                        FF_CODEC_CAP_AUTO_THREADS | FF_CODEC_CAP_INIT_CLEANUP |
> +                        FF_CODEC_CAP_ICC_PROFILES | FF_CODEC_CAP_EOF_FLUSH,

Why FF_CODEC_CAP_EOF_FLUSH?


> +    .p.pix_fmts       = libjxl_pix_fmts,
>       .p.priv_class     = &libjxl_encode_class,
>       .p.wrapper_name   = "libjxl",
>   };


More information about the ffmpeg-devel mailing list