[FFmpeg-devel] [PATCH v5 2/4] avcodec/jpegxl_parser: add JPEG XL parser

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Sun Jul 23 13:55:36 EEST 2023


Leo Izen:
> Add a parser to libavcodec for AV_CODEC_ID_JPEGXL. It doesn't find the
> end of the stream in order to packetize the codec, but it does look at
> the headers to set preliminary information like dimensions and pixel
> format.
> 
> Note that much of this code is duplicated from avformat/jpegxl_probe.c,
> but that code will be removed and call this instead in the next commit.
> 
> Signed-off-by: Leo Izen <leo.izen at gmail.com>
> ---
>  libavcodec/Makefile        |    3 +
>  libavcodec/jpegxl.h        |   94 +++
>  libavcodec/jpegxl_parse.c  |  520 +++++++++++++
>  libavcodec/jpegxl_parse.h  |   72 ++
>  libavcodec/jpegxl_parser.c | 1455 ++++++++++++++++++++++++++++++++++++

1455 lines. That would be the biggest parser of them all. Why is this
necessary?

>  libavcodec/parsers.c       |    1 +
>  libavcodec/version.h       |    2 +-
>  7 files changed, 2146 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/jpegxl.h
>  create mode 100644 libavcodec/jpegxl_parse.c
>  create mode 100644 libavcodec/jpegxl_parse.h
>  create mode 100644 libavcodec/jpegxl_parser.c
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 1b0226c089..e289c030d3 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1056,6 +1056,8 @@ STLIBOBJS-$(CONFIG_AVFORMAT)           += to_upper4.o
>  STLIBOBJS-$(CONFIG_ISO_MEDIA)          += mpegaudiotabs.o
>  STLIBOBJS-$(CONFIG_FLV_MUXER)          += mpeg4audio_sample_rates.o
>  STLIBOBJS-$(CONFIG_HLS_DEMUXER)        += ac3_channel_layout_tab.o
> +STLIBOBJS-$(CONFIG_IMAGE_JPEGXL_PIPE_DEMUXER) += jpegxl_parse.o
> +STLIBOBJS-$(CONFIG_JPEGXL_ANIM_DEMUXER)       += jpegxl_parse.o
>  STLIBOBJS-$(CONFIG_MATROSKA_DEMUXER)   += mpeg4audio_sample_rates.o
>  STLIBOBJS-$(CONFIG_MOV_DEMUXER)        += ac3_channel_layout_tab.o
>  STLIBOBJS-$(CONFIG_MXF_MUXER)          += golomb.o
> @@ -1185,6 +1187,7 @@ OBJS-$(CONFIG_HEVC_PARSER)             += hevc_parser.o hevc_data.o
>  OBJS-$(CONFIG_HDR_PARSER)              += hdr_parser.o
>  OBJS-$(CONFIG_IPU_PARSER)              += ipu_parser.o
>  OBJS-$(CONFIG_JPEG2000_PARSER)         += jpeg2000_parser.o
> +OBJS-$(CONFIG_JPEGXL_PARSER)           += jpegxl_parser.o jpegxl_parse.o
>  OBJS-$(CONFIG_MISC4_PARSER)            += misc4_parser.o
>  OBJS-$(CONFIG_MJPEG_PARSER)            += mjpeg_parser.o
>  OBJS-$(CONFIG_MLP_PARSER)              += mlp_parse.o mlp_parser.o mlp.o
> diff --git a/libavcodec/jpegxl.h b/libavcodec/jpegxl.h
> new file mode 100644
> index 0000000000..66a6be3555
> --- /dev/null
> +++ b/libavcodec/jpegxl.h
> @@ -0,0 +1,94 @@
> +/*
> + * JPEG XL Common Header Definitions
> + * Copyright (c) 2023 Leo Izen <leo.izen at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_JPEGXL_H
> +#define AVCODEC_JPEGXL_H
> +
> +#define FF_JPEGXL_CODESTREAM_SIGNATURE_LE 0x0aff
> +#define FF_JPEGXL_CONTAINER_SIGNATURE_LE 0x204c584a0c000000
> +#define FF_JPEGXL_CODESTREAM_SIGNATURE_BE 0xff0a
> +#define FF_JPEGXL_CONTAINER_SIGNATURE_BE 0x0000000c4a584c20
> +
> +typedef enum FFJXLFrameEncoding {
> +    JPEGXL_ENC_VARDCT,
> +    JPEGXL_ENC_MODULAR
> +} FFJXLFrameEncoding;
> +
> +typedef enum FFJXLFrameType {
> +    JPEGXL_FRAME_REGULAR,
> +    JPEGXL_FRAME_LF,
> +    JPEGXL_FRAME_REFERENCE_ONLY,
> +    JPEGXL_FRAME_SKIP_PROGRESSIVE
> +} FFJXLFrameType;
> +
> +typedef enum FFJXLBlendMode {
> +    JPEGXL_BM_REPLACE,
> +    JPEGXL_BM_ADD,
> +    JPEGXL_BM_BLEND,
> +    JPEGXL_BM_MULADD,
> +    JPEGXL_BM_MUL
> +} FFJXLBlendMode;
> +
> +typedef enum FFJXLExtraChannelType {
> +    JPEGXL_CT_ALPHA = 0,
> +    JPEGXL_CT_DEPTH,
> +    JPEGXL_CT_SPOT_COLOR,
> +    JPEGXL_CT_SELECTION_MASK,
> +    JPEGXL_CT_BLACK,
> +    JPEGXL_CT_CFA,
> +    JPEGXL_CT_THERMAL,
> +    JPEGXL_CT_NON_OPTIONAL = 15,
> +    JPEGXL_CT_OPTIONAL
> +} FFJXLExtraChannelType;
> +
> +typedef enum FFJXLColorSpace {
> +    JPEGXL_CS_RGB = 0,
> +    JPEGXL_CS_GRAY,
> +    JPEGXL_CS_XYB,
> +    JPEGXL_CS_UNKNOWN
> +} FFJXLColorSpace;
> +
> +typedef enum FFJXLWhitePoint {
> +    JPEGXL_WP_D65 = 1,
> +    JPEGXL_WP_CUSTOM,
> +    JPEGXL_WP_E = 10,
> +    JPEGXL_WP_DCI = 11
> +} FFJXLWhitePoint;
> +
> +typedef enum FFJXLPrimaries {
> +    JPEGXL_PR_SRGB = 1,
> +    JPEGXL_PR_CUSTOM,
> +    JPEGXL_PR_2100 = 9,
> +    JPEGXL_PR_P3 = 11,
> +} FFJXLPrimaries;
> +
> +typedef enum FFJXLTransferCharacteristic {
> +    JPEGXL_TR_BT709 = 1,
> +    JPEGXL_TR_UNKNOWN,
> +    JPEGXL_TR_LINEAR = 8,
> +    JPEGXL_TR_SRGB = 13,
> +    JPEGXL_TR_PQ = 16,
> +    JPEGXL_TR_DCI,
> +    JPEGXL_TR_HLG,
> +    JPEGXL_TR_GAMMA = 1 << 24,
> +} FFJXLTransferCharacteristic;
> +
> +#endif /* AVCODEC_JPEGXL_H */
> diff --git a/libavcodec/jpegxl_parse.c b/libavcodec/jpegxl_parse.c
> new file mode 100644
> index 0000000000..e22e2a7534
> --- /dev/null
> +++ b/libavcodec/jpegxl_parse.c
> @@ -0,0 +1,520 @@
> +/*
> + * JPEG XL Header Parser
> + * Copyright (c) 2023 Leo Izen <leo.izen at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +
> +#include "bytestream.h"
> +#define UNCHECKED_BITSTREAM_READER 0
> +#define BITSTREAM_READER_LE
> +#include "get_bits.h"
> +#include "jpegxl.h"
> +#include "jpegxl_parse.h"
> +
> +/* read a U32(c_i + u(u_i)) */
> +static av_always_inline uint32_t jxl_u32(GetBitContext *gb,
> +                        uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3,
> +                        uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3)
> +{
> +    const uint32_t constants[4] = {c0, c1, c2, c3};
> +    const uint32_t ubits    [4] = {u0, u1, u2, u3};
> +    uint32_t ret, choice = get_bits(gb, 2);
> +
> +    ret = constants[choice];
> +    if (ubits[choice])
> +        ret += get_bits_long(gb, ubits[choice]);
> +
> +    return ret;
> +}
> +
> +static av_always_inline uint32_t jxl_enum(GetBitContext *gb)
> +{
> +    return jxl_u32(gb, 0, 1, 2, 18, 0, 0, 4, 6);
> +}
> +
> +/* read a U64() */
> +static uint64_t jxl_u64(GetBitContext *gb)
> +{
> +    uint64_t shift = 12, ret;
> +
> +    switch (get_bits(gb, 2)) {
> +    case 1:
> +        ret = 1 + get_bits(gb, 4);
> +        break;
> +    case 2:
> +        ret = 17 + get_bits(gb, 8);
> +        break;
> +    case 3:
> +        ret = get_bits(gb, 12);
> +        while (get_bits1(gb)) {
> +            if (shift < 60) {
> +                ret |= (uint64_t)get_bits(gb, 8) << shift;
> +                shift += 8;
> +            } else {
> +                ret |= (uint64_t)get_bits(gb, 4) << shift;
> +                break;
> +            }
> +        }
> +        break;
> +    default:
> +        ret = 0;
> +    }
> +
> +    return ret;
> +}
> +
> +static uint32_t jpegxl_width_from_ratio(uint32_t height, int ratio)
> +{
> +    uint64_t height64 = height; /* avoid integer overflow */
> +    switch (ratio) {
> +    case 1:
> +        return height;
> +    case 2:
> +        return (uint32_t)((height64 * 12) / 10);
> +    case 3:
> +        return (uint32_t)((height64 * 4) / 3);
> +    case 4:
> +        return (uint32_t)((height64 * 3) / 2);
> +    case 5:
> +        return (uint32_t)((height64 * 16) / 9);
> +    case 6:
> +        return (uint32_t)((height64 * 5) / 4);
> +    case 7:
> +        return (uint32_t)(height64 * 2);
> +    default:
> +        break;
> +    }
> +
> +    return 0; /* manual width */
> +}
> +
> +/**
> + * validate a Jpeg XL Size Header
> + * @return >= 0 upon valid size, < 0 upon invalid size found
> + */
> +static int jpegxl_read_size_header(GetBitContext *gb, FFJXLMetadata *meta, int validate)
> +{
> +    uint32_t width, height;
> +
> +    if (get_bits1(gb)) {
> +        /* small size header */
> +        height = (get_bits(gb, 5) + 1) << 3;
> +        width = jpegxl_width_from_ratio(height, get_bits(gb, 3));
> +        if (!width)
> +            width = (get_bits(gb, 5) + 1) << 3;
> +    } else {
> +        /* large size header */
> +        height = 1 + jxl_u32(gb, 0, 0, 0, 0, 9, 13, 18, 30);
> +        width = jpegxl_width_from_ratio(height, get_bits(gb, 3));
> +        if (!width)
> +            width = 1 + jxl_u32(gb, 0, 0, 0, 0, 9, 13, 18, 30);
> +    }
> +    if (validate && (width > (1 << 18) || height > (1 << 18)
> +        || (width >> 4) * (height >> 4) > (1 << 20)))
> +        return AVERROR_INVALIDDATA;
> +
> +    if (meta) {
> +        meta->width = meta->coded_width = width;
> +        meta->height = meta->coded_height = height;
> +    }
> +
> +    return 0;
> +}
> +
> +/**
> + * validate a Jpeg XL Preview Header
> + * @return >= 0 upon valid size, < 0 upon invalid size found
> + */
> +static int jpegxl_read_preview_header(GetBitContext *gb, int validate)
> +{
> +    uint32_t width, height;
> +
> +    if (get_bits1(gb)) {
> +        /* coded height and width divided by eight */
> +        height = jxl_u32(gb, 16, 32, 1, 33, 0, 0, 5, 9) << 3;
> +        width = jpegxl_width_from_ratio(height, get_bits(gb, 3));
> +        if (!width)
> +            width = jxl_u32(gb, 16, 32, 1, 33, 0, 0, 5, 9) << 3;
> +    } else {
> +        /* full height and width coded */
> +        height = jxl_u32(gb, 1, 65, 321, 1345, 6, 8, 10, 12);
> +        width = jpegxl_width_from_ratio(height, get_bits(gb, 3));
> +        if (!width)
> +            width = jxl_u32(gb, 1, 65, 321, 1345, 6, 8, 10, 12);
> +    }
> +    if (validate && (width > 4096 || height > 4096))
> +        return AVERROR_INVALIDDATA;
> +
> +    return 0;
> +}
> +
> +/**
> + * get a Jpeg XL BitDepth Header. These cannot be invalid.
> + */
> +static void jpegxl_get_bit_depth(GetBitContext *gb, FFJXLMetadata *meta)
> +{
> +    int bit_depth;
> +    if (get_bits1(gb)) {
> +        /* float samples */
> +        bit_depth = jxl_u32(gb, 32, 16, 24, 1, 0, 0, 0, 6); /* mantissa */
> +        skip_bits_long(gb, 4); /* exponent */
> +    } else {
> +        /* integer samples */
> +        bit_depth = jxl_u32(gb, 8, 10, 12, 1, 0, 0, 0, 6);
> +    }
> +    if (meta)
> +        meta->bit_depth = bit_depth;
> +}
> +
> +/**
> + * validate a Jpeg XL Extra Channel Info bundle
> + * @return >= 0 upon valid, < 0 upon invalid
> + */
> +static int jpegxl_read_extra_channel_info(GetBitContext *gb, FFJXLMetadata *meta, int validate)
> +{
> +    int default_alpha = get_bits1(gb);
> +    uint32_t type, name_len = 0;
> +
> +    if (!default_alpha) {
> +        type = jxl_enum(gb);
> +        if (validate && type > 63)
> +            return AVERROR_INVALIDDATA; /* enum types cannot be 64+ */
> +        if (validate && validate < 10 && type == JPEGXL_CT_BLACK)
> +            return AVERROR_INVALIDDATA;
> +        jpegxl_get_bit_depth(gb, NULL);
> +        jxl_u32(gb, 0, 3, 4, 1, 0, 0, 0, 3); /* dim-shift */
> +        /* max of name_len is 1071 = 48 + 2^10 - 1 */
> +        name_len = 8 * jxl_u32(gb, 0, 0, 16, 48, 0, 4, 5, 10);
> +    } else {
> +        type = JPEGXL_CT_ALPHA;
> +    }
> +
> +    if (get_bits_left(gb) < name_len)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    /* skip over the name */
> +    skip_bits_long(gb, name_len);
> +
> +    if (!default_alpha && type == JPEGXL_CT_ALPHA)
> +        skip_bits1(gb);
> +
> +    if (type == JPEGXL_CT_SPOT_COLOR)
> +        skip_bits_long(gb, 16 * 4);
> +
> +    if (type == JPEGXL_CT_CFA)
> +        jxl_u32(gb, 1, 0, 3, 19, 0, 2, 4, 8);
> +
> +    if (meta && type == JPEGXL_CT_ALPHA)
> +        meta->have_alpha = 1;
> +
> +    return 0;
> +}
> +
> +static int jpegxl_skip_extensions(GetBitContext *gb)
> +{
> +    uint64_t extensions = jxl_u64(gb), extensions_len = 0;
> +
> +    if (get_bits_left(gb) <= 0)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    if (!extensions)
> +        return 0;
> +
> +    for (int i = 0; i < 64; i++) {
> +        if (extensions & (UINT64_C(1) << i))
> +            extensions_len += jxl_u64(gb);
> +        if (get_bits_left(gb) <= 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +
> +    if (extensions_len > INT_MAX || get_bits_left(gb) <= extensions_len)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    skip_bits_long(gb, extensions_len);
> +
> +    return 0;
> +}
> +
> +int ff_jpegxl_parse_codestream_header(const uint8_t *buf, int buflen, FFJXLMetadata *meta, int validate)
> +{
> +    GetBitContext gbi, *gb = &gbi;
> +
> +    int all_default, extra_fields = 0;
> +    int xyb_encoded = 1, have_icc_profile = 0;
> +    int animation_offset = 0, have_timecodes = 0;
> +
> +    FFJXLPrimaries primaries = JPEGXL_PR_SRGB;
> +    FFJXLTransferCharacteristic trc = JPEGXL_TR_SRGB + (1U << 24);
> +    FFJXLWhitePoint white_point = JPEGXL_WP_D65;
> +    FFJXLColorSpace color_space = JPEGXL_CS_RGB;
> +
> +    AVRational tb;
> +    uint32_t num_extra_channels = 0;
> +    int ret;
> +
> +    ret = init_get_bits8(gb, buf, buflen);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (get_bits(gb, 16) != FF_JPEGXL_CODESTREAM_SIGNATURE_LE && validate)
> +        return AVERROR_INVALIDDATA;
> +
> +    ret = jpegxl_read_size_header(gb, meta, validate);
> +    if (ret < 0)
> +        return ret;
> +
> +    all_default = get_bits1(gb);
> +    if (!all_default)
> +        extra_fields = get_bits1(gb);
> +
> +    if (extra_fields) {
> +        int orientation = get_bits(gb, 3);
> +        if (orientation > 3 && meta)
> +            FFSWAP(uint32_t, meta->width, meta->height);
> +
> +        /*
> +         * intrinstic size
> +         * any size header here is valid, but as it
> +         * is variable length we have to read it
> +         */
> +        if (get_bits1(gb))
> +            jpegxl_read_size_header(gb, NULL, 0);
> +
> +        /* preview header */
> +        if (get_bits1(gb)) {
> +            ret = jpegxl_read_preview_header(gb, 0);
> +            if (ret < 0)
> +                return ret;
> +        }
> +
> +        /* animation header */
> +        if (get_bits1(gb)) {
> +            animation_offset = get_bits_count(gb);
> +            tb.den = jxl_u32(gb, 100, 1000, 1, 1, 0, 0, 10, 30);
> +            tb.num = jxl_u32(gb, 1, 1001, 1, 1, 0, 0, 8, 10);
> +            jxl_u32(gb, 0, 0, 0, 0, 0, 3, 16, 32);
> +            have_timecodes = get_bits1(gb);
> +        }
> +    }
> +
> +    if (animation_offset && meta) {
> +        meta->animation_offset = animation_offset;
> +        meta->timebase = tb;
> +        meta->have_timecodes = have_timecodes;
> +    }
> +
> +    if (get_bits_left(gb) <= 0)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    if (!all_default) {
> +        jpegxl_get_bit_depth(gb, meta);
> +
> +        /* modular_16bit_buffers must equal 1 */
> +        if (!get_bits1(gb) && validate && validate < 10)
> +            return AVERROR_INVALIDDATA;
> +
> +        num_extra_channels = jxl_u32(gb, 0, 1, 2, 1, 0, 0, 4, 12);
> +        if (num_extra_channels > 4 && validate && validate < 10)
> +            return AVERROR_INVALIDDATA;
> +        for (uint32_t i = 0; i < num_extra_channels; i++) {
> +            ret = jpegxl_read_extra_channel_info(gb, meta, validate);
> +            if (ret < 0)
> +                return ret;
> +            if (get_bits_left(gb) <= 0)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +        }
> +
> +        xyb_encoded = get_bits1(gb);
> +
> +        /* color encoding bundle */
> +        if (!get_bits1(gb)) {
> +            have_icc_profile = get_bits1(gb);
> +            color_space = jxl_enum(gb);
> +            if (color_space > 63 && validate)
> +                return AVERROR_INVALIDDATA;
> +            if (!have_icc_profile) {
> +                if (color_space != JPEGXL_CS_XYB) {
> +                    white_point = jxl_enum(gb);
> +                    if (white_point > 63 && validate)
> +                        return AVERROR_INVALIDDATA;
> +                    if (white_point == JPEGXL_WP_CUSTOM) {
> +                        /* ux and uy values */
> +                        jxl_u32(gb, 0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                        jxl_u32(gb, 0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                    }
> +                    if (color_space != JPEGXL_CS_GRAY) {
> +                        /* primaries */
> +                        primaries = jxl_enum(gb);
> +                        if (primaries > 63 && validate)
> +                            return AVERROR_INVALIDDATA;
> +                        if (primaries == JPEGXL_PR_CUSTOM) {
> +                            /* ux/uy values for r,g,b */
> +                            for (int i = 0; i < 6; i++) {
> +                                jxl_u32(gb, 0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                                if (get_bits_left(gb) <= 0)
> +                                    return AVERROR_BUFFER_TOO_SMALL;
> +                            }
> +                        }
> +                    }
> +                }
> +
> +                /* transfer characteristics */
> +                if (get_bits1(gb)) {
> +                    /* gamma */
> +                    trc = get_bits(gb, 24);
> +                } else {
> +                    /* transfer function */
> +                    trc = jxl_enum(gb);
> +                    if (trc > 63 && validate)
> +                        return AVERROR_INVALIDDATA;
> +                    trc += (1U << 24);
> +                }
> +
> +                /* rendering intent */
> +                if (jxl_enum(gb) > 63 && validate)
> +                    return AVERROR_INVALIDDATA;
> +            }
> +        }
> +
> +        /* tone mapping bundle */
> +        if (extra_fields && !get_bits1(gb))
> +            skip_bits_long(gb, 16 + 16 + 1 + 16);
> +
> +        ret = jpegxl_skip_extensions(gb);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    if (meta) {
> +        meta->xyb_encoded = xyb_encoded;
> +        meta->have_icc_profile = have_icc_profile;
> +        meta->csp = color_space;
> +        meta->primaries = primaries;
> +        meta->wp = white_point;
> +        meta->trc = trc;
> +        if (!meta->bit_depth)
> +            meta->bit_depth = 8;
> +        meta->num_extra_channels = num_extra_channels;
> +    }
> +
> +    /* default transform */
> +    if (!get_bits1(gb)) {
> +        /* opsin inverse matrix */
> +        if (xyb_encoded && !get_bits1(gb))
> +            skip_bits_long(gb, 16 * 16);
> +        /* cw_mask and default weights */
> +        if (get_bits1(gb))
> +            skip_bits_long(gb, 16 * 15);
> +        if (get_bits1(gb))
> +            skip_bits_long(gb, 16 * 55);
> +        if (get_bits1(gb))
> +            skip_bits_long(gb, 16 * 210);
> +    }
> +
> +    if (!have_icc_profile) {
> +        int bits_remaining = 7 - ((get_bits_count(gb) - 1) & 0x7);
> +        if (bits_remaining && get_bits(gb, bits_remaining))
> +            return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (get_bits_left(gb) < 0)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    return get_bits_count(gb);
> +}
> +
> +/*
> + * copies as much of the codestream into the buffer as possible
> + * pass a shorter buflen to request less
> + * returns the number of bytes consumed from input, may be greater than input_len
> + * if the input doesn't end on an ISOBMFF-box boundary
> + */
> +int ff_jpegxl_collect_codestream_header(const uint8_t *input_buffer, int input_len,
> +                                        uint8_t *buffer, int buflen, int *copied)
> +{
> +    GetByteContext gb;
> +    int pos = 0, last_box = 0;
> +    bytestream2_init(&gb, input_buffer, input_len);
> +
> +    while (1) {
> +        uint64_t size;
> +        uint32_t tag;
> +        int head_size = 8;
> +
> +        if (bytestream2_get_bytes_left(&gb) < 8)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +
> +        size = bytestream2_get_be32(&gb);
> +        if (size == 1) {
> +            if (bytestream2_get_bytes_left(&gb) < 12)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +            size = bytestream2_get_be64(&gb);
> +            head_size = 16;
> +        }
> +        /* invalid ISOBMFF size */
> +        if (size && size <= head_size)
> +            return AVERROR_INVALIDDATA;
> +        if (size)
> +            size -= head_size;
> +
> +        tag = bytestream2_get_le32(&gb);
> +
> +        if (tag == MKTAG('j','x','l','p')) {
> +            uint32_t idx;
> +            if (bytestream2_get_bytes_left(&gb) < 4)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +            idx = bytestream2_get_be32(&gb);
> +            if (idx >= UINT32_C(0x80000000))
> +                last_box = 1;
> +            if (size) {
> +                if (size <= 4)
> +                    return AVERROR_INVALIDDATA;
> +                size -= 4;
> +            }
> +        }
> +        if (tag == MKTAG('j','x','l','c'))
> +            last_box = 1;
> +
> +        /*
> +         * size = 0 means "until EOF". this is legal but uncommon
> +         * here we just set it to the remaining size of the probe buffer
> +         */
> +        if (!size)
> +            size = bytestream2_get_bytes_left(&gb);
> +
> +        pos += size + head_size;
> +
> +        if (tag == MKTAG('j','x','l','c') || tag == MKTAG('j','x','l','p')) {
> +            if (size > buflen - *copied)
> +                size = buflen - *copied;
> +            /*
> +             * arbitrary chunking of the payload makes this memcpy hard to avoid
> +             * in practice this will only be performed one or two times at most
> +             */
> +            *copied += bytestream2_get_buffer(&gb, buffer + *copied, size);
> +        } else {
> +            bytestream2_skip(&gb, size);
> +        }
> +        if (last_box || bytestream2_get_bytes_left(&gb) <= 0 || *copied >= buflen)
> +            break;
> +    }
> +
> +    return pos;
> +}
> diff --git a/libavcodec/jpegxl_parse.h b/libavcodec/jpegxl_parse.h
> new file mode 100644
> index 0000000000..0602f4d409
> --- /dev/null
> +++ b/libavcodec/jpegxl_parse.h
> @@ -0,0 +1,72 @@
> +/*
> + * JPEG XL Header Parser
> + * Copyright (c) 2023 Leo Izen <leo.izen at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_JPEGXL_PARSE_H
> +#define AVCODEC_JPEGXL_PARSE_H
> +
> +#include <stdint.h>
> +
> +#include "libavutil/rational.h"
> +
> +#include "jpegxl.h"
> +
> +typedef struct FFJXLMetadata {
> +    uint32_t width;
> +    uint32_t height;
> +    uint32_t coded_width;
> +    uint32_t coded_height;
> +    int bit_depth;
> +    int have_alpha;
> +    /*
> +     * offset, in bits, of the animation header
> +     * zero if not animated
> +     */
> +    int animation_offset;
> +    AVRational timebase;
> +    FFJXLColorSpace csp;
> +    FFJXLWhitePoint wp;
> +    FFJXLPrimaries primaries;
> +    FFJXLTransferCharacteristic trc;
> +
> +    /* used by the parser */
> +    int xyb_encoded;
> +    int have_icc_profile;
> +    int have_timecodes;
> +    uint32_t num_extra_channels;
> +} FFJXLMetadata;
> +
> +/*
> + * copies as much of the codestream into the buffer as possible
> + * pass a shorter buflen to request less
> + * returns the number of bytes consumed from input, may be greater than input_len
> + * if the input doesn't end on an ISOBMFF-box boundary
> + */
> +int ff_jpegxl_collect_codestream_header(const uint8_t *input_buffer, int input_len,
> +                                        uint8_t *buffer, int buflen, int *copied);
> +
> +/*
> + * Parse the codestream header with the provided buffer. Returns negative upon failure,
> + * or the number of bits consumed upon success.
> + * The FFJXLMetadata parameter may be NULL, in which case it's ignored.
> + */
> +int ff_jpegxl_parse_codestream_header(const uint8_t *buf, int buflen, FFJXLMetadata *meta, int validate);
> +
> +#endif /* AVCODEC_JPEGXL_PARSE_H */
> diff --git a/libavcodec/jpegxl_parser.c b/libavcodec/jpegxl_parser.c
> new file mode 100644
> index 0000000000..2cceaa5a7b
> --- /dev/null
> +++ b/libavcodec/jpegxl_parser.c
> @@ -0,0 +1,1455 @@
> +/**
> + * JPEG XL parser
> + * Copyright (c) 2023 Leo Izen <leo.izen at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <errno.h>
> +#include <stdint.h>
> +#include <string.h>
> +
> +#include "libavutil/attributes.h"
> +#include "libavutil/error.h"
> +#include "libavutil/intmath.h"
> +#include "libavutil/macros.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/pixfmt.h"
> +
> +#include "bytestream.h"
> +#include "codec_id.h"
> +#define UNCHECKED_BITSTREAM_READER 0
> +#define BITSTREAM_READER_LE
> +#include "get_bits.h"
> +#include "jpegxl.h"
> +#include "jpegxl_parse.h"
> +#include "parser.h"
> +#include "vlc.h"
> +
> +#define JXL_FLAG_NOISE 1
> +#define JXL_FLAG_PATCHES 2
> +#define JXL_FLAG_SPLINES 16
> +#define JXL_FLAG_USE_LF_FRAME 32
> +#define JXL_FLAG_SKIP_ADAPTIVE_LF_SMOOTH 128
> +
> +#define clog1p(x) (ff_log2(x) + !!(x))
> +#define unpack_signed(x) (((x) & 1 ? -(x)-1 : (x))/2)
> +#define div_ceil(x, y) (((x) - 1) / (y) + 1)
> +
> +typedef struct JXLHybridUintConf {
> +    int split_exponent;
> +    uint32_t msb_in_token;
> +    uint32_t lsb_in_token;
> +} JXLHybridUintConf;
> +
> +typedef struct JXLSymbolDistribution {
> +    JXLHybridUintConf config;
> +    int log_bucket_size;
> +    /* this is the actual size of the alphabet */
> +    int alphabet_size;
> +    /* ceil(log(alphabet_size)) */
> +    int log_alphabet_size;
> +
> +    /* for prefix code distributions */
> +    VLC vlc;
> +    /* in case bits == 0 */
> +    uint32_t default_symbol;
> +
> +    /*
> +     * each (1 << log_alphabet_size) length
> +     * with log_alphabet_size <= 8
> +     */
> +    /* frequencies associated with this Distribution */
> +    uint32_t freq[258];
> +    /* cutoffs for using the symbol table */
> +    uint16_t cutoffs[258];
> +    /* the symbol table for this distribution */
> +    uint16_t symbols[258];
> +    /* the offset for symbols */
> +    uint16_t offsets[258];
> +
> +    /* if this distribution contains only one symbol this is its index */
> +    int uniq_pos;
> +} JXLSymbolDistribution;
> +
> +typedef struct JXLDistributionBundle {
> +    /* lz77 flags */
> +    int lz77_enabled;
> +    uint32_t lz77_min_symbol;
> +    uint32_t lz77_min_length;
> +    JXLHybridUintConf lz_len_conf;
> +
> +    /* one entry for each distribution */
> +    uint8_t *cluster_map;
> +    /* length of cluster_map */
> +    int num_dist;
> +
> +    /* one for each cluster */
> +    JXLSymbolDistribution *dists;
> +    int num_clusters;
> +
> +    /* whether to use brotli prefixes or ans */
> +    int use_prefix_code;
> +    /* bundle log alphabet size, dist ones may be smaller */
> +    int log_alphabet_size;
> +} JXLDistributionBundle;
> +
> +typedef struct JXLEntropyDecoder {
> +
> +    /* state is a positive 32-bit integer, or -1 if unset */
> +    int64_t state;
> +
> +    /* lz77 values */
> +    uint32_t num_to_copy;
> +    uint32_t copy_pos;
> +    uint32_t num_decoded;
> +
> +    /* length is (1 << 20) */
> +    /* if lz77 is enabled for this bundle */
> +    /* if lz77 is disabled it's NULL */
> +    uint32_t *window;
> +
> +    /* primary bundle associated with this distribution */
> +    JXLDistributionBundle bundle;
> +
> +    /* for av_log */
> +    void *logctx;
> +} JXLEntropyDecoder;
> +
> +typedef struct JXLFrame {
> +    FFJXLFrameType type;
> +    FFJXLFrameEncoding encoding;
> +
> +    int is_last;
> +    int full_frame;
> +
> +    uint32_t total_length;
> +    uint32_t body_length;
> +} JXLFrame;
> +
> +typedef struct JXLCodestream {
> +    FFJXLMetadata meta;
> +    JXLFrame frame;
> +} JXLCodestream;
> +
> +typedef struct JXLParseContext {
> +    ParseContext pc;
> +    JXLCodestream codestream;
> +
> +    /* using ISOBMFF-based container */
> +    int container;
> +    int skip;
> +    int copied;
> +    int collected_size;
> +    int codestream_length;
> +    int skipped_icc;
> +    int next;
> +
> +    uint8_t cs_buffer[4096];
> +} JXLParseContext;
> +
> +/* used for reading brotli prefixes */
> +static const VLCElem level0_table[16] = {
> +    {0, 2}, {4, 2}, {3, 2}, {2, 3}, {0, 2}, {4, 2}, {3, 2}, {1, 4},
> +    {0, 2}, {4, 2}, {3, 2}, {2, 3}, {0, 2}, {4, 2}, {3, 2}, {5, 4},

VLCElem is a struct, not an array. You should not presume anything about
the order of the elements in a struct, i.e. use designated initializers
(hide it behind a macro).

(VLCElem.len actually fits into one byte; I always pondered whether to
make it an int8_t and add another (u)int8_t field to add more
information; VLC_RL_ELEM already uses this.)

> +};
> +
> +/* prefix table for populating ANS distribution */
> +static const VLCElem dist_prefix_table[128] = {
> +    {10, 3}, {12, 7}, {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {0, 5},  {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {11, 6}, {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {0, 5},  {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {13, 7}, {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {0, 5},  {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {11, 6}, {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},
> +    {10, 3}, {0, 5},  {7, 3}, {3, 4}, {6, 3}, {8, 3}, {9, 3}, {5, 4},
> +    {10, 3}, {4, 4},  {7, 3}, {1, 4}, {6, 3}, {8, 3}, {9, 3}, {2, 4},

How much would it cost to create this table at runtime?

> +};
> +
> +static const uint8_t prefix_codelen_map[18] = {
> +    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
> +};
> +
> +/**
> + * Read a variable-length 8-bit integer.
> + * Used when populating the ANS frequency tables.
> + */
> +static av_always_inline uint8_t jxl_u8(GetBitContext *gb)
> +{
> +    int n;
> +    if (!get_bits1(gb))
> +        return 0;
> +    n = get_bits(gb, 3);
> +
> +    return get_bitsz(gb, n) | (1 << n);
> +}
> +
> +/* read a U32(c_i + u(u_i)) */
> +static av_always_inline uint32_t jxl_u32(GetBitContext *gb,
> +                        uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3,
> +                        uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3)
> +{
> +    const uint32_t constants[4] = {c0, c1, c2, c3};
> +    const uint32_t ubits    [4] = {u0, u1, u2, u3};
> +    uint32_t ret, choice = get_bits(gb, 2);
> +
> +    ret = constants[choice];
> +    if (ubits[choice])
> +        ret += get_bits_long(gb, ubits[choice]);
> +
> +    return ret;
> +}
> +
> +static av_always_inline uint32_t jxl_enum(GetBitContext *gb)
> +{
> +    return jxl_u32(gb, 0, 1, 2, 18, 0, 0, 4, 6);
> +}
> +
> +/* read a U64() */
> +static uint64_t jxl_u64(GetBitContext *gb)
> +{
> +    uint64_t shift = 12, ret;
> +
> +    switch (get_bits(gb, 2)) {
> +    case 1:
> +        ret = 1 + get_bits(gb, 4);
> +        break;
> +    case 2:
> +        ret = 17 + get_bits(gb, 8);
> +        break;
> +    case 3:
> +        ret = get_bits(gb, 12);
> +        while (get_bits1(gb)) {
> +            if (shift < 60) {
> +                ret |= (uint64_t)get_bits(gb, 8) << shift;
> +                shift += 8;
> +            } else {
> +                ret |= (uint64_t)get_bits(gb, 4) << shift;
> +                break;
> +            }
> +        }
> +        break;
> +    default:
> +        ret = 0;
> +    }
> +
> +    return ret;
> +}
> +
> +static int read_hybrid_uint_conf(GetBitContext *gb, JXLHybridUintConf *conf, int log_alphabet_size)
> +{
> +    conf->split_exponent = get_bitsz(gb, clog1p(log_alphabet_size));
> +    if (conf->split_exponent == log_alphabet_size) {
> +        conf->msb_in_token = conf->lsb_in_token = 0;
> +        return 0;
> +    }
> +
> +    conf->msb_in_token = get_bitsz(gb, clog1p(conf->split_exponent));
> +    if (conf->msb_in_token > conf->split_exponent)
> +        return AVERROR_INVALIDDATA;
> +    conf->lsb_in_token = get_bitsz(gb, clog1p(conf->split_exponent - conf->msb_in_token));
> +    if (conf->msb_in_token + conf->lsb_in_token > conf->split_exponent)
> +        return AVERROR_INVALIDDATA;
> +
> +    return 0;
> +}
> +
> +static int read_hybrid_uint(GetBitContext *gb, const JXLHybridUintConf *conf, uint32_t token, uint32_t *hybrid_uint)
> +{
> +    uint32_t n, low, split = 1 << conf->split_exponent;
> +
> +    if (token < split) {
> +        *hybrid_uint = token;
> +        return 0;
> +    }
> +
> +    n = conf->split_exponent - conf->lsb_in_token - conf->msb_in_token +
> +        ((token - split) >> (conf->msb_in_token + conf->lsb_in_token));
> +    if (n >= 32)
> +        return AVERROR_INVALIDDATA;
> +    low = token & ((1 << conf->lsb_in_token) - 1);
> +    token >>= conf->lsb_in_token;
> +    token &= (1 << conf->msb_in_token) - 1;
> +    token |= 1 << conf->msb_in_token;
> +    *hybrid_uint = (((token << n) | get_bits_long(gb, n)) << conf->lsb_in_token ) | low;
> +
> +    return 0;
> +}
> +
> +static inline uint32_t read_prefix_symbol(GetBitContext *gb, const JXLSymbolDistribution *dist)
> +{
> +    if (!dist->vlc.bits)
> +        return dist->default_symbol;
> +
> +    return get_vlc2(gb, dist->vlc.table, dist->vlc.bits, 1);
> +}
> +
> +static uint32_t read_ans_symbol(GetBitContext *gb, JXLEntropyDecoder *dec, const JXLSymbolDistribution *dist)
> +{
> +    uint32_t index, i, pos, symbol, offset;
> +
> +    if (dec->state < 0)
> +        dec->state = get_bits_long(gb, 32);
> +
> +    index = dec->state & 0xFFF;
> +    i = index >> dist->log_bucket_size;
> +    pos = index & ((1 << dist->log_bucket_size) - 1);
> +    symbol = pos >= dist->cutoffs[i] ? dist->symbols[i] : i;
> +    offset = pos >= dist->cutoffs[i] ? dist->offsets[i] + pos : pos;
> +    dec->state = dist->freq[symbol] * (dec->state >> 12) + offset;
> +    if (dec->state < (1 << 16))
> +        dec->state = (dec->state << 16) | get_bits(gb, 16);
> +    dec->state &= 0xFFFFFFFF;
> +
> +    return symbol;
> +}
> +
> +static int decode_hybrid_varlen_uint(GetBitContext *gb, JXLEntropyDecoder *dec,
> +                                     const JXLDistributionBundle *bundle,
> +                                     uint32_t context, uint32_t *hybrid_uint)
> +{
> +    int ret;
> +    uint32_t token, distance;
> +    const JXLSymbolDistribution *dist;
> +
> +    if (dec->num_to_copy > 0) {
> +        *hybrid_uint = dec->window[dec->copy_pos++ & 0xFFFFF];
> +        dec->num_to_copy--;
> +        dec->window[dec->num_decoded++ & 0xFFFFF] = *hybrid_uint;
> +        return 0;
> +    }
> +
> +    if (context >= bundle->num_dist)
> +        return AVERROR(EINVAL);
> +    if (bundle->cluster_map[context] >= bundle->num_clusters)
> +        return AVERROR_INVALIDDATA;
> +
> +    dist = &bundle->dists[bundle->cluster_map[context]];
> +    if (bundle->use_prefix_code)
> +        token = read_prefix_symbol(gb, dist);
> +    else
> +        token = read_ans_symbol(gb, dec, dist);
> +
> +    if (bundle->lz77_enabled && token >= bundle->lz77_min_symbol) {
> +        const JXLSymbolDistribution *lz77dist = &bundle->dists[bundle->cluster_map[bundle->num_dist - 1]];
> +        ret = read_hybrid_uint(gb, &bundle->lz_len_conf, token - bundle->lz77_min_symbol, &dec->num_to_copy);
> +        if (ret < 0)
> +            return ret;
> +        dec->num_to_copy += bundle->lz77_min_length;
> +        if (bundle->use_prefix_code)
> +            token = read_prefix_symbol(gb, lz77dist);
> +        else
> +            token = read_ans_symbol(gb, dec, lz77dist);
> +        ret = read_hybrid_uint(gb, &lz77dist->config, token, &distance);
> +        if (ret < 0)
> +            return ret;
> +        distance++;
> +        distance = FFMIN3(distance, dec->num_decoded, 1 << 20);
> +        dec->copy_pos = dec->num_decoded - distance;
> +        return decode_hybrid_varlen_uint(gb, dec, bundle, context, hybrid_uint);
> +    }
> +    ret = read_hybrid_uint(gb, &dist->config, token, hybrid_uint);
> +    if (ret < 0)
> +        return ret;
> +    if (bundle->lz77_enabled)
> +        dec->window[dec->num_decoded++ & 0xFFFFF] = *hybrid_uint;
> +
> +    return 0;
> +}
> +
> +static int populate_distribution(GetBitContext *gb, JXLSymbolDistribution *dist, int log_alphabet_size)
> +{
> +    int len = 0, shift, omit_log = -1, omit_pos = -1;
> +    int prev = 0, num_same = 0;
> +    uint32_t total_count = 0;
> +    uint8_t logcounts[258] = { 0 };
> +    uint8_t same[258] = { 0 };
> +    dist->uniq_pos = -1;
> +
> +    if (get_bits1(gb)) {
> +        /* simple code */
> +        dist->alphabet_size = 256;
> +        if (get_bits1(gb)) {
> +            uint8_t v1 = jxl_u8(gb);
> +            uint8_t v2 = jxl_u8(gb);
> +            if (v1 == v2)
> +                return AVERROR_INVALIDDATA;
> +            dist->freq[v1] = get_bits(gb, 12);
> +            dist->freq[v2] = (1 << 12) - dist->freq[v1];
> +            if (!dist->freq[v1])
> +                dist->uniq_pos = v2;
> +        } else {
> +            uint8_t x = jxl_u8(gb);
> +            dist->freq[x] = 1 << 12;
> +            dist->uniq_pos = x;
> +        }
> +        return 0;
> +    }
> +
> +    if (get_bits1(gb)) {
> +        /* flat code */
> +        dist->alphabet_size = jxl_u8(gb) + 1;
> +        for (int i = 0; i < dist->alphabet_size; i++)
> +            dist->freq[i] = (1 << 12) / dist->alphabet_size;
> +        for (int i = 0; i < (1 << 12) % dist->alphabet_size; i++)
> +            dist->freq[i]++;
> +        return 0;
> +    }
> +
> +    do {
> +        if (!get_bits1(gb))
> +            break;
> +    } while (++len < 3);
> +
> +    shift = (get_bitsz(gb, len) | (1 << len)) - 1;
> +    if (shift > 13)
> +        return AVERROR_INVALIDDATA;
> +
> +    dist->alphabet_size = jxl_u8(gb) + 3;
> +    for (int i = 0; i < dist->alphabet_size; i++) {
> +        logcounts[i] = get_vlc2(gb, dist_prefix_table, 7, 1);
> +        if (logcounts[i] == 13) {
> +            int rle = jxl_u8(gb);
> +            same[i] = rle + 5;
> +            i += rle + 3;
> +            continue;
> +        }
> +        if (logcounts[i] > omit_log) {
> +            omit_log = logcounts[i];
> +            omit_pos = i;
> +        }
> +    }
> +    if (omit_pos < 0 || omit_pos + 1 < dist->alphabet_size && logcounts[omit_pos + 1] == 13)
> +        return AVERROR_INVALIDDATA;
> +
> +    for (int i = 0; i < dist->alphabet_size; i++) {
> +        if (same[i]) {
> +            num_same = same[i] - 1;
> +            prev = i > 0 ? dist->freq[i - 1] : 0;
> +        }
> +        if (num_same) {
> +            dist->freq[i] = prev;
> +            num_same--;
> +        } else {
> +            if (i == omit_pos || !logcounts[i])
> +                continue;
> +            if (logcounts[i] == 1) {
> +                dist->freq[i] = 1;
> +            } else {
> +                int bitcount = FFMIN(FFMAX(0, shift - ((12 - logcounts[i] + 1) >> 1)), logcounts[i] - 1);
> +                dist->freq[i] = (1 << (logcounts[i] - 1)) + (get_bitsz(gb, bitcount) << (logcounts[i] - 1 - bitcount));
> +            }
> +        }
> +        total_count += dist->freq[i];
> +    }
> +    dist->freq[omit_pos] = (1 << 12) - total_count;
> +
> +    return 0;
> +}
> +
> +static void dist_bundle_close(JXLDistributionBundle *bundle)
> +{
> +    if (bundle->use_prefix_code && bundle->dists)
> +        for (int i = 0; i < bundle->num_clusters; i++)
> +            ff_free_vlc(&bundle->dists[i].vlc);
> +    av_freep(&bundle->dists);
> +    av_freep(&bundle->cluster_map);
> +}
> +
> +
> +static int read_distribution_bundle(GetBitContext *gb, JXLEntropyDecoder *dec,
> +                                    JXLDistributionBundle *bundle, int num_dist);
> +
> +static int read_dist_clustering(GetBitContext *gb, JXLEntropyDecoder *dec, JXLDistributionBundle *bundle)
> +{
> +    int ret;
> +
> +    bundle->cluster_map = av_malloc(bundle->num_dist);
> +    if (!bundle->cluster_map)
> +        return AVERROR(ENOMEM);
> +
> +    if (bundle->num_dist == 1) {
> +        bundle->cluster_map[0] = 0;
> +        bundle->num_clusters = 1;
> +        return 0;
> +    }
> +
> +    if (get_bits1(gb)) {
> +        /* simple clustering */
> +        uint32_t nbits = get_bits(gb, 2);
> +        for (int i = 0; i < bundle->num_dist; i++)
> +            bundle->cluster_map[i] = get_bitsz(gb, nbits);
> +    } else {
> +        /* complex clustering */
> +        int use_mtf = get_bits1(gb);
> +        JXLDistributionBundle nested = { 0 };
> +        /* num_dist == 1 prevents this from recursing again */
> +        ret = read_distribution_bundle(gb, dec, &nested, 1);
> +        if (ret < 0) {
> +            dist_bundle_close(&nested);
> +            return ret;
> +        }
> +        for (int i = 0; i < bundle->num_dist; i++) {
> +            uint32_t clust;
> +            ret = decode_hybrid_varlen_uint(gb, dec, &nested, 0, &clust);
> +            if (ret < 0) {
> +                dist_bundle_close(&nested);
> +                return ret;
> +            }
> +            bundle->cluster_map[i] = clust;
> +        }
> +        dec->state = -1;
> +        /* it's not going to necessarily be zero after reading */
> +        dec->num_to_copy = 0;
> +        dist_bundle_close(&nested);
> +        if (use_mtf) {
> +            uint8_t mtf[256];
> +            for (int i = 0; i < 256; i++)
> +                mtf[i] = i;
> +            for (int i = 0; i < bundle->num_dist; i++) {
> +                int index = bundle->cluster_map[i];
> +                bundle->cluster_map[i] = mtf[index];
> +                if (index) {
> +                    int value = mtf[index];
> +                    for (int j = index; j > 0; j--)
> +                        mtf[j] = mtf[j - 1];
> +                    mtf[0] = value;
> +                }
> +            }
> +        }
> +    }
> +    for (int i = 0; i < bundle->num_dist; i++) {
> +        if (bundle->cluster_map[i] >= bundle->num_clusters)
> +            bundle->num_clusters = bundle->cluster_map[i] + 1;
> +    }
> +
> +    if (bundle->num_clusters > bundle->num_dist)
> +        return AVERROR_INVALIDDATA;
> +
> +    return 0;
> +}
> +
> +static int gen_alias_map(JXLEntropyDecoder *dec, JXLSymbolDistribution *dist, int log_alphabet_size)
> +{
> +    uint32_t bucket_size, table_size;
> +    uint8_t overfull[256], underfull[256];
> +    int overfull_pos = 0, underfull_pos = 0;
> +    dist->log_bucket_size = 12 - log_alphabet_size;
> +    bucket_size = 1 << dist->log_bucket_size;
> +    table_size = 1 << log_alphabet_size;
> +
> +    if (dist->uniq_pos >= 0) {
> +        for (int i = 0; i < table_size; i++) {
> +            dist->symbols[i] = dist->uniq_pos;
> +            dist->offsets[i] = bucket_size * i;
> +            dist->cutoffs[i] = 0;
> +        }
> +        return 0;
> +    }
> +
> +    for (int i = 0; i < dist->alphabet_size; i++) {
> +        dist->cutoffs[i] = dist->freq[i];
> +        dist->symbols[i] = i;
> +        if (dist->cutoffs[i] > bucket_size)
> +            overfull[overfull_pos++] = i;
> +        else if (dist->cutoffs[i] < bucket_size)
> +            underfull[underfull_pos++] = i;
> +    }
> +
> +    for (int i = dist->alphabet_size; i < table_size; i++) {
> +        dist->cutoffs[i] = 0;
> +        underfull[underfull_pos++] = i;
> +    }
> +
> +    while (overfull_pos) {
> +        int o, u, by;
> +        /* this should be impossible */
> +        if (!underfull_pos)
> +            return AVERROR_INVALIDDATA;
> +        u = underfull[--underfull_pos];
> +        o = overfull[--overfull_pos];
> +        by = bucket_size - dist->cutoffs[u];
> +        dist->cutoffs[o] -= by;
> +        dist->symbols[u] = o;
> +        dist->offsets[u] = dist->cutoffs[o];
> +        if (dist->cutoffs[o] < bucket_size)
> +            underfull[underfull_pos++] = o;
> +        else if (dist->cutoffs[o] > bucket_size)
> +            overfull[overfull_pos++] = o;
> +    }
> +
> +    for (int i = 0; i < table_size; i++) {
> +        if (dist->cutoffs[i] == bucket_size) {
> +            dist->symbols[i] = i;
> +            dist->offsets[i] = 0;
> +            dist->cutoffs[i] = 0;
> +        } else {
> +            dist->offsets[i] -= dist->cutoffs[i];
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int read_simple_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolDistribution *dist)
> +{
> +    int nsym, tree_select, bits;
> +
> +    int8_t lens[4];
> +    int16_t symbols[4];
> +
> +    nsym = 1 + get_bits(gb, 2);
> +    for (int i = 0; i < nsym; i++)
> +        symbols[i] = get_bitsz(gb, dist->log_alphabet_size);
> +    if (nsym == 4)
> +        tree_select = get_bits1(gb);
> +    switch (nsym) {
> +    case 1:
> +        dist->vlc.bits = 0;
> +        dist->default_symbol = symbols[0];
> +        return 0;
> +    case 2:
> +        bits = 1;
> +        lens[0] = 1, lens[1] = 1, lens[2] = 0, lens[3] = 0;
> +        if (symbols[1] < symbols[0])
> +            FFSWAP(int16_t, symbols[0], symbols[1]);
> +        break;
> +    case 3:
> +        bits = 2;
> +        lens[0] = 1, lens[1] = 2, lens[2] = 2, lens[3] = 0;
> +        if (symbols[2] < symbols[1])
> +            FFSWAP(int16_t, symbols[1], symbols[2]);
> +        break;
> +    case 4:
> +        if (tree_select) {
> +            bits = 3;
> +            lens[0] = 1, lens[1] = 2, lens[2] = 3, lens[3] = 3;
> +            if (symbols[3] < symbols[2])
> +                FFSWAP(int16_t, symbols[2], symbols[3]);
> +        } else {
> +            bits = 2;
> +            lens[0] = 2, lens[1] = 2, lens[2] = 2, lens[3] = 2;
> +            while (1) {
> +                if (symbols[1] < symbols[0])
> +                    FFSWAP(int16_t, symbols[0], symbols[1]);
> +                if (symbols[3] < symbols[2])
> +                    FFSWAP(int16_t, symbols[2], symbols[3]);
> +                if (symbols[1] <= symbols[2])
> +                    break;
> +                FFSWAP(int16_t, symbols[1], symbols[2]);
> +            }
> +        }
> +        break;
> +    default:
> +        // Challenge Complete! How did we get here?
> +        return AVERROR_BUG;
> +    }
> +
> +    return ff_init_vlc_from_lengths(&dist->vlc, bits, nsym, lens, 1, symbols,
> +                                    2, 2, 0, INIT_VLC_LE, dec->logctx);
> +}
> +
> +static int read_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolDistribution *dist)
> +{
> +    int8_t level1_lens[18] = { 0 };
> +    int8_t level1_lens_s[18] = { 0 };
> +    int16_t level1_syms[18] = { 0 };
> +    int level1_codecounts[19] = { 0 };
> +
> +    int8_t level2_lens[32768] = { 0 };
> +    int8_t level2_lens_s[32768] = { 0 };
> +    int16_t level2_syms[32768] = { 0 };
> +    int level2_codecounts[32769] = { 0 };

That is quite a lot of stack here.

> +
> +    int repeat_count_prev = 0, repeat_count_zero = 0, prev = 8;
> +    int total_code = 0, code, hskip, num_codes = 0;
> +
> +    VLC level1_vlc;
> +
> +    if (dist->alphabet_size == 1) {
> +        dist->vlc.bits = 0;
> +        dist->default_symbol = 0;
> +        return 0;
> +    }
> +
> +    hskip = get_bits(gb, 2);
> +    if (hskip == 1)
> +        return read_simple_vlc_prefix(gb, dec, dist);
> +
> +    level1_codecounts[0] = hskip;
> +    for (int i = hskip; i < 18; i++) {
> +        code = level1_lens[prefix_codelen_map[i]] = get_vlc2(gb, level0_table, 4, 1);
> +        level1_codecounts[code]++;
> +        if (code) {
> +            total_code += (32 >> code);

It also looks like your "code" variable should be renamed to len.
(I dislike that you duplicate part of ff_init_vlc_from_lengths() here,
but given the break condition below I don't see how to avoid it.

> +            num_codes++;
> +        }
> +        if (total_code >= 32) {
> +            level1_codecounts[0] += 18 - i - 1;
> +            break;
> +        }
> +    }
> +
> +    if (total_code != 32 && num_codes >= 2 || num_codes < 1)
> +        return AVERROR_INVALIDDATA;
> +
> +    for (int i = 1; i < 19; i++)
> +         level1_codecounts[i] += level1_codecounts[i - 1];
> +
> +    for (int i = 17; i >= 0; i--) {
> +        int idx = --level1_codecounts[level1_lens[i]];
> +        level1_lens_s[idx] = level1_lens[i];
> +        level1_syms[idx] = i;
> +    }
> +
> +    ff_init_vlc_from_lengths(&level1_vlc, 5, 18, level1_lens_s, 1, level1_syms, 2, 2, 0, INIT_VLC_LE, dec->logctx);

Missing check.

> +
> +    total_code = 0;
> +    for (int i = 0; i < dist->alphabet_size; i++) {
> +        code = get_vlc2(gb, level1_vlc.table, 5, 1);
> +        if (code == 16) {
> +            int extra = 3 + get_bits(gb, 2);
> +            if (repeat_count_prev)
> +                extra = 4 * (repeat_count_prev - 2) - repeat_count_prev + extra;
> +            for (int j = 0; j < extra; j++)
> +                level2_lens[i + j] = prev;
> +            total_code += (32768 >> prev) * extra;
> +            i += extra - 1;
> +            repeat_count_prev += extra;
> +            repeat_count_zero = 0;
> +            level2_codecounts[prev] += extra;
> +        } else if (code == 17) {
> +            int extra = 3 + get_bits(gb, 3);
> +            if (repeat_count_zero > 0)
> +                extra = 8 * (repeat_count_zero - 2) - repeat_count_zero + extra;
> +            i += extra - 1;
> +            repeat_count_prev = 0;
> +            repeat_count_zero += extra;
> +            level2_codecounts[0] += extra;
> +        } else {
> +            level2_lens[i] = code;
> +            repeat_count_prev = repeat_count_zero = 0;
> +            if (code) {
> +                total_code += (32768 >> code);
> +                prev = code;
> +            }
> +            level2_codecounts[code]++;
> +        }
> +        if (total_code >= 32768) {
> +            level2_codecounts[0] += dist->alphabet_size - i - 1;
> +            break;
> +        }
> +    }
> +    ff_free_vlc(&level1_vlc);
> +
> +    if (total_code != 32768 && level2_codecounts[0] < dist->alphabet_size - 1)
> +        return AVERROR_INVALIDDATA;
> +
> +    for (int i = 1; i < dist->alphabet_size + 1; i++)
> +        level2_codecounts[i] += level2_codecounts[i - 1];
> +
> +    for (int i = dist->alphabet_size - 1; i >= 0; i--) {
> +        int idx = --level2_codecounts[level2_lens[i]];
> +        level2_lens_s[idx] = level2_lens[i];
> +        level2_syms[idx] = i;
> +    }
> +
> +    return ff_init_vlc_from_lengths(&dist->vlc, 15, dist->alphabet_size, level2_lens_s,
> +                                    1, level2_syms, 2, 2, 0, INIT_VLC_LE, dec->logctx);
> +}
> +
> +static int read_distribution_bundle(GetBitContext *gb, JXLEntropyDecoder *dec,
> +                                    JXLDistributionBundle *bundle, int num_dist)
> +{
> +    int ret;
> +
> +    if (num_dist <= 0)
> +        return AVERROR(EINVAL);
> +
> +    bundle->num_dist = num_dist;
> +    bundle->lz77_enabled = get_bits1(gb);
> +    if (bundle->lz77_enabled) {
> +        bundle->lz77_min_symbol = jxl_u32(gb, 224, 512, 4096, 8, 0, 0, 0, 15);
> +        bundle->lz77_min_length = jxl_u32(gb, 3, 4, 5, 9, 0, 0, 2, 8);
> +        bundle->num_dist++;
> +        ret = read_hybrid_uint_conf(gb, &bundle->lz_len_conf, 8);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    if (bundle->lz77_enabled && !dec->window) {
> +        dec->window = av_malloc_array(1 << 20, sizeof(uint32_t));
> +        if (!dec->window)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    ret = read_dist_clustering(gb, dec, bundle);
> +    if (ret < 0)
> +        return ret;
> +    if (get_bits_left(gb) < 0)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    bundle->dists = av_calloc(bundle->num_clusters, sizeof(JXLSymbolDistribution));
> +    if (!bundle->dists)
> +        return AVERROR(ENOMEM);
> +
> +    bundle->use_prefix_code = get_bits1(gb);
> +    bundle->log_alphabet_size = bundle->use_prefix_code ? 15 : 5 + get_bits(gb, 2);
> +
> +    for (int i = 0; i < bundle->num_clusters; i++) {
> +        ret = read_hybrid_uint_conf(gb, &bundle->dists[i].config, bundle->log_alphabet_size);
> +        if (ret < 0)
> +            return ret;
> +        if (get_bits_left(gb) < 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +
> +    if (bundle->use_prefix_code) {
> +        for (int i = 0; i < bundle->num_clusters; i++) {
> +            JXLSymbolDistribution *dist = &bundle->dists[i];
> +            if (get_bits1(gb)) {
> +                int n = get_bits(gb, 4);
> +                dist->alphabet_size = 1 + (1 << n) + get_bitsz(gb, n);
> +            } else {
> +                dist->alphabet_size = 1;
> +            }
> +            dist->log_alphabet_size = clog1p(dist->alphabet_size - 1);
> +        }
> +        for (int i = 0; i < bundle->num_clusters; i++) {
> +            ret = read_vlc_prefix(gb, dec, &bundle->dists[i]);
> +            if (ret < 0)
> +                return ret;
> +            if (get_bits_left(gb) < 0)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +        }
> +    } else {
> +        for (int i = 0; i < bundle->num_clusters; i++) {
> +            ret = populate_distribution(gb, &bundle->dists[i], bundle->log_alphabet_size);
> +            if (ret < 0)
> +                return ret;
> +            if (get_bits_left(gb) < 0)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +        }
> +        for (int i = 0; i < bundle->num_clusters; i++) {
> +            ret = gen_alias_map(dec, &bundle->dists[i], bundle->log_alphabet_size);
> +            if (ret < 0)
> +                return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void entropy_decoder_close(JXLEntropyDecoder *dec)
> +{
> +    if (!dec)
> +        return;
> +    av_freep(&dec->window);
> +    dist_bundle_close(&dec->bundle);
> +}
> +
> +static int entropy_decoder_init(void *avctx, GetBitContext *gb, JXLEntropyDecoder *dec, int num_dist)
> +{
> +    int ret;
> +
> +    memset(dec, 0, sizeof(*dec));
> +    dec->logctx = avctx;
> +    dec->state = -1;
> +
> +    ret = read_distribution_bundle(gb, dec, &dec->bundle, num_dist);
> +    if (ret < 0) {
> +        entropy_decoder_close(dec);
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static int64_t entropy_decoder_read_symbol(GetBitContext *gb, JXLEntropyDecoder *dec, uint32_t context)
> +{
> +    int ret;
> +    uint32_t hybrid_uint;
> +
> +    ret = decode_hybrid_varlen_uint(gb, dec, &dec->bundle, context, &hybrid_uint);
> +    if (ret < 0)
> +        return ret;
> +
> +    return hybrid_uint;
> +}
> +
> +static inline uint32_t icc_context(uint64_t i, uint32_t b1, uint32_t b2)
> +{
> +    uint32_t p1, p2;
> +    if (i <= 128)
> +        return 0;
> +    if (b1 >= 'a' && b1 <= 'z' || b1 >= 'A' && b1 <= 'Z')
> +        p1 = 0;
> +    else if (b1 >= '0' && b1 <= '9' || b1 == '.' || b1 == ',')
> +        p1 = 1;
> +    else if (b1 <= 1)
> +        p1 = b1 + 2;
> +    else if (b1 > 1 && b1 < 16)
> +        p1 = 4;
> +    else if (b1 > 240 && b1 < 255)
> +        p1 = 5;
> +    else if (b1 == 255)
> +        p1 = 6;
> +    else
> +        p1 = 7;
> +
> +    if (b2 >= 'a' && b2 <= 'z' || b2 >= 'A' && b2 <= 'Z')
> +        p2 = 0;
> +    else if (b2 >= '0' && b2 <= '9' || b2 == '.' || b2 == ',')
> +        p2 = 1;
> +    else if (b2 < 16)
> +        p2 = 2;
> +    else if (b2 > 240)
> +        p2 = 3;
> +    else
> +        p2 = 4;
> +
> +    return 1 + p1 + p2 * 8;
> +}
> +
> +static inline uint32_t toc_context(uint32_t x)
> +{
> +    return FFMIN(7, clog1p(x));
> +}
> +
> +static void populate_fields(AVCodecParserContext *s, AVCodecContext *avctx, const FFJXLMetadata *meta)
> +{
> +    s->width = meta->width;
> +    s->height = meta->height;
> +
> +    switch (meta->csp) {
> +    case JPEGXL_CS_RGB:
> +    case JPEGXL_CS_XYB:
> +        avctx->colorspace = AVCOL_SPC_RGB;
> +        break;
> +    default:
> +        avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
> +    }
> +
> +    if (meta->wp == JPEGXL_WP_D65) {
> +        switch (meta->primaries) {
> +        case JPEGXL_PR_SRGB:
> +            avctx->color_primaries = AVCOL_PRI_BT709;
> +            break;
> +        case JPEGXL_PR_P3:
> +            avctx->color_primaries = AVCOL_PRI_SMPTE432;
> +            break;
> +        case JPEGXL_PR_2100:
> +            avctx->color_primaries = AVCOL_PRI_BT2020;
> +            break;
> +        default:
> +            avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
> +        }
> +    } else if (meta->wp == JPEGXL_WP_DCI && meta->primaries == JPEGXL_PR_P3) {
> +        avctx->color_primaries = AVCOL_PRI_SMPTE431;
> +    } else {
> +        avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
> +    }
> +
> +    if (meta->trc > JPEGXL_TR_GAMMA) {
> +        FFJXLTransferCharacteristic trc = meta->trc - JPEGXL_TR_GAMMA;
> +        switch (trc) {
> +        case JPEGXL_TR_BT709:
> +            avctx->color_trc = AVCOL_TRC_BT709;
> +            break;
> +        case JPEGXL_TR_LINEAR:
> +            avctx->color_trc = AVCOL_TRC_LINEAR;
> +            break;
> +        case JPEGXL_TR_SRGB:
> +            avctx->color_trc = AVCOL_TRC_IEC61966_2_1;
> +            break;
> +        case JPEGXL_TR_PQ:
> +            avctx->color_trc = AVCOL_TRC_SMPTEST2084;
> +            break;
> +        case JPEGXL_TR_DCI:
> +            avctx->color_trc = AVCOL_TRC_SMPTE428;
> +            break;
> +        case JPEGXL_TR_HLG:
> +            avctx->color_trc = AVCOL_TRC_ARIB_STD_B67;
> +            break;
> +        default:
> +            avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
> +        }
> +    } else if (meta->trc > 0) {
> +        if (meta->trc > 45355 && meta->trc < 45555)
> +            avctx->color_trc = AVCOL_TRC_GAMMA22;
> +        else if (meta->trc > 35614 && meta->trc < 35814)
> +            avctx->color_trc = AVCOL_TRC_GAMMA28;
> +        else
> +            avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
> +    } else {
> +        avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
> +    }
> +
> +    if (meta->csp == JPEGXL_CS_GRAY) {
> +        if (meta->bit_depth <= 8)
> +            s->format = meta->have_alpha ? AV_PIX_FMT_YA8 : AV_PIX_FMT_GRAY8;
> +        else if (meta->bit_depth <= 16)
> +            s->format = meta->have_alpha ? AV_PIX_FMT_YA16 : AV_PIX_FMT_GRAY16;
> +        else
> +            s->format = meta->have_alpha ? AV_PIX_FMT_NONE : AV_PIX_FMT_GRAYF32;
> +    } else {
> +        if (meta->bit_depth <= 8)
> +            s->format = meta->have_alpha ? AV_PIX_FMT_RGBA : AV_PIX_FMT_RGB24;
> +        else if (meta->bit_depth <= 16)
> +            s->format = meta->have_alpha ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48;
> +        else
> +            s->format = meta->have_alpha ? AV_PIX_FMT_RGBAF32 : AV_PIX_FMT_RGBF32;
> +    }
> +}
> +
> +static int skip_icc_profile(void *avctx, JXLParseContext *ctx, GetBitContext *gb)
> +{
> +    int64_t ret;
> +    uint32_t last = 0, last2 = 0;
> +    JXLEntropyDecoder dec;
> +    uint64_t enc_size = jxl_u64(gb);
> +
> +    if (!enc_size)
> +        return AVERROR_INVALIDDATA;
> +
> +    ret = entropy_decoder_init(avctx, gb, &dec, 41);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (get_bits_left(gb) < 0) {
> +        entropy_decoder_close(&dec);
> +        return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +
> +    for (uint64_t read = 0; read < enc_size; read++) {
> +        ret = entropy_decoder_read_symbol(gb, &dec, icc_context(read, last, last2));
> +        if (ret < 0 || get_bits_left(gb) < 0) {
> +            entropy_decoder_close(&dec);
> +            return ret < 0 ? ret : AVERROR_BUFFER_TOO_SMALL;
> +        }
> +        last2 = last;
> +        last = ret;
> +    }
> +
> +    entropy_decoder_close(&dec);
> +
> +    return 0;
> +}
> +
> +static int skip_extensions(GetBitContext *gb)
> +{
> +    uint64_t extensions = jxl_u64(gb), extensions_len = 0;
> +
> +    if (get_bits_left(gb) < 0)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    if (!extensions)
> +        return 0;
> +
> +    for (int i = 0; i < 64; i++) {
> +        if (extensions & (UINT64_C(1) << i))
> +            extensions_len += jxl_u64(gb);
> +        if (get_bits_left(gb) < 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +
> +    if (extensions_len > INT_MAX || get_bits_left(gb) < extensions_len)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    skip_bits_long(gb, extensions_len);
> +
> +    return 0;
> +}
> +
> +static int parse_frame_header(void *avctx, JXLParseContext *ctx, GetBitContext *gb)
> +{
> +    int all_default, do_yCbCr = 0, num_passes = 1, ret;
> +    int group_size_shift = 1, lf_level = 0, save_as_ref = 0;
> +    int have_crop = 0, full_frame = 1, resets_canvas = 1, upsampling = 1;
> +    JXLFrame *frame = &ctx->codestream.frame;
> +    const FFJXLMetadata *meta = &ctx->codestream.meta;
> +    int32_t x0 = 0, y0 = 0;
> +    uint32_t duration = 0, width = meta->coded_width, height = meta->coded_height;
> +    uint32_t name_len, num_groups, num_lf_groups, group_dim, lf_group_dim, toc_count;
> +    uint64_t flags = 0;
> +    int start_len = get_bits_count(gb);
> +
> +    memset(frame, 0, sizeof(*frame));
> +    frame->is_last = 1;
> +
> +    all_default = get_bits1(gb);
> +    if (!all_default) {
> +        frame->type = get_bits(gb, 2);
> +        frame->encoding = get_bits1(gb);
> +        flags = jxl_u64(gb);
> +        if (!meta->xyb_encoded)
> +            do_yCbCr = get_bits1(gb);
> +        if (!(flags & JXL_FLAG_USE_LF_FRAME)) {
> +            if (do_yCbCr)
> +                skip_bits(gb, 6); // jpeg upsampling
> +            upsampling = jxl_u32(gb, 1, 2, 4, 8, 0, 0, 0, 0);
> +            skip_bits_long(gb, 2 * meta->num_extra_channels);
> +            if (get_bits_left(gb) < 0)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +        }
> +        if (frame->encoding == JPEGXL_ENC_MODULAR)
> +            group_size_shift = get_bits(gb, 2);
> +        else if (meta->xyb_encoded)
> +            skip_bits(gb, 6); // xqm and bqm scales
> +        if (frame->type != JPEGXL_FRAME_REFERENCE_ONLY) {
> +            num_passes = jxl_u32(gb, 1, 2, 3, 4, 0, 0, 0, 3);
> +            if (num_passes != 1) {
> +                int num_ds = jxl_u32(gb, 0, 1, 2, 3, 0, 0, 0, 1);
> +                skip_bits(gb, 2 * (num_passes - 1)); // shift
> +                skip_bits(gb, 2 * num_ds); // downsample
> +                for (int i = 0; i < num_ds; i++)
> +                    jxl_u32(gb, 0, 1, 2, 0, 0, 0, 0, 3);
> +            }
> +        }
> +        if (frame->type == JPEGXL_FRAME_LF)
> +            lf_level = 1 + get_bits(gb, 2);
> +        else
> +            have_crop = get_bits1(gb);
> +        if (have_crop) {
> +            if (frame->type != JPEGXL_FRAME_REFERENCE_ONLY) {
> +                uint32_t ux0 = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30);
> +                uint32_t uy0 = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30);
> +                x0 = unpack_signed(ux0);
> +                y0 = unpack_signed(uy0);
> +            } else {
> +                x0 = y0 = 0;
> +            }
> +            width = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30); // width
> +            height = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30); // height
> +            full_frame = x0 <= 0 && y0 <= 0 && width >= meta->coded_width && height >= meta->coded_height;
> +        }
> +        if (get_bits_left(gb) < 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +        if (frame->type == JPEGXL_FRAME_REGULAR || frame->type == JPEGXL_FRAME_SKIP_PROGRESSIVE) {
> +            for (int i = 0; i <= meta->num_extra_channels; i++) {
> +                int mode = jxl_u32(gb, 0, 1, 2, 3, 0, 0, 0, 2);
> +                if (meta->num_extra_channels && (mode == JPEGXL_BM_BLEND || mode == JPEGXL_BM_MULADD))
> +                    jxl_u32(gb, 0, 1, 2, 3, 0, 0, 0, 2);
> +                if (meta->num_extra_channels && (mode == JPEGXL_BM_BLEND || mode == JPEGXL_BM_MULADD
> +                                              || mode == JPEGXL_BM_MUL))
> +                    skip_bits1(gb);
> +                if (!i)
> +                    resets_canvas = mode == JPEGXL_BM_REPLACE && full_frame;
> +                if (!resets_canvas)
> +                    skip_bits(gb, 2);
> +                if (get_bits_left(gb) < 0)
> +                    return AVERROR_BUFFER_TOO_SMALL;
> +            }
> +            if (meta->animation_offset)
> +                duration = jxl_u32(gb, 0, 1, 0, 0, 0, 0, 8, 32);
> +            if (meta->have_timecodes)
> +                skip_bits_long(gb, 32);
> +            frame->is_last = get_bits1(gb);
> +        } else {
> +            frame->is_last = 0;
> +        }
> +        if (frame->type != JPEGXL_FRAME_LF && !frame->is_last)
> +            save_as_ref = get_bits(gb, 2);
> +        if (frame->type == JPEGXL_FRAME_REFERENCE_ONLY ||
> +                (resets_canvas && !frame->is_last && (!duration || save_as_ref)
> +                && frame->type != JPEGXL_FRAME_LF))
> +            skip_bits1(gb); // save before color transform
> +        name_len = 8 * jxl_u32(gb, 0, 0, 16, 48, 0, 4, 5, 10);
> +        if (get_bits_left(gb) < name_len)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +        skip_bits_long(gb, name_len);
> +    }
> +
> +    if (!all_default) {
> +        int restd = get_bits1(gb), gab = 1;
> +        if (!restd)
> +            gab = get_bits1(gb);
> +        if (gab && get_bits1(gb))
> +            // gab custom
> +            skip_bits_long(gb, 16 * 6);
> +        if (get_bits_left(gb) < 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +        if (!restd) {
> +            int epf = get_bits(gb, 2);
> +            if (epf) {
> +                if (frame->encoding == JPEGXL_ENC_VARDCT && get_bits1(gb)) {
> +                    skip_bits_long(gb, 16 * 8); // custom epf sharpness
> +                    if (get_bits_left(gb) < 0)
> +                        return AVERROR_BUFFER_TOO_SMALL;
> +                }
> +                if (get_bits1(gb)) {
> +                    skip_bits_long(gb, 3 * 16 + 32); // custom epf weight
> +                    if (get_bits_left(gb) < 0)
> +                        return AVERROR_BUFFER_TOO_SMALL;
> +                }
> +                if (get_bits1(gb)) { // custom epf sigma
> +                    if (frame->encoding == JPEGXL_ENC_VARDCT)
> +                        skip_bits(gb, 16);
> +                    skip_bits_long(gb, 16 * 3);
> +                    if (get_bits_left(gb) < 0)
> +                        return AVERROR_BUFFER_TOO_SMALL;
> +                }
> +                if (frame->encoding == JPEGXL_ENC_MODULAR)
> +                    skip_bits(gb, 16);
> +            }
> +            ret = skip_extensions(gb);
> +            if (ret < 0)
> +                return ret;
> +        }
> +        ret = skip_extensions(gb);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    width = div_ceil(width, upsampling);
> +    height = div_ceil(height, upsampling);
> +    width = div_ceil(width, 1 << (3 * lf_level));
> +    height = div_ceil(height, 1 << (3 * lf_level));
> +    group_dim = 128 << group_size_shift;
> +    lf_group_dim = group_dim << 3;
> +    num_groups = div_ceil(width, group_dim) * div_ceil(height, group_dim);
> +    num_lf_groups = div_ceil(width, lf_group_dim) * div_ceil(height, lf_group_dim);
> +    if (num_groups == 1 && num_passes == 1)
> +        toc_count = 1;
> +    else
> +        toc_count = 2 + num_lf_groups + num_groups * num_passes;
> +
> +    // permuted toc
> +    if (get_bits1(gb)) {
> +        JXLEntropyDecoder dec;
> +        uint32_t end, lehmer = 0;
> +        ret = entropy_decoder_init(avctx, gb, &dec, 8);
> +        if (ret < 0)
> +            return ret;
> +        if (get_bits_left(gb) < 0) {
> +            entropy_decoder_close(&dec);
> +            return AVERROR_BUFFER_TOO_SMALL;
> +        }
> +        end = entropy_decoder_read_symbol(gb, &dec, toc_context(toc_count));
> +        if (end > toc_count) {
> +            entropy_decoder_close(&dec);
> +            return AVERROR_INVALIDDATA;
> +        }
> +        for (uint32_t i = 0; i < end; i++) {
> +            lehmer = entropy_decoder_read_symbol(gb, &dec, toc_context(lehmer));
> +            if (get_bits_left(gb) < 0) {
> +                entropy_decoder_close(&dec);
> +                return AVERROR_BUFFER_TOO_SMALL;
> +            }
> +        }
> +        entropy_decoder_close(&dec);
> +    }
> +    align_get_bits(gb);
> +
> +    for (uint32_t i = 0; i < toc_count; i++) {
> +        frame->body_length += 8 * jxl_u32(gb, 0, 1024, 17408, 4211712, 10, 14, 22, 30);
> +        if (get_bits_left(gb) < 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +    align_get_bits(gb);
> +
> +    frame->total_length = frame->body_length + get_bits_count(gb) - start_len;
> +
> +    return 0;
> +}
> +
> +static int skip_boxes(JXLParseContext *ctx, const uint8_t *buf, int buf_size)
> +{
> +    GetByteContext gb;
> +
> +    if (ctx->skip > buf_size)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    buf += ctx->skip;
> +    buf_size -= ctx->skip;
> +    bytestream2_init(&gb, buf, buf_size);
> +
> +    while (1) {
> +        uint64_t size;
> +        int head_size = 4;
> +
> +        if (bytestream2_peek_le16(&gb) == FF_JPEGXL_CODESTREAM_SIGNATURE_LE)
> +            break;
> +        if (bytestream2_peek_le64(&gb) == FF_JPEGXL_CONTAINER_SIGNATURE_LE)
> +            break;
> +
> +        if (bytestream2_get_bytes_left(&gb) < 8)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +
> +        size = bytestream2_get_be32(&gb);
> +        if (size == 1) {
> +            if (bytestream2_get_bytes_left(&gb) < 12)
> +                return AVERROR_BUFFER_TOO_SMALL;
> +            size = bytestream2_get_be64(&gb);
> +            head_size = 12;
> +        }
> +        if (!size)
> +            return AVERROR_INVALIDDATA;
> +        /* invalid ISOBMFF size */
> +        if (size <= head_size + 4)
> +            return AVERROR_INVALIDDATA;
> +
> +        ctx->skip += size;
> +        bytestream2_skip(&gb, size - head_size);
> +        if (bytestream2_get_bytes_left(&gb) <= 0)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +    }
> +
> +    return 0;
> +}
> +
> +static int try_parse(AVCodecParserContext *s, AVCodecContext *avctx, JXLParseContext *ctx,
> +                     const uint8_t *buf, int buf_size)
> +{
> +    int ret, cs_buflen, header_skip;
> +    const uint8_t *cs_buffer;
> +    GetBitContext gb;
> +
> +    if (ctx->skip > buf_size)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    buf += ctx->skip;
> +    buf_size -= ctx->skip;
> +
> +    if (ctx->container || AV_RL64(buf) == FF_JPEGXL_CONTAINER_SIGNATURE_LE) {
> +        ctx->container = 1;
> +        ret = ff_jpegxl_collect_codestream_header(buf, buf_size, ctx->cs_buffer,
> +                                                  sizeof(ctx->cs_buffer), &ctx->copied);
> +        if (ret < 0)
> +            return ret;
> +        ctx->collected_size = ret;
> +        if (!ctx->copied) {
> +            ctx->skip += ret;
> +            return AVERROR_BUFFER_TOO_SMALL;
> +        }
> +        cs_buffer = ctx->cs_buffer;
> +        cs_buflen = FFMIN(sizeof(ctx->cs_buffer), ctx->copied);
> +    } else {
> +        cs_buffer = buf;
> +        cs_buflen = buf_size;
> +    }
> +
> +    if (!ctx->codestream_length) {
> +        header_skip = ff_jpegxl_parse_codestream_header(cs_buffer, cs_buflen, &ctx->codestream.meta, 0);
> +        if (header_skip < 0)
> +            return header_skip;
> +        ctx->codestream_length = header_skip;
> +        populate_fields(s, avctx, &ctx->codestream.meta);
> +    }
> +
> +    if (ctx->container)
> +        return ctx->collected_size;
> +
> +    ret = init_get_bits8(&gb, cs_buffer, cs_buflen);
> +    if (ret < 0)
> +        return ret;
> +
> +    skip_bits_long(&gb, ctx->codestream_length);
> +
> +    if (!ctx->skipped_icc && ctx->codestream.meta.have_icc_profile) {
> +        ret = skip_icc_profile(avctx, ctx, &gb);
> +        if (ret < 0)
> +            return ret;
> +        ctx->skipped_icc = 1;
> +        align_get_bits(&gb);
> +        ctx->codestream_length = get_bits_count(&gb);
> +    }
> +
> +    if (get_bits_left(&gb) <= 0)
> +        return AVERROR_BUFFER_TOO_SMALL;
> +
> +    while (1) {
> +        ret = parse_frame_header(avctx, ctx, &gb);
> +        if (ret < 0)
> +            return ret;
> +        ctx->codestream_length += ctx->codestream.frame.total_length;
> +        if (ctx->codestream.frame.is_last)
> +            return ctx->codestream_length / 8;
> +        if (get_bits_left(&gb) <= ctx->codestream.frame.body_length)
> +            return AVERROR_BUFFER_TOO_SMALL;
> +        skip_bits_long(&gb, ctx->codestream.frame.body_length);
> +    }
> +}
> +
> +static int jpegxl_parse(AVCodecParserContext *s, AVCodecContext *avctx,
> +                        const uint8_t **poutbuf, int *poutbuf_size,
> +                        const uint8_t *buf, int buf_size)
> +{
> +    JXLParseContext *ctx = s->priv_data;
> +    int next = END_NOT_FOUND, ret;
> +
> +    *poutbuf_size = 0;
> +    *poutbuf = NULL;
> +
> +    if (!ctx->pc.index)
> +        goto flush;
> +
> +    if (!ctx->container || !ctx->codestream_length) {
> +        ret = try_parse(s, avctx, ctx, ctx->pc.buffer, ctx->pc.index);
> +        if (ret < 0)
> +            goto flush;
> +        ctx->next = ret;
> +        if (ctx->container)
> +            ctx->skip += ctx->next;
> +    }
> +
> +    if (ctx->container) {
> +        ret = skip_boxes(ctx, ctx->pc.buffer, ctx->pc.index);
> +        if (ret < 0)
> +            goto flush;
> +        ctx->next = ret + ctx->skip;
> +    }
> +
> +    next = ctx->next - ctx->pc.index;
> +
> +flush:
> +    if (next > buf_size)
> +        next = END_NOT_FOUND;
> +
> +    ret = ff_combine_frame(&ctx->pc, next, &buf, &buf_size);
> +    if (ret < 0)
> +        return buf_size;
> +
> +    *poutbuf      = buf;
> +    *poutbuf_size = buf_size;
> +
> +    ctx->codestream_length = 0;
> +    ctx->collected_size = 0;
> +    ctx->container = 0;
> +    ctx->copied = 0;
> +    ctx->skip = 0;
> +    ctx->skipped_icc = 0;
> +    ctx->next = 0;
> +    memset(&ctx->codestream, 0, sizeof(ctx->codestream));
> +
> +    return next;
> +}
> +
> +const AVCodecParser ff_jpegxl_parser = {
> +    .codec_ids      = { AV_CODEC_ID_JPEGXL },
> +    .priv_data_size = sizeof(JXLParseContext),
> +    .parser_parse   = jpegxl_parse,
> +    .parser_close   = ff_parse_close,
> +};
> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> index 285f81a901..a663b9e253 100644
> --- a/libavcodec/parsers.c
> +++ b/libavcodec/parsers.c
> @@ -55,6 +55,7 @@ extern const AVCodecParser ff_hevc_parser;
>  extern const AVCodecParser ff_hdr_parser;
>  extern const AVCodecParser ff_ipu_parser;
>  extern const AVCodecParser ff_jpeg2000_parser;
> +extern const AVCodecParser ff_jpegxl_parser;
>  extern const AVCodecParser ff_misc4_parser;
>  extern const AVCodecParser ff_mjpeg_parser;
>  extern const AVCodecParser ff_mlp_parser;
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 9411511e04..728ab8839d 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -29,7 +29,7 @@
>  
>  #include "version_major.h"
>  
> -#define LIBAVCODEC_VERSION_MINOR  22
> +#define LIBAVCODEC_VERSION_MINOR  23
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \



More information about the ffmpeg-devel mailing list