[FFmpeg-devel] [PATCH] avcodec/libx264: fix extradata when config annexb=0
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Wed Mar 6 18:23:38 EET 2024
Zhao Zhili:
>
>
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of Andreas Rheinhardt
>> Sent: 2024年3月6日 22:19
>> To: ffmpeg-devel at ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH] avcodec/libx264: fix extradata when config annexb=0
>>
>> Zhao Zhili:
>>> From: Zhao Zhili <zhilizhao at tencent.com>
>>>
>>> ---
>>> configure | 2 +-
>>> libavcodec/libx264.c | 148 ++++++++++++++++++++++++++++++++++++-------
>>> 2 files changed, 125 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/configure b/configure
>>> index bbf1a70731..ef58c3fe97 100755
>>> --- a/configure
>>> +++ b/configure
>>> @@ -3491,7 +3491,7 @@ libwebp_encoder_deps="libwebp"
>>> libwebp_anim_encoder_deps="libwebp"
>>> libx262_encoder_deps="libx262"
>>> libx264_encoder_deps="libx264"
>>> -libx264_encoder_select="atsc_a53"
>>> +libx264_encoder_select="atsc_a53 h264parse"
>>> libx264rgb_encoder_deps="libx264"
>>> libx264rgb_encoder_select="libx264_encoder"
>>> libx265_encoder_deps="libx265"
>>> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
>>> index 10d646bd76..10dfda125d 100644
>>> --- a/libavcodec/libx264.c
>>> +++ b/libavcodec/libx264.c
>>> @@ -34,9 +34,12 @@
>>> #include "avcodec.h"
>>> #include "codec_internal.h"
>>> #include "encode.h"
>>> +#include "get_bits.h"
>>> +#include "h264_ps.h"
>>> #include "internal.h"
>>> #include "packet_internal.h"
>>> #include "atsc_a53.h"
>>> +#include "put_bits.h"
>>> #include "sei.h"
>>>
>>> #include <x264.h>
>>> @@ -865,6 +868,124 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
>>> return 0;
>>> }
>>>
>>> +static int set_avcc_extradata(AVCodecContext *avctx, x264_nal_t *nal, int nnal)
>>> +{
>>> + x264_nal_t *sps_nal = NULL;
>>> + x264_nal_t *pps_nal = NULL;
>>> + GetBitContext gbc;
>>> + PutBitContext pbc;
>>> + H264ParamSets ps = { 0 };
>>> + uint8_t *p;
>>> + int ret, profile;
>>> +
>>> + /* We know it's in the order of SPS/PPS/SEI, but it's not documented in x264 API.
>>> + * The x264 param i_sps_id implies there is a single pair of SPS/PPS.
>>> + */
>>> + for (int i = 0; i < nnal; i++) {
>>> + if (nal[i].i_type == NAL_SPS)
>>> + sps_nal = &nal[i];
>>> + else if (nal[i].i_type == NAL_PPS)
>>> + pps_nal = &nal[i];
>>> + }
>>> + av_assert0(sps_nal);
>>> + av_assert0(pps_nal);
>>
>> We should never assert on the output of external libraries.
>
> Ok, will just remove the assert. I don't think a check is needed.
>
>>
>>> +
>>> + // +11 for AVCDecoderConfigurationRecord, will shrink to the real size finally
>>> + avctx->extradata_size = sps_nal->i_payload + pps_nal->i_payload + 11;
>>> + avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_MIN_SIZE);
>>
>> AV_INPUT_BUFFER_PADDING_SIZE
>
> Oops.
>
>>
>>> + if (!avctx->extradata)
>>> + return AVERROR(ENOMEM);
>>> +
>>> + // Skip size part
>>> + p = sps_nal->p_payload + 4;
>>> + init_get_bits8(&gbc, p, sps_nal->i_payload - 4);
>>> + skip_bits(&gbc, 8);
>>> + ret = ff_h264_decode_seq_parameter_set(&gbc, avctx, &ps, 0);
>>
>> This is completely overblown. You only read three fields: profile_idc,
>> the constraint fields and level_idc. All three are at the start of the
>> SPS and occupy a whole byte, so there is no need for a GetBitContext at all.
>> (Yes, I am aware of the hypothetical scenario in which we could have a
>> 0x03 escape byte before level_idc (in case of a hypothetical future
>> profile zero), but even then using ff_h264_decode_seq_parameter_set() is
>> overblown.)
>
> You missed chroma_format_idc, bit_depth_luma/bit_depth_chroma.
> I can get that from pixel format, but there are a lot of them.
>
Ok, but even then I fail to see a convenience advantage here. After all,
using it e.g. forces the following loop onto you. And it forces cleanup
code here. Also, there is a gotcha here. See below.
>>
>>> + if (ret < 0)
>>> + return ret;
>>> +
>>> + for (int i = 0; i < FF_ARRAY_ELEMS(ps.sps_list); i++) {
>>> + if (ps.sps_list[i]) {
>>> + ps.sps = ps.sps_list[i];
>>> + break;
>>> + }
>>> + }
>>> +
>>> + // Now create AVCDecoderConfigurationRecord
>>> + init_put_bits(&pbc, avctx->extradata, avctx->extradata_size);
>>> + put_bits(&pbc, 8, 1); // version
>>> + put_bits(&pbc, 8, ps.sps->profile_idc); // AVCProfileIndication
>>> + put_bits(&pbc, 8, ps.sps->constraint_set_flags); // profile_compatibility
Sure this is correct at all? It seems to me that h264_ps.c reverses the
order of the constraint_set_flags (constraint_set0_flag (which is the
0x80 bit in a file) will be mapped to 0x01 etc.) and it discards
reserved_zero_2bits. You should instead simply copy the relevant byte
from the SPS.
>>> + put_bits(&pbc, 8, ps.sps->level_idc); // AVCLevelIndication
>>> + put_bits(&pbc, 11, 0x7FF);
>>> +
>>> + p = sps_nal->p_payload + 4;
>>> + put_bits(&pbc, 5, 1); // numOfSequenceParameterSets
>>> + put_bits(&pbc, 16, sps_nal->i_payload - 4); // sequenceParameterSetLength
>>
>> I also think that using a PutBitContext is overblown here (most fields
>> don't cross a byte-boundary, the bit-alignment of every field is fixed),
>> in particular if you copy the NALUs eight bits at a time:
>
> It's readable, close related to the spec, and it counts how many bytes actually used
> automatically. I choose it for these convenience over performance.
>
I regard a simple memcpy() as more convenient than the following loop:
>>
>>> + for (int i = 0; i < sps_nal->i_payload - 4; i++)
>>> + put_bits(&pbc, 8, p[i]);
>>> +
>>> + p = pps_nal->p_payload + 4;
>>> + put_bits(&pbc, 8, 1); // numOfPictureParameterSets
>>> + put_bits(&pbc, 16, pps_nal->i_payload - 4); // pictureParameterSetLength
>>> + for (int i = 0; i < pps_nal->i_payload - 4; i++)
>>> + put_bits(&pbc, 8, p[i]);
>>> +
>>> + profile = ps.sps->profile_idc;
>>> + if (profile != 66 && profile != 77 && profile != 88) {
>>> + put_bits(&pbc, 6, 0x3F);
>>> + put_bits(&pbc, 2, ps.sps->chroma_format_idc);
>>> + put_bits(&pbc, 5, 0x1F);
>>> + put_bits(&pbc, 3, ps.sps->bit_depth_luma - 8);
>>> + put_bits(&pbc, 5, 0x1F);
>>> + put_bits(&pbc, 3, ps.sps->bit_depth_chroma - 8);
>>> + put_bits(&pbc, 8, 0);
>>> + }
>>> + flush_put_bits(&pbc);
>>> + avctx->extradata_size = put_bytes_output(&pbc);
>>> +
>>> + ff_h264_ps_uninit(&ps);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int set_extradata(AVCodecContext *avctx)
>>> +{
>>> + X264Context *x4 = avctx->priv_data;
>>> + x264_nal_t *nal;
>>> + uint8_t *p;
>>> + int nnal, s;
>>> +
>>> + s = x264_encoder_headers(x4->enc, &nal, &nnal);
>>> + if (s < 0)
>>> + return AVERROR_EXTERNAL;
>>> +
>>> + if (!x4->params.b_annexb)
>>> + return set_avcc_extradata(avctx, nal, nnal);
>>> +
>>> + avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
>>> + if (!p)
>>> + return AVERROR(ENOMEM);
>>> +
>>> + for (int i = 0; i < nnal; i++) {
>>> + /* Don't put the SEI in extradata. */
>>> + if (nal[i].i_type == NAL_SEI) {
>>> + av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload + 25);
>>> + x4->sei_size = nal[i].i_payload;
>>> + x4->sei = av_malloc(x4->sei_size);
>>> + if (!x4->sei)
>>> + return AVERROR(ENOMEM);
>>> + memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
>>> + continue;
>>> + }
>>> + memcpy(p, nal[i].p_payload, nal[i].i_payload);
>>> + p += nal[i].i_payload;
>>> + }
>>> + avctx->extradata_size = p - avctx->extradata;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> #define PARSE_X264_OPT(name, var)\
>>> if (x4->var && x264_param_parse(&x4->params, name, x4->var) < 0) {\
>>> av_log(avctx, AV_LOG_ERROR, "Error parsing option '%s' with value '%s'.\n", name, x4->var);\
>>> @@ -1233,30 +1354,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
>>> return AVERROR_EXTERNAL;
>>>
>>> if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
>>> - x264_nal_t *nal;
>>> - uint8_t *p;
>>> - int nnal, s, i;
>>> -
>>> - s = x264_encoder_headers(x4->enc, &nal, &nnal);
>>> - avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
>>> - if (!p)
>>> - return AVERROR(ENOMEM);
>>> -
>>> - for (i = 0; i < nnal; i++) {
>>> - /* Don't put the SEI in extradata. */
>>> - if (nal[i].i_type == NAL_SEI) {
>>> - av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
>>> - x4->sei_size = nal[i].i_payload;
>>> - x4->sei = av_malloc(x4->sei_size);
>>> - if (!x4->sei)
>>> - return AVERROR(ENOMEM);
>>> - memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
>>> - continue;
>>> - }
>>> - memcpy(p, nal[i].p_payload, nal[i].i_payload);
>>> - p += nal[i].i_payload;
>>> - }
>>> - avctx->extradata_size = p - avctx->extradata;
>>> + ret = set_extradata(avctx);
>>> + if (ret < 0)
>>> + return ret;
>>> }
>>>
>>> cpb_props = ff_encode_add_cpb_side_data(avctx);
>>
More information about the ffmpeg-devel
mailing list