[FFmpeg-devel] [PATCH 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)
Lynne
dev at lynne.ee
Thu May 16 18:00:25 EEST 2024
On 16/05/2024 12:26, Andreas Rheinhardt wrote:
> Lynne via ffmpeg-devel:
>> This commit adds a decoder for the frequency-domain part of USAC.
>>
>> What works:
>> - Mono
>> - Stereo (no prediction)
>> - Stereo (mid/side coding)
>>
>> What doesn't:
>> - Preroll decoding (every single decoder seems faulty or weird?)
>> - Complex stereo prediction
>>
>> Known issues:
>> - Spec incompliance (noise synthesis in particular)
>> - Lack of robustness
>> ---
>
>
>> diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
>> index 20545a24d4..3e6592cf0e 100644
>> --- a/libavcodec/aac/aacdec.h
>> +++ b/libavcodec/aac/aacdec.h
>> @@ -42,6 +42,8 @@
>> #include "libavcodec/avcodec.h"
>> #include "libavcodec/mpeg4audio.h"
>>
>> +#include "aacdec_ac.h"
>> +
>> typedef struct AACDecContext AACDecContext;
>>
>> /**
>> @@ -69,6 +71,32 @@ enum CouplingPoint {
>> AFTER_IMDCT = 3,
>> };
>>
>> +enum AACUsacElem {
>> + ID_USAC_SCE = 0,
>> + ID_USAC_CPE = 1,
>> + ID_USAC_LFE = 2,
>> + ID_USAC_EXT = 3,
>> +};
>> +
>> +enum ExtensionHeaderType {
>> + ID_CONFIG_EXT_FILL = 0,
>> + ID_CONFIG_EXT_LOUDNESS_INFO = 2,
>> + ID_CONFIG_EXT_STREAM_ID = 7,
>> +};
>> +
>> +enum AACUsacExtension {
>> + ID_EXT_ELE_FILL,
>> + ID_EXT_ELE_MPEGS,
>> + ID_EXT_ELE_SAOC,
>> + ID_EXT_ELE_AUDIOPREROLL,
>> + ID_EXT_ELE_UNI_DRC,
>> +};
>> +
>> +enum AACUSACLoudnessExt {
>> + UNIDRCLOUDEXT_TERM = 0x0,
>> + UNIDRCLOUDEXT_EQ = 0x1,
>> +};
>> +
>> // Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
>> #define RENAME_FIXED(name) name ## _fixed
>>
>> @@ -93,6 +121,40 @@ typedef struct LongTermPrediction {
>> int8_t used[MAX_LTP_LONG_SFB];
>> } LongTermPrediction;
>>
>> +/* Per channel core mode */
>> +typedef struct AACUsacElemData {
>> + uint8_t core_mode;
>> + uint8_t scale_factor_grouping;
>> +
>> + /* Timewarping ratio */
>> +#define NUM_TW_NODES 16
>> + uint8_t tw_ratio[NUM_TW_NODES];
>> +
>> + struct {
>> + uint8_t acelp_core_mode : 3;
>> + uint8_t lpd_mode : 5;
>> +
>> + uint8_t bpf_control_info : 1;
>> + uint8_t core_mode_last : 1;
>> + uint8_t fac_data_present : 1;
>> +
>> + int last_lpd_mode;
>> + } ldp;
>> +
>> + struct {
>> + unsigned int seed;
>> + uint8_t level : 3;
>> + uint8_t offset : 5;
>> + } noise;
>> +
>> + struct {
>> + uint8_t gain;
>> + uint32_t kv[8 /* (1024 / 16) / 8 */][8];
>> + } fac;
>> +
>> + AACArithState ac;
>> +} AACUsacElemData;
>> +
>> /**
>> * Individual Channel Stream
>> */
>> @@ -145,6 +207,7 @@ typedef struct ChannelCoupling {
>> */
>> typedef struct SingleChannelElement {
>> IndividualChannelStream ics;
>> + AACUsacElemData ue; ///< USAC element data
>> TemporalNoiseShaping tns;
>> enum BandType band_type[128]; ///< band types
>> int sfo[128]; ///< scalefactor offsets
>> @@ -163,25 +226,141 @@ typedef struct SingleChannelElement {
>> };
>> } SingleChannelElement;
>>
>> +typedef struct AACUsacStereo {
>> + uint8_t common_window;
>> + uint8_t common_tw;
>> +
>> + uint8_t ms_mask_mode;
>> + uint8_t config_idx;
>> +
>> + struct {
>> + uint8_t use_prev_frame;
>> + uint8_t pred_dir;
>> + uint8_t delta_code_time;
>> + uint8_t pred_used[8][64];
>> +
>> + AVComplexFloat pred[8][64];
>> + } cplx;
>> +} AACUsacStereo;
>> +
>> /**
>> * channel element - generic struct for SCE/CPE/CCE/LFE
>> */
>> typedef struct ChannelElement {
>> int present;
>> // CPE specific
>> + uint8_t max_sfb_ste; ///< (USAC) Maximum of both max_sfb values
>> uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band
>> // shared
>> SingleChannelElement ch[2];
>> // CCE specific
>> ChannelCoupling coup;
>> + // USAC stereo coupling data
>> + AACUsacStereo us;
>> } ChannelElement;
>>
>> +typedef struct AACUSACLoudnessInfo {
>> + uint8_t drc_set_id : 6;
>> + uint8_t downmix_id : 7;
>> + struct {
>> + uint16_t lvl : 12;
>> + uint8_t present : 1;
>> + } sample_peak;
>> +
>> + struct {
>> + uint16_t lvl : 12;
>> + uint8_t measurement : 4;
>> + uint8_t reliability : 2;
>> + uint8_t present : 1;
>> + } true_peak;
>> +
>> + uint8_t nb_measurements : 4;
>> + struct {
>> + uint8_t method_def : 4;
>> + uint8_t method_val;
>> + uint8_t measurement : 4;
>> + uint8_t reliability : 2;
>> + } measurements[16];
>> +} AACUSACLoudnessInfo;
>> +
>> +typedef struct AACUsacElemConfig {
>> + enum AACUsacElem type;
>> +
>> + uint8_t tw_mdct : 1;
>> + uint8_t noise_fill : 1;
>> +
>> + uint8_t stereo_config_index;
>> +
>> + struct {
>> + int ratio;
>> +
>> + uint8_t harmonic_sbr : 1; /* harmonicSBR */
>> + uint8_t bs_intertes : 1; /* bs_interTes */
>> + uint8_t bs_pvc : 1; /* bs_pvc */
>> +
>> + struct {
>> + uint8_t start_freq; /* dflt_start_freq */
>> + uint8_t stop_freq; /* dflt_stop_freq */
>> +
>> + uint8_t freq_scale; /* dflt_freq_scale */
>> + uint8_t alter_scale : 1; /* dflt_alter_scale */
>> + uint8_t noise_scale; /* dflt_noise_scale */
>> +
>> + uint8_t limiter_bands; /* dflt_limiter_bands */
>> + uint8_t limiter_gains; /* dflt_limiter_gains */
>> + uint8_t interpol_freq : 1; /* dflt_interpol_freq */
>> + uint8_t smoothing_mode : 1; /* dflt_smoothing_mode */
>> + } dflt;
>> + } sbr;
>> +
>> + struct {
>> + uint8_t freq_res; /* bsFreqRes */
>> + uint8_t fixed_gain; /* bsFixedGainDMX */
>> + uint8_t temp_shape_config; /* bsTempShapeConfig */
>> + uint8_t decorr_config; /* bsDecorrConfig */
>> + uint8_t high_rate_mode : 1; /* bsHighRateMode */
>> + uint8_t phase_coding : 1; /* bsPhaseCoding */
>> +
>> + uint8_t otts_bands_phase; /* bsOttBandsPhase */
>> + uint8_t residual_coding; /* bsResidualCoding */
>> + uint8_t residual_bands; /* bsResidualBands */
>> + uint8_t pseudo_lr : 1; /* bsPseudoLr */
>> + uint8_t env_quant_mode : 1; /* bsEnvQuantMode */
>
> Is using bitfields really worth it given that they force to use masking
> for accesses?
>
>> + } mps;
>> +
>> + struct {
>> + enum AACUsacExtension type;
>> + uint8_t payload_frag;
>> + uint32_t default_len;
>> + uint32_t pl_data_offset;
>> + uint8_t *pl_data;
>> + } ext;
>> +} AACUsacElemConfig;
>> +
>> +typedef struct AACUSACConfig {
>> + uint8_t core_sbr_frame_len_idx; /* coreSbrFrameLengthIndex */
>> + uint8_t rate_idx;
>> + uint16_t core_frame_len;
>> + uint16_t stream_identifier;
>> +
>> + AACUsacElemConfig elems[64];
>> + int nb_elems;
>> +
>> + struct {
>> + uint8_t nb_album;
>> + AACUSACLoudnessInfo album_info[64];
>> + uint8_t nb_info;
>> + AACUSACLoudnessInfo info[64];
>> + } loudness;
>> +} AACUSACConfig;
>> +
>> typedef struct OutputConfiguration {
>> MPEG4AudioConfig m4ac;
>> uint8_t layout_map[MAX_ELEM_ID*4][3];
>> int layout_map_tags;
>> AVChannelLayout ch_layout;
>> enum OCStatus status;
>> + AACUSACConfig usac;
>> } OutputConfiguration;
>>
>> /**
>> diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c
>> new file mode 100644
>> index 0000000000..326d716bd3
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_ac.c
>> @@ -0,0 +1,224 @@
>> +/*
>> + * AAC definitions and structures
>> + * Copyright (c) 2024 Lynne
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "libavcodec/aactab.h"
>> +#include "aacdec_ac.h"
>> +
>> +uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int N)
>> +{
>> + float ratio;
>> + if (reset) {
>> + memset(state->last, 0, sizeof(state->last));
>> + state->last_len = N;
>> + memset(state->cur, 0, sizeof(state->cur));
>> + state->cur[3] = 0;
>> + state->cur[2] = 0;
>> + state->cur[1] = 0;
>> + state->cur[0] = 1;
>> + state->last[0] = 0 << 12;
>> + state->state_pre = 0;
>> + return 0;
>> + } else if (state->last_len != N) {
>> + int i;
>> + uint8_t last[512 /* 2048 / 4 */];
>> + memcpy(last, state->last, sizeof(last));
>> +
>> + ratio = state->last_len / (float)N;
>> + for (i = 0; i < N/2; i++) {
>> + int k = (int)(i * ratio);
>> + state->last[i] = last[k];
>> + }
>> +
>> + for (; i < FF_ARRAY_ELEMS(state->last); i++)
>> + state->last[i] = 0;
>> +
>> + state->last_len = N;
>> + }
>> +
>> + memset(state->cur, 0, sizeof(state->cur));
>> + state->cur[3] = 0;
>> + state->cur[2] = 0;
>> + state->cur[1] = 0;
>> + state->cur[0] = 1;
>> +
>> + state->state_pre = state->last[0] << 12;
>> + return state->last[0] << 12;
>> +}
>> +
>> +extern int ec_debug;
>> +
>> +int trig = 0;
>> +
>> +uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t c, int i, int N)
>> +{
>> + c = state->state_pre >> 8;
>> + c = c + (state->last[i + 1] << 8);
>> + c = (c << 4);
>> + c += state->cur[1];
>> +
>> + state->state_pre = c;
>> +
>> + if (i > 3 &&
>> + ((state->cur[3] + state->cur[2] + state->cur[1]) < 5))
>> + return c + 0x10000;
>> +
>> + return c;
>> +}
>> +
>> +uint32_t ff_aac_ac_get_pk(uint32_t c)
>> +{
>> + int i_min = -1;
>> + int i, j;
>> + int i_max = FF_ARRAY_ELEMS(ff_aac_ac_lookup_m) - 1;
>> + while ((i_max - i_min) > 1) {
>> + i = i_min + ((i_max - i_min) / 2);
>> + j = ff_aac_ac_hash_m[i];
>> + if (c < (j >> 8))
>> + i_max = i;
>> + else if (c > (j >> 8))
>> + i_min = i;
>> + else
>> + return (j & 0xFF);
>> + }
>> + return ff_aac_ac_lookup_m[i_max];
>> +}
>> +
>> +void ff_aac_ac_update_context(AACArithState *state, int idx,
>> + uint16_t a, uint16_t b)
>> +{
>> + state->cur[0] = a + b + 1;
>> + if (state->cur[0] > 0xF)
>> + state->cur[0] = 0xF;
>> +
>> + state->cur[3] = state->cur[2];
>> + state->cur[2] = state->cur[1];
>> + state->cur[1] = state->cur[0];
>> +
>> + state->last[idx] = state->cur[0];
>> +}
>> +
>> +/* Initialize AC */
>> +void ff_aac_ac_init(AACArith *ac, GetBitContext *gb)
>> +{
>> + ac->low = 0;
>> + ac->high = UINT16_MAX;
>> + ac->val = get_bits(gb, 16);
>> +}
>> +
>> +uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
>> + const uint16_t *cdf, uint16_t cdf_len)
>> +{
>> + int val = ac->val;
>> + int low = ac->low;
>> + int high = ac->high;
>> +
>> + int rng = high - low + 1;
>> + int c = ((((int)(val - low + 1)) << 14) - ((int)1));
>> +
>> + /* Note: this could be done faster via heuristics, the total number of
>> + * configurations is low */
>> + const uint16_t *p = cdf - 1;
>> + const uint16_t *q;
>> +
>> + switch (cdf_len) {
>> + case 2:
>> + if ((p[1] * rng) > c)
>> + p += 1;
>> + break;
>> + case 4:
>> + if ((p[2] * rng) > c)
>> + p += 2;
>> + if ((p[1] * rng) > c)
>> + p += 1;
>> + break;
>> + case 17:
>> + /* First check if the current probability is even met at all */
>> + if ((p[1] * rng) <= c)
>> + break;
>> + p += 1;
>> + for (int i = 8; i >= 1; i >>= 1)
>> + if ((p[i] * rng) > c)
>> + p += i;
>> + break;
>> + case 27:
>> + const uint16_t *p_24 = p + 24;
>> +
>> + if ((p[16] * rng) > c)
>> + p += 16;
>> + if ((p[8] * rng) > c)
>> + p += 8;
>> + if (p != p_24)
>> + if ((p[4] * rng) > c)
>> + p += 4;
>> + if ((p[2] * rng) > c)
>> + p += 2;
>> +
>> + if (p != &p_24[2])
>> + if ((p[1] * rng) > c)
>> + p += 1;
>> + break;
>> + default:
>> + /* This should never happen */
>> + av_assert2(0);
>> + }
>> +
>> + int sym = (int)((ptrdiff_t)(p - cdf)) + 1;
>> + if (sym)
>> + high = low + ((rng * cdf[sym - 1]) >> 14) - 1;
>> + low += (rng * cdf[sym]) >> 14;
>> +
>> + /* This loop could be done faster */
>> + while (1) {
>> + if (high < 32768) {
>> + ;
>> + } else if (low >= 32768) {
>> + val -= 32768;
>> + low -= 32768;
>> + high -= 32768;
>> + } else if (low >= 16384 && high < 49152) {
>> + val -= 16384;
>> + low -= 16384;
>> + high -= 16384;
>> + } else {
>> + break;
>> + }
>> + low += low;
>> + high += high + 1;
>> + val = (val << 1) | get_bits1(gb);
>> + };
>> +
>> + ac->low = low;
>> + ac->high = high;
>> + ac->val = val;
>> +
>> + return sym;
>> +}
>> +
>> +void ff_aac_ac_finish(AACArithState *state, int offset, int N)
>> +{
>> + int i;
>> +
>> + for (i = offset; i < N/2; i++)
>> + state->last[i] = 1;
>> +
>> + for (; i < FF_ARRAY_ELEMS(state->last); i++)
>> + state->last[i] = 0;
>> +}
>> diff --git a/libavcodec/aac/aacdec_ac.h b/libavcodec/aac/aacdec_ac.h
>> new file mode 100644
>> index 0000000000..ef96bed770
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_ac.h
>> @@ -0,0 +1,54 @@
>> +/*
>> + * AAC definitions and structures
>> + * Copyright (c) 2024 Lynne
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVCODEC_AACDEC_AC_H
>> +#define AVCODEC_AACDEC_AC_H
>> +
>> +#include "libavcodec/get_bits.h"
>> +
>> +typedef struct AACArithState {
>> + uint8_t last[512 /* 2048 / 4 */];
>> + int last_len;
>> + uint8_t cur[4];
>> + uint16_t state_pre;
>> +} AACArithState;
>> +
>> +typedef struct AACArith {
>> + uint16_t low;
>> + uint16_t high;
>> + uint16_t val;
>> +} AACArith;
>> +
>> +#define FF_AAC_AC_ESCAPE 16
>> +
>> +uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int len);
>> +uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t old_c, int idx, int len);
>> +uint32_t ff_aac_ac_get_pk(uint32_t c);
>> +
>> +void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, uint16_t b);
>> +void ff_aac_ac_init(AACArith *ac, GetBitContext *gb);
>> +
>> +uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
>> + const uint16_t *cdf, uint16_t cdf_len);
>> +
>> +void ff_aac_ac_finish(AACArithState *state, int offset, int nb);
>> +
>> +#endif /* AVCODEC_AACDEC_AC_H */
>> diff --git a/libavcodec/aac/aacdec_dsp_template.c b/libavcodec/aac/aacdec_dsp_template.c
>> index 59a69d88f3..8d31af22f8 100644
>> --- a/libavcodec/aac/aacdec_dsp_template.c
>> +++ b/libavcodec/aac/aacdec_dsp_template.c
>> @@ -88,8 +88,8 @@ static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement
>> INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
>> const uint16_t *offsets = ics->swb_offset;
>> for (int g = 0; g < ics->num_window_groups; g++) {
>> - for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
>> - const int idx = g*ics->max_sfb + sfb;
>> + for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
>> + const int idx = g*cpe->max_sfb_ste + sfb;
>> if (cpe->ms_mask[idx] &&
>> cpe->ch[0].band_type[idx] < NOISE_BT &&
>> cpe->ch[1].band_type[idx] < NOISE_BT) {
>> diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
>> index e40a2fe1a7..047c11e0fb 100644
>> --- a/libavcodec/aac/aacdec_latm.h
>> +++ b/libavcodec/aac/aacdec_latm.h
>> @@ -56,7 +56,8 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>> {
>> AACDecContext *ac = &latmctx->aac_ctx;
>> AVCodecContext *avctx = ac->avctx;
>> - MPEG4AudioConfig m4ac = { 0 };
>> + OutputConfiguration oc = { 0 };
>> + MPEG4AudioConfig *m4ac = &oc.m4ac;
>> GetBitContext gbc;
>> int config_start_bit = get_bits_count(gb);
>> int sync_extension = 0;
>> @@ -76,7 +77,7 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>> if (get_bits_left(gb) <= 0)
>> return AVERROR_INVALIDDATA;
>>
>> - bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
>> + bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc,
>> &gbc, config_start_bit,
>> sync_extension);
>>
>> @@ -88,11 +89,12 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>> asclen = bits_consumed;
>>
>> if (!latmctx->initialized ||
>> - ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
>> - ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
>> + ac->oc[1].m4ac.sample_rate != m4ac->sample_rate ||
>> + ac->oc[1].m4ac.chan_config != m4ac->chan_config) {
>>
>> if (latmctx->initialized) {
>> - av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
>> + av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n",
>> + m4ac->sample_rate, m4ac->chan_config);
>> } else {
>> av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
>> }
>> @@ -280,7 +282,7 @@ static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
>> } else {
>> push_output_configuration(&latmctx->aac_ctx);
>> if ((err = decode_audio_specific_config(
>> - &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
>> + &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1],
>> avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
>> pop_output_configuration(&latmctx->aac_ctx);
>> return err;
>> diff --git a/libavcodec/aac/aacdec_lpd.c b/libavcodec/aac/aacdec_lpd.c
>> new file mode 100644
>> index 0000000000..be39e2c175
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_lpd.c
>> @@ -0,0 +1,192 @@
>> +/*
>> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "aacdec_lpd.h"
>> +#include "aacdec_usac.h"
>> +
>> +const uint8_t ff_aac_lpd_mode_tab[32][4] = {
>> + { 0, 0, 0, 0 },
>> + { 1, 0, 0, 0 },
>> + { 0, 1, 0, 0 },
>> + { 1, 1, 0, 0 },
>> + { 0, 0, 1, 0 },
>> + { 1, 0, 1, 0 },
>> + { 0, 1, 1, 0 },
>> + { 1, 1, 1, 0 },
>> + { 0, 0, 0, 1 },
>> + { 1, 0, 0, 1 },
>> + { 0, 1, 0, 1 },
>> + { 1, 1, 0, 1 },
>> + { 0, 0, 1, 1 },
>> + { 1, 0, 1, 1 },
>> + { 0, 1, 1, 1 },
>> + { 1, 1, 1, 1 },
>> + { 2, 2, 0, 0 },
>> + { 2, 2, 1, 0 },
>> + { 2, 2, 0, 1 },
>> + { 2, 2, 1, 1 },
>> + { 0, 0, 2, 2 },
>> + { 1, 0, 2, 2 },
>> + { 0, 1, 2, 2 },
>> + { 1, 1, 2, 2 },
>> + { 2, 2, 2, 2 },
>> + { 3, 3, 3, 3 },
>> + /* Larger values are reserved, but permit them for resilience */
>> + { 0, 0, 0, 0 },
>> + { 0, 0, 0, 0 },
>> + { 0, 0, 0, 0 },
>> + { 0, 0, 0, 0 },
>> + { 0, 0, 0, 0 },
>> + { 0, 0, 0, 0 },
>> +};
>> +
>> +static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn)
>> +{
>> + if (nk_mode == 1) {
>> + for (int k = 0; k < no_qn; k++) {
>> + qn[k] = ff_aac_get_vlclbf(gb);
>> + if (qn[k])
>> + qn[k]++;
>> + }
>> + return;
>> + }
>> +
>> + for (int k = 0; k < no_qn; k++)
>> + qn[k] = get_bits(gb, 2) + 2;
>> +
>> + if (nk_mode == 2) {
>> + for (int k = 0; k < no_qn; k++) {
>> + if (qn[k] > 4) {
>> + qn[k] = ff_aac_get_vlclbf(gb);
>> + if (qn[k])
>> + qn[k] += 4;
>> + }
>> + }
>> + return;
>> + }
>> +
>> + for (int k = 0; k < no_qn; k++) {
>> + if (qn[k] > 4) {
>> + int qn_ext = ff_aac_get_vlclbf(gb);
>> + switch (qn_ext) {
>> + case 0: qn[k] = 5; break;
>> + case 1: qn[k] = 6; break;
>> + case 2: qn[k] = 0; break;
>> + default: qn[k] = qn_ext + 4; break;
>> + }
>> + }
>> + }
>> +}
>> +
>> +static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv,
>> + int nk_mode, int no_qn)
>> +{
>> + int n, nk;
>> +
>> + int qn[2];
>> + parse_qn(gb, qn, nk_mode, no_qn);
>> +
>> + for (int k = 0; k < no_qn; k++) {
>> + if (qn[k] > 4) {
>> + nk = (qn[k] - 3) / 2;
>> + n = qn[k] - nk*2;
>> + } else {
>> + nk = 0;
>> + n = qn[k];
>> + }
>> + }
>> +
>> + int idx = get_bits(gb, 4*n);
>> +
>> + if (nk > 0)
>> + for (int i = 0; i < 8; i++)
>> + kv[i] = get_bits(gb, nk);
>> +
>> + return 0;
>> +}
>> +
>> +int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
>> + int use_gain, int len)
>> +{
>> + int ret;
>> + if (use_gain)
>> + ce->fac.gain = get_bits(gb, 7);
>> +
>> + for (int i = 0; i < len/8; i++) {
>> + ret = parse_codebook_idx(gb, ce->fac.kv[i], 1, 1);
>> + if (ret < 0)
>> + return ret;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
>> + AACUsacElemData *ce, GetBitContext *gb)
>> +{
>> + ce->ldp.acelp_core_mode = get_bits(gb, 3);
>> + ce->ldp.lpd_mode = get_bits(gb, 5);
>> +
>> + ce->ldp.bpf_control_info = get_bits1(gb);
>> + ce->ldp.core_mode_last = get_bits1(gb);
>> + ce->ldp.fac_data_present = get_bits1(gb);
>> +
>> + const uint8_t *mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode];
>> +
>> + int first_ldp_flag = !ce->ldp.core_mode_last;
>> + int first_tcx_flag = 1;
>> + if (first_ldp_flag)
>> + ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */
>> +
>> + int k = 0;
>> + while (k < 0) {
>> + if (!k) {
>> + if (ce->ldp.core_mode_last && ce->ldp.fac_data_present)
>> + ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
>> + } else {
>> + if (!ce->ldp.last_lpd_mode && mod[k] > 0 ||
>> + ce->ldp.last_lpd_mode && !mod[k])
>> + ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
>> + }
>> + if (!mod[k]) {
>> +// parse_acelp_coding();
>> + ce->ldp.last_lpd_mode = 0;
>> + k++;
>> + } else {
>> +// parse_tcx_coding();
>> + ce->ldp.last_lpd_mode = mod[k];
>> + k += (1 << (mod[k] - 1));
>> + first_tcx_flag = 0;
>> + }
>> + }
>> +
>> +// parse_lpc_data(first_lpd_flag);
>> +
>> + if (!ce->ldp.core_mode_last && ce->ldp.fac_data_present) {
>> + uint16_t len_8 = usac->core_frame_len / 8;
>> + uint16_t len_16 = usac->core_frame_len / 16;
>> + uint16_t fac_len = get_bits1(gb) /* short_fac_flag */ ? len_8 : len_16;
>> + int ret = ff_aac_parse_fac_data(ce, gb, 1, fac_len);
>> + if (ret < 0)
>> + return ret;
>> + }
>> +
>> + return 0;
>> +}
>> diff --git a/libavcodec/aac/aacdec_lpd.h b/libavcodec/aac/aacdec_lpd.h
>> new file mode 100644
>> index 0000000000..924ff75e52
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_lpd.h
>> @@ -0,0 +1,33 @@
>> +/*
>> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVCODEC_AAC_AACDEC_LPD_H
>> +#define AVCODEC_AAC_AACDEC_LPD_H
>> +
>> +#include "aacdec.h"
>> +#include "libavcodec/get_bits.h"
>> +
>> +int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
>> + int use_gain, int len);
>> +
>> +int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
>> + AACUsacElemData *ce, GetBitContext *gb);
>> +
>> +#endif /* AVCODEC_AAC_AACDEC_LPD_H */
>> diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
>> new file mode 100644
>> index 0000000000..4b48c4d6ca
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_usac.c
>> @@ -0,0 +1,1230 @@
>> +/*
>> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "aacdec_usac.h"
>> +#include "aacdec_tab.h"
>> +#include "aacdec_lpd.h"
>> +#include "aacdec_ac.h"
>> +
>> +#include "libavcodec/opusdsp.h"
>> +#include "libavcodec/aactab.h"
>> +#include "libavutil/mem.h"
>> +#include "libavcodec/mpeg4audio.h"
>> +
>> +/* Number of scalefactor bands per complex prediction band, equal to 2. */
>> +#define SFB_PER_PRED_BAND 2
>> +
>> +static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, int nb3)
>> +{
>> + uint32_t val = get_bits(gb, nb1);
>> + if (val < ((1 << nb1) - 1))
>> + return val;
>> +
>> + val += get_bits(gb, nb2);
>> + if (val == ((1 << nb2) - 1))
>> + val += get_bits(gb, nb3);
>> +
>> + return val;
>> +}
>> +
>> +static int aac_usac_samplerate[] = {
>
> Missing const
>
>> + 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
>> + 16000, 12000, 11025, 8000, 7350, -1, -1, 57600, 51200,
>> + 40000, 38400, 34150, 28800, 25600, 20000, 19200, 17075, 14400, 12800, 9600, -1, -1, -1, -1,
>> +};
>> +
>
>> +static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
>> + GetBitContext *gb)
>> +{
>> + if (get_bits1(gb)) { /* usacExtElementPresent */
>> + uint32_t len;
>> + if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
>> + len = e->ext.default_len;
>> + } else {
>> + len = get_bits(gb, 8); /* usacExtElementPayloadLength */
>> + if (len == 255)
>> + len += get_bits(gb, 16) - 2;
>> + }
>> +
>> + if (len) {
>> + uint8_t *tmp;
>> + uint8_t pl_frag_start = 1;
>> + uint8_t pl_frag_end = 1;
>> + if (e->ext.payload_frag) {
>> + pl_frag_start = get_bits1(gb); /* usacExtElementStart */
>> + pl_frag_end = get_bits1(gb); /* usacExtElementStop */
>> + }
>> +
>> + if (pl_frag_start)
>> + e->ext.pl_data_offset = 0;
>> +
>> + tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
>> + if (!tmp) {
>> + free(e->ext.pl_data);
>
> Wrong deallocator.
>
>> + return AVERROR(ENOMEM);
>> + }
>> + e->ext.pl_data = tmp;
>> +
>> + for (int i = 0; i < len; i++)
>> + e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
>> +
>> + if (pl_frag_end) {
>> + int ret;
>> + e->ext.pl_data_offset = 0;
>> + switch (e->ext.type) {
>> + case ID_EXT_ELE_FILL:
>> + av_freep(&e->ext.pl_data);
>> + break;
>> + case ID_EXT_ELE_AUDIOPREROLL:
>> + ret = parse_audio_preroll(ac, e->ext.pl_data,
>> + e->ext.pl_data_offset);
>> + if (ret < 0) {
>> + av_freep(&e->ext.pl_data);
>> + return ret;
>> + }
>> + break;
>> + default:
>> + av_freep(&e->ext.pl_data);
>
> Pointless if you abort in the next line
>
>> + /* This should never happen */
>> + av_assert0(0);
>> + }
>> + }
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>
>
>> +#include "libavcodec/opusdsp.h"
>> +
>> +#ifndef AVCODEC_AAC_AACDEC_USAC_H
>> +#define AVCODEC_AAC_AACDEC_USAC_H
>> +
>> +#include "aacdec.h"
>> +
>> +#include "libavcodec/get_bits.h"
>> +
>> +static inline uint8_t ff_aac_get_vlclbf(GetBitContext *gb)
>> +{
>> + uint8_t ret = 0;
>> + while (get_bits1(gb) && ret <= 36)
>> + ret++;
>> + return ret;
>> +}
>
> Look at unary.h
That's convenient, thanks.
I've synced my changes on my repo in
https://github.com/cyanreg/FFmpeg/tree/xhe
Though I'll likely upload the patchset on the ML again tomorrow with
some fixes for preroll parsing and complex synth.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_0xA2FEA5F03F034464.asc
Type: application/pgp-keys
Size: 624 bytes
Desc: OpenPGP public key
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240516/b654cae8/attachment.key>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 236 bytes
Desc: OpenPGP digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240516/b654cae8/attachment.sig>
More information about the ffmpeg-devel
mailing list