[FFmpeg-devel] [PATCH] ALS decoder
Måns Rullgård
mans
Thu Aug 20 02:40:31 CEST 2009
Thilo Borgmann <thilo.borgmann at googlemail.com> writes:
> Hi,
>
> the second part of my GSoC project is the decoder itself.
>
> Refers to 14496-3, subpart 11.
Some comments below. This is not a thorough review. Someone who
actually understands these algorithms will have to do that.
> +typedef struct {
> + uint32_t als_id; ///< ALS identifier
> + uint32_t samp_freq; ///< sampling frequency in Hz
> + uint32_t samples; ///< number of samples (per channel), 0xFFFFFFFF if unknown
> + int channels; ///< number of channels
> + int file_type; ///< not used, provided for debugging
> + int resolution; ///< 000 = 8-bit; 001 = 16-bit; 010 = 24-bit; 011 = 32-bit
> + int floating; ///< 1 = IEEE 32-bit floating-point, 0 = integer
> + int msb_first; ///< original byte order of the input audio data
> + int frame_length; ///< frame Length
> + int random_access; ///< distance between RA frames (in frames, 0...255)
> + enum RA_Flag ra_flag; ///< indicates where the size of ra units is stored
> + int adapt_order; ///< adaptive order: 1 = on, 0 = off
> + int coef_table; ///< table index of Rice code parameters
> + int long_term_prediction; ///< long term prediction (LTP): 1 = on, 0 = off
> + int max_order; ///< maximum prediction order (0..1023)
> + int block_switching; ///< number of block switching levels
> + int bgmc_mode; ///< BGMC Mode: 1 = on, 0 = off (Rice coding only)
> + int sb_part; ///< sub-block partition
> + int joint_stereo; ///< joint Stereo: 1 = on, 0 = off
> + int mc_coding; ///< extended inter-channel coding: 1 = on, 0 = off
> + int chan_config; ///< indicates that a chan_config_info field is present
> + int chan_sort; ///< channel rearrangement: 1 = on, 0 = off
> + int crc_enabled; ///< indicates that the crc field is present
> + int rlslms; ///< use RLS-LMS predictor: 1 = on, 0 = off
> + int aux_data_enabled; ///< indicates that auxiliary data is present
> + int chan_config_info; ///< mapping of channels to loudspeaker locations
> + int *chan_pos; ///< original channel positions
> + uint32_t header_size; ///< header size of original audio file in bytes, provided for debugging
> + uint32_t trailer_size; ///< Trailer size of original audio file in bytes, provided for debugging
> + uint32_t crc; ///< 32-bit CCITT-32 CRC checksum
> +} ALSSpecificConfig;
Please try not to make *every* line exceed 80 columns. You could
easily shorten them here by reducing the amount of whitespace between
the type and name and before the comment.
> +typedef struct {
> + AVCodecContext *avctx;
> + ALSSpecificConfig sconf;
> + GetBitContext gb; ///< a bit reader context
> + unsigned int num_frames; ///< number of frames to decode, 0 if unknown
> + unsigned int cur_frame_length; ///< length of the current frame to decode
> + unsigned int last_frame_length; ///< length of the last frame to decode, 0 if unknown
> + unsigned int frame_id; ///< the frame id / number of the current frame
> + unsigned int js_switch; ///< if true, joint-stereo decoding is enforced
> + unsigned int num_blocks; ///< number of blocks used in the current frame
> + int64_t *quant_cof; ///< quantized parcor coefficients
> + int64_t *lpc_cof; ///< coefficients of the direct form prediction filter
> + int64_t *prev_raw_samples; ///< contains unshifted raw samples from the previous block
> + int64_t **raw_samples; ///< decoded raw samples for each channel
> + int64_t *raw_buffer; ///< contains all decoded raw samples including carryover samples
> +} ALSDecContext;
Ditto.
> +/** Computes ceil(log2(x)) using av_log2.
> + */
> +static inline int ceil_log2(int x) {
> + return x > 0 ? av_log2((x - 1) << 1) : 0;
> +}
Little functions like this are likely to be needed again some time.
They should be placed in a common location.
> +/** Reads an ALSSpecificConfig from a buffer into the output struct.
> + */
> +static av_cold int read_specific_config(ALSDecContext *ctx)
> +{
> + GetBitContext gb;
> + uint64_t ht_size;
> + int i, config_offset;
> + MPEG4AudioConfig m4ac;
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + const uint8_t *buffer = ctx->avctx->extradata;
> + int buffer_size = ctx->avctx->extradata_size;
> +
> + init_get_bits(&gb, buffer, buffer_size * 8);
> +
> + config_offset = ff_mpeg4audio_get_config(&m4ac, buffer, buffer_size);
> +
> + if (config_offset < 0)
> + return -1;
> +
> + skip_bits_long(&gb, config_offset);
> + buffer_size -= config_offset >> 3;
> +
> + if (buffer_size < 22)
> + return -1;
> +
> + // read the fixed items
> + sconf->als_id = get_bits_long(&gb, 32);
> + sconf->samp_freq = m4ac.sample_rate;
> + skip_bits_long(&gb, 32);
Maybe comment what is being skipped here and elsewhere.
> + sconf->samples = get_bits_long(&gb, 32);
> + sconf->channels = m4ac.channels;
> + skip_bits(&gb, 16);
> + sconf->file_type = get_bits(&gb, 3);
> + sconf->resolution = get_bits(&gb, 3);
> + sconf->floating = get_bits1(&gb);
> + sconf->msb_first = get_bits1(&gb);
> + sconf->frame_length = get_bits(&gb, 16) + 1;
> + sconf->random_access = get_bits(&gb, 8);
> + sconf->ra_flag = get_bits(&gb, 2);
> + sconf->adapt_order = get_bits1(&gb);
> + sconf->coef_table = get_bits(&gb, 2);
> + sconf->long_term_prediction = get_bits1(&gb);
> + sconf->max_order = get_bits(&gb, 10);
> + sconf->block_switching = get_bits(&gb, 2);
> + sconf->bgmc_mode = get_bits1(&gb);
> + sconf->sb_part = get_bits1(&gb);
> + sconf->joint_stereo = get_bits1(&gb);
> + sconf->mc_coding = get_bits1(&gb);
> + sconf->chan_config = get_bits1(&gb);
> + sconf->chan_sort = get_bits1(&gb);
> + sconf->crc_enabled = get_bits1(&gb);
> + sconf->rlslms = get_bits1(&gb);
> + skip_bits(&gb, 5); // skip 5 reserved bits
Please lose some of that whitespace. Compliments on the alignment
though. Diego will love you.
> + sconf->aux_data_enabled = get_bits1(&gb);
> + buffer_size -= 22;
> +
> +
> + // check for ALSSpecificConfig struct
> + if (sconf->als_id != MKBETAG('A','L','S','\0'))
> + return -1;
> +
> + ctx->cur_frame_length = sconf->frame_length;
> +
> + // allocate quantized parcor coefficient buffer
> + if (!(ctx->quant_cof = av_malloc(sizeof(int64_t) * sconf->max_order))) {
> + av_log(ctx->avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> + return AVERROR(ENOMEM);
> + }
> +
> + // allocate LPC coefficients
> + if (!(ctx->lpc_cof = av_malloc(sizeof(int64_t) * sconf->max_order))) {
> + av_log(ctx->avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> + return AVERROR(ENOMEM);
> + }
> +
> + // calculate total number of frames to decode if possible
> + if (sconf->samples != 0xFFFFFFFF) {
> + ctx->num_frames = ((sconf->samples - 1) / sconf->frame_length) + 1;
> + ctx->last_frame_length = sconf->samples % ctx->sconf.frame_length;
> + if (!ctx->last_frame_length) {
> + ctx->last_frame_length = sconf->frame_length;
> + }
> + } else {
> + ctx->num_frames = 0;
> + ctx->last_frame_length = 0;
> + }
> +
> +
> + // read channel config
> + if (sconf->chan_config) {
> + if (buffer_size < 2)
> + return -1;
> + sconf->chan_config_info = get_bits(&gb, 16);
> + buffer_size -= 2;
> + // TODO: use this to set avctx->channel_layout
> + }
> +
> +
> + // read channel sorting
> + if (sconf->chan_sort && sconf->channels > 1) {
> + int chan_pos_bits = ceil_log2(sconf->channels);
> + int bytes_needed = (sconf->channels * chan_pos_bits + 7) >> 3;
> + if (buffer_size < bytes_needed)
> + return -1;
> +
> + if(!(sconf->chan_pos = av_malloc(sconf->channels * sizeof(int))))
> + return -1;
> +
> + for (i = 0; i < sconf->channels; i++) {
> + sconf->chan_pos[i] = get_bits(&gb, chan_pos_bits);
> + }
We usually omit {} for single-line for/if/while statements...
> + align_get_bits(&gb);
> + buffer_size -= bytes_needed;
> + // TODO: use this to actually do channel sorting
> + } else {
> + sconf->chan_sort = 0;
> + }
... except for one-line else clauses after a multiline if.
> + // read fixed header and trailer sizes, if size = 0xFFFFFFFF then there is no data field!
> + if (buffer_size < 8)
> + return -1;
> +
> + sconf->header_size = get_bits_long(&gb, 32);
> + sconf->trailer_size = get_bits_long(&gb, 32);
> + if (sconf->header_size == 0xFFFFFFFF)
> + sconf->header_size = 0;
> + if (sconf->trailer_size == 0xFFFFFFFF)
> + sconf->trailer_size = 0;
> +
> + ht_size = sconf->header_size + sconf->trailer_size;
> +
> + buffer_size -= 8;
> +
> +
> + // skip the header and trailer data
> + if (buffer_size < ht_size)
> + return -1;
> +
> + ht_size <<= 3;
> +
> + while (ht_size > 0) {
> + int len = FFMIN(ht_size, INT32_MAX);
> + skip_bits_long(&gb, len);
> + ht_size -= len;
> + }
> +
> + buffer_size -= ht_size >> 3;
> +
> +
> + // read the crc data
> + if (sconf->crc_enabled) {
> + if (buffer_size < 4)
> + return -1;
> +
> + sconf->crc = get_bits_long(&gb, 32);
> + }
> +
> +
> + // no need to read the rest of ALSSpecificConfig (ra_unit_size & aux data)
> +#ifdef DEBUG
> + dprint_specific_config(ctx);
> +#endif
> + return 0;
> +}
> +
> +
> +/** Checks the ALSSpecificConfig for unsupported features.
> + */
> +static int check_specific_config(ALSDecContext *ctx)
> +{
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + int error = 0;
> +
> + if (sconf->floating) {
> + av_log_missing_feature(ctx->avctx, "Floating point decoding", 0);
> + error = -1;
> + }
> +
> + if (sconf->long_term_prediction) {
> + av_log_missing_feature(ctx->avctx, "Long-term prediction", 0);
> + error = -1;
> + }
> +
> + if (sconf->bgmc_mode) {
> + av_log_missing_feature(ctx->avctx, "BGMC entropy decoding", 0);
> + error = -1;
> + }
> +
> + if (sconf->mc_coding) {
> + av_log_missing_feature(ctx->avctx, "Multi-channel correlation", 0);
> + error = -1;
> + }
> +
> + if (sconf->chan_sort) {
> + av_log_missing_feature(ctx->avctx, "Channel sorting", 0);
> + }
> +
> + if (sconf->rlslms) {
> + av_log_missing_feature(ctx->avctx, "Adaptive RLS-LMS prediction", 0);
> + error = -1;
> + }
> +
> + return error;
> +}
> +
> +
> +/** Parses the bs_info item to extract the block partitioning.
> + */
> +static void parse_bs_info(uint32_t bs_info, unsigned int n, unsigned int div,
> + unsigned int **div_blocks, unsigned int *num_blocks)
> +{
> + if (n < 31 && ((bs_info >> (30 - n)) & 1)) {
> + // if the level is valid and the investigated bit n is set
> + // then recursively check both children at bits (2n+1) and (2n+2)
> + n *= 2;
> + div += 1;
> + parse_bs_info(bs_info, n + 1, div, div_blocks, num_blocks);
> + parse_bs_info(bs_info, n + 2, div, div_blocks, num_blocks);
> + } else {
> + // else the bit is not set or the last level has been reached
> + // (bit implicitly not set)
> + **div_blocks = div;
> + (*div_blocks)++;
> + (*num_blocks)++;
> + }
> +}
> +
> +
> +/** Reads and decodes a Rice codeword.
> + */
> +static int64_t decode_rice(GetBitContext *gb, unsigned int k)
> +{
> + int64_t value = 0;
> + int64_t q = 0;
> + int max = gb->size_in_bits - get_bits_count(gb) - k;
> +
> + if (!k) {
> + q = get_unary(gb, 0, max);
> + return (q & 1) ? -((q + 1) >> 1) : ((q + 1) >> 1);
> + } else if (k == 1) {
> + q = get_unary(gb, 0, max);
> + return get_bits1(gb) ? q : -(q + 1);
> + } else {
> + unsigned int r, sub_sign;
> +
> + q = get_unary(gb, 0, max);
> + sub_sign = get_bits1(gb);
> + r = get_bits_long(gb, k - 1);
> +
> + value = (q << (k - 1)) + r;
> +
> + return sub_sign ? value : -(value + 1);
> + }
> +}
This function looks like it was designed specifically to make gcc fail:
1. 64-bit variables used unnecessarily.
2. GCC hates complex conditionals.
3. Compilers in general are bad at bit-hacks.
It seems to me that int is enough for 'q' in the first two cases.
get_unary() returns int, so we start with at most that many bits. To
avoid overflow in q+1, do this instead:
int r = q & 1;
return r ? -((q >> 1) + r) : ((q >> 1) + r);
Furthermore, gcc does stupid things with that code even with plain int
variables. Rewriting in a few more steps helps massively:
int r = q & 1;
q = (q >> 1) + r;
return r ? -q : q;
This reduces the number of instructions on ARM from 6 to 4 (from 4 to
3 with armcc). On MIPS it goes from 9 with a branch to 5 branch-free.
Finally, -(v + 1) is equivalent to ~v on two's complement machines,
which we have previously agreed to assume we have. Hence we should
write the return values in the second and third cases like this:
return sign ? value : ~value;
This prevents 32-bit overflow in the k==1 case and gives better code
with several compilers in the final case, where we must resort to
64-bit maths.
> +/** Converts PARCOR coefficient k to direct filter coefficient.
> + */
> +static void parcor_to_lpc(unsigned int k, int64_t *par, int64_t *cof)
> +{
> + int i;
> + int64_t tmp1, tmp2;
> +
> + for (i = 0; i < (k+1) >> 1; i++) {
> + tmp1 = cof[ i ] + ((par[k] * cof[k - i - 1] + (1 << 19)) >> 20);
> + tmp2 = cof[k - i - 1] + ((par[k] * cof[ i ] + (1 << 19)) >> 20);
> + cof[k - i - 1] = tmp2;
> + cof[ i ] = tmp1;
> + }
> +
> + cof[k] = par[k];
> +}
This is again a perfect trap for gcc. 64-bit maths in tight loops is
something it is *really* bad at. Is there no way to avoid 64-bit
elements here? Is perhaps the range usually smaller, so we could
choose a 32-bit version most of the time and fall back on 64-bit only
when required? Regardless of compiler, 64-bit multiplication is
expensive on any 32-bit machine.
> +/** Reformat block sizes from log2 format to direct form. Also assure that the
> + * block sizes of the last frame correspond to the actual number of samples.
> + */
> +static void reconstruct_block_sizes(ALSDecContext *ctx, uint32_t *div_blocks)
> +{
> + unsigned int b;
> +
> + // The last frame may have an overdetermined block structure given in
> + // the bitstream. In that case the defined block structure would need
> + // more samples than available to be consistent.
> + // The block structure is actually used but the block sizes are adapted
> + // to fit the actual number of available samples.
> + // Example: 5 samples, 2nd level block sizes: 2 2 2 2.
> + // This results in the actual block sizes: 2 2 1 0.
> + // This is not specified in 14496-3 but actually done by the reference
> + // codec RM22 revision 2.
> + // This appears to happen in case of an odd number of samples in the last
> + // frame which is actually not allowed by the block length switching part
> + // of 14496-3.
> + // The ALS conformance files feature an odd number of samples in the last
> + // frame.
> + if (ctx->cur_frame_length == ctx->last_frame_length) {
> + unsigned int remaining = ctx->cur_frame_length;
> +
> + for (b = 0; b < ctx->num_blocks; b++) {
> + div_blocks[b] = ctx->sconf.frame_length >> div_blocks[b];
> +
> + if (remaining < div_blocks[b]) {
> + div_blocks[b] = remaining;
> + ctx->num_blocks = b + 1;
> + break;
> + } else {
> + remaining -= div_blocks[b];
> + }
> + }
> + } else {
> + for (b = 0; b < ctx->num_blocks; b++)
> + div_blocks[b] = ctx->sconf.frame_length >> div_blocks[b];
> + }
> +}
> +
> +
> +/** Reads the block data.
> + */
> +static int read_block_data(ALSDecContext *ctx, unsigned int ra_block,
> + int64_t *raw_samples, unsigned int block_length,
> + unsigned int *js_blocks, int64_t *raw_other)
> +{
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + AVCodecContext *avctx = ctx->avctx;
> + GetBitContext *gb = &ctx->gb;
> + unsigned int shift_lsbs = 0;
> + unsigned int block_type;
> + unsigned int k;
> +
> + block_type = get_bits1(gb);
> +
> + if (block_type == 0) {
> + unsigned int const_block;
> + int32_t const_val = 0;
> +
> + const_block = get_bits1(gb); // 1 = constant value, 0 = zero block (silence)
> + *js_blocks = get_bits1(gb);
> +
> + // skip 5 reserved bits
> + skip_bits(gb, 5);
> +
> + if (const_block) {
> + unsigned int const_val_bits;
> +
> + if (sconf->resolution == 2 || sconf->floating)
> + const_val_bits = 24;
> + else
> + const_val_bits = avctx->bits_per_raw_sample;
> +
> + const_val = get_sbits_long(gb, const_val_bits);
> + }
> +
> + // write raw samples into buffer
> + for (k = 0; k < block_length; k++)
> + raw_samples[k] = const_val;
> + } else {
> + unsigned int s[8];
> + unsigned int sub_blocks, sb_length;
> + unsigned int opt_order = 1;
> + int64_t *quant_cof = ctx->quant_cof;
> + int64_t *lpc_cof = ctx->lpc_cof;
> + unsigned int start = 0;
> + int sb, smp;
> + int64_t y;
> +
> + *js_blocks = get_bits1(gb);
> +
> + // determine the number of sub blocks for entropy decoding
> + if (!sconf->bgmc_mode && !sconf->sb_part)
> + sub_blocks = 1;
> + else if (sconf->bgmc_mode && sconf->sb_part)
> + sub_blocks = 1 << get_bits(gb, 2);
> + else
> + sub_blocks = get_bits1(gb) ? 4 : 1;
> +
> + // do not continue in case of a damaged stream since
> + // block_length must be evenly divisible by sub_blocks
> + if (block_length % sub_blocks) {
> + av_log(avctx, AV_LOG_WARNING,
> + "Block length is not evenly divisible by the number of sub blocks.\n");
> + return -1;
> + }
> +
> + sb_length = block_length / sub_blocks;
> +
> +
> + if (!sconf->bgmc_mode) {
> + s[0] = get_bits(gb, (sconf->resolution > 1) ? 5 : 4);
> + for (k = 1; k < sub_blocks; k++)
> + s[k] = s[k - 1] + decode_rice(gb, 0);
> + } else {
> + // TODO: BGMC mode
> + }
> +
> + if (get_bits1(gb)) {
> + shift_lsbs = get_bits(gb, 4) + 1;
> + }
> +
> +
> + if (!sconf->rlslms) {
> + int64_t quant_index;
> +
> + if (sconf->adapt_order) {
> + int opt_order_length =
> + FFMIN(
> + ceil_log2(sconf->max_order+1),
> + FFMAX(ceil_log2((block_length >> 3) - 1), 1)
> + );
> + opt_order = get_bits(gb, opt_order_length);
> + } else {
> + opt_order = sconf->max_order;
> + }
> +
> + if (opt_order) {
> + if (sconf->coef_table == 3) {
> + // read coefficient 0
> + quant_index = get_bits(gb, 7) - 64;
> + quant_cof[0] = parcor_scaled_values[quant_index + 64];
> +
> + // read coefficient 1
> + quant_index = get_bits(gb, 7) - 64;
> + quant_cof[1] = -parcor_scaled_values[quant_index + 64];
> +
> + // read coefficients 2 to opt_order
> + for (k = 2; k < opt_order; k++) {
> + quant_index = get_bits(gb, 7) - 64;
> + quant_cof[k] = (quant_index << 14) + (1 << 13);
> + }
> + } else {
> + int offset, rice_param, k_max;
> +
> + // read coefficient 0
> + offset = parcor_rice_table[sconf->coef_table][0][0];
> + rice_param = parcor_rice_table[sconf->coef_table][0][1];
> + quant_index = decode_rice(gb, rice_param) + offset;
> + quant_cof[0] = parcor_scaled_values[quant_index + 64];
> +
> + // read coefficient 1
> + offset = parcor_rice_table[sconf->coef_table][1][0];
> + rice_param = parcor_rice_table[sconf->coef_table][1][1];
> + quant_index = decode_rice(gb, rice_param) + offset;
> + quant_cof[1] = -parcor_scaled_values[quant_index + 64];
> +
> + // read coefficients 2 to 19
> + k_max = FFMIN(20, opt_order);
> + for (k = 2; k < k_max; k++) {
> + offset = parcor_rice_table[sconf->coef_table][k][0];
> + rice_param = parcor_rice_table[sconf->coef_table][k][1];
> + quant_index = decode_rice(gb, rice_param) + offset;
> + quant_cof[k] = (quant_index << 14) + (1 << 13);
> + }
> +
> + // read coefficients 20 to 126
> + k_max = FFMIN(127, opt_order);
> + for (k = 20; k < k_max; k++) {
> + offset = k & 1;
> + rice_param = 2;
> + quant_index = decode_rice(gb, rice_param) + offset;
> + quant_cof[k] = (quant_index << 14) + (1 << 13);
> + }
> +
> + // read coefficients 127 to opt_order
> + for (k = 127; k < opt_order; k++) {
> + offset = 0;
> + rice_param = 1;
> + quant_index = decode_rice(gb, rice_param) + offset;
> + quant_cof[k] = (quant_index << 14) + (1 << 13);
> + }
> + }
> + }
> + }
The block nesting is very deep here. Perhaps splitting this large
function into several smaller ones would help readability. Even gcc
should manage to inline them.
> + if (sconf->long_term_prediction) {
> + // TODO: LTP mode
> + }
> +
> + start = 0;
> +
> + // read first value and residuals in case of a random access block
> + if (ra_block) {
> + if (opt_order)
> + raw_samples[0] = decode_rice(gb, avctx->bits_per_raw_sample - 4);
> + if (opt_order > 1)
> + raw_samples[1] = decode_rice(gb, s[0] + 3);
> + if (opt_order > 2)
> + raw_samples[2] = decode_rice(gb, s[0] + 1);
> +
> + start = FFMIN(opt_order, 3);
> + } else {
> + for (k = 0; k < opt_order; k++)
> + parcor_to_lpc(k, quant_cof, lpc_cof);
> + }
> +
> + // read all residuals
> + // TODO: decode directly into ctx->raw_samples[] instead of storing the residuals
> + if (sconf->bgmc_mode) {
> + // TODO: BGMC mode
> + } else {
> + int64_t *current_res = raw_samples;
> +
> + for (sb = 0; sb < sub_blocks; sb++) {
> + for (k = start; k < sb_length; k++) {
> + current_res[k] = decode_rice(gb, s[sb]);
> + }
> + current_res += sb_length;
> + start = 0;
> + }
> + }
> +
> + // reconstruct all samples from residuals
> + if (ra_block) {
> + unsigned int progressive = FFMIN(block_length, opt_order);
> +
> + for (smp = 0; smp < block_length; smp++) {
> + unsigned int max, dequant;
> +
> + dequant = smp < progressive;
> + max = dequant ? smp : progressive;
> +
> + y = 1 << 19;
> +
> + for (sb = 0; sb < max; sb++)
> + y += lpc_cof[sb] * raw_samples[smp - (sb + 1)];
> +
> + raw_samples[smp] -= y >> 20;
> + if (dequant)
> + parcor_to_lpc(smp, quant_cof, lpc_cof);
> + }
> + } else {
> + int store_prev_samples = (*js_blocks && raw_other) || shift_lsbs;
> +
> + // store previous smaples in case that they have to be altered
> + if (store_prev_samples)
> + memcpy(ctx->prev_raw_samples, raw_samples - sconf->max_order,
> + sizeof(int64_t) * sconf->max_order);
> +
> + // reconstruct difference signal for prediction (joint-stereo)
> + if (*js_blocks && raw_other) {
> + int i;
> + if (raw_other > raw_samples) { // D = R - L
> + for (i = -1; i >= -sconf->max_order; i--)
> + raw_samples[i] = raw_other[i] - raw_samples[i];
> + } else { // D = R - L
Are those two comments meant to be the same?
> + for (i = -1; i >= -sconf->max_order; i--)
> + raw_samples[i] = raw_samples[i] - raw_other[i];
> + }
> + }
> +
> + // reconstruct shifted signal
> + if (shift_lsbs) {
> + for (smp = -1; smp >= -sconf->max_order; smp--)
> + raw_samples[smp] >>= shift_lsbs;
> + }
> +
> + // reconstruct raw samples
> + for (smp = 0; smp < block_length; smp++) {
> + y = 1 << 19;
> +
> + for (sb = 0; sb < opt_order; sb++)
> + y += lpc_cof[sb] * raw_samples[smp - (sb + 1)];
> +
> + raw_samples[smp] -= y >> 20;
> + }
> +
> + // restore previous samples in case that they have been altered
> + if (store_prev_samples)
> + memcpy(raw_samples - sconf->max_order, ctx->prev_raw_samples,
> + sizeof(int64_t) * sconf->max_order);
> + }
> + }
> +
> + if (sconf->rlslms) {
> + // TODO: read RLSLMS extension data
> + }
> +
> + if (!sconf->mc_coding || ctx->js_switch) {
> + align_get_bits(gb);
> + }
> +
> + if (shift_lsbs) {
> + for (k = 0; k < block_length; k++)
> + raw_samples[k] <<= shift_lsbs;
> + }
> +
> + return 0;
> +}
Many of the loops above look like they could be easily simdified. I
don't know how much time is spent in each, so I haven't thought about
it in detail. Breaking the function apart would also help in finding
where most time is spent.
> +
> +/** Reads the frame data.
> + */
> +static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
> +{
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + GetBitContext *gb = &ctx->gb;
> + unsigned int div_blocks[32]; ///< Block sizes.
> + unsigned int c, b, ra_block;
> + int64_t *raw_samples_L;
> + int64_t *raw_samples_R;
> + unsigned int js_blocks[2];
> +
> + uint32_t bs_info = 0;
> + unsigned int *ptr_div_blocks;
> +
> + // skip ra_unit_size if present
> + if (sconf->ra_flag == RA_FLAG_FRAMES && ra_frame)
> + skip_bits_long(gb, 32);
> +
> + if (sconf->mc_coding && sconf->joint_stereo) {
> + ctx->js_switch = get_bits1(gb);
> + align_get_bits(gb);
> + }
> +
> + if (!sconf->mc_coding || ctx->js_switch) {
> + int independent_bs = !sconf->joint_stereo;
> +
> + for (c = 0; c < sconf->channels; c++) {
> + js_blocks[0] = 0;
> + js_blocks[1] = 0;
> +
> + if (sconf->block_switching) {
> + unsigned int bs_info_len = 1 << (sconf->block_switching + 2);
> + bs_info = get_bits_long(gb, bs_info_len);
> + bs_info <<= (32 - bs_info_len);
> + }
> +
> + ctx->num_blocks = 0;
> + ptr_div_blocks = &div_blocks[0];
> + parse_bs_info(bs_info, 0, 0, &ptr_div_blocks, &ctx->num_blocks);
> + reconstruct_block_sizes(ctx, div_blocks);
> +
> + // if joint_stereo and block_switching is set, independent decoding
> + // is signaled via the first bit of bs_info
> + if(sconf->joint_stereo && sconf->block_switching) {
> + if (bs_info >> 31)
> + independent_bs = 2;
> + }
> +
> + // if this is the last channel, it has to be decoded independently
> + if (c == sconf->channels - 1)
> + independent_bs = 1;
> +
> + if (independent_bs) {
> + raw_samples_L = ctx->raw_samples[c];
> +
> + for (b = 0; b < ctx->num_blocks; b++) {
> + ra_block = !b && ra_frame;
> + if (read_block_data(ctx, ra_block, raw_samples_L,
> + div_blocks[b], &js_blocks[0], NULL)) {
> + // damaged block, write zero for the rest of the frame
> + while (b < ctx->num_blocks) {
> + memset(raw_samples_L, 0, div_blocks[b]);
> + raw_samples_L += div_blocks[b];
> + b++;
> + }
> + return -1;
> + }
> + raw_samples_L += div_blocks[b];
> + }
> +
> + // store carryover raw samples
> + memmove((ctx->raw_samples[c]) - sconf->max_order,
> + (ctx->raw_samples[c]) - sconf->max_order + sconf->frame_length,
> + sizeof(int64_t) * sconf->max_order);
> +
> + if(independent_bs)
> + independent_bs--;
> + } else {
> + unsigned int offset = 0;
> +
> + // decode all blocks
> + for (b = 0; b < ctx->num_blocks; b++) {
> + unsigned int s;
> + raw_samples_L = ctx->raw_samples[c ] + offset;
> + raw_samples_R = ctx->raw_samples[c + 1] + offset;
> + ra_block = !b && ra_frame;
> + if (read_block_data(ctx, ra_block, raw_samples_L, div_blocks[b],
> + &js_blocks[0], raw_samples_R) ||
> + read_block_data(ctx, ra_block, raw_samples_R, div_blocks[b],
> + &js_blocks[1], raw_samples_L)) {
> + // damaged block, write zero for the rest of the frame
> + while (b < ctx->num_blocks) {
> + memset(raw_samples_L, 0, div_blocks[b]);
> + memset(raw_samples_R, 0, div_blocks[b]);
> + raw_samples_L += div_blocks[b];
> + raw_samples_R += div_blocks[b];
> + b++;
> + }
> + return -1;
> + }
Again, very deep nesting makes code hard to read. Consider making
some blocks separate functions.
> + // reconstruct joint-stereo blocks
> + if (js_blocks[0]) {
> + if (js_blocks[1])
> + av_log(ctx->avctx, AV_LOG_WARNING, "Invalid channel pair!\n");
> +
> + for (s = 0; s < div_blocks[b]; s++)
> + raw_samples_L[s] = raw_samples_R[s] - raw_samples_L[s];
> + } else if (js_blocks[1]) {
> + for (s = 0; s < div_blocks[b]; s++)
> + raw_samples_R[s] = raw_samples_R[s] + raw_samples_L[s];
> + }
> +
> + offset += div_blocks[b];
> + }
> +
> + // store carryover raw samples
> + memmove((ctx->raw_samples[c]) - sconf->max_order,
> + (ctx->raw_samples[c]) - sconf->max_order + sconf->frame_length,
> + sizeof(int64_t) * sconf->max_order);
> +
> + memmove((ctx->raw_samples[c + 1]) - sconf->max_order,
> + (ctx->raw_samples[c + 1]) - sconf->max_order + sconf->frame_length,
> + sizeof(int64_t) * sconf->max_order);
> +
> + c++;
> + }
> + }
> + } else { // multi-channel coding
> + if (sconf->block_switching) {
> + unsigned int bs_info_len = 1 << (sconf->block_switching + 2);
> + bs_info = get_bits_long(gb, bs_info_len);
> + bs_info <<= (32 - bs_info_len);
> + }
> +
> + ctx->num_blocks = 0;
> + ptr_div_blocks = &div_blocks[0];
> + parse_bs_info(bs_info, 0, 0, &ptr_div_blocks, &ctx->num_blocks);
> + reconstruct_block_sizes(ctx, div_blocks);
> +
> + // TODO: multi channel coding might use a temporary buffer instead as
> + // the actual channel is not known when read_block-data is called
> + raw_samples_L = ctx->raw_samples[0];
> +
> + for (b = 0; b < ctx->num_blocks; b++) {
> + ra_block = !b && ra_frame;
> + if (read_block_data(ctx, ra_block, raw_samples_L,
> + div_blocks[b], &js_blocks[0], NULL)) {
> + // damaged block, write zero for the rest of the frame
> + while (b < ctx->num_blocks) {
> + memset(raw_samples_L, 0, div_blocks[b]);
> + raw_samples_L += div_blocks[b];
> + b++;
> + }
> + return -1;
> + }
> + raw_samples_L += div_blocks[b];
> + // TODO: read_channel_data
> + }
> + }
> +
> + if (sconf->floating) {
> + // TODO: read_diff_float_data
> + }
> +
> + return 0;
> +}
Maybe you already tried to fix it, but there's a scary number of
memcpy/memmove calls above.
> +/** Decodes an ALS frame.
> + */
> +static int decode_frame(AVCodecContext *avctx,
> + void *data, int *data_size,
> + AVPacket *avpkt)
> +{
> + ALSDecContext *ctx = avctx->priv_data;
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + const uint8_t *buffer = avpkt->data;
> + int buffer_size = avpkt->size;
> + int invalid_frame = 0;
> + unsigned int c, sample, ra_frame, bytes_read, shift;
> +
> + init_get_bits(&ctx->gb, buffer, buffer_size * 8);
> + ra_frame = sconf->random_access && ((!ctx->frame_id) ||
> + !(ctx->frame_id % sconf->random_access));
> +
> + // the last frame to decode might have a different length
> + if (ctx->num_frames && ctx->num_frames - 1 == ctx->frame_id) {
> + ctx->cur_frame_length = ctx->last_frame_length;
> + }
> +
> + // decode the frame data
> + if ((invalid_frame = read_frame_data(ctx, ra_frame))) {
> + av_log(ctx->avctx, AV_LOG_WARNING,
> + "Reading frame data failed. Skipping RA unit.\n");
> + }
> +
> + // increment the frame counter
> + ctx->frame_id++;
> +
> + // transform decoded frame into output format
> + #define INTERLEAVE_OUTPUT(bps) \
That's an awful lot of whitespace. Please cut it back so the lines
fit in 80 columns, which will be easily possible when you address my
next comment.
> + { \
> + int##bps##_t *dest = (int##bps##_t*) data; \
> + shift = bps - ctx->avctx->bits_per_raw_sample; \
> + for (sample = 0; sample < ctx->cur_frame_length; sample++) { \
> + for (c = 0; c < sconf->channels; c++) { \
> + *(dest++) = (int##bps##_t) (ctx->raw_samples[c][sample] << shift); \
Useless parens: *dest++ is good. Useless cast => more useless parens.
> + } \
> + } \
> + }
> +
> + if (ctx->avctx->bits_per_raw_sample <= 16) {
> + INTERLEAVE_OUTPUT(16)
> + } else {
> + INTERLEAVE_OUTPUT(32)
> + }
> +
> + *data_size = ctx->cur_frame_length * sconf->channels
> + * (av_get_bits_per_sample_format(avctx->sample_fmt) >> 3);
> +
> + bytes_read = invalid_frame ? buffer_size :
> + (get_bits_count(&ctx->gb) + 7) >> 3;
> +
> + return bytes_read;
> +}
> +
> +
> +/** Uninitializes the ALS decoder.
> + */
> +static av_cold int decode_end(AVCodecContext *avctx)
> +{
> + ALSDecContext *ctx = avctx->priv_data;
> +
> + av_freep(&ctx->sconf.chan_pos);
> +
> + av_freep(&ctx->quant_cof);
> + av_freep(&ctx->lpc_cof);
> + av_freep(&ctx->prev_raw_samples);
> + av_freep(&ctx->raw_samples);
> + av_freep(&ctx->raw_buffer);
> +
> + return 0;
> +}
> +
> +
> +/** Initializes the ALS decoder.
> + */
> +static av_cold int decode_init(AVCodecContext *avctx)
> +{
> + unsigned int c;
> + unsigned int channel_size;
> + ALSDecContext *ctx = avctx->priv_data;
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + ctx->avctx = avctx;
> +
> + if (!avctx->extradata) {
> + av_log(avctx, AV_LOG_ERROR, "Missing required ALS extradata.\n");
> + return -1;
> + }
> +
> + if (read_specific_config(ctx)) {
> + av_log(avctx, AV_LOG_ERROR, "Reading ALSSpecificConfig failed.\n");
> + decode_end(avctx);
> + return -1;
> + }
> +
> + if (check_specific_config(ctx)) {
> + decode_end(avctx);
> + return -1;
> + }
> +
> + avctx->sample_rate = sconf->samp_freq;
> + avctx->channels = sconf->channels;
> +
> + if (sconf->floating) {
> + avctx->sample_fmt = SAMPLE_FMT_FLT;
> + avctx->bits_per_raw_sample = 32;
> + } else {
> + avctx->sample_fmt = sconf->resolution > 1
> + ? SAMPLE_FMT_S32 : SAMPLE_FMT_S16;
> + avctx->bits_per_raw_sample = (sconf->resolution + 1) * 8;
> + }
> +
> + avctx->frame_size = sconf->frame_length;
> + channel_size = sconf->frame_length + sconf->max_order;
> +
> + // allocate previous raw sample buffer
> + if (!(ctx->prev_raw_samples = av_malloc(sizeof(int64_t) * sconf->max_order))) {
> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> + decode_end(avctx);
> + return AVERROR(ENOMEM);
> + }
> +
> + // allocate raw and carried sample buffer
> + if (!(ctx->raw_buffer = av_mallocz(sizeof(int64_t) *
> + avctx->channels * channel_size))) {
> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> + decode_end(avctx);
> + return AVERROR(ENOMEM);
> + }
> +
> + // allocate raw sample array buffer
> + if (!(ctx->raw_samples = av_malloc(sizeof(int64_t*) * avctx->channels))) {
> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer array failed.\n");
> + decode_end(avctx);
> + return AVERROR(ENOMEM);
> + }
This looks very repetitive...
> + // allocate raw and carried samples buffers
> + ctx->raw_samples[0] = ctx->raw_buffer + sconf->max_order;
> + for (c = 1; c < avctx->channels; c++) {
> + ctx->raw_samples[c] = ctx->raw_samples[c - 1] + channel_size;
> + }
> +
> + return 0;
> +}
> +
> +
> +AVCodec als_decoder = {
> + "als",
> + CODEC_TYPE_AUDIO,
> + CODEC_ID_MP4ALS,
> + sizeof(ALSDecContext),
> + decode_init,
> + NULL,
> + decode_end,
> + decode_frame,
> + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 Audio Lossless Coding (ALS)"),
> +};
> +
> Index: libavcodec/als_data.h
> ===================================================================
> --- libavcodec/als_data.h (revision 0)
> +++ libavcodec/als_data.h (revision 0)
> @@ -0,0 +1,147 @@
> +/*
> + * ALS header file for common data
> + * Copyright (c) 2009 Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file libavcodec/als_data.h
> + * MPEG-4 ALS header file for common data
> + * @author Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
> + */
> +
> +
> +#ifndef AVCODEC_ALS_DATA_H
> +#define AVCODEC_ALS_DATA_H
> +
> +
> +#include <stdint.h>
> +
> +/** Rice parameters and corresponding index offsets for decoding the
> + * indices of scaled PARCOR values. The table choosen is set globally
> + * by the encoder and stored in ALSSpecificConfig.
> + */
> +int8_t parcor_rice_table[3][20][2] = {
> + {
> + {-52, 4},
WTF happened to the indentation here?
> +
> +/** Scaled PARCOR values used for the first two PARCOR coefficients.
> + * To be indexed by the Rice coded indices.
> + * Generated by: parcor_scaled_values[i] = 32 + ((i * (i+1)) << 7) - (1 << 20)
> + */
> +int32_t parcor_scaled_values[] = {-1048544, -1048288, -1047776, -1047008,
> + -1045984, -1044704, -1043168, -1041376,
And here.
> +#endif /* AVCODEC_ALS_DATA_H */
> Index: libavcodec/allcodecs.c
> ===================================================================
> --- libavcodec/allcodecs.c (revision 19671)
> +++ libavcodec/allcodecs.c (working copy)
> @@ -198,6 +198,7 @@
> REGISTER_ENCDEC (AAC, aac);
> REGISTER_ENCDEC (AC3, ac3);
> REGISTER_ENCDEC (ALAC, alac);
> + REGISTER_DECODER (ALS, als);
> REGISTER_DECODER (APE, ape);
> REGISTER_DECODER (ATRAC3, atrac3);
> REGISTER_DECODER (COOK, cook);
> Index: libavcodec/Makefile
> ===================================================================
> --- libavcodec/Makefile (revision 19671)
> +++ libavcodec/Makefile (working copy)
> @@ -42,6 +42,7 @@
> OBJS-$(CONFIG_AC3_ENCODER) += ac3enc.o ac3tab.o ac3.o
> OBJS-$(CONFIG_ALAC_DECODER) += alac.o
> OBJS-$(CONFIG_ALAC_ENCODER) += alacenc.o lpc.o
> +OBJS-$(CONFIG_ALS_DECODER) += alsdec.o
> OBJS-$(CONFIG_AMV_DECODER) += sp5xdec.o mjpegdec.o mjpeg.o
> OBJS-$(CONFIG_APE_DECODER) += apedec.o
> OBJS-$(CONFIG_ASV1_DECODER) += asv1.o mpeg12data.o
> Index: Changelog
> ===================================================================
> --- Changelog (revision 19671)
> +++ Changelog (working copy)
> @@ -32,6 +32,7 @@
> - RTMP support in libavformat
> - noX handling for OPT_BOOL X options
> - Wave64 demuxer
> +- MPEG-4 ALS decoder
>
> Index: doc/general.texi
> ===================================================================
> --- doc/general.texi (revision 19671)
> +++ doc/general.texi (working copy)
> @@ -564,6 +564,7 @@
> @item MP2 (MPEG audio layer 2) @tab IX @tab IX
> @item MP3 (MPEG audio layer 3) @tab E @tab IX
> @tab encoding supported through external library LAME, ADU MP3 and MP3onMP4 also supported
> + at item MPEG-4 Audio Lossless Coding (ALS) @tab @tab X
> @item Musepack SV7 @tab @tab X
> @item Musepack SV8 @tab @tab X
> @item Nellymoser Asao @tab X @tab X
--
M?ns Rullg?rd
mans at mansr.com
More information about the ffmpeg-devel
mailing list