[FFmpeg-devel] [PATCH] wmapro decoder

Sun Aug 30 23:26:39 CEST 2009

On Sat, Aug 29, 2009 at 11:51:54PM +0200, Sascha Sommer wrote:
> Hi,
> 
> new patch attached...

[...]
> +/**
> + * @brief main decoder context
> + */
> +typedef struct WMA3DecodeContext {
> +    /* generic decoder variables */
> +    AVCodecContext*  avctx;                         ///< codec context for av_log
> +    DSPContext       dsp;                           ///< accelerated DSP functions
> +    uint8_t          frame_data[MAX_FRAMESIZE +
> +                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
> +    PutBitContext    pb;                            ///< context for filling the frame_data buffer
> +    MDCTContext      mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
> +    DECLARE_ALIGNED_16(float, tmp[WMAPRO_BLOCK_MAX_SIZE]); ///< IMDCT output buffer
> +    float*           windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes
> +
> +    /* frame size dependent frame information (set during initialization) */
> +    uint32_t         decode_flags;                  ///< used compression features
> +    uint8_t          len_prefix;                    ///< frame is prefixed with its length
> +    uint8_t          dynamic_range_compression;     ///< frame contains DRC data
> +    uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
> +    uint16_t         samples_per_frame;             ///< number of samples to output
> +    uint16_t         log2_frame_size;
> +    int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
> +    int8_t           lfe_channel;                   ///< lfe channel index
> +    uint8_t          max_num_subframes;
> +    uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
> +    uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
> +    uint16_t         min_samples_per_subframe;
> +    int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
> +    int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
> +    int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
> +    int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
> +
> +    /* packet decode state */
> +    uint8_t          packet_sequence_number;        ///< current packet number
> +    int              num_saved_bits;                ///< saved number of bits
> +    int              frame_offset;                  ///< frame offset in the bit reservoir
> +    int              subframe_offset;               ///< subframe offset in the bit reservoir
> +    uint8_t          packet_loss;                   ///< set in case of bitstream error
> +
> +    /* frame decode state */

> +    uint32_t         frame_num;                     ///< current frame number

unused

[...]
> +/**
> + *@brief Decode the subframe length.
> + *@param s context
> + *@param offset sample offset in the frame
> + *@return decoded subframe length on success, 0 in case of an error
> + */
> +static int decode_subframe_length(WMA3DecodeContext *s, int offset)
> +{
> +    int log2_subframe_len = 0;
> +    int subframe_len;
> +
> +    /** no need to read from the bitstream when only one length is possible */
> +    if (offset == s->samples_per_frame - s->min_samples_per_subframe)
> +        return s->min_samples_per_subframe;
> +
> +    /** 1 bit indicates if the subframe is of maximum length */
> +    if (s->max_subframe_len_bit) {
> +        if (get_bits1(&s->gb))
> +            log2_subframe_len = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
> +    } else
> +        log2_subframe_len = get_bits(&s->gb, s->subframe_len_bits);
> +

> +    subframe_len = s->samples_per_frame / (1 << log2_subframe_len);

subframe_len = s->samples_per_frame >> log2_subframe_len;

and i would guess the names are not good because one would expect
1<<log2_X = X

> +
> +    /** sanity check the length */
> +    if (subframe_len < s->min_samples_per_subframe
> +              || subframe_len > s->samples_per_frame) {
> +        av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
> +               subframe_len);
> +        return 0;

within ffmpeg errors are negative values

> +    }
> +    return subframe_len;
> +}
> +
> +/**
> + *@brief Decode how the data in the frame is split into subframes.
> + *       Every WMA frame contains the encoded data for a fixed number of
> + *       samples per channel. The data for every channel might be split
> + *       into several subframes. This function will reconstruct the list of
> + *       subframes for every channel.
> + *
> + *       If the subframes are not evenly split, the algorithm estimates the
> + *       channels with the lowest number of total samples.
> + *       Afterwards, for each of these channels a bit is read from the
> + *       bitstream that indicates if the channel contains a subframe with the
> + *       next subframe size that is going to be read from the bitstream or not.
> + *       If a channel contains such a subframe, the subframe size gets added to
> + *       the channel's subframe list.
> + *       The algorithm repeats these steps until the frame is properly divided
> + *       between the individual channels.
> + *
> + *@param s context
> + *@return 0 on success, < 0 in case of an error
> + */
> +static int decode_tilehdr(WMA3DecodeContext *s)
> +{

> +    uint16_t num_samples[WMAPRO_MAX_CHANNELS];
> +    uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];
> +    int channels_for_cur_subframe = s->num_channels;
> +    int fixed_channel_layout = 0;
> +    int min_channel_len = 0;

these could benefit from some doxy

[...]
> @@ -241,6 +838,98 @@
>  }
>  
>  /**
> + *@brief Extract scale factors from the bitstream.
> + *@param s codec context
> + *@return 0 on success, < 0 in case of bitstream errors
> + */
> +static int decode_scale_factors(WMA3DecodeContext* s)
> +{
> +    int i;
> +
> +    /** should never consume more than 5344 bits
> +     *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
> +     */
> +
> +    for (i = 0; i < s->channels_for_cur_subframe; i++) {
> +        int c = s->channel_indexes_for_cur_subframe[i];
> +        int* sf;
> +        int* sf_end = s->channel[c].scale_factors + s->num_bands;
> +
> +        /** resample scale factors for the new block size */
> +        if (s->channel[c].reuse_sf) {
> +            const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
> +            int b;
> +            for (b = 0; b < s->num_bands; b++)
> +                s->channel[c].scale_factors[b] =
> +                                   s->channel[c].saved_scale_factors[*sf_offsets++];
> +        }
> +
> +        if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
> +
> +            if (!s->channel[c].reuse_sf) {
> +                int val;
> +                /** decode DPCM coded scale factors */
> +                s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
> +                val = 45 / s->channel[c].scale_factor_step;
> +                for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
> +                    val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
> +                    *sf = val;
> +                }
> +            } else {
> +                int i;
> +                /** run level decode differences to the resampled factors */
> +                for (i = 0; i < s->num_bands; i++) {
> +                    int idx;
> +                    int skip;
> +                    int val;
> +                    int sign;
> +
> +                    idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
> +
> +                    if ( !idx ) {
> +                        uint32_t code = get_bits(&s->gb, 14);
> +                        val  =  code >> 6;
> +                        sign = (code & 1) - 1;
> +                        skip = (code & 0x3f) >> 1;
> +                    } else if (idx == 1) {
> +                        break;
> +                    } else {
> +                        skip = scale_rl_run[idx];
> +                        val  = scale_rl_level[idx];
> +                        sign = get_bits1(&s->gb)-1;
> +                    }
> +
> +                    i += skip;
> +                    if (i >= s->num_bands) {
> +                        av_log(s->avctx,AV_LOG_ERROR,
> +                               "invalid scale factor coding\n");
> +                        return AVERROR_INVALIDDATA;
> +                    }
> +                    s->channel[c].scale_factors[i] += (val ^ sign) - sign;
> +                }
> +            }
> +

> +            /** save transmitted scale factors so that they can be reused for
> +                the next subframe */
> +            memcpy(s->channel[c].saved_scale_factors,
> +                   s->channel[c].scale_factors, s->num_bands *
> +                   sizeof(*s->channel[c].saved_scale_factors));

what happens with s->channel[c].scale_factors so that it cant just be used?

[...]
> +
> +/**
> + *@brief Decode a single subframe (block).
> + *@param s codec context
> + *@return 0 on success, < 0 when decoding failed
> + */
> +static int decode_subframe(WMA3DecodeContext *s)
> +{
> +    int offset = s->samples_per_frame;
> +    int subframe_len = s->samples_per_frame;
> +    int i;
> +    int total_samples   = s->samples_per_frame * s->num_channels;
> +    int transmit_coeffs = 0;
> +    int cur_subwoofer_cutoff;
> +
> +    s->subframe_offset = get_bits_count(&s->gb);
> +
> +    /** reset channel context and find the next block offset and size
> +        == the next block of the channel with the smallest number of
> +        decoded samples
> +    */
> +    for (i = 0; i < s->num_channels; i++) {
> +        s->channel[i].grouped = 0;
> +        if (offset > s->channel[i].decoded_samples) {
> +            offset = s->channel[i].decoded_samples;
> +            subframe_len =
> +                s->channel[i].subframe_len[s->channel[i].cur_subframe];
> +        }
> +    }
> +
> +    dprintf(s->avctx,
> +           "processing subframe with offset %i len %i\n", offset, subframe_len);
> +
> +    /** get a list of all channels that contain the estimated block */
> +    s->channels_for_cur_subframe = 0;
> +    for (i = 0; i < s->num_channels; i++) {
> +        const int cur_subframe = s->channel[i].cur_subframe;
> +        /** substract already processed samples */
> +        total_samples -= s->channel[i].decoded_samples;
> +
> +        /** and count if there are multiple subframes that match our profile */
> +        if (offset == s->channel[i].decoded_samples &&
> +           subframe_len == s->channel[i].subframe_len[cur_subframe]) {
> +            total_samples -= s->channel[i].subframe_len[cur_subframe];
> +            s->channel[i].decoded_samples +=
> +                s->channel[i].subframe_len[cur_subframe];
> +            s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
> +            ++s->channels_for_cur_subframe;
> +        }
> +    }
> +
> +    /** check if the frame will be complete after processing the
> +        estimated block */
> +    if (!total_samples)
> +        s->parsed_all_subframes = 1;
> +
> +
> +    dprintf(s->avctx, "subframe is part of %i channels\n",
> +           s->channels_for_cur_subframe);
> +
> +    /** calculate number of scale factor bands and their offsets */
> +    s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
> +    s->num_bands         = s->num_sfb[s->table_idx];
> +    s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
> +    cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
> +
> +    /** configure the decoder for the current subframe */
> +    for (i = 0; i < s->channels_for_cur_subframe; i++) {
> +        int c = s->channel_indexes_for_cur_subframe[i];
> +
> +        s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame>>1)
> +                                                  + offset];

> +        memset(s->channel[c].coeffs, 0,
> +               sizeof(*s->channel[c].coeffs) * subframe_len);

IMHO it would be better to do this in a else clause of the if(transmit_coefs)
and before the rle decoding is started

[..]
> +    /** interleave samples and write them to the output buffer */
> +    for (i = 0; i < s->num_channels; i++) {
> +        float* ptr;
> +        int incr = s->num_channels;
> +        float* iptr = s->channel[i].out;
> +        int x;
> +
> +        ptr = s->samples + i;
> +
> +        for (x = 0; x < s->samples_per_frame; x++) {
> +            *ptr = av_clipf(*iptr++, -1.0, 32767.0 / 32768.0);
> +            ptr += incr;
> +        }
> +
> +        /** reuse second half of the IMDCT output for the next frame */
> +        memcpy(&s->channel[i].out[0],
> +               &s->channel[i].out[s->samples_per_frame],
> +               s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
> +    }

did you try to not do several passes over the output buffer?
(maybe its faster ...)

[...]
> +/**
> + *@brief WMA9 decoder
> + */
> +AVCodec wmapro_decoder = {
> +    "wmapro",
> +    CODEC_TYPE_AUDIO,
> +    CODEC_ID_WMAPRO,
> +    sizeof(WMA3DecodeContext),

WMA9 decoder, WMAPRO, WMA3DecodeContext
isnt that naming a little inconsistent?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Old school: Use the lowest level language in which you can solve the problem
            conveniently.
New school: Use the highest level language in which the latest supercomputer
            can solve the problem without the user falling asleep waiting.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090830/b42f23a7/attachment.pgp>