[FFmpeg-devel] Bink audio decoder
Michael Niedermayer
michaelni
Wed Apr 16 00:44:38 CEST 2008
On Wed, Apr 09, 2008 at 11:47:32PM +1000, pross at xvid.org wrote:
> Hi,
>
> Enclosed is a working demuxer and audio decoder for the Bink multimedia
> format. The patch has been tested on PPC hardware only, but should run
> elsewhere.
>
> The audio codec makes use of a windowed real-value FFT (and DCT for more
> recent files). Benjamin L tells me that there are NO equivalent dsp
> routines in FFmpeg, so as a starting point a well known public domain
> routine is included.
We have a FFT
DCT and real value FFT can be efficiently implemented with FFT + trivial
pre/post transforms.
read http://www.nrbook.com/a/bookcpdf/c12-3.pdf
[...]
> +#include <stdio.h>
> +#include <stdlib.h>
are both really needed?
> +#include "avcodec.h"
> +#define ALT_BITSTREAM_READER_LE
> +#include "bitstream.h"
> +#include "dsputil.h"
> +#include "fft4g.h" // rdft(), ddct()
> +
> +// from wmadata.h
> +static const uint16_t wma_critical_freqs[25] = {
> + 100, 200, 300, 400, 510, 630, 770, 920,
> + 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150,
> + 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500,
> + 24500,
> +};
duplicate
> +
> +#define MAX_CHANNELS 2
> +
> +#define BLOCK_MAX_SIZE ((1<<11)*MAX_CHANNELS)
> +
> +typedef struct {
> + AVCodecContext *avctx;
> + GetBitContext gb;
> + int first;
> + int use_mdct;
> + int frame_len; // transform size (samples)
> + int overlap_len; // overlap size (samples)
comments are not doxygen compatible
> + int channels;
> + int num_bands;
> + unsigned int * bands;
> + float root;
> + DECLARE_ALIGNED_16(short, window[BLOCK_MAX_SIZE/16]);
> +} BinkAudioContext;
> +
> +/*
> + */
> +static av_cold int binkaudio_decode_init(AVCodecContext *avctx)
useless comment
> +{
> + BinkAudioContext * s = avctx->priv_data;
> + int sample_rate = avctx->sample_rate;
> + int sample_rate_half;
> + int i;
> + int frame_len_bits;
> + s->avctx = avctx;
> +
> + s->use_mdct = avctx->extradata_size>0 && *(char*)avctx->extradata == 1;
useless cast
> +
> + /* determine frame length */
> + if (avctx->sample_rate<22050) {
> + frame_len_bits = 9;
> + }else if (avctx->sample_rate<44100) {
> + frame_len_bits = 10;
> + }else{
> + frame_len_bits = 11;
> + }
> + s->frame_len = 1<<frame_len_bits;
> +
> + if (s->use_mdct) {
> + s->channels = avctx->channels;
> + }else{
indention ...
[...]
> +static float get_float(GetBitContext *gb)
> +{
> + int power = get_bits(gb, 5);
> + float f = get_bits(gb, 23) / (float)(1 << (23-power));
is power > 23 disallowed?
> + if(get_bits1(gb))
> + f = -f;
> + return f;
> +}
> +
> +static const int rle_length_tab[16] = {
> + 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64
> +};
fits in uint8_t
> +
> +static int decode_block(BinkAudioContext *s, short *out)
> +{
> + // coeffs is a double because rdft() and ddct() take doubles
> + DECLARE_ALIGNED_16(double, coeffs[BLOCK_MAX_SIZE]);
> +
> + // temporary workspace for ddct() and rdft()
> + DECLARE_ALIGNED_16(double, ddct_w[BLOCK_MAX_SIZE*5/4]); ///< max size = block_size*5/4
> + DECLARE_ALIGNED_16(double, rdft_w[BLOCK_MAX_SIZE/2]); ///< max size = block_size*5/4
> + DECLARE_ALIGNED_16(int, fft4g_ip[48]); ///< max size = 2+sqrt(block_max_size/2)
Please put these things somewhere else than the stack, some compilers
have difficulty aligning variables on the stack.
Also why double and not float?
> +
> + int ch,i,j,k;
> + float q, quant[25];
> + int width,coeff;
> + int channels = s->channels;
> +
> + if (s->use_mdct) {
> + skip_bits(&s->gb, 2);
> + }
> +
> + for(ch=0; ch<channels; ch++) {
> + q = 0.0;
> + coeffs[0] = get_float(&s->gb);
> + coeffs[1] = get_float(&s->gb);
> +
> + for(i=0; i<s->num_bands; i++) {
> + int value = get_bits(&s->gb, 8);
> + quant[i] = pow(10.0, FFMIN(value, 95) * 0.0664);
> + }
> +
> + // find band (k)
> + for(k=0; s->bands[k]*2 < 2; k++) {
> + q = quant[k];
> + }
s->bands[0] == 1 -> 2 >= 2 -> q=0.0
or did i miss something ?
[...]
> + if (width==0) {
> + memset(coeffs+i, 0, (j-i)*sizeof(double));
sizeof(*coeffs) is safer if it ever is changed to float
> + i=j;
> + while(s->bands[k]*2 < i)
> + q = quant[k++];
> + }else{
> + while(i<j) {
> + if (s->bands[k]*2 == i)
> + q = quant[k++];
> + coeff = get_bits(&s->gb, width);
> + if (coeff) {
> + if (get_bits1(&s->gb))
> + coeffs[i] = -q*coeff;
> + else
> + coeffs[i] = q*coeff;
> + }else{
> + coeffs[i] = 0.0;
> + }
> + i++;
> + }
> + }
> + }
> +
> + if (s->use_mdct) {
> + ddct(s->frame_len, 0, coeffs, fft4g_ip, ddct_w);
> + }else{
> + rdft(s->frame_len, -1, coeffs, fft4g_ip, rdft_w);
> + }
> +
> + for(i=0; i<s->frame_len; i++) { //FIXME: dsp.float_to_int16?
> + out[i*channels + ch] = av_clip_int16(
> + lrintf(coeffs[i]*s->root));
> + }
yes that should be fixme-ed and the root should be multiplied into above q
> + }
> +
> + // windowing
> + if (s->first>0) {
> + int count = s->overlap_len*channels;
> + for(i=0; i<count; i++) {
> + out[i] = (s->window[i]*(count-i) + out[i]*i) / count;
> + }
divisions are slow ...
Also i suspect that the windowing
would be more efficient before lrintf() in floats.
[...]
> +/**
> + */
> +static int binkaudio_decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size)
> +{
> + BinkAudioContext * s = avctx->priv_data;
> + short *samples = (short*)data;
useless cast
> + s->first = 0;
this looks a little odd, also if its correct then its duplicated.
[...]
> Index: libavcodec/fft4g.h
> ===================================================================
> --- libavcodec/fft4g.h (revision 0)
> +++ libavcodec/fft4g.h (revision 0)
> @@ -0,0 +1,1368 @@
> +/*
> +General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
> +
> +Description:
> + A package to calculate Discrete Fourier/Cosine/Sine Transforms of
> + 1-dimensional sequences of length 2^N.
> +
> +Reference:
> + * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan,
> + Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese)
> + * Henri J. Nussbaumer: Fast Fourier Transform and Convolution
> + Algorithms, Springer Verlag, 1982
> + * C. S. Burrus, Notes on the FFT (with large FFT paper list)
> + http://www-dsp.rice.edu/research/fft/fftnote.asc
> +
> +Copyright:
> + Copyright(C) 1996-2001 Takuya OOURA
> + email: ooura at mmm.t.u-tokyo.ac.jp
> + download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
> + You may use, copy, modify this code for any purpose and
> + without fee. You may distribute this ORIGINAL package.
we cannot accept that due to the non free license.
[...]
> Index: libavformat/bink.c
> ===================================================================
> --- libavformat/bink.c (revision 0)
> +++ libavformat/bink.c (revision 0)
> @@ -0,0 +1,190 @@
> +/*
> + * Bink file Demuxer
> + * Copyright (c) 2008 Peter Ross (pross at xvid.org)
The demuxer should be in a seperate patch (smaller patches, easier to
handle, easier to review, ...)
[...]
> +static int bink_probe(AVProbeData *p)
> +{
> + const uint8_t *b = p->buf;
> +
> + if (b[0] == 'B' && b[1] == 'I' && b[2] == 'K')
> + return AVPROBE_SCORE_MAX;
please check more than 3 bytes if possible
> + return 0;
> +}
> +
> +#define BINK_EXTRADATA_SIZE 1
> +static int bink_read_header(AVFormatContext *s, AVFormatParameters *ap)
> +{
> + BinkDemuxContext *bink = s->priv_data;
> + ByteIOContext *pb = s->pb;
> + unsigned int version,width,height;
> + AVRational fps;
> + AVStream *st;
some of these are redundant
[...]
> + st = av_new_stream(s, 0);
> + if (!st)
> + return AVERROR(ENOMEM);
> + st->codec->codec_type = CODEC_TYPE_VIDEO;
> + st->codec->codec_tag = 0; /* no fourcc */
> + st->codec->codec_id = CODEC_ID_BINKVIDEO;
> + st->codec->width = width;
> + st->codec->height = height;
vertical align:
st->codec->width = width;
st->codec->height = height;
[...]
> + if ((get_le16(pb) & 0x1000)) { //dct flag
> + st[i]->codec->extradata = av_malloc(BINK_EXTRADATA_SIZE);
> + st[i]->codec->extradata_size = BINK_EXTRADATA_SIZE;
> + *(char*)st[i]->codec->extradata = 1;
> + }
I think you should just place these 2 bytes as they are in extradata. This
seems simpler and more natural
> + }
> + url_fskip(pb, 4 * bink->num_tracks);
> + av_free(st);
> + }
> +
> + bink->video_pts = bink->audio_pts = 0;
This should be unneeded as things are zeroed automatically.
[...]
> +static int bink_read_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> + BinkDemuxContext *bink = s->priv_data;
> + ByteIOContext *pb = s->pb;
> +
> + if (bink->current_track<0) {
> + int packet_pos = bink->packet_pos;
> +
> + url_fseek(pb, bink->index_pos + (bink->video_pts+1)*4, SEEK_SET);
> + if (bink->video_pts+1 < bink->total_frames) {
> + bink->packet_pos = get_le32(pb); /* store next packet offset */
> + bink->remain_packet_size = bink->packet_pos - packet_pos; /* calculate size of this packet */
> + }else if (bink->video_pts < bink->total_frames) {
> + bink->remain_packet_size = bink->file_size - packet_pos;
> + }else {
> + return AVERROR(EIO);
> + }
> + url_fseek(pb, packet_pos, SEEK_SET);
> + bink->current_track = 0;
> + }
it looks like bink can be read without randomly seeking around, so please
dont randomly seek around. The index should be read in with av_add_index_entry()
> +
> + while (bink->current_track < bink->num_tracks) {
> + unsigned int audio_size = get_le32(pb);
> + bink->remain_packet_size -= 4 + audio_size;
> + bink->current_track++;
> + if (audio_size > 0) {
> + url_fskip(pb, 4);
> + audio_size -= 4;
> + if (av_get_packet(s->pb, pkt, audio_size) != audio_size)
> + return AVERROR(EIO);
memleak
> + pkt->stream_index = bink->current_track;
> + pkt->pts = bink->audio_pts++;
somehow i do not think this will be correct for >1 audio stream
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Observe your enemies, for they first find out your faults. -- Antisthenes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080416/85713e8c/attachment.pgp>
More information about the ffmpeg-devel
mailing list