[FFmpeg-devel] [PATCH] NellyMoser audio decoder
Michael Niedermayer
michaelni
Tue Sep 11 14:59:14 CEST 2007
Hi
On Tue, Sep 11, 2007 at 12:13:34PM +0200, Lo?c Minier wrote:
> Hi there,
>
> Please find attached a NellyMoser decoder for ffmpeg. It uses code
> downloaded from:
> <http://code.google.com/p/nelly2pcm/downloads/list>
> (this is where the funny copyrights come from)
>
> Comments are of course welcome. I tested the decoder against 8kHz,
> 11kHz, and 22kHz samples via ffplay and also via libavcodec.
[...]
> diff --git a/libavcodec/nelly.c b/libavcodec/nelly.c
> new file mode 100644
> index 0000000..89f4117
> --- /dev/null
> +++ b/libavcodec/nelly.c
> @@ -0,0 +1,521 @@
> +/*
> + * Copyright (c) 2007 a840bda5870ba11f19698ff6eb9581dfb0f95fa5,
> + * 539459aeb7d425140b62a3ec7dbf6dc8e408a306, and
> + * 520e17cd55896441042b14df2566a6eb610ed444
> + *
trailing whitespace is forbidden in ffmpeg-svn
[...]
> +extern unsigned char nelly_center_table[64];
> +extern short nelly_init_table[64];
> +extern float nelly_inv_dft_table[129];
> +extern short nelly_delta_table[32];
> +extern float nelly_pos_unpack_table[64];
> +extern float nelly_state_table[128];
> +extern float nelly_signal_table[64];
> +extern float nelly_neg_unpack_table[64];
> +extern int nelly_copy_count[23];
> +extern float nelly_huff_table[127];
at least some of these tables are used at just one point, so should be
static, the others need a ff_ prefix or they will colide with the nelly
code in an app linking to ffmpeg and the original code
also they should be const either way
> +
> +static void center(float *audio)
> +{
> + int i, j;
tabs are forbidden in ffmpeg svn
also as all the whitespace must be changed anyway, please ensure its
4-space indented ...
> + float ftmp;
> +
> + for (i = 0; i < NELLY_BUF_LEN; i+=2) {
> + j = nelly_center_table[i/2];
> + if (j > i) {
> + ftmp = audio[j];
> + audio[j] = audio[i];
> + audio[i] = ftmp;
> + ftmp = audio[j+1];
> + audio[j+1] = audio[i+1];
> + audio[i+1] = ftmp;
FFSWAP
> + }
> + }
> +}
> +
> +static void inverse_dft(float *audio)
> +{
if this is a standard dft (discrete fourier transform) then please use
the existing code from fft.c
if its not a dft then please elaborate on what it is
[...]
> +
> +static void unpack_coeffs(float *buf, float *audio)
> +{
> + int i, end, mid_hi, mid_lo;
> + float a, b, c, d, e, f;
> +
> + end = NELLY_BUF_LEN-1;
> + mid_hi = NELLY_BUF_LEN/2;
> + mid_lo = mid_hi-1;
> +
> + for (i = 0; i < NELLY_BUF_LEN/4; i++) {
> + a = buf[end-(2*i)];
superflous ()
> + b = buf[2*i];
> + c = buf[(2*i)+1];
> + d = buf[end-(2*i)-1];
> + e = nelly_pos_unpack_table[i];
> + f = nelly_neg_unpack_table[i];
> +
> + audio[2*i] = b*e-a*f;
> + audio[(2*i)+1] = a*e+b*f;
> +
> + a = nelly_neg_unpack_table[mid_lo-i];
> + b = nelly_pos_unpack_table[mid_lo-i];
> +
> + audio[end-(2*i)-1] = b*d-a*c;
> + audio[end-(2*i)] = b*c+a*d;
> + }
> +}
also this function doesnt unpack anything, it outputs the same number
of coeffs as are input, it looks more like some antialias like filter
similar to what mp3 does
[...]
> + if (b < 0)
> + b = 0;
> + b = ((b>>(shift-1))+1)>>1;
> + if (b > NELLY_BIT_CAP)
> + ret += NELLY_BIT_CAP;
> + else
> + ret += b;
av_clip with 0 and NELLY_BIT_CAP
[...]
> +static int headroom(int *la, short *sa)
> +{
> + if (*la == 0)
> + *sa += 31;
> + else if (*la < 0) {
> + while (*la > -1<<30) {
> + *la <<= 1;
> + (*sa)++;
> + }
> + } else {
> + while (*la < 1<<30) {
> + *la <<= 1;
> + (*sa)++;
> + }
> + }
31-av_log2(FFABS(*la)) or something similar should do the same
> +
> + return *la;
> +}
> +
> +static void get_sample_bits(float *buf, int *bits)
> +{
> + int i, j;
> + short sbuf[128];
> + int bitsum = 0, last_bitsum, small_bitsum, big_bitsum;
> + short shift, shift_saved;
> + int tmp;
> + int big_off;
> + int off, diff;
> +
> + tmp = 0;
> + for (i = 0; i < NELLY_FILL_LEN; i++) {
> + if (buf[i] > tmp)
> + tmp = buf[i];
FFMAX() and tmp should be renamed to max
> + }
> + shift = -16;
> + headroom(&tmp, &shift);
this is a VERY ugly way to return something, following is a little
better
shift += headroom(&tmp);
> +
> + if (shift < 0)
> + for (i = 0; i < NELLY_FILL_LEN; i++)
> + sbuf[i] = ((int)buf[i]) >> -shift;
> + else
> + for (i = 0; i < NELLY_FILL_LEN; i++)
> + sbuf[i] = ((int)buf[i]) << shift;
> +
> + for (i = 0; i < NELLY_FILL_LEN; i++)
> + sbuf[i] = (3*sbuf[i])>>2;
> +
> + tmp = 0;
> + for (i = 0; i < NELLY_FILL_LEN; i++)
> + tmp += sbuf[i];
sum=0; (or a more appropriate name)
for(i=0; i<NELLY_FILL_LEN; i++){
if(shift < 0)
sbuf[i] = buf[i] >> -shift; (yes buf should be int anyway)
else
sbuf[i] = buf[i] << shift;
sbuf[i] = (3*sbuf[i])>>2;
sum += sbuf[i];
}
[...]
> + if (diff > 0) {
> + while (diff <= 16383) {
> + shift++;
> + diff *= 2;
> + }
> + } else {
> + while (diff >= -16383) {
> + shift++;
> + diff *= 2;
> + }
> + }
this can be simpified with FFABS
> +
> + diff = (diff * NELLY_BASE_OFF) >> 15;
> + shift = shift_saved-(NELLY_BASE_SHIFT+shift-15);
> +
> + if (shift > 0) {
> + diff <<= shift;
> + } else {
> + diff >>= -shift;
> + }
this one occured often enough to be its own function ...
[...]
> + tmp = sbuf[i]-off;
> + if (tmp < 0)
> + tmp = 0;
> + else
> + tmp = ((tmp>>(shift_saved-1))+1)>>1;
> +
> + if (tmp > NELLY_BIT_CAP)
> + tmp = NELLY_BIT_CAP;
> + bits[i] = tmp;
tmp = sbuf[i]-off;
tmp = ((tmp>>(shift_saved-1))+1)>>1;
bits[i] = av_clip(tmp, 0, NELLY_BIT_CAP);
[...]
> +static unsigned char get_bits(unsigned char block[NELLY_BLOCK_LEN], int *off, int n)
> +{
> + char ret;
> + int boff = *off/8, bitpos = *off%8, mask = (1<<n)-1;
> +
> + if (bitpos+n > 8) {
> + ret = block[boff%NELLY_BLOCK_LEN] >> bitpos;
> + mask >>= 8-bitpos;
> + ret |= (block[(boff+1)%NELLY_BLOCK_LEN] & mask) << (8-bitpos);
> + } else {
> + ret = (block[boff%NELLY_BLOCK_LEN] >> bitpos) & mask;
> + }
> +
> + *off += n;
> + return ret;
> +}
please use the bitreader from bitstream.c/h
> +
> +void nelly_decode_block(NellyMoserDecodeContext *s, unsigned char block[NELLY_BLOCK_LEN], float audio[256])
> +{
should be static
> + int i,j;
> + float buf[NELLY_BUF_LEN], pows[NELLY_BUF_LEN];
> + float *aptr, *bptr, *pptr, val, pval;
> + int bits[NELLY_BUF_LEN];
> + unsigned char v;
> + int bit_offset = 0;
> +
> + bptr = buf;
> + pptr = pows;
> + val = nelly_init_table[get_bits(block, &bit_offset, 6)];
> + for (i = 0; i < 23; i++) {
> + if (i > 0)
> + val += nelly_delta_table[get_bits(block, &bit_offset, 5)];
> + pval = pow(2, val/2048);
> + for (j = 0; j < nelly_copy_count[i]; j++) {
> + *bptr = val;
> + *pptr = pval;
> + bptr++;
> + pptr++;
*bptr++ = ...
> + }
> +
> + }
> +
> + for (i = NELLY_FILL_LEN; i < NELLY_BUF_LEN; i++)
> + buf[i] = pows[i] = 0.0;
> +
buf can only contains integers so it should be int not float
[...]
> + if (av_random(&s->random_state) % 2)
> + buf[j] *= -1.0;
id prefer &1 over%2
[...]
> + unpack_coeffs(buf, aptr);
> + center(aptr);
this reorders the coefficients for the fft, dont give functions nonsense
names
> + inverse_dft(aptr);
> + complex2signal(aptr);
without lookint to carefull at the code i think the whole is likely a IMDCT
or something, find out what it is and use the existing functions from
libavcodec
> + apply_state(s->state, aptr);
this applies a window name it appropriately
> + }
> +}
> +
> +void nelly_util_floats2shorts(float audio[256], short shorts[256])
> +{
> + int i;
> +
> + for (i = 0; i < 256; i++) {
> + if (audio[i] >= 32767.0)
> + shorts[i] = 32767;
> + else if (audio[i] <= -32768.0)
> + shorts[i] = -32768;
> + else
> + shorts[i] = (short)audio[i];
> + }
> +}
duplicate of the respective code in dsputil
[...]
> + int seen_audio_format;
> + uint8_t audio_format;
both unused
[...]
> +/* size of decoded audio data for a block */
> +#define NELLY_SAMPLE_SIZE 512
comment is not doxygen compatible
[...]
> +float nelly_huff_table[127] = {
no this table has nothing to do with huffman decoding its a dequantization
table
[...]
> +int nelly_copy_count[23] = {
> +2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 8, 9, 10, 12, 14, 15
> +};
this table represents band sizes name it appropriately
[...]
> +static int decode_tag(AVCodecContext * avctx,
> + void *data, int *data_size,
> + uint8_t * buf, int buf_size) {
> + NellyMoserDecodeContext *s = avctx->priv_data;
> + int remaining_size = buf_size;
> + *data_size = 0;
> +
> + while (remaining_size >= NELLY_BLOCK_LEN) {
> + nelly_decode_block(s, buf, s->float_buf);
> + nelly_util_floats2shorts(s->float_buf, data);
> + data += NELLY_SAMPLE_SIZE;
> + *data_size += NELLY_SAMPLE_SIZE;
> + buf += NELLY_BLOCK_LEN;
> + remaining_size -= NELLY_BLOCK_LEN;
> + }
there should be a parser which splits these so the decoder is only feeded
with individual blocks
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070911/55d3eb4c/attachment.pgp>
More information about the ffmpeg-devel
mailing list