[FFmpeg-devel] [PATCH] NellyMoser audio decoder

Tue Sep 11 14:59:14 CEST 2007

Hi

On Tue, Sep 11, 2007 at 12:13:34PM +0200, Lo?c Minier wrote:
>         Hi there,
> 
>  Please find attached a NellyMoser decoder for ffmpeg.  It uses code
>  downloaded from:
>     <http://code.google.com/p/nelly2pcm/downloads/list>
>  (this is where the funny copyrights come from)
> 
>  Comments are of course welcome.  I tested the decoder against 8kHz,
>  11kHz, and 22kHz samples via ffplay and also via libavcodec.

[...]
> diff --git a/libavcodec/nelly.c b/libavcodec/nelly.c
> new file mode 100644
> index 0000000..89f4117
> --- /dev/null
> +++ b/libavcodec/nelly.c
> @@ -0,0 +1,521 @@
> +/*
> + * Copyright (c) 2007 a840bda5870ba11f19698ff6eb9581dfb0f95fa5,
> + *                    539459aeb7d425140b62a3ec7dbf6dc8e408a306, and
> + *                    520e17cd55896441042b14df2566a6eb610ed444
> + * 

trailing whitespace is forbidden in ffmpeg-svn

[...]
> +extern unsigned char nelly_center_table[64];
> +extern short nelly_init_table[64];
> +extern float nelly_inv_dft_table[129];
> +extern short nelly_delta_table[32];
> +extern float nelly_pos_unpack_table[64];
> +extern float nelly_state_table[128];
> +extern float nelly_signal_table[64];
> +extern float nelly_neg_unpack_table[64];
> +extern int nelly_copy_count[23];
> +extern float nelly_huff_table[127];

at least some of these tables are used at just one point, so should be
static, the others need a ff_ prefix or they will colide with the nelly
code in an app linking to ffmpeg and the original code

also they should be const either way

> +
> +static void center(float *audio)
> +{
> +	int i, j;

tabs are forbidden in ffmpeg svn
also as all the whitespace must be changed anyway, please ensure its
4-space indented ...

> +	float ftmp;
> +
> +	for (i = 0; i < NELLY_BUF_LEN; i+=2) {
> +		j = nelly_center_table[i/2];
> +		if (j > i) {
> +			ftmp = audio[j];
> +			audio[j] = audio[i];
> +			audio[i] = ftmp;
> +			ftmp = audio[j+1];
> +			audio[j+1] = audio[i+1];
> +			audio[i+1] = ftmp;

FFSWAP

> +		}
> +	}
> +}
> +

> +static void inverse_dft(float *audio)
> +{

if this is a standard dft (discrete fourier transform) then please use
the existing code from fft.c 
if its not a dft then please elaborate on what it is

[...]

> +

> +static void unpack_coeffs(float *buf, float *audio)
> +{
> +	int i, end, mid_hi, mid_lo;
> +	float a, b, c, d, e, f;
> +
> +	end = NELLY_BUF_LEN-1;
> +	mid_hi = NELLY_BUF_LEN/2;
> +	mid_lo = mid_hi-1;
> +

> +	for (i = 0; i < NELLY_BUF_LEN/4; i++) {
> +		a = buf[end-(2*i)];

superflous ()

> +		b = buf[2*i];
> +		c = buf[(2*i)+1];
> +		d = buf[end-(2*i)-1];
> +		e = nelly_pos_unpack_table[i];
> +		f = nelly_neg_unpack_table[i];
> +
> +		audio[2*i] = b*e-a*f;
> +		audio[(2*i)+1] = a*e+b*f;
> +
> +		a = nelly_neg_unpack_table[mid_lo-i];
> +		b = nelly_pos_unpack_table[mid_lo-i];
> +
> +		audio[end-(2*i)-1] = b*d-a*c;
> +		audio[end-(2*i)] = b*c+a*d;
> +	}
> +}

also this function doesnt unpack anything, it outputs the same number
of coeffs as are input, it looks more like some antialias like filter
similar to what mp3 does

[...]
> +		if (b < 0)
> +			b = 0;
> +		b = ((b>>(shift-1))+1)>>1;
> +		if (b > NELLY_BIT_CAP)
> +			ret += NELLY_BIT_CAP;
> +		else
> +			ret += b;

av_clip with 0 and NELLY_BIT_CAP

[...]
> +static int headroom(int *la, short *sa)
> +{
> +	if (*la == 0)
> +		*sa += 31;
> +	else if (*la < 0) {
> +		while (*la > -1<<30) {
> +			*la <<= 1;
> +			(*sa)++;
> +		}
> +	} else {
> +		while (*la < 1<<30) {
> +			*la <<= 1;
> +			(*sa)++;
> +		}
> +	}

31-av_log2(FFABS(*la)) or something similar should do the same

> +
> +	return *la;
> +}
> +
> +static void get_sample_bits(float *buf, int *bits)
> +{
> +	int i, j;
> +	short sbuf[128];
> +	int bitsum = 0, last_bitsum, small_bitsum, big_bitsum;
> +	short shift, shift_saved;
> +	int tmp;
> +	int big_off;
> +	int off, diff;
> +
> +	tmp = 0;
> +	for (i = 0; i < NELLY_FILL_LEN; i++) {
> +		if (buf[i] > tmp)
> +			tmp = buf[i];

FFMAX() and tmp should be renamed to max

> +	}
> +	shift = -16;
> +	headroom(&tmp, &shift);

this is a VERY ugly way to return something, following is a little
better
shift += headroom(&tmp);

> +
> +	if (shift < 0)
> +		for (i = 0; i < NELLY_FILL_LEN; i++)
> +			sbuf[i] = ((int)buf[i]) >> -shift;
> +	else
> +		for (i = 0; i < NELLY_FILL_LEN; i++)
> +			sbuf[i] = ((int)buf[i]) << shift;
> +
> +	for (i = 0; i < NELLY_FILL_LEN; i++)
> +		sbuf[i] = (3*sbuf[i])>>2;
> +
> +	tmp = 0;
> +	for (i = 0; i < NELLY_FILL_LEN; i++)
> +		tmp += sbuf[i];

sum=0; (or a more appropriate name)
for(i=0; i<NELLY_FILL_LEN; i++){
    if(shift < 0)
        sbuf[i] = buf[i] >> -shift;  (yes buf should be int anyway)
    else
        sbuf[i] = buf[i] <<  shift;
    sbuf[i] = (3*sbuf[i])>>2;
    sum += sbuf[i];
}

[...]
> +		if (diff > 0) {
> +			while (diff <= 16383) {
> +				shift++;
> +				diff *= 2;
> +			}
> +		} else {
> +			while (diff >= -16383) {
> +				shift++;
> +				diff *= 2;
> +			}
> +		}

this can be simpified with FFABS

> +
> +		diff = (diff * NELLY_BASE_OFF) >> 15;
> +		shift = shift_saved-(NELLY_BASE_SHIFT+shift-15);
> +

> +		if (shift > 0) {
> +			diff <<= shift;
> +		} else {
> +			diff >>= -shift;
> +		}

this one occured often enough to be its own function ...

[...]
> +		tmp = sbuf[i]-off;
> +		if (tmp < 0)
> +			tmp = 0;
> +		else
> +			tmp = ((tmp>>(shift_saved-1))+1)>>1;
> +
> +		if (tmp > NELLY_BIT_CAP)
> +			tmp = NELLY_BIT_CAP;
> +		bits[i] = tmp;

tmp = sbuf[i]-off;
tmp = ((tmp>>(shift_saved-1))+1)>>1;
bits[i] = av_clip(tmp, 0, NELLY_BIT_CAP);

[...]
> +static unsigned char get_bits(unsigned char block[NELLY_BLOCK_LEN], int *off, int n)
> +{
> +	char ret;
> +	int boff = *off/8, bitpos = *off%8, mask = (1<<n)-1;
> +
> +	if (bitpos+n > 8) {
> +		ret = block[boff%NELLY_BLOCK_LEN] >> bitpos;
> +		mask >>= 8-bitpos;
> +		ret |= (block[(boff+1)%NELLY_BLOCK_LEN] & mask) << (8-bitpos);
> +	} else {
> +		ret = (block[boff%NELLY_BLOCK_LEN] >> bitpos) & mask;
> +	}
> +	
> +	*off += n;
> +	return ret;
> +}

please use the bitreader from bitstream.c/h

> +
> +void nelly_decode_block(NellyMoserDecodeContext *s, unsigned char block[NELLY_BLOCK_LEN], float audio[256])
> +{

should be static

> +	int i,j;
> +	float buf[NELLY_BUF_LEN], pows[NELLY_BUF_LEN];
> +	float *aptr, *bptr, *pptr, val, pval;
> +	int bits[NELLY_BUF_LEN];
> +	unsigned char v;
> +	int bit_offset = 0;
> +
> +	bptr = buf;
> +	pptr = pows;
> +	val = nelly_init_table[get_bits(block, &bit_offset, 6)];
> +	for (i = 0; i < 23; i++) {
> +		if (i > 0)
> +			val += nelly_delta_table[get_bits(block, &bit_offset, 5)];
> +		pval = pow(2, val/2048);
> +		for (j = 0; j < nelly_copy_count[i]; j++) {

> +			*bptr = val;
> +			*pptr = pval;
> +			bptr++;
> +			pptr++;

*bptr++ = ...

> +		}
> +
> +	}
> +

> +	for (i = NELLY_FILL_LEN; i < NELLY_BUF_LEN; i++)
> +		buf[i] = pows[i] = 0.0;
> +

buf can only contains integers so it should be int not float

[...]
> +				if (av_random(&s->random_state) % 2)
> +					buf[j] *= -1.0;

id prefer &1 over%2

[...]
> +		unpack_coeffs(buf, aptr);

> +		center(aptr);

this reorders the coefficients for the fft, dont give functions nonsense
names

> +		inverse_dft(aptr);
> +		complex2signal(aptr);

without lookint to carefull at the code i think the whole is likely a IMDCT
or something, find out what it is and use the existing functions from
libavcodec

> +		apply_state(s->state, aptr);

this applies a window name it appropriately

> +	}
> +}
> +

> +void nelly_util_floats2shorts(float audio[256], short shorts[256])
> +{
> +	int i;
> +
> +	for (i = 0; i < 256; i++) {
> +		if (audio[i] >= 32767.0)
> +			shorts[i] = 32767;
> +		else if (audio[i] <= -32768.0)
> +			shorts[i] = -32768;
> +		else
> +			shorts[i] = (short)audio[i];
> +	}
> +}

duplicate of the respective code in dsputil

[...]

> +    int     seen_audio_format;
> +    uint8_t audio_format;

both unused

[...]
> +/* size of decoded audio data for a block */
> +#define NELLY_SAMPLE_SIZE 512

comment is not doxygen compatible

[...]
> +float nelly_huff_table[127] = {

no this table has nothing to do with huffman decoding its a dequantization
table

[...]
> +int nelly_copy_count[23] = {
> +2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 5, 6, 6, 7, 8, 9, 10, 12, 14, 15
> +};

this table represents band sizes name it appropriately

[...]
> +static int decode_tag(AVCodecContext * avctx,
> +                      void *data, int *data_size,
> +                      uint8_t * buf, int buf_size) {
> +    NellyMoserDecodeContext *s = avctx->priv_data;
> +    int remaining_size = buf_size;
> +    *data_size = 0;
> +
> +    while (remaining_size >= NELLY_BLOCK_LEN) {
> +        nelly_decode_block(s, buf, s->float_buf);
> +        nelly_util_floats2shorts(s->float_buf, data);
> +        data += NELLY_SAMPLE_SIZE;
> +        *data_size += NELLY_SAMPLE_SIZE;
> +        buf += NELLY_BLOCK_LEN;
> +        remaining_size -= NELLY_BLOCK_LEN;
> +    }

there should be a parser which splits these so the decoder is only feeded
with individual blocks

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070911/55d3eb4c/attachment.pgp>