[Ffmpeg-devel] [PATCH] DTS decoder
Michael Niedermayer
michaelni
Fri Feb 23 00:06:18 CET 2007
Hi
On Wed, Feb 21, 2007 at 01:01:13PM +0100, Benjamin Larsson wrote:
> This is the current working version of the DCA decoder. It is mostly a
> port of libdts/libdca to ffmpeg, Kostya have replaced all the GPL
> downmixing code and ported the huffman stuff plus some other various
> things, I played abit with the transform and the initial lavc port. The
> rest of the patch files can be obtained by using svn co
> svn://svn.mplayerhq.hu/dca dca (there are over 300kb of tables and add
> little for a review). The current code just downmix to 2 channels like
> the libdts glue code also did. This will hopefully change in the future.
[...]
> #define SAMPLE(x) (x)
> #define LEVEL(x) (x)
>
> #define MUL(a,b) ((a) * (b))
> #define MUL_L(a,b) ((a) * (b))
> #define MUL_C(a,b) ((a) * (b))
> #define DIV(a,b) ((a) / (b))
> #define BIAS(x) ((x) + bias)
iam against such arithmetic obfuscation macros unless they are somehow
performence relevant but that cant be here considering how often they are
used
[...]
also remove all other unused #defines unless theres a reason to keep them
[...]
> #define BIAS(x) ((x) + bias)
duplicate
[...]
> #define GET_BITALLOC(gb, ba, idx) \
> get_vlc2(gb, ba.vlc[idx].table, ba.vlc[idx].bits, ba.wrap) + ba.offset
this should be a function theres no reason for it not to be, or remove
it completely
>
> typedef struct {
> AVCodecContext *avctx;
> /* Frame header */
> int frame_type; /* type of the current frame */
> int samples_deficit; /* deficit sample count */
> int crc_present; /* crc is present in the bitstream */
> int sample_blocks; /* number of PCM sample blocks */
> int frame_size; /* primary frame byte size */
> int amode; /* audio channels arrangement */
> int sample_rate; /* audio sampling rate */
> int bit_rate; /* transmission bit rate */
not doxygen compatible
> int flags;
> int frame_length;
>
> int downmix; /* embedded downmix enabled */
> int dynrange; /* embedded dynamic range flag */
> int timestamp; /* embedded time stamp flag */
> int aux_data; /* auxiliary data flag */
> int hdcd; /* source material is mastered in HDCD */
> int ext_descr; /* extension audio descriptor flag */
> int ext_coding; /* extended coding flag */
> int aspf; /* audio sync word insertion flag */
> int lfe; /* low frequency effects flag */
> int predictor_history; /* predictor history flag */
> int header_crc; /* header crc check bytes */
> int multirate_inter; /* multirate interpolator switch */
> int version; /* encoder software revision */
> int copy_history; /* copy history */
> int source_pcm_res; /* source pcm resolution */
> int front_sum; /* front sum/difference flag */
> int surround_sum; /* surround sum/difference flag */
> int dialog_norm; /* dialog normalisation parameter */
>
> /* Primary audio coding header */
> int subframes; /* number of subframes */
> int prim_channels; /* number of primary audio channels */
> /* subband activity count */
> int subband_activity[DCA_PRIM_CHANNELS_MAX];
> /* high frequency vq start subband */
> int vq_start_subband[DCA_PRIM_CHANNELS_MAX];
> /* joint intensity coding index */
> int joint_intensity[DCA_PRIM_CHANNELS_MAX];
> /* transient mode code book */
> int transient_huffman[DCA_PRIM_CHANNELS_MAX];
> /* scale factor code book */
> int scalefactor_huffman[DCA_PRIM_CHANNELS_MAX];
> /* bit allocation quantizer select */
> int bitalloc_huffman[DCA_PRIM_CHANNELS_MAX];
> /* quantization index codebook select */
> int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];
> /* scale factor adjustment */
> float scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];
please put comments to the right of fields, that makes them much more readable
[...]
> dca_bitalloc_index.offset = 1;
> dca_bitalloc_index.wrap = 1;
> for (i = 0; i < BITALLOC_12_COUNT; i++)
> init_vlc(&dca_bitalloc_index.vlc[i], bitalloc_12_vlc_bits[i], 12,
> bitalloc_12_bits[i], 1, 1,
> bitalloc_12_codes[i], 2, 2, 1);
> dca_scalefactor.offset = -64;
> dca_scalefactor.wrap = 2;
> for (i = 0; i < SCALES_COUNT; i++)
> init_vlc(&dca_scalefactor.vlc[i], SCALES_VLC_BITS, 129,
> scales_bits[i], 1, 1,
> scales_codes[i], 2, 2, 1);
> dca_tmode.offset = 0;
> dca_tmode.wrap = 1;
> for (i = 0; i < TMODE_COUNT; i++)
> init_vlc(&dca_tmode.vlc[i], tmode_vlc_bits[i], 4,
> tmode_bits[i], 1, 1,
> tmode_codes[i], 2, 2, 1);
*_COUNT should be replaced by appropriat sizeof() constructs
>
> #define INIT_VLC_BITALLOC(num, off, mb, bn) \
> dca_smpl_bitalloc[num].offset = off; \
> dca_smpl_bitalloc[num].wrap = 2; \
> for(i = 0; i < BITALLOC_ ## bn ## _COUNT; i++) \
> init_vlc(&dca_smpl_bitalloc[num].vlc[i], bitalloc_ ## bn ## _vlc_bits[i], bn, \
> bitalloc_##bn##_bits[i], 1, 1, \
> bitalloc_##bn##_codes[i], 2, 2, 1);
>
> INIT_VLC_BITALLOC( 1, 2, -1, 3);
> INIT_VLC_BITALLOC( 2, 4, -2, 5);
> INIT_VLC_BITALLOC( 3, 5, -3, 7);
> INIT_VLC_BITALLOC( 4, 6, -4, 9);
> INIT_VLC_BITALLOC( 5, 7, -6, 13);
> INIT_VLC_BITALLOC( 6, 9, -8, 17);
> INIT_VLC_BITALLOC( 7, 9, -12, 25);
> INIT_VLC_BITALLOC( 8, 9, -16, 33);
> INIT_VLC_BITALLOC( 9, 9, -32, 65);
> INIT_VLC_BITALLOC(10, 9, -64, 129);
ugly, a for() loop and table would be so much cleaner
[...]
> /* Frame type */
> s->frame_type = get_bits(&s->gb, 1);
> /* Samples deficit */
> s->samples_deficit = get_bits(&s->gb, 5);
> /* CRC present */
> s->crc_present = get_bits(&s->gb, 1);
these comments are completely useless and make the code hard to read
>
> s->frame_length = (get_bits(&s->gb, 7) + 1) * 32;
> s->frame_size = get_bits(&s->gb, 14) + 1;
>
> if ((s->frame_size <= 0) || (s->frame_size > DCA_MAX_FRAME_SIZE))
> return 0;
>
> /* Audio channel arrangement */
> s->flags = get_bits(&s->gb, 6);
> if (s->flags > 63)
> return 0;
how is this supposed to be ever true?
>
> sample_rate_index = get_bits(&s->gb, 4);
> if (sample_rate_index >= sizeof(dca_sample_rates) / sizeof(uint32_t))
> return 0;
the table has 16 entries please remove such silly checks
[...]
> s->output = DCA_STEREO;
> if (s->output < 0)
> return 1;
and that does what?
[...]
> if (s->subband_activity[i] > DCA_SUBBANDS)
> s->subband_activity[i] = DCA_SUBBANDS;
FFMIN
> }
> for (i = 0; i < s->prim_channels; i++) {
> s->vq_start_subband[i] = get_bits(&s->gb, 5) + 1;
> #ifdef DEBUG
> av_log(s->avctx, AV_LOG_DEBUG, "vq start subband: %i\n",
> s->vq_start_subband[i]);
> #endif
> if (s->vq_start_subband[i] > DCA_SUBBANDS)
> s->vq_start_subband[i] = DCA_SUBBANDS;
FFMIN
[...]
> for (i = 0; i < s->prim_channels; i++) {
> if (s->quant_index_huffman[i][1] == 0) {
> /* Transmitted only if quant_index_huffman=0 (Huffman code used) */
> s->scalefactor_adj[i][1] = adj_table[get_bits(&s->gb, 2)];
> }
> }
> for (j = 2; j < 6; j++)
> for (i = 0; i < s->prim_channels; i++)
> if (s->quant_index_huffman[i][j] < 3) {
> /* Transmitted only if quant_index_huffman < 3 */
> s->scalefactor_adj[i][j] = adj_table[get_bits(&s->gb, 2)];
> }
> for (j = 6; j < 11; j++)
> for (i = 0; i < s->prim_channels; i++)
> if (s->quant_index_huffman[i][j] < 7) {
> /* Transmitted only if quant_index_huffman < 7 */
> s->scalefactor_adj[i][j] = adj_table[get_bits(&s->gb, 2)];
> }
the previous loops can be merged or factored into their own function
[...]
>
> static int dca_subframe_header(DCAContext * s)
> {
> /* Primary audio coding side information */
> int j, k;
>
> /* Subsubframe count */
> s->subsubframes = get_bits(&s->gb, 2) + 1;
> #ifdef DEBUG
> av_log(s->avctx, AV_LOG_DEBUG, "subsubframes: %i\n", s->subsubframes);
> #endif
>
> /* Partial subsubframe sample count */
> s->partial_samples = get_bits(&s->gb, 3);
> #ifdef DEBUG
> av_log(s->avctx, AV_LOG_DEBUG, "partial samples: %i\n",
> s->partial_samples);
> #endif
>
> /* Get prediction mode for each subband */
> for (j = 0; j < s->prim_channels; j++) {
> for (k = 0; k < s->subband_activity[j]; k++)
> s->prediction_mode[j][k] = get_bits(&s->gb, 1);
> #ifdef DEBUG
> av_log(s->avctx, AV_LOG_DEBUG, "prediction mode:");
> for (k = 0; k < s->subband_activity[j]; k++)
> av_log(s->avctx, AV_LOG_DEBUG, " %i", s->prediction_mode[j][k]);
> av_log(s->avctx, AV_LOG_DEBUG, "\n");
> #endif
and please get rid of the debug code interspaced everywhere
use #define TRACE or if needed extent the TRACE related code
[...]
> for (k = 0; k < s->subband_activity[j]; k++) {
> if (k >= s->vq_start_subband[j] || s->bitalloc[j][k] > 0) {
> if (s->scalefactor_huffman[j] < 5) {
> /* huffman encoded */
> scale_sum += GET_BITALLOC(&s->gb, dca_scalefactor, j);
> } else if (s->scalefactor_huffman[j] == 5) {
> scale_sum = get_bits(&s->gb, 6);
> } else if (s->scalefactor_huffman[j] == 6) {
> scale_sum = get_bits(&s->gb, 7);
> }
>
> s->scale_factor[j][k][0] = scale_table[scale_sum];
> }
>
> if (k < s->vq_start_subband[j] && s->transition_mode[j][k]) {
> /* Get second scale factor */
> if (s->scalefactor_huffman[j] < 5) {
> /* huffman encoded */
> scale_sum += GET_BITALLOC(&s->gb, dca_scalefactor, j);
> } else if (s->scalefactor_huffman[j] == 5) {
> scale_sum = get_bits(&s->gb, 6);
> } else if (s->scalefactor_huffman[j] == 6) {
> scale_sum = get_bits(&s->gb, 7);
> }
>
> s->scale_factor[j][k][1] = scale_table[scale_sum];
> }
code duplication
[...]
> for (k = s->subband_activity[j];
> k < s->subband_activity[source_channel]; k++) {
> if (s->joint_huff[j] < 5) {
> /* huffman encoded */
> scale = GET_BITALLOC(&s->gb, dca_scalefactor, j);
> } else if (s->joint_huff[j] == 5) {
> scale = get_bits(&s->gb, 6);
> } else if (s->joint_huff[j] == 6) {
> scale = get_bits(&s->gb, 7);
> }
code triplification ...
[...]
> for (j = lfe_samples; j < lfe_samples * 2; j++) {
> /* Signed 8 bits int */
> s->lfe_data[j] =
> (signed int) (signed char) get_bits(&s->gb, 8);
get_sbits() and remove the casts
[...]
> /* Reconstructed channel sample index */
> for (nSubIndex = nStart; nSubIndex < nEnd; nSubIndex++) {
> float A[16], B[16], SUM[16], DIFF[16];
uppercase identifers are generally constants, here they are
not and the reader might get confused by that
>
> /* Load in one sample from each subband */
> for (i = 0; i < s->subband_activity[chans]; i++)
> raXin[i] = samples_in[i][nSubIndex];
>
> /* Clear inactive subbands */
> for (i = s->subband_activity[chans]; i < NumSubband; i++)
> raXin[i] = 0.0;
>
> /* Multiply by cosine modulation coefficients and
> * create temporary arrays SUM and DIFF */
> for (j = 0, k = 0; k < 16; k++) {
> A[k] = 0.0;
> //START_TIMER
> // for (i=0;i<16;i++) 2255 vs 592
> // A[k]+=(raXin[2*i]+raXin[2*i+1])*s->cos_mod[j++];
> A[k] += (raXin[2 * 0] + raXin[2 * 0 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 1] + raXin[2 * 1 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 2] + raXin[2 * 2 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 3] + raXin[2 * 3 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 4] + raXin[2 * 4 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 5] + raXin[2 * 5 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 6] + raXin[2 * 6 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 7] + raXin[2 * 7 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 8] + raXin[2 * 8 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 9] + raXin[2 * 9 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 10] + raXin[2 * 10 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 11] + raXin[2 * 11 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 12] + raXin[2 * 12 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 13] + raXin[2 * 13 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 14] + raXin[2 * 14 + 1]) * s->cos_mod[j++];
> A[k] += (raXin[2 * 15] + raXin[2 * 15 + 1]) * s->cos_mod[j++];
#define FOOBAR(i) A[k]+=(raXin[2*(i)]+raXin[2*(i)+1])*s->cos_mod[j++];
#define FOOBAR2(i) FOOBAR(i) FOOBAR(i+1) FOOBAR(i+2) FOOBAR(i+3)
#ifdef CONFIG_SMALL
for (i=0;i<16;i++) FOOBAR(i)
#else
FOOBAR2(0) FOOBAR2(4) FOOBAR2(8) FOOBAR2(12)
#endif
the same can be done with the other similar loops
of course only if this is speed critical code otherwise a plain for loop
is best
[...]
>
> static void lfe_interpolation_fir(int nDecimationSelect,
> int nNumDeciSample, float *samples_in,
> float *samples_out, float scale,
> float bias)
> {
> /* samples_in: An array holding decimated samples.
> * Samples in current subframe starts from samples_in[0],
> * while samples_in[-1], samples_in[-2], ..., stores samples
> * from last subframe as history.
> *
> * samples_out: An array holding interpolated samples
> */
>
> int nDeciFactor, k, J;
upper case J is ugly and might confuse some developers
> float *prCoeff;
>
> int NumFIRCoef = 512; /* Number of FIR coefficients */
> int nInterpIndex = 0; /* Index to the interpolated samples */
> int nDeciIndex;
>
> /* Select decimation filter */
> if (nDecimationSelect == 1) {
> /* 128 decimation */
> nDeciFactor = 128;
> /* Pointer to the FIR coefficients array */
> prCoeff = (float *) lfe_fir_128;
> } else {
> /* 64 decimation */
> nDeciFactor = 64;
the /* * decimation */ is useless, as this is obvious from the variable names
> prCoeff = (float *) lfe_fir_64;
> }
> //START_TIMER
> /* Interpolation */
> for (nDeciIndex = 0; nDeciIndex < nNumDeciSample; nDeciIndex++) {
> /* One decimated sample generates nDeciFactor interpolated ones */
> for (k = 0; k < nDeciFactor; k++) {
> /* Clear accumulation */
> float rTmp = 0.0;
> //FIXME the coeffs are symetric, fix that
> /* Accumulate */
(Clear) Accumulate comments are not very helpfull in understanding as they
just say what everyone sees anyway ...
[...]
> void dca_downmix(float *samples, int srcfmt)
add ff_ prefix or static
> {
> int i;
> float t;
>
> switch (srcfmt) {
> case DCA_MONO:
> case DCA_CHANNEL:
> case DCA_STEREO_TOTAL:
> case DCA_STEREO_SUMDIFF:
> case DCA_4F2R:
> av_log(NULL, 0, "Not implemented!\n");
> break;
> case DCA_STEREO:
> break;
> case DCA_3F:
> DOWNMIX_TO_STEREO(MIX_FRONT3(samples),);
> break;
> case DCA_2F1R:
> DOWNMIX_TO_STEREO(MIX_REAR1(samples, i + 512),);
> break;
> case DCA_3F1R:
> DOWNMIX_TO_STEREO(MIX_FRONT3(samples),
> MIX_REAR1(samples, i + 768));
> break;
> case DCA_2F2R:
> DOWNMIX_TO_STEREO(MIX_REAR2(samples, i + 512, i + 768),);
> break;
> case DCA_3F2R:
> DOWNMIX_TO_STEREO(MIX_FRONT3(samples),
> MIX_REAR2(samples, i + 768, i + 1024));
> break;
> }
> }
all the downmixig code of the various audio codecs should be merged ideally
>
>
> /* Very compact version of the block code decoder that does not use table
> * look-up but is slightly slower */
> static int decode_blockcode(int code, int levels, int *values)
> {
> int i;
> int offset = (levels - 1) >> 1;
>
> for (i = 0; i < 4; i++) {
> values[i] = (code % levels) - offset;
> code /= levels;
> }
>
> if (code == 0)
> return 1;
> else {
> av_log(NULL, AV_LOG_ERROR, "ERROR: block code look-up failed\n");
> return 0;
error return should be -1
[...]
> case 2: /* No further encoding */
> for (m = 0; m < 8; m++) {
> /* Extract (signed) quantization index */
> int q_index = get_bits(&s->gb, abits - 3);
> if (q_index & (1 << (abits - 4))) {
> q_index = (1 << (abits - 3)) - q_index;
> q_index = -q_index;
> }
get_sbits() ?
> subband_samples[k][l][m] = q_index;
> }
> break;
>
> case 3: /* Block code */
> {
> int block_code1, block_code2, size, levels;
> int block[8];
>
> switch (abits) {
> case 1:
> size = 7;
> levels = 3;
> break;
> case 2:
> size = 10;
> levels = 5;
> break;
> case 3:
> size = 12;
> levels = 7;
> break;
> case 4:
> size = 13;
> levels = 9;
> break;
> case 5:
> size = 15;
> levels = 13;
> break;
> case 6:
> size = 17;
> levels = 17;
> break;
> case 7:
> default:
> size = 19;
> levels = 25;
> break;
> }
this would be much simpler with a table
[...]
> /*
> * Inverse ADPCM if in prediction mode
> */
> if (s->prediction_mode[k][l]) {
> int n;
> for (m = 0; m < 8; m++) {
> for (n = 1; n <= 4; n++)
> if (m - n >= 0)
if(m>=n)
[...]
> for (m = 0; m < 8; m++) {
> subband_samples[k][l][m] =
> high_freq_vq[s->high_freq_vq[k][l]][subsubframe * 8 +
> m]
> * (float) s->scale_factor[k][l][0] / 16.0;
* (1/16.0) might be faster depening on how stupid gcc is
[...]
> /* Backup predictor history for adpcm */
> for (k = 0; k < s->prim_channels; k++) {
> for (l = 0; l < s->vq_start_subband[k]; l++) {
> int m;
> for (m = 0; m < 4; m++)
> s->subband_samples_hist[k][l][m] =
> subband_samples[k][l][4 + m];
memcpy()
> }
> }
>
> /* 32 subbands QMF */
> for (k = 0; k < s->prim_channels; k++) {
> /* static float pcm_to_double[8] =
> {32768.0, 32768.0, 524288.0, 524288.0, 0, 8388608.0, 8388608.0};*/
> START_TIMER //982747 vs 884090 vs 874210 vs 706959 vs 686880 vs 677672 vs 557540 vs 429267 vs 420059 (times 16350)
> qmf_32_subbands(s, k,
> subband_samples[k],
> &s->samples[256 * k],
> 3 / 2 /*pcm_to_double[s->source_pcm_res] */ ,
3/2= 1 is this intended?
> 0 /*s->bias */ );
> STOP_TIMER("qmf_32_subbands")
> }
>
> /* Down mixing */
>
> if (s->prim_channels > dca_channels[s->output & DCA_CHANNEL_MASK]) {
> dca_downmix(s->samples, s->amode);
> }
cant the downmixing be moved before the qmf and qmf skiped for "lost" channels?
[...]
> /**
> * Convert bitstream to one representation based on sync marker
> */
> int dca_convert_bitstream(uint8_t * src, int src_size, uint8_t * dst,
> int max_size)
static or ff_ prefix
> {
> uint32_t tmp = 0, mrk;
> int bits = 0;
> int dsize = 0;
> int i;
> uint16_t *ssrc = (uint16_t *) src, *sdst = (uint16_t *) dst;
>
> mrk = AV_RB32(src);
> switch (mrk) {
> case DCA_MARKER_RAW_BE:
> memcpy(dst, src, FFMIN(src_size, max_size));
cant that memcpy be avoided?
[...]
> case DCA_MARKER_14B_BE:
> case DCA_MARKER_14B_LE:
> for (i = 0; i < (src_size + 1) >> 1 && dsize < max_size; i++) {
> tmp <<= 14;
> tmp |=
> ((mrk ==
> DCA_MARKER_14B_BE) ? AV_RB16(src) : AV_RL16(src)) &
> 0x3FFF;
> src += 2;
> bits += 14;
> while (bits >= 8) {
> bits -= 8;
> *dst++ = tmp >> bits;
> dsize++;
> if (dsize >= max_size)
> break;
> }
> }
> if (bits) {
> *dst++ = tmp << (8 - bits);
> dsize++;
> }
a put_bits(14) could simplify this quite a bit ...
[...]
>
> /**
> * Main frame decoding function
> * FIXME add arguments
> */
> static int dca_decode_frame(AVCodecContext * avctx,
> void *data, int *data_size,
> uint8_t * buf, int buf_size)
> {
>
> int i, j, k;
> int16_t *samples = data;
> DCAContext *s = avctx->priv_data;
> int channels;
>
>
> *data_size = 0;
you should check that the buffer is large enough
[...]
> //if the AVCodecContext is missing some values, fill in the ones we have
> if (!avctx_check(avctx)) {
> s->flags = 2; //force stereo for now FIXME
> avctx->sample_rate = s->sample_rate;
> avctx->channels = channels_multi(s->flags);
> avctx->bit_rate = s->bit_rate;
> }
hmm
if(!avctx->sample_rate) avctx->sample_rate = s->sample_rate
...
seems better, or even always set them no matter if they have been set or not
[...]
> static int dca_decode_close(AVCodecContext * avctx)
> {
> //DCAContext *s = avctx->priv_data;
> return 0;
> }
you could as well set .close to NULL
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Democracy is the form of government in which you can choose your dictator
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070223/e8e5c39f/attachment.pgp>
More information about the ffmpeg-devel
mailing list