[FFmpeg-devel] [PATCH] ALAC Encoder

Mon Aug 18 19:17:42 CEST 2008

Hi,

On Monday 18 Aug 2008 9:55:10 pm Jai Menon wrote:
> Hi,
>
> On Monday 18 Aug 2008 3:46:23 am Michael Niedermayer wrote:
> > On Mon, Aug 18, 2008 at 02:38:24AM +0530, Jai Menon wrote:
> > > Hi,
> > >
> > > On Sunday 17 Aug 2008 5:17:52 pm Michael Niedermayer wrote:
> > > > On Sun, Aug 17, 2008 at 11:17:10AM +0530, Jai Menon wrote:
> >
> > [...]
> >
> > [...]
> >
> > > Index: libavcodec/alacenc.c
> > > ===================================================================
> > > --- libavcodec/alacenc.c	(revision 14818)
> > > +++ libavcodec/alacenc.c	(working copy)
> > > @@ -33,15 +33,58 @@
> > >
> > >  #define ALAC_ESCAPE_CODE          0x1FF
> > >  #define ALAC_MAX_LPC_ORDER        30
> > > +#define DEFAULT_MAX_PRED_ORDER    6
> > > +#define DEFAULT_MIN_PRED_ORDER    4
> > > +#define ALAC_MAX_LPC_PRECISION    9
> > > +#define ALAC_MAX_LPC_SHIFT        9
> >
> > ok
> >
> > > +#define ALAC_CHMODE_LEFT_RIGHT    1
> > > +#define ALAC_CHMODE_LEFT_SIDE     8
> > > +#define ALAC_CHMODE_RIGHT_SIDE    9
> > > +#define ALAC_CHMODE_MID_SIDE     10
> > > +
> > >
> > > +typedef struct RiceContext {
> > > +    int history_mult;
> > > +    int initial_history;
> > > +    int k_modifier;
> > > +    int rice_modifier;
> > > +} RiceContext;
> > > +
> > > +typedef struct LPCContext {
> > > +    int lpc_order;
> > > +    int lpc_coeff[ALAC_MAX_LPC_ORDER+1];
> > > +    int lpc_quant;
> > > +} LPCContext;
> > > +
> > > +typedef struct AlacEncodeContext {
> > > +    int compression_level;
> > > +    int max_coded_frame_size;
> > > +    int write_sample_size;
> > > +    int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE];
> >
> > ok
> >
> > > +    int32_t predictor_buf[DEFAULT_FRAME_SIZE];
> > >      int interlacing_shift;
> > >      int interlacing_leftweight;
> > >      PutBitContext pbctx;
> > >
> > > +    RiceContext rc;
> > > +    LPCContext lpc[MAX_CHANNELS];
> >
> > ok
> >
> > >      DSPContext dspctx;
> > >      AVCodecContext *avctx;
> > >  } AlacEncodeContext;
> > >
> > >
> > >
> > > +static void init_sample_buffers(AlacEncodeContext *s, int16_t
> > > *input_samples) +{
> > > +    int ch, i;
> > > +
> > > +    for(ch=0;ch<s->avctx->channels;ch++) {
> > > +        int16_t *sptr = input_samples + ch;
> > > +        for(i=0;i<s->avctx->frame_size;i++) {
> > > +            s->sample_buf[ch][i] = *sptr;
> > > +            sptr += s->avctx->channels;
> > > +        }
> > > +    }
> > > +}
> > > +
> > >  static void encode_scalar(AlacEncodeContext *s, int x, int k, int
> > > write_sample_size) {
> > >      int divisor, q, r;
> >
> > ok
> >
> > > @@ -71,7 +114,7 @@
> > >
> > >  static void write_frame_header(AlacEncodeContext *s, int is_verbatim)
> > >  {
> > > -    put_bits(&s->pbctx, 3,  s->channels-1);                 // No. of
> > > channels -1 +    put_bits(&s->pbctx, 3,  s->avctx->channels-1);
> > > // No. of channels -1 put_bits(&s->pbctx, 16, 0);
> > >     // Seems to be zero put_bits(&s->pbctx, 1,  1);
> > >       // Sample count is in the header put_bits(&s->pbctx, 2,  0);
> > >                      // FIXME: Wasted bytes field
> >
> > ok
> >
> > > @@ -79,6 +122,205 @@
> > >      put_bits(&s->pbctx, 32, s->avctx->frame_size);          // No. of
> > > samples in the frame }
> > >
> > > +static void calc_predictor_params(AlacEncodeContext *s, int ch)
> > > +{
> > > +    int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
> > > +    int shift[MAX_LPC_ORDER];
> > > +    int opt_order;
> > > +
> > > +    opt_order = ff_lpc_calc_coefs(&s->dspctx, s->sample_buf[ch],
> > > s->avctx->frame_size, DEFAULT_MIN_PRED_ORDER, DEFAULT_MAX_PRED_ORDER, +
> > >                                 ALAC_MAX_LPC_PRECISION, coefs, shift,
> > > 1, ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1); +
> > > +    s->lpc[ch].lpc_order = opt_order;
> > > +    s->lpc[ch].lpc_quant = shift[opt_order-1];
> > > +    memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1],
> > > opt_order*sizeof(int)); +}
> > > +
> >
> > I think this should be using AVCodecContext.min/max_prediction_order
> >
> > > +static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch,
> > > int n) +{
> > > +    int i, best;
> > > +    int32_t lt, rt;
> > > +    uint64_t sum[4];
> > > +    uint64_t score[4];
> > > +
> > > +    /* calculate sum of 2nd order residual for each channel */
> > > +    sum[0] = sum[1] = sum[2] = sum[3] = 0;
> > > +    for(i=2; i<n; i++) {
> > > +        lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
> > > +        rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
> > > +        sum[2] += FFABS((lt + rt) >> 1);
> > > +        sum[3] += FFABS(lt - rt);
> > > +        sum[0] += FFABS(lt);
> > > +        sum[1] += FFABS(rt);
> > > +    }
> > > +
> > > +    /* calculate score for each mode */
> > > +    score[0] = sum[0] + sum[1];
> > > +    score[1] = sum[0] + sum[3];
> > > +    score[2] = sum[1] + sum[3];
> > > +    score[3] = sum[2] + sum[3];
> > > +
> > > +    /* return mode with lowest score */
> > > +    best = 0;
> > > +    for(i=1; i<4; i++) {
> > > +        if(score[i] < score[best]) {
> > > +            best = i;
> > > +        }
> > > +    }
> >
> > ok
> >
> > > +    if(best == 0) {
> > > +        return ALAC_CHMODE_LEFT_RIGHT;
> > > +    } else if(best == 1) {
> > > +        return ALAC_CHMODE_LEFT_SIDE;
> > > +    } else if(best == 2) {
> > > +        return ALAC_CHMODE_RIGHT_SIDE;
> > > +    } else {
> > > +        return ALAC_CHMODE_MID_SIDE;
> > > +    }
> > > +}
> >
> > i think best could simply be returned
> >
> > > +
> > > +static void alac_stereo_decorrelation(AlacEncodeContext *s)
> > > +{
> > > +    int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
> > > +    int i, mode, n = s->avctx->frame_size;
> > > +
> > > +    mode = estimate_stereo_mode(left, right, n);
> > > +
> > > +    if(mode == ALAC_CHMODE_LEFT_RIGHT) {
> > > +        s->interlacing_leftweight = 0;
> > > +        s->interlacing_shift = 0;
> > > +        return;
> > > +    }
> > > +
> > > +    if(mode == ALAC_CHMODE_LEFT_SIDE) {
> > > +        for(i=0; i<n; i++) {
> > > +            right[i] = left[i] - right[i];
> > > +        }
> > > +        s->interlacing_leftweight = 1;
> > > +        s->interlacing_shift = 0;
> > > +
> > > +    } else {
> > > +        int32_t tmp;
> > > +        for(i=0; i<n; i++) {
> > > +            tmp = left[i];
> > > +            left[i] = (tmp + right[i]) >> 1;
> > > +            right[i] = tmp - right[i];
> > > +        }
> > > +        s->interlacing_leftweight = 1;
> > > +        s->interlacing_shift = 1;
> > > +    }
> >
> > i think 1 mode is missing
>
> I left out the right-side mode because I really don't see how the decoder
> could support it without accidentally swapping channels. Or am I missing
> something?
>

Michael, do you want me to introduce another decorrelation scheme or are 3 
okay? i was thinking of one with a shift value of 2, but the difference 
during entropy coding is negligible.

Regards,

Jai Menon