[FFmpeg-devel] [PATCH] ALAC Encoder
Jai Menon
realityman
Mon Aug 18 19:17:42 CEST 2008
Hi,
On Monday 18 Aug 2008 9:55:10 pm Jai Menon wrote:
> Hi,
>
> On Monday 18 Aug 2008 3:46:23 am Michael Niedermayer wrote:
> > On Mon, Aug 18, 2008 at 02:38:24AM +0530, Jai Menon wrote:
> > > Hi,
> > >
> > > On Sunday 17 Aug 2008 5:17:52 pm Michael Niedermayer wrote:
> > > > On Sun, Aug 17, 2008 at 11:17:10AM +0530, Jai Menon wrote:
> >
> > [...]
> >
> > [...]
> >
> > > Index: libavcodec/alacenc.c
> > > ===================================================================
> > > --- libavcodec/alacenc.c (revision 14818)
> > > +++ libavcodec/alacenc.c (working copy)
> > > @@ -33,15 +33,58 @@
> > >
> > > #define ALAC_ESCAPE_CODE 0x1FF
> > > #define ALAC_MAX_LPC_ORDER 30
> > > +#define DEFAULT_MAX_PRED_ORDER 6
> > > +#define DEFAULT_MIN_PRED_ORDER 4
> > > +#define ALAC_MAX_LPC_PRECISION 9
> > > +#define ALAC_MAX_LPC_SHIFT 9
> >
> > ok
> >
> > > +#define ALAC_CHMODE_LEFT_RIGHT 1
> > > +#define ALAC_CHMODE_LEFT_SIDE 8
> > > +#define ALAC_CHMODE_RIGHT_SIDE 9
> > > +#define ALAC_CHMODE_MID_SIDE 10
> > > +
> > >
> > > +typedef struct RiceContext {
> > > + int history_mult;
> > > + int initial_history;
> > > + int k_modifier;
> > > + int rice_modifier;
> > > +} RiceContext;
> > > +
> > > +typedef struct LPCContext {
> > > + int lpc_order;
> > > + int lpc_coeff[ALAC_MAX_LPC_ORDER+1];
> > > + int lpc_quant;
> > > +} LPCContext;
> > > +
> > > +typedef struct AlacEncodeContext {
> > > + int compression_level;
> > > + int max_coded_frame_size;
> > > + int write_sample_size;
> > > + int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE];
> >
> > ok
> >
> > > + int32_t predictor_buf[DEFAULT_FRAME_SIZE];
> > > int interlacing_shift;
> > > int interlacing_leftweight;
> > > PutBitContext pbctx;
> > >
> > > + RiceContext rc;
> > > + LPCContext lpc[MAX_CHANNELS];
> >
> > ok
> >
> > > DSPContext dspctx;
> > > AVCodecContext *avctx;
> > > } AlacEncodeContext;
> > >
> > >
> > >
> > > +static void init_sample_buffers(AlacEncodeContext *s, int16_t
> > > *input_samples) +{
> > > + int ch, i;
> > > +
> > > + for(ch=0;ch<s->avctx->channels;ch++) {
> > > + int16_t *sptr = input_samples + ch;
> > > + for(i=0;i<s->avctx->frame_size;i++) {
> > > + s->sample_buf[ch][i] = *sptr;
> > > + sptr += s->avctx->channels;
> > > + }
> > > + }
> > > +}
> > > +
> > > static void encode_scalar(AlacEncodeContext *s, int x, int k, int
> > > write_sample_size) {
> > > int divisor, q, r;
> >
> > ok
> >
> > > @@ -71,7 +114,7 @@
> > >
> > > static void write_frame_header(AlacEncodeContext *s, int is_verbatim)
> > > {
> > > - put_bits(&s->pbctx, 3, s->channels-1); // No. of
> > > channels -1 + put_bits(&s->pbctx, 3, s->avctx->channels-1);
> > > // No. of channels -1 put_bits(&s->pbctx, 16, 0);
> > > // Seems to be zero put_bits(&s->pbctx, 1, 1);
> > > // Sample count is in the header put_bits(&s->pbctx, 2, 0);
> > > // FIXME: Wasted bytes field
> >
> > ok
> >
> > > @@ -79,6 +122,205 @@
> > > put_bits(&s->pbctx, 32, s->avctx->frame_size); // No. of
> > > samples in the frame }
> > >
> > > +static void calc_predictor_params(AlacEncodeContext *s, int ch)
> > > +{
> > > + int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
> > > + int shift[MAX_LPC_ORDER];
> > > + int opt_order;
> > > +
> > > + opt_order = ff_lpc_calc_coefs(&s->dspctx, s->sample_buf[ch],
> > > s->avctx->frame_size, DEFAULT_MIN_PRED_ORDER, DEFAULT_MAX_PRED_ORDER, +
> > > ALAC_MAX_LPC_PRECISION, coefs, shift,
> > > 1, ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1); +
> > > + s->lpc[ch].lpc_order = opt_order;
> > > + s->lpc[ch].lpc_quant = shift[opt_order-1];
> > > + memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1],
> > > opt_order*sizeof(int)); +}
> > > +
> >
> > I think this should be using AVCodecContext.min/max_prediction_order
> >
> > > +static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch,
> > > int n) +{
> > > + int i, best;
> > > + int32_t lt, rt;
> > > + uint64_t sum[4];
> > > + uint64_t score[4];
> > > +
> > > + /* calculate sum of 2nd order residual for each channel */
> > > + sum[0] = sum[1] = sum[2] = sum[3] = 0;
> > > + for(i=2; i<n; i++) {
> > > + lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
> > > + rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
> > > + sum[2] += FFABS((lt + rt) >> 1);
> > > + sum[3] += FFABS(lt - rt);
> > > + sum[0] += FFABS(lt);
> > > + sum[1] += FFABS(rt);
> > > + }
> > > +
> > > + /* calculate score for each mode */
> > > + score[0] = sum[0] + sum[1];
> > > + score[1] = sum[0] + sum[3];
> > > + score[2] = sum[1] + sum[3];
> > > + score[3] = sum[2] + sum[3];
> > > +
> > > + /* return mode with lowest score */
> > > + best = 0;
> > > + for(i=1; i<4; i++) {
> > > + if(score[i] < score[best]) {
> > > + best = i;
> > > + }
> > > + }
> >
> > ok
> >
> > > + if(best == 0) {
> > > + return ALAC_CHMODE_LEFT_RIGHT;
> > > + } else if(best == 1) {
> > > + return ALAC_CHMODE_LEFT_SIDE;
> > > + } else if(best == 2) {
> > > + return ALAC_CHMODE_RIGHT_SIDE;
> > > + } else {
> > > + return ALAC_CHMODE_MID_SIDE;
> > > + }
> > > +}
> >
> > i think best could simply be returned
> >
> > > +
> > > +static void alac_stereo_decorrelation(AlacEncodeContext *s)
> > > +{
> > > + int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
> > > + int i, mode, n = s->avctx->frame_size;
> > > +
> > > + mode = estimate_stereo_mode(left, right, n);
> > > +
> > > + if(mode == ALAC_CHMODE_LEFT_RIGHT) {
> > > + s->interlacing_leftweight = 0;
> > > + s->interlacing_shift = 0;
> > > + return;
> > > + }
> > > +
> > > + if(mode == ALAC_CHMODE_LEFT_SIDE) {
> > > + for(i=0; i<n; i++) {
> > > + right[i] = left[i] - right[i];
> > > + }
> > > + s->interlacing_leftweight = 1;
> > > + s->interlacing_shift = 0;
> > > +
> > > + } else {
> > > + int32_t tmp;
> > > + for(i=0; i<n; i++) {
> > > + tmp = left[i];
> > > + left[i] = (tmp + right[i]) >> 1;
> > > + right[i] = tmp - right[i];
> > > + }
> > > + s->interlacing_leftweight = 1;
> > > + s->interlacing_shift = 1;
> > > + }
> >
> > i think 1 mode is missing
>
> I left out the right-side mode because I really don't see how the decoder
> could support it without accidentally swapping channels. Or am I missing
> something?
>
Michael, do you want me to introduce another decorrelation scheme or are 3
okay? i was thinking of one with a shift value of 2, but the difference
during entropy coding is negligible.
Regards,
Jai Menon
More information about the ffmpeg-devel
mailing list