[FFmpeg-devel] [PATCH] ALAC Encoder

Sun Aug 17 05:39:00 CEST 2008

Hi,

On Sunday 17 Aug 2008 8:05:14 am Michael Niedermayer wrote:
> On Sun, Aug 17, 2008 at 04:14:43AM +0530, Jai Menon wrote:
> > Hi,
> >
> > The attached ALAC encoder was written as part of GSoC and mentored by
> > Justin Ruggles. I'm posting it for inclusion into FFmpeg-svn.
>
> [...]
>
> > Index: libavcodec/alacenc.c
> > ===================================================================
> > --- libavcodec/alacenc.c	(revision 0)
> > +++ libavcodec/alacenc.c	(revision 0)
> > @@ -0,0 +1,459 @@
> > +/**
> > + * ALAC audio encoder
> > + * Copyright (c) 2008  Jaikrishnan Menon <realityman at gmx.net>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA + */
> > +
> > +#include "avcodec.h"
> > +#include "bitstream.h"
> > +#include "dsputil.h"
> > +#include "lpc.h"
> > +
> > +#define DEFAULT_FRAME_SIZE        4096
> > +#define DEFAULT_SAMPLE_SIZE       16
> > +#define MAX_CHANNELS              8
> > +#define ALAC_EXTRADATA_SIZE       36
> > +#define ALAC_FRAME_HEADER_SIZE    55
> > +#define ALAC_FRAME_FOOTER_SIZE    3
> > +
> > +#define ALAC_ESCAPE_CODE          0x1FF
> > +#define ALAC_MAX_LPC_ORDER        30
>
> ok
>
>
> [...]
>
> > +typedef struct AlacEncodeContext {
> >
> > +    int channels;
> > +    int samplerate;
>
> redundant relative to the fields in AVCodecContext
>
>
> [...]
>
> > +    int interlacing_shift;
> > +    int interlacing_leftweight;
> > +    PutBitContext pbctx;
>
> ok (pb is more common than pbctx but this is nitpicking, pbctx is ok if you
>     prefer, the same is true for dspctx)
>
>
> [...]
>
> > +    DSPContext dspctx;
> > +    AVCodecContext *avctx;
> > +} AlacEncodeContext;
>
> ok
>
> > +
> > +
> > +static void allocate_sample_buffers(AlacEncodeContext *s)
> > +{
> > +    int i = s->channels;
> > +
> > +    while(i) {
> > +        s->sample_buf[i-1] =
> > av_mallocz(s->avctx->frame_size*sizeof(int32_t)); +        i--;
> > +    }
> > +    s->predictor_buf = av_mallocz(s->avctx->frame_size*sizeof(int32_t));
> > +}
> > +
> > +static void free_sample_buffers(AlacEncodeContext *s)
> > +{
> > +    int i = s->channels;
> > +
> > +    while(i) {
> > +        av_freep(&s->sample_buf[i-1]);
> > +        i--;
> > +    }
> > +    av_freep(&s->predictor_buf);
> > +}
>
> As they have constant size they do not need a malloc() but instead can be
> part of the context
>
>
> [...]
>
> > +static void encode_scalar(AlacEncodeContext *s, int x, int k, int
> > write_sample_size) +{
> > +    int divisor, q, r;
> > +
> > +    k = FFMIN(k, s->rc.k_modifier);
> > +    divisor = (1<<k) - 1;
> > +    q = x / divisor;
> > +    r = x % divisor;
> > +
> > +    if(q > 8) {
> > +        // write escape code and sample value directly
> > +        put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE);
> > +        put_bits(&s->pbctx, write_sample_size, x);
> > +    } else {
> > +        if(q)
> > +            put_bits(&s->pbctx, q, (1<<q) - 1);
> > +        put_bits(&s->pbctx, 1, 0);
> > +
> > +        if(k != 1) {
> > +            if(r > 0)
> > +                put_bits(&s->pbctx, k, r+1);
> > +            else
> > +                put_bits(&s->pbctx, k-1, 0);
> > +        }
> > +    }
> > +}
>
> ok
>
> > +
> > +static void write_frame_header(AlacEncodeContext *s, int is_verbatim)
> > +{
> > +    put_bits(&s->pbctx, 3,  s->channels-1);                 // No. of
> > channels -1 +    put_bits(&s->pbctx, 16, 0);                            
> > // Seems to be zero +    put_bits(&s->pbctx, 1,  1);                     
> >        // Sample count is in the header +    put_bits(&s->pbctx, 2,  0); 
> >                            // FIXME: Wasted bytes field +   
> > put_bits(&s->pbctx, 1,  is_verbatim);                   // Audio block is
> > verbatim +    put_bits(&s->pbctx, 32, s->avctx->frame_size);          //
> > No. of samples in the frame +}
>
> ok
>
>
> [...]
>
> > +static void alac_stereo_decorrelation(AlacEncodeContext *s)
> > +{
> > +    int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
> > +    int32_t tmp;
> > +    int i;
> > +
> > +    for(i=0; i<s->avctx->frame_size; i++) {
> > +        tmp = left[i];
> > +        left[i] = (tmp + right[i]) >> 1;
> > +        right[i] = tmp - right[i];
> > +    }
> >
> > +    s->interlacing_leftweight = 1;
> > +    s->interlacing_shift = 1;
>
> i do not belive this is optimal
>

It may not be optimal in the sense that I do not adaptively select the 
decorrelation scheme, but this is just the first iteration which aims at 
getting a basic encoder into svn. And it is better than doing no 
deorrelation. I did initially try out an adaptive approach but the difference 
in compression wasn't that great. I'm looking into how this can be done in a 
better manner. Till then, I was hoping if we could go with this.

Regards,

Jai Menon