[FFmpeg-devel] [PATCH] ALAC Encoder
Jai Menon
realityman
Sun Aug 17 05:39:00 CEST 2008
Hi,
On Sunday 17 Aug 2008 8:05:14 am Michael Niedermayer wrote:
> On Sun, Aug 17, 2008 at 04:14:43AM +0530, Jai Menon wrote:
> > Hi,
> >
> > The attached ALAC encoder was written as part of GSoC and mentored by
> > Justin Ruggles. I'm posting it for inclusion into FFmpeg-svn.
>
> [...]
>
> > Index: libavcodec/alacenc.c
> > ===================================================================
> > --- libavcodec/alacenc.c (revision 0)
> > +++ libavcodec/alacenc.c (revision 0)
> > @@ -0,0 +1,459 @@
> > +/**
> > + * ALAC audio encoder
> > + * Copyright (c) 2008 Jaikrishnan Menon <realityman at gmx.net>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA + */
> > +
> > +#include "avcodec.h"
> > +#include "bitstream.h"
> > +#include "dsputil.h"
> > +#include "lpc.h"
> > +
> > +#define DEFAULT_FRAME_SIZE 4096
> > +#define DEFAULT_SAMPLE_SIZE 16
> > +#define MAX_CHANNELS 8
> > +#define ALAC_EXTRADATA_SIZE 36
> > +#define ALAC_FRAME_HEADER_SIZE 55
> > +#define ALAC_FRAME_FOOTER_SIZE 3
> > +
> > +#define ALAC_ESCAPE_CODE 0x1FF
> > +#define ALAC_MAX_LPC_ORDER 30
>
> ok
>
>
> [...]
>
> > +typedef struct AlacEncodeContext {
> >
> > + int channels;
> > + int samplerate;
>
> redundant relative to the fields in AVCodecContext
>
>
> [...]
>
> > + int interlacing_shift;
> > + int interlacing_leftweight;
> > + PutBitContext pbctx;
>
> ok (pb is more common than pbctx but this is nitpicking, pbctx is ok if you
> prefer, the same is true for dspctx)
>
>
> [...]
>
> > + DSPContext dspctx;
> > + AVCodecContext *avctx;
> > +} AlacEncodeContext;
>
> ok
>
> > +
> > +
> > +static void allocate_sample_buffers(AlacEncodeContext *s)
> > +{
> > + int i = s->channels;
> > +
> > + while(i) {
> > + s->sample_buf[i-1] =
> > av_mallocz(s->avctx->frame_size*sizeof(int32_t)); + i--;
> > + }
> > + s->predictor_buf = av_mallocz(s->avctx->frame_size*sizeof(int32_t));
> > +}
> > +
> > +static void free_sample_buffers(AlacEncodeContext *s)
> > +{
> > + int i = s->channels;
> > +
> > + while(i) {
> > + av_freep(&s->sample_buf[i-1]);
> > + i--;
> > + }
> > + av_freep(&s->predictor_buf);
> > +}
>
> As they have constant size they do not need a malloc() but instead can be
> part of the context
>
>
> [...]
>
> > +static void encode_scalar(AlacEncodeContext *s, int x, int k, int
> > write_sample_size) +{
> > + int divisor, q, r;
> > +
> > + k = FFMIN(k, s->rc.k_modifier);
> > + divisor = (1<<k) - 1;
> > + q = x / divisor;
> > + r = x % divisor;
> > +
> > + if(q > 8) {
> > + // write escape code and sample value directly
> > + put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE);
> > + put_bits(&s->pbctx, write_sample_size, x);
> > + } else {
> > + if(q)
> > + put_bits(&s->pbctx, q, (1<<q) - 1);
> > + put_bits(&s->pbctx, 1, 0);
> > +
> > + if(k != 1) {
> > + if(r > 0)
> > + put_bits(&s->pbctx, k, r+1);
> > + else
> > + put_bits(&s->pbctx, k-1, 0);
> > + }
> > + }
> > +}
>
> ok
>
> > +
> > +static void write_frame_header(AlacEncodeContext *s, int is_verbatim)
> > +{
> > + put_bits(&s->pbctx, 3, s->channels-1); // No. of
> > channels -1 + put_bits(&s->pbctx, 16, 0);
> > // Seems to be zero + put_bits(&s->pbctx, 1, 1);
> > // Sample count is in the header + put_bits(&s->pbctx, 2, 0);
> > // FIXME: Wasted bytes field +
> > put_bits(&s->pbctx, 1, is_verbatim); // Audio block is
> > verbatim + put_bits(&s->pbctx, 32, s->avctx->frame_size); //
> > No. of samples in the frame +}
>
> ok
>
>
> [...]
>
> > +static void alac_stereo_decorrelation(AlacEncodeContext *s)
> > +{
> > + int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
> > + int32_t tmp;
> > + int i;
> > +
> > + for(i=0; i<s->avctx->frame_size; i++) {
> > + tmp = left[i];
> > + left[i] = (tmp + right[i]) >> 1;
> > + right[i] = tmp - right[i];
> > + }
> >
> > + s->interlacing_leftweight = 1;
> > + s->interlacing_shift = 1;
>
> i do not belive this is optimal
>
It may not be optimal in the sense that I do not adaptively select the
decorrelation scheme, but this is just the first iteration which aims at
getting a basic encoder into svn. And it is better than doing no
deorrelation. I did initially try out an adaptive approach but the difference
in compression wasn't that great. I'm looking into how this can be done in a
better manner. Till then, I was hoping if we could go with this.
Regards,
Jai Menon
More information about the ffmpeg-devel
mailing list