[FFmpeg-devel] [PATCH] Add single stream LATM/LOAS decoder

Mon Oct 18 15:59:47 CEST 2010

On Mon, Oct 18, 2010 at 02:56:36PM +0200, Michael Niedermayer wrote:
> On Sun, Oct 17, 2010 at 12:47:54PM +0200, Janne Grunau wrote:
> > The decoder is basicly just a wrapper around the AAC decoder.
> > based on patch by Paul Kendall { paul <?t> kcbbs gen nz }
> > ---
> >  Changelog                |    1 +
> >  configure                |    1 +
> >  libavcodec/Makefile      |    2 +
> >  libavcodec/aaclatmdec.c  |  438 ++++++++++++++++++++++++++++++++++++++++++++++
> >  libavcodec/allcodecs.c   |    2 +
> >  libavcodec/avcodec.h     |    3 +-
> >  libavcodec/latm_parser.c |  118 +++++++++++++
> >  7 files changed, 564 insertions(+), 1 deletions(-)
> >  create mode 100644 libavcodec/aaclatmdec.c
> >  create mode 100644 libavcodec/latm_parser.c
> > 
> > diff --git a/Changelog b/Changelog
> > index 76d6b8b..1abc19d 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -46,6 +46,7 @@ version <next>:
> >  - RTP depacketization of the X-QT QuickTime format
> >  - SAP (Session Announcement Protocol, RFC 2974) muxer and demuxer
> >  - cropdetect filter
> > +- single stream LATM/LOAS decoder
> >  
> >  
> >  version 0.6:
> > diff --git a/configure b/configure
> > index 0e6e439..f3e65d4 100755
> > --- a/configure
> > +++ b/configure
> > @@ -1187,6 +1187,7 @@ rdft_select="fft"
> >  # decoders / encoders / hardware accelerators
> >  aac_decoder_select="mdct rdft"
> >  aac_encoder_select="mdct"
> > +aac_latm_decoder_select="aac_decoder"
> >  ac3_decoder_select="mdct ac3_parser"
> >  alac_encoder_select="lpc"
> >  amrnb_decoder_select="lsp"
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 385ae02..bd5f041 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -52,6 +52,7 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
> >                                            aacpsy.o aactab.o      \
> >                                            psymodel.o iirfilter.o \
> >                                            mpeg4audio.o
> > +OBJS-$(CONFIG_AAC_LATM_DECODER)        += aaclatmdec.o
> >  OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
> >  OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o
> >  OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc.o ac3tab.o ac3.o
> > @@ -576,6 +577,7 @@ OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264.o            \
> >                                            h264_loopfilter.o h264_cabac.o \
> >                                            h264_cavlc.o h264_ps.o \
> >                                            mpegvideo.o error_resilience.o
> > +OBJS-$(CONFIG_AAC_LATM_PARSER)         += latm_parser.o
> >  OBJS-$(CONFIG_MJPEG_PARSER)            += mjpeg_parser.o
> >  OBJS-$(CONFIG_MLP_PARSER)              += mlp_parser.o mlp.o
> >  OBJS-$(CONFIG_MPEG4VIDEO_PARSER)       += mpeg4video_parser.o h263.o \
> > diff --git a/libavcodec/aaclatmdec.c b/libavcodec/aaclatmdec.c
> > new file mode 100644
> > index 0000000..06f3a35
> > --- /dev/null
> > +++ b/libavcodec/aaclatmdec.c
> > @@ -0,0 +1,438 @@
> > +/*
> > + * AAC LATM decoder
> > + * Copyright (c) 2008-2010 Paul Kendall <paul at kcbbs.gen.nz>
> > + * Copyright (c) 2010      Janne Grunau <janne-ffmpeg at jannau.net>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * AAC LATM decoder
> > + * @author Paul Kendall <paul at kcbbs.gen.nz>
> > + * @author Janne Grunau <janne-ffmpeg at jannau.net>
> > + */
> > +
> > +/*
> > +    Note: This decoder filter is intended to decode LATM streams transferred
> > +    in MPEG transport streams which only contain one program.
> > +    To do a more complex LATM demuxing a separate LATM demuxer should be used.
> > +*/
> > +
> > +#include "get_bits.h"
> > +#include "dsputil.h"
> > +
> > +#include "aac.h"
> > +#include "aacdectab.h"
> > +#include "mpeg4audio.h"
> > +
> > +#include "libavutil/avassert.h"
> > +
> 
> > +#define LOAS_SYNC_WORD   0x2b7       // 11 bits
> 
> doxy
> 

done

> > +#define MAX_SIZE         8*1024
> > +
> > +struct LATMContext
> > +{
> > +    AACContext      aac_ctx;
> > +    AVCodec        *aac_codec;
> 
> > +    uint8_t         initialized;
> 
> why uint8_t ?

changed to int

> > +
> > +    // parser data
> > +    uint8_t         audio_mux_version_A;
> > +    uint8_t         same_time_framing;
> > +    uint8_t         frame_length_type;
> > +    uint32_t        frame_length;
> 
> maybe a bit of documentation would be a good idea

done

> > +};
> > +
> > +static inline int64_t latm_get_value(GetBitContext *b)
> > +{
> > +    uint8_t bytesForValue = get_bits(b, 2);
> 
> why uint8_t ?

chnaged to int

> > +    int64_t value = 0;
> 
> inst unsigned int enough?

yes, changed

> > +    int i;
> > +    for (i=0; i<=bytesForValue; i++) {
> > +        value <<= 8;
> > +        value |= get_bits(b, 8);
> > +    }
> > +    return value;
> > +}
> > +
> > +// copied from libavcodec/mpeg4audio.c
> > +static av_always_inline unsigned int copy_bits(PutBitContext *pb,
> > +                                               GetBitContext *gb, int bits)
> > +{
> > +    unsigned int el = get_bits(gb, bits);
> > +    put_bits(pb, bits, el);
> > +    return el;
> > +}
> > +
> 
> > +static void latm_read_ga_specific_config(int audio_object_type,
> > +                                         MPEG4AudioConfig *c,
> > +                                         GetBitContext *gb, PutBitContext *pb)
> > +{
> > +    int ext_flag;
> > +
> > +    copy_bits(pb, gb, 1);                        // framelen_flag
> > +    if (copy_bits(pb, gb, 1))                    // depends_on_coder
> > +        copy_bits(pb, gb, 14);                   // delay
> > +    ext_flag = copy_bits(pb, gb, 1);
> > +
> > +    if (!c->chan_config)
> > +        ff_copy_pce_data(pb, gb);                // program_config_element
> > +
> > +    if (audio_object_type == AOT_AAC_SCALABLE ||
> > +        audio_object_type == AOT_ER_AAC_SCALABLE)
> > +        copy_bits(pb, gb, 3);                    // layer number
> > +
> > +    if (!ext_flag)
> > +        return;
> > +
> > +    if (audio_object_type == AOT_ER_BSAC) {
> > +        copy_bits(pb, gb, 5);                    // numOfSubFrame
> > +        copy_bits(pb, gb, 11);                   // layer_length
> > +    } else if (audio_object_type == AOT_ER_AAC_LC ||
> > +               audio_object_type == AOT_ER_AAC_LTP ||
> > +               audio_object_type == AOT_ER_AAC_SCALABLE ||
> > +               audio_object_type == AOT_ER_AAC_LD)
> > +        copy_bits(pb, gb, 3);                    // stuff
> > +    copy_bits(pb, gb, 1);                        // extflag3
> > +}
> 
> isnt it easier to parse the whole normally (no copy) and then after parsing
> meassure how much was read and copy that amount ?

Yes, makes more sense, it was copied from the demuxer without rethinking
the approach.

> also it does not look like all uses of copy_bits are speed relevant as they
> write in extradata (a one time thing ...) thus marking it always_inline
> seems a bad idea

unfortunately not a one time thing. the multiplex repeats the config for
random access and nothing prevents it from changing. Not that I would
expect it to change regularly in real streams.

> > +static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
> > +                                             GetBitContext *gb)
> > +{
> > +    PutBitContext pb;
> > +    int32_t esize, bits_consumed;
> > +    uint8_t extradata[32+FF_INPUT_BUFFER_PADDING_SIZE];
> > +    AVCodecContext *avctx = latmctx->aac_ctx.avctx;
> > +
> > +    init_put_bits(&pb, extradata, 32 * 8);
> > +
> > +    bits_consumed = latm_read_audio_specific_config(gb, &pb);
> 
> that feels like asking for a stack overflow (if one copies more in on of these
> functions), besides that, it can crash due
> to insufficent alignment on some cpus with some bit writers
> 
> and 32 should be a named constant and it should be documented how much each
> function is allowed to write.

changed to parse the config first and count consumed bits, av_malloc the
right amount of extradata and copy then.

> > +
> > +    if (bits_consumed < 0)
> > +        return AVERROR_INVALIDDATA;
> > +
> > +    esize = (bits_consumed+7) / 8;
> > +
> > +    if (avctx->extradata_size != esize) {
> > +        av_free(avctx->extradata);
> > +        avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
> > +        if (!avctx->extradata)
> > +            return AVERROR(ENOMEM);
> > +
> > +        avctx->extradata_size = esize;
> > +        memcpy(avctx->extradata, extradata, esize);
> > +        memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
> > +    }
> 
> it seems easier to alloc just once and write into this, the 15 extra bytes
> allocated surely are smaller than the 2 stage alloc + copy

yes, changed.

> > +static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size,
> > +                             AVPacket *avpkt)
> > +{
> > +    struct LATMContext *latmctx = avctx->priv_data;
> > +    uint8_t            *tmp, tmpbuf[MAX_SIZE];
> > +    int                 ret, bufsize = MAX_SIZE;
> > +    GetBitContext       gb;
> > +
> > +    if(avpkt->size == 0)
> > +        return 0;
> > +
> > +    init_get_bits(&gb, avpkt->data, avpkt->size * 8);
> > +
> > +    ret = read_audio_sync_stream(latmctx, &gb, avpkt->size, tmpbuf, &bufsize);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    if (!latmctx->initialized) {
> > +        if (!avctx->extradata) {
> > +            *out_size = 0;
> > +            return avpkt->size;
> > +        } else {
> 
> > +            av_assert0(latmctx->aac_codec->init);
> > +            ret = latmctx->aac_codec->init(avctx);
> 
> that assert is pointless, abort or null pointer exception is pretty much equally
> easy to debug

switched to using the functions from aacdec.c directly. exporting the
decoding function was required for avoiding aac bitstream copying. init
and close are exported too.

> > +            if (ret < 0)
> > +                return ret;
> > +            latmctx->initialized = 1;
> > +        }
> > +    }
> > +
> > +    tmp         = avpkt->data;
> > +    avpkt->data = tmpbuf;
> > +    avpkt->size = bufsize;
> > +
> > +    av_assert0(latmctx->aac_codec->decode);
> > +    ret = latmctx->aac_codec->decode(avctx, out, out_size, avpkt);
> > +    avpkt->data = tmp;
> > +    return ret;
> > +}
> > +
> 
> 
> > +static int latm_decode_init(AVCodecContext *avctx)
> > +{
> > +    struct LATMContext *latmctx = avctx->priv_data;
> > +    int ret;
> > +
> > +    latmctx->aac_codec = avcodec_find_decoder_by_name("aac");
> 
> that seems unneeded, the AVCodec can be accessed directly

see above

thanks for the review. I'll resend the patch later today.

Janne