[FFmpeg-devel] [PATCH 03/14] libavcodec/libavutil: Implementation of AAC_fixed_decoder (LC-module) [3/4]

Sat May 16 02:12:17 CEST 2015

On Thu, May 14, 2015 at 03:33:55PM +0200, Nedeljko Babic wrote:
> From: Djordje Pesut <djordje.pesut at imgtec.com>
> 
> Add fixed point implementation
> 
> Signed-off-by: Nedeljko Babic <nedeljko.babic at imgtec.com>
> ---
>  libavcodec/aac.h             | 101 ++++++++--
>  libavcodec/aacdec.c          |   5 +
>  libavcodec/aacdec_fixed.c    | 446 +++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/aacdec_template.c | 433 ++++++++++++++++++++++++++++-------------
>  libavcodec/lpc.h             |  15 +-
>  libavcodec/mdct_template.c   |   5 +

>  libavutil/fixed_dsp.c        |  70 ++++++-
>  libavutil/fixed_dsp.h        |  53 +++++

the changes to libavutil should be split into a seperate patch

>  8 files changed, 965 insertions(+), 163 deletions(-)
>  create mode 100644 libavcodec/aacdec_fixed.c
> 
> diff --git a/libavcodec/aac.h b/libavcodec/aac.h
> index 23ec085..571f8b9 100644
> --- a/libavcodec/aac.h
> +++ b/libavcodec/aac.h
> @@ -30,9 +30,62 @@
>  #ifndef AVCODEC_AAC_H
>  #define AVCODEC_AAC_H
>  
> +#ifndef USE_FIXED
> +#define USE_FIXED 0
> +#endif
> +
> +#if USE_FIXED
> +
> +#define FFT_FLOAT    0
> +#define FFT_FIXED_32 1
> +
> +#define AAC_RENAME(x)       x ## _fixed
> +#define AAC_RENAME_32(x)    x ## _fixed_32
> +#define INTFLOAT int
> +#define SHORTFLOAT int16_t

> +#define AAC_FLOAT aac_float_t

i think *_t is reserved in POSIX to some extend

[...]

> +#include <assert.h>

we use av_assert*()

> +#include <errno.h>

is that really used ?

[...]
> +/**
> + * Apply dependent channel coupling (applied before IMDCT).
> + *
> + * @param   index   index into coupling gain array
> + */
> +static void apply_dependent_coupling_fixed(AACContext *ac,
> +                                     SingleChannelElement *target,
> +                                     ChannelElement *cce, int index)
> +{
> +    IndividualChannelStream *ics = &cce->ch[0].ics;
> +    const uint16_t *offsets = ics->swb_offset;
> +    int *dest = target->coeffs;
> +    const int *src = cce->ch[0].coeffs;
> +    int g, i, group, k, idx = 0;
> +    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
> +        av_log(ac->avctx, AV_LOG_ERROR,
> +               "Dependent coupling is not supported together with LTP\n");
> +        return;
> +    }

> +    for (g = 0; g < ics->num_window_groups; g++) {
> +        for (i = 0; i < ics->max_sfb; i++, idx++) {
> +            if (cce->ch[0].band_type[idx] != ZERO_BT) {
> +                const int gain = cce->coup.gain[index][idx];
> +                int shift, round, c, tmp;
> +
> +                if (gain < 0) {
> +                    c = -cce_scale_fixed[-gain & 7];
> +                    shift = (-gain-1024) >> 3;
> +                }
> +                else {
> +                    c = cce_scale_fixed[gain & 7];
> +                    shift = (gain-1024) >> 3;
> +                }
> +
> +                if (shift < 0) {
> +                  shift = -shift;
> +                  round = 1 << (shift - 1);
> +
> +                  for (group = 0; group < ics->group_len[g]; group++) {
> +                      for (k = offsets[i]; k < offsets[i + 1]; k++) {
> +                          tmp = (int)(((int64_t)src[group * 128 + k] * c + \
> +                                       (int64_t)0x1000000000) >> 37);
> +                          dest[group * 128 + k] += (tmp + round) >> shift;
> +                      }
> +                  }
> +                }

there is something wrong with the indention level

[...]
>      memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
> @@ -2341,22 +2472,27 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce)
>  static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
>  {
>      IndividualChannelStream *ics = &sce->ics;
> -    float *in    = sce->coeffs;
> -    float *out   = sce->ret;
> -    float *saved = sce->saved;
> -    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
> -    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
> -    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
> -    float *buf  = ac->buf_mdct;
> -    float *temp = ac->temp;
> +    INTFLOAT *in    = sce->coeffs;
> +    INTFLOAT *out   = sce->ret;
> +    INTFLOAT *saved = sce->saved;
> +    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
> +    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024);
> +    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
> +    INTFLOAT *buf  = ac->buf_mdct;
> +    INTFLOAT *temp = ac->temp;
>      int i;
>  
>      // imdct
>      if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
>          for (i = 0; i < 1024; i += 128)
>              ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
> -    } else
> +    } else {
>          ac->mdct.imdct_half(&ac->mdct, buf, in);
> +#if USE_FIXED
> +        for (i=0; i<1024; i++)
> +          buf[i] = (buf[i] + 4) >> 3;
> +#endif /* USE_FIXED */
> +    }
>  
>      /* window overlapping
>       * NOTE: To simplify the overlapping code, all 'meaningless' short to long
> @@ -2368,7 +2504,7 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
>              (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
>          ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
>      } else {
> -        memcpy(                         out,               saved,            448 * sizeof(float));
> +        memcpy(                         out,               saved,            448 * sizeof(INTFLOAT));
>  
>          if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
>              ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
> @@ -2376,65 +2512,71 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
>              ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
>              ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
>              ac->fdsp->vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
> -            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(float));
> +            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(INTFLOAT));
>          } else {
>              ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
> -            memcpy(                     out + 576,         buf + 64,         448 * sizeof(float));
> +            memcpy(                     out + 576,         buf + 64,         448 * sizeof(INTFLOAT));
>          }
>      }
>  

>      // buffer update
>      if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
> -        memcpy(                     saved,       temp + 64,         64 * sizeof(float));
> +        memcpy(                     saved,       temp + 64,         64 * sizeof(INTFLOAT));
>          ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
>          ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
>          ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
> -        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
> +        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(INTFLOAT));
>      } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
> -        memcpy(                     saved,       buf + 512,        448 * sizeof(float));
> -        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
> +        memcpy(                     saved,       buf + 512,        448 * sizeof(INTFLOAT));
> +        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(INTFLOAT));
>      } else { // LONG_STOP or ONLY_LONG
> -        memcpy(                     saved,       buf + 512,        512 * sizeof(float));
> +        memcpy(                     saved,       buf + 512,        512 * sizeof(INTFLOAT));

these should use sizeof(*saved) or something not sizeof(INTFLOAT)
so as to make it more robust

[...]

> diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h
> index ff6f365..73859c0 100644
> --- a/libavutil/fixed_dsp.h
> +++ b/libavutil/fixed_dsp.h
> @@ -54,6 +54,8 @@
>  #include "libavcodec/mathops.h"
>  
>  typedef struct AVFixedDSPContext {
> +    /* assume len is a multiple of 16, and arrays are 32-byte aligned */
> +
>      /**
>       * Overlap/add with window function.
>       * Used primarily by MDCT-based audio codecs.
> @@ -92,6 +94,57 @@ typedef struct AVFixedDSPContext {
>       */
>      void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len);
>  
> +    /**
> +     * Fixed-point multiplication that calculates the product of two vectors of
> +     * integers and stores the result in a vector of integers.
> +     *
> +     * @param dst  output vector
> +     *             constraints: 32-byte aligned
> +     * @param src0 first input vector
> +     *             constraints: 32-byte aligned
> +     * @param src1 second input vector
> +     *             constraints: 32-byte aligned
> +     * @param len  number of elements in the input
> +     *             constraints: multiple of 16
> +     */
> +    void (*vector_fmul)(int *dst, const int *src0, const int *src1,
> +                        int len);
> +
> +    void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, int len);
> +    /**
> +     * Calculate the product of two vectors of integers, add a third vector of
> +     * integers and store the result in a vector of integers.
> +     *
> +     * @param dst  output vector
> +     *             constraints: 32-byte aligned
> +     * @param src0 first input vector
> +     *             constraints: 32-byte aligned
> +     * @param src1 second input vector
> +     *             constraints: 32-byte aligned
> +     * @param src1 third input vector
> +     *             constraints: 32-byte aligned
> +     * @param len  number of elements in the input
> +     *             constraints: multiple of 16
> +     */
> +    void (*vector_fmul_add)(int *dst, const int *src0, const int *src1,
> +                            const int *src2, int len);
> +
> +    /**
> +     * Calculate the scalar product of two vectors of integers.
> +     * @param v1  first vector, 16-byte aligned
> +     * @param v2  second vector, 16-byte aligned
> +     * @param len length of vectors, multiple of 4
> +     */
> +    int (*scalarproduct_fixed)(const int *v1, const int *v2, int len);
> +
> +    /**
> +     * Calculate the sum and difference of two vectors of integers.
> +     *
> +     * @param v1  first input vector, sum output, 16-byte aligned
> +     * @param v2  second input vector, difference output, 16-byte aligned
> +     * @param len length of vectors, multiple of 4
> +     */
> +    void (*butterflies_fixed)(int *av_restrict v1, int *av_restrict v2, int len);

these  ignore the 1.0 point / amount of shifting done
the documentation should be sufficient so that someone could write
a working implementation based on just the documentation

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150516/69b7173d/attachment.asc>