[FFmpeg-devel] [PATCH] avcodec/dcaenc: Use ffmpeg mdct instead of own implementation

Sat Jan 13 02:10:18 EET 2018

New path attached.

Thanks.

On Sat, Jan 13, 2018 at 2:37 AM, James Almer <jamrial at gmail.com> wrote:

> On 1/12/2018 8:12 PM, Даниил Чередник wrote:
> > Hysterically dcaenc uses own implementation of time->frequency
> > transformation used by psychoacoustic. But actually function named fft in
> > original dcaenc code is not fft. Power spectrum looks similar to mdct,
> and
> > Alexander E. Patrakov told me it is MDCT. But for me it is still a bit
> > strange, because of output size, and absent phase shift sensitivity. I
> was
> > thinking about MCLT. But again, result of transformation original
> function
> > was different. So I decided to use ffmpeg mdct transformation here.
> >
> >
> > Results:
> >
> > I could not hear the difference between original and modified version.
> >
> > I got approximately 10% performance boost.
>
>
> > From 39e7f15886f1c083f3a3d37d52778882c8949a93 Mon Sep 17 00:00:00 2001
> > From: Daniil Cherednik <dan.cherednik at gmail.com>
> > Date: Sun, 7 Jan 2018 22:39:22 +0000
> > Subject: [PATCH] avcodec/dcaenc: Use ffmpeg mdct instead of own
> implementation
> >
> > Signed-off-by: Daniil Cherednik <dan.cherednik at gmail.com>
> > ---
> >  libavcodec/dcaenc.c   | 107 ++++++++++++++----------------
> --------------------
> >  tests/fate/acodec.mak |   4 +-
> >  2 files changed, 32 insertions(+), 79 deletions(-)
> >
> > diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c
> > index dd601ffae0..b924c58185 100644
> > --- a/libavcodec/dcaenc.c
> > +++ b/libavcodec/dcaenc.c
> > @@ -21,6 +21,9 @@
> >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> >   */
> >
> > +#define FFT_FLOAT 0
> > +#define FFT_FIXED_32 1
> > +
> >  #include "libavutil/avassert.h"
> >  #include "libavutil/channel_layout.h"
> >  #include "libavutil/common.h"
> > @@ -33,6 +36,7 @@
> >  #include "dca_core.h"
> >  #include "dcadata.h"
> >  #include "dcaenc.h"
> > +#include "fft.h"
> >  #include "internal.h"
> >  #include "mathops.h"
> >  #include "put_bits.h"
> > @@ -56,6 +60,7 @@ typedef struct DCAEncContext {
> >      AVClass *class;
> >      PutBitContext pb;
> >      DCAADPCMEncContext adpcm_ctx;
> > +    FFTContext mdct;
> >      CompressionOptions options;
> >      int frame_size;
> >      int frame_bits;
> > @@ -154,6 +159,7 @@ static int encode_init(AVCodecContext *avctx)
> >      DCAEncContext *c = avctx->priv_data;
> >      uint64_t layout = avctx->channel_layout;
> >      int i, j, min_frame_bits;
> > +    int rv;
>
> We normally use ret for variables meant to hold a return value.
>
> >
> >      if (subband_bufer_alloc(c))
> >          return AVERROR(ENOMEM);
> > @@ -231,6 +237,9 @@ static int encode_init(AVCodecContext *avctx)
> >
> >      avctx->frame_size = 32 * SUBBAND_SAMPLES;
> >
> > +    if ((rv = ff_mdct_init(&c->mdct, 9, 0, 1.0)) < 0)
> > +        return rv;
> > +
> >      if (!cos_table[0]) {
> >          int j, k;
> >
> > @@ -297,6 +306,7 @@ static av_cold int encode_close(AVCodecContext
> *avctx)
> >  {
> >      if (avctx->priv_data) {
> >          DCAEncContext *c = avctx->priv_data;
> > +        ff_mdct_end(&c->mdct);
> >          subband_bufer_free(c);
> >          ff_dcaadpcm_free(&c->adpcm_ctx);
> >      }
> > @@ -398,78 +408,6 @@ static void lfe_downsample(DCAEncContext *c, const
> int32_t *input)
> >      }
> >  }
> >
> > -typedef struct {
> > -    int32_t re;
> > -    int32_t im;
> > -} cplx32;
> > -
> > -static void fft(const int32_t in[2 * 256], cplx32 out[256])
> > -{
> > -    cplx32 buf[256], rin[256], rout[256];
> > -    int i, j, k, l;
> > -
> > -    /* do two transforms in parallel */
> > -    for (i = 0; i < 256; i++) {
> > -        /* Apply the Hann window */
> > -        rin[i].re = mul32(in[2 * i], 0x3fffffff - (cos_t(8 * i + 2) >>
> 1));
> > -        rin[i].im = mul32(in[2 * i + 1], 0x3fffffff - (cos_t(8 * i + 6)
> >> 1));
> > -    }
> > -    /* pre-rotation */
> > -    for (i = 0; i < 256; i++) {
> > -        buf[i].re = mul32(cos_t(4 * i + 2), rin[i].re)
> > -                  - mul32(sin_t(4 * i + 2), rin[i].im);
> > -        buf[i].im = mul32(cos_t(4 * i + 2), rin[i].im)
> > -                  + mul32(sin_t(4 * i + 2), rin[i].re);
> > -    }
> > -
> > -    for (j = 256, l = 1; j != 1; j >>= 1, l <<= 1) {
> > -        for (k = 0; k < 256; k += j) {
> > -            for (i = k; i < k + j / 2; i++) {
> > -                cplx32 sum, diff;
> > -                int t = 8 * l * i;
> > -
> > -                sum.re = buf[i].re + buf[i + j / 2].re;
> > -                sum.im = buf[i].im + buf[i + j / 2].im;
> > -
> > -                diff.re = buf[i].re - buf[i + j / 2].re;
> > -                diff.im = buf[i].im - buf[i + j / 2].im;
> > -
> > -                buf[i].re = half32(sum.re);
> > -                buf[i].im = half32(sum.im);
> > -
> > -                buf[i + j / 2].re = mul32(diff.re, cos_t(t))
> > -                                  - mul32(diff.im, sin_t(t));
> > -                buf[i + j / 2].im = mul32(diff.im, cos_t(t))
> > -                                  + mul32(diff.re, sin_t(t));
> > -            }
> > -        }
> > -    }
> > -    /* post-rotation */
> > -    for (i = 0; i < 256; i++) {
> > -        int b = ff_reverse[i];
> > -        rout[i].re = mul32(buf[b].re, cos_t(4 * i))
> > -                   - mul32(buf[b].im, sin_t(4 * i));
> > -        rout[i].im = mul32(buf[b].im, cos_t(4 * i))
> > -                   + mul32(buf[b].re, sin_t(4 * i));
> > -    }
> > -    for (i = 0; i < 256; i++) {
> > -        /* separate the results of the two transforms */
> > -        cplx32 o1, o2;
> > -
> > -        o1.re =  rout[i].re - rout[255 - i].re;
> > -        o1.im =  rout[i].im + rout[255 - i].im;
> > -
> > -        o2.re =  rout[i].im - rout[255 - i].im;
> > -        o2.im = -rout[i].re - rout[255 - i].re;
> > -
> > -        /* combine them into one long transform */
> > -        out[i].re = mul32( o1.re + o2.re, cos_t(2 * i + 1))
> > -                  + mul32( o1.im - o2.im, sin_t(2 * i + 1));
> > -        out[i].im = mul32( o1.im + o2.im, cos_t(2 * i + 1))
> > -                  + mul32(-o1.re + o2.re, sin_t(2 * i + 1));
> > -    }
> > -}
> > -
> >  static int32_t get_cb(int32_t in)
> >  {
> >      int i, res;
> > @@ -494,21 +432,36 @@ static int32_t add_cb(int32_t a, int32_t b)
> >      return a + cb_to_add[a - b];
> >  }
> >
> > -static void adjust_jnd(int samplerate_index,
> > +static void calc_power(DCAEncContext *c,
> > +                       const int32_t in[2 * 256], int32_t power[256])
> > +{
> > +    int i;
> > +    DECLARE_ALIGNED(32, int32_t, data)[512];
> > +    DECLARE_ALIGNED(32, int32_t, coeff)[256];
>
> LOCAL_ALIGNED_32(int32_t, data,  [512]);
> LOCAL_ALIGNED_32(int32_t, coeff, [256]);
>
> > +    for (i = 0; i < 512; i++) {
> > +        data[i] = norm__(mul32(in[i], 0x3fffffff - (cos_t(4 * i + 2) >>
> 1)), 4);
> > +    }
> > +    c->mdct.mdct_calc(&c->mdct, coeff, data);
> > +    for (i = 0; i < 256; i++) {
> > +        const int32_t cb = get_cb(coeff[i]);
> > +        power[i] = add_cb(cb, cb);
> > +    }
> > +}
> > +
> > +static void adjust_jnd(DCAEncContext *c,
> >                         const int32_t in[512], int32_t out_cb[256])
> >  {
> >      int32_t power[256];
> > -    cplx32 out[256];
> >      int32_t out_cb_unnorm[256];
> >      int32_t denom;
> >      const int32_t ca_cb = -1114;
> >      const int32_t cs_cb = 928;
> > +    const int samplerate_index = c->samplerate_index;
> >      int i, j;
> >
> > -    fft(in, out);
> > +    calc_power(c, in, power);
> >
> >      for (j = 0; j < 256; j++) {
> > -        power[j] = add_cb(get_cb(out[j].re), get_cb(out[j].im));
> >          out_cb_unnorm[j] = -2047; /* and can only grow */
> >      }
> >
> > @@ -586,7 +539,7 @@ static void calc_masking(DCAEncContext *c, const
> int32_t *input)
> >                  data[i] = c->history[ch][k];
> >              for (k -= 512; i < 512; i++, k++)
> >                  data[i] = input[k * c->channels + chi];
> > -            adjust_jnd(c->samplerate_index, data,
> c->masking_curve_cb[ssf]);
> > +            adjust_jnd(c, data, c->masking_curve_cb[ssf]);
> >          }
> >      for (i = 0; i < 256; i++) {
> >          int32_t m = 2048;
> > diff --git a/tests/fate/acodec.mak b/tests/fate/acodec.mak
> > index 5c3fea90c5..80d26de0f9 100644
> > --- a/tests/fate/acodec.mak
> > +++ b/tests/fate/acodec.mak
> > @@ -104,14 +104,14 @@ fate-acodec-dca: tests/data/asynth-44100-2.wav
> >  fate-acodec-dca: SRC = tests/data/asynth-44100-2.wav
> >  fate-acodec-dca: CMD = md5 -i $(TARGET_PATH)/$(SRC) -c:a dca -strict -2
> -f dts -flags +bitexact
> >  fate-acodec-dca: CMP = oneline
> > -fate-acodec-dca: REF = 7cd79a3717943a06b217f1130223a86f
> > +fate-acodec-dca: REF = 2aa580ac67820fce4f581b96ebb34acc
> >
> >  FATE_ACODEC-$(call ENCDEC, DCA, WAV) += fate-acodec-dca2
> >  fate-acodec-dca2: CMD = enc_dec_pcm dts wav s16le $(SRC) -c:a dca
> -strict -2 -flags +bitexact
> >  fate-acodec-dca2: REF = $(SRC)
> >  fate-acodec-dca2: CMP = stddev
> >  fate-acodec-dca2: CMP_SHIFT = -2048
> > -fate-acodec-dca2: CMP_TARGET = 527
> > +fate-acodec-dca2: CMP_TARGET = 535
> >  fate-acodec-dca2: SIZE_TOLERANCE = 1632
> >
> >  FATE_ACODEC-$(call ENCDEC, FLAC, FLAC) += fate-acodec-flac
> fate-acodec-flac-exact-rice
> > --
> > 2.13.5
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

-- 
Daniil Cherednik
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-avcodec-dcaenc-Use-ffmpeg-mdct-instead-of-own-implem.patch
Type: application/octet-stream
Size: 7384 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20180113/e87e0a61/attachment.obj>