[FFmpeg-devel] [PATCH v2] avcodec/rdft: remove sintable

Fri Jul 7 10:50:13 EEST 2017

On Thu, Jul 6, 2017 at 4:18 PM, Muhammad Faiz <mfcc64 at gmail.com> wrote:
> It is redundant with costable. The first half of sintable is
> identical with the second half of costable. The second half
> of sintable is negative value of the first half of sintable.
>
> The computation is changed to handle sign of sin values.
>
> Signed-off-by: Muhammad Faiz <mfcc64 at gmail.com>
> ---
>  libavcodec/Makefile        |  3 +-
>  libavcodec/arm/rdft_neon.S | 20 ++++++++------
>  libavcodec/rdft.c          | 68 ++++++++++++++++------------------------------
>  libavcodec/rdft.h          | 26 ++----------------
>  4 files changed, 39 insertions(+), 78 deletions(-)
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index b440a00..59029a8 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV)                     += qsv.o
>  OBJS-$(CONFIG_QSVDEC)                  += qsvdec.o
>  OBJS-$(CONFIG_QSVENC)                  += qsvenc.o
>  OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
> -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
> -OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
> +OBJS-$(CONFIG_RDFT)                    += rdft.o
>  OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
>  OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
>  OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o
> diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
> index 781d976..3bea8b4 100644
> --- a/libavcodec/arm/rdft_neon.S
> +++ b/libavcodec/arm/rdft_neon.S
> @@ -22,7 +22,7 @@
>  #include "libavutil/arm/asm.S"
>
>  function ff_rdft_calc_neon, export=1
> -        push            {r4-r8,lr}
> +        push            {r4-r9,lr}
>
>          ldr             r6,  [r0, #4]           @ inverse
>          mov             r4,  r0
> @@ -30,9 +30,9 @@ function ff_rdft_calc_neon, export=1
>
>          lsls            r6,  r6,  #31
>          bne             1f
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          bl              X(ff_fft_permute_neon)
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          mov             r1,  r5
>          bl              X(ff_fft_calc_neon)
>  1:
> @@ -46,8 +46,10 @@ function ff_rdft_calc_neon, export=1
>          sub             r12, r12, #2
>          ldr             r3,  [r4, #16]          @ tsin
>          mov             r7,  r0
> +        ldr             r9,  [r4, #20]          @ negative_sin
>          sub             r1,  r1,  #8
>          mov             lr,  r1
> +        lsl             r9,  r9,  #31
>          mov             r8,  #-8
>          vld1.32         {d0},     [r0,:64]!     @ d1[0,1]
>          vld1.32         {d1},     [r1,:64], r8  @ d2[0,1]
> @@ -61,8 +63,10 @@ function ff_rdft_calc_neon, export=1
>          vmov.i32        d17, #1<<31
>          pld             [r1, #-32]
>          vtrn.32         d16, d17
> +        vdup.32         d16, r9
>          pld             [r2, #32]
> -        vrev64.32       d16, d16                @ d16=1,0 d17=0,1
> +        veor            d17, d16, d17
> +        vrev64.32       d16, d17                @ negative_sin ? d16=0,1 d17=1,0 : d16=1,0 d17=0,1
>          pld             [r3, #32]
>  2:
>          veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
> @@ -136,15 +140,15 @@ function ff_rdft_calc_neon, export=1
>
>          cmp             r6,  #0
>          it              eq
> -        popeq           {r4-r8,pc}
> +        popeq           {r4-r9,pc}
>
>          vmul.f32        d22, d22, d18
>          vst1.32         {d22},    [r5,:64]
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          mov             r1,  r5
>          bl              X(ff_fft_permute_neon)
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          mov             r1,  r5
> -        pop             {r4-r8,lr}
> +        pop             {r4-r9,lr}
>          b               X(ff_fft_calc_neon)
>  endfunc
> diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
> index c318aa8..194e0bc 100644
> --- a/libavcodec/rdft.c
> +++ b/libavcodec/rdft.c
> @@ -28,28 +28,6 @@
>   * (Inverse) Real Discrete Fourier Transforms.
>   */
>
> -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
> -#if !CONFIG_HARDCODED_TABLES
> -SINTABLE(16);
> -SINTABLE(32);
> -SINTABLE(64);
> -SINTABLE(128);
> -SINTABLE(256);
> -SINTABLE(512);
> -SINTABLE(1024);
> -SINTABLE(2048);
> -SINTABLE(4096);
> -SINTABLE(8192);
> -SINTABLE(16384);
> -SINTABLE(32768);
> -SINTABLE(65536);
> -#endif
> -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
> -    NULL, NULL, NULL, NULL,
> -    ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
> -    ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
> -};
> -
>  /** Map one real FFT into two parallel real even and odd FFTs. Then interleave
>   * the two real FFTs into one complex FFT. Unmangle the results.
>   * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
> @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
>      ev.re = data[0];
>      data[0] = ev.re+data[1];
>      data[1] = ev.re-data[1];
> -    for (i = 1; i < (n>>2); i++) {
> -        i1 = 2*i;
> -        i2 = n-i1;
> -        /* Separate even and odd FFTs */
> -        ev.re =  k1*(data[i1  ]+data[i2  ]);
> -        od.im = -k2*(data[i1  ]-data[i2  ]);
> -        ev.im =  k1*(data[i1+1]-data[i2+1]);
> -        od.re =  k2*(data[i1+1]+data[i2+1]);
> -        /* Apply twiddle factors to the odd FFT and add to the even FFT */
> -        data[i1  ] =  ev.re + od.re*tcos[i] - od.im*tsin[i];
> -        data[i1+1] =  ev.im + od.im*tcos[i] + od.re*tsin[i];
> -        data[i2  ] =  ev.re - od.re*tcos[i] + od.im*tsin[i];
> -        data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
> +
> +#define RDFT_UNMANGLE(sign0, sign1)                                         \
> +    for (i = 1; i < (n>>2); i++) {                                          \
> +        i1 = 2*i;                                                           \
> +        i2 = n-i1;                                                          \
> +        /* Separate even and odd FFTs */                                    \
> +        ev.re =  k1*(data[i1  ]+data[i2  ]);                                \
> +        od.im = -k2*(data[i1  ]-data[i2  ]);                                \
> +        ev.im =  k1*(data[i1+1]-data[i2+1]);                                \
> +        od.re =  k2*(data[i1+1]+data[i2+1]);                                \
> +        /* Apply twiddle factors to the odd FFT and add to the even FFT */  \
> +        data[i1  ] =  ev.re + od.re*tcos[i] sign0 od.im*tsin[i];            \
> +        data[i1+1] =  ev.im + od.im*tcos[i] sign1 od.re*tsin[i];            \
> +        data[i2  ] =  ev.re - od.re*tcos[i] sign1 od.im*tsin[i];            \
> +        data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i];            \
> +    }
> +
> +    if (s->negative_sin) {
> +        RDFT_UNMANGLE(+,-)
> +    } else {
> +        RDFT_UNMANGLE(-,+)
>      }
> +
>      data[2*i+1]=s->sign_convention*data[2*i+1];
>      if (s->inverse) {
>          data[0] *= k1;
> @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
>      s->nbits           = nbits;
>      s->inverse         = trans == IDFT_C2R || trans == DFT_C2R;
>      s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
> +    s->negative_sin    = trans == DFT_C2R || trans == DFT_R2C;
>
>      if (nbits < 4 || nbits > 16)
>          return AVERROR(EINVAL);
> @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
>
>      ff_init_ff_cos_tabs(nbits);
>      s->tcos = ff_cos_tabs[nbits];
> -    s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
> -#if !CONFIG_HARDCODED_TABLES
> -    {
> -        int i;
> -        const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
> -        for (i = 0; i < (n >> 2); i++)
> -            s->tsin[i] = sin(i * theta);
> -    }
> -#endif
> +    s->tsin = ff_cos_tabs[nbits] + (n >> 2);
>      s->rdft_calc   = rdft_calc_c;
>
>      if (ARCH_ARM) ff_rdft_init_arm(s);
> diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
> index 37c40e7..ffafca7 100644
> --- a/libavcodec/rdft.h
> +++ b/libavcodec/rdft.h
> @@ -25,29 +25,6 @@
>  #include "config.h"
>  #include "fft.h"
>
> -#if CONFIG_HARDCODED_TABLES
> -#   define SINTABLE_CONST const
> -#else
> -#   define SINTABLE_CONST
> -#endif
> -
> -#define SINTABLE(size) \
> -    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
> -
> -extern SINTABLE(16);
> -extern SINTABLE(32);
> -extern SINTABLE(64);
> -extern SINTABLE(128);
> -extern SINTABLE(256);
> -extern SINTABLE(512);
> -extern SINTABLE(1024);
> -extern SINTABLE(2048);
> -extern SINTABLE(4096);
> -extern SINTABLE(8192);
> -extern SINTABLE(16384);
> -extern SINTABLE(32768);
> -extern SINTABLE(65536);
> -
>  struct RDFTContext {
>      int nbits;
>      int inverse;
> @@ -55,7 +32,8 @@ struct RDFTContext {
>
>      /* pre/post rotation tables */
>      const FFTSample *tcos;
> -    SINTABLE_CONST FFTSample *tsin;
> +    const FFTSample *tsin;
> +    int negative_sin;
>      FFTContext fft;
>      void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
>  };
> --
> 2.9.3
>

Break fate, so dropped. Will post new patch.

Thank's