[FFmpeg-devel] [PATCH v3] avcodec/rdft: remove sintable

Tue Jul 11 09:53:54 EEST 2017

On Fri, Jul 7, 2017 at 2:50 PM, Muhammad Faiz <mfcc64 at gmail.com> wrote:
> It is redundant with costable. The first half of sintable is
> identical with the second half of costable. The second half
> of sintable is negative value of the first half of sintable.
>
> The computation is changed to handle sign of sin values, in
> C code and ARM assembly code.
>
> Signed-off-by: Muhammad Faiz <mfcc64 at gmail.com>
> ---
>  libavcodec/Makefile        |  3 +-
>  libavcodec/arm/rdft_neon.S | 13 ++++++---
>  libavcodec/rdft.c          | 68 ++++++++++++++++------------------------------
>  libavcodec/rdft.h          | 26 ++----------------
>  4 files changed, 36 insertions(+), 74 deletions(-)
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index b440a00..59029a8 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV)                     += qsv.o
>  OBJS-$(CONFIG_QSVDEC)                  += qsvdec.o
>  OBJS-$(CONFIG_QSVENC)                  += qsvenc.o
>  OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
> -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
> -OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
> +OBJS-$(CONFIG_RDFT)                    += rdft.o
>  OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
>  OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
>  OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o
> diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
> index 781d976..eabb92b 100644
> --- a/libavcodec/arm/rdft_neon.S
> +++ b/libavcodec/arm/rdft_neon.S
> @@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1
>
>          lsls            r6,  r6,  #31
>          bne             1f
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          bl              X(ff_fft_permute_neon)
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          mov             r1,  r5
>          bl              X(ff_fft_calc_neon)
>  1:
>          ldr             r12, [r4, #0]           @ nbits
>          mov             r2,  #1
> +        ldr             r8,  [r4, #20]          @ negative_sin
>          lsl             r12, r2,  r12
>          add             r0,  r5,  #8
> +        lsl             r8,  r8,  #31
>          add             r1,  r5,  r12, lsl #2
>          lsr             r12, r12, #2
> +        vdup.32         d26, r8
>          ldr             r2,  [r4, #12]          @ tcos
>          sub             r12, r12, #2
>          ldr             r3,  [r4, #16]          @ tsin
> @@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1
>          vld1.32         {d5},     [r3,:64]!     @ tsin[i]
>          vmov.f32        d18, #0.5               @ k1
>          vdup.32         d19, r6
> +        veor            d5,  d26, d5
>          pld             [r0, #32]
>          veor            d19, d18, d19           @ k2
>          vmov.i32        d16, #0
> @@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1
>          vld1.32         {d5},     [r3,:64]!     @  tsin[i]
>          veor            d24, d22, d17           @  ev.re,-ev.im
>          vrev64.32       d3,  d23                @  od.re, od.im
> +        veor            d5, d26, d5
>          pld             [r2, #32]
>          veor            d2,  d3,  d16           @ -od.re, od.im
>          pld             [r3, #32]
> @@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1
>
>          vmul.f32        d22, d22, d18
>          vst1.32         {d22},    [r5,:64]
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          mov             r1,  r5
>          bl              X(ff_fft_permute_neon)
> -        add             r0,  r4,  #20
> +        add             r0,  r4,  #24
>          mov             r1,  r5
>          pop             {r4-r8,lr}
>          b               X(ff_fft_calc_neon)
> diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
> index c318aa8..194e0bc 100644
> --- a/libavcodec/rdft.c
> +++ b/libavcodec/rdft.c
> @@ -28,28 +28,6 @@
>   * (Inverse) Real Discrete Fourier Transforms.
>   */
>
> -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
> -#if !CONFIG_HARDCODED_TABLES
> -SINTABLE(16);
> -SINTABLE(32);
> -SINTABLE(64);
> -SINTABLE(128);
> -SINTABLE(256);
> -SINTABLE(512);
> -SINTABLE(1024);
> -SINTABLE(2048);
> -SINTABLE(4096);
> -SINTABLE(8192);
> -SINTABLE(16384);
> -SINTABLE(32768);
> -SINTABLE(65536);
> -#endif
> -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
> -    NULL, NULL, NULL, NULL,
> -    ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
> -    ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
> -};
> -
>  /** Map one real FFT into two parallel real even and odd FFTs. Then interleave
>   * the two real FFTs into one complex FFT. Unmangle the results.
>   * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
> @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
>      ev.re = data[0];
>      data[0] = ev.re+data[1];
>      data[1] = ev.re-data[1];
> -    for (i = 1; i < (n>>2); i++) {
> -        i1 = 2*i;
> -        i2 = n-i1;
> -        /* Separate even and odd FFTs */
> -        ev.re =  k1*(data[i1  ]+data[i2  ]);
> -        od.im = -k2*(data[i1  ]-data[i2  ]);
> -        ev.im =  k1*(data[i1+1]-data[i2+1]);
> -        od.re =  k2*(data[i1+1]+data[i2+1]);
> -        /* Apply twiddle factors to the odd FFT and add to the even FFT */
> -        data[i1  ] =  ev.re + od.re*tcos[i] - od.im*tsin[i];
> -        data[i1+1] =  ev.im + od.im*tcos[i] + od.re*tsin[i];
> -        data[i2  ] =  ev.re - od.re*tcos[i] + od.im*tsin[i];
> -        data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
> +
> +#define RDFT_UNMANGLE(sign0, sign1)                                         \
> +    for (i = 1; i < (n>>2); i++) {                                          \
> +        i1 = 2*i;                                                           \
> +        i2 = n-i1;                                                          \
> +        /* Separate even and odd FFTs */                                    \
> +        ev.re =  k1*(data[i1  ]+data[i2  ]);                                \
> +        od.im = -k2*(data[i1  ]-data[i2  ]);                                \
> +        ev.im =  k1*(data[i1+1]-data[i2+1]);                                \
> +        od.re =  k2*(data[i1+1]+data[i2+1]);                                \
> +        /* Apply twiddle factors to the odd FFT and add to the even FFT */  \
> +        data[i1  ] =  ev.re + od.re*tcos[i] sign0 od.im*tsin[i];            \
> +        data[i1+1] =  ev.im + od.im*tcos[i] sign1 od.re*tsin[i];            \
> +        data[i2  ] =  ev.re - od.re*tcos[i] sign1 od.im*tsin[i];            \
> +        data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i];            \
> +    }
> +
> +    if (s->negative_sin) {
> +        RDFT_UNMANGLE(+,-)
> +    } else {
> +        RDFT_UNMANGLE(-,+)
>      }
> +
>      data[2*i+1]=s->sign_convention*data[2*i+1];
>      if (s->inverse) {
>          data[0] *= k1;
> @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
>      s->nbits           = nbits;
>      s->inverse         = trans == IDFT_C2R || trans == DFT_C2R;
>      s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
> +    s->negative_sin    = trans == DFT_C2R || trans == DFT_R2C;
>
>      if (nbits < 4 || nbits > 16)
>          return AVERROR(EINVAL);
> @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
>
>      ff_init_ff_cos_tabs(nbits);
>      s->tcos = ff_cos_tabs[nbits];
> -    s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
> -#if !CONFIG_HARDCODED_TABLES
> -    {
> -        int i;
> -        const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
> -        for (i = 0; i < (n >> 2); i++)
> -            s->tsin[i] = sin(i * theta);
> -    }
> -#endif
> +    s->tsin = ff_cos_tabs[nbits] + (n >> 2);
>      s->rdft_calc   = rdft_calc_c;
>
>      if (ARCH_ARM) ff_rdft_init_arm(s);
> diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
> index 37c40e7..ffafca7 100644
> --- a/libavcodec/rdft.h
> +++ b/libavcodec/rdft.h
> @@ -25,29 +25,6 @@
>  #include "config.h"
>  #include "fft.h"
>
> -#if CONFIG_HARDCODED_TABLES
> -#   define SINTABLE_CONST const
> -#else
> -#   define SINTABLE_CONST
> -#endif
> -
> -#define SINTABLE(size) \
> -    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
> -
> -extern SINTABLE(16);
> -extern SINTABLE(32);
> -extern SINTABLE(64);
> -extern SINTABLE(128);
> -extern SINTABLE(256);
> -extern SINTABLE(512);
> -extern SINTABLE(1024);
> -extern SINTABLE(2048);
> -extern SINTABLE(4096);
> -extern SINTABLE(8192);
> -extern SINTABLE(16384);
> -extern SINTABLE(32768);
> -extern SINTABLE(65536);
> -
>  struct RDFTContext {
>      int nbits;
>      int inverse;
> @@ -55,7 +32,8 @@ struct RDFTContext {
>
>      /* pre/post rotation tables */
>      const FFTSample *tcos;
> -    SINTABLE_CONST FFTSample *tsin;
> +    const FFTSample *tsin;
> +    int negative_sin;
>      FFTContext fft;
>      void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
>  };
> --
> 2.9.3
>

Applied.

Thank's.