[FFmpeg-devel] [PATCH v2] avcodec/rdft: remove sintable
Muhammad Faiz
mfcc64 at gmail.com
Fri Jul 7 10:50:13 EEST 2017
On Thu, Jul 6, 2017 at 4:18 PM, Muhammad Faiz <mfcc64 at gmail.com> wrote:
> It is redundant with costable. The first half of sintable is
> identical with the second half of costable. The second half
> of sintable is negative value of the first half of sintable.
>
> The computation is changed to handle sign of sin values.
>
> Signed-off-by: Muhammad Faiz <mfcc64 at gmail.com>
> ---
> libavcodec/Makefile | 3 +-
> libavcodec/arm/rdft_neon.S | 20 ++++++++------
> libavcodec/rdft.c | 68 ++++++++++++++++------------------------------
> libavcodec/rdft.h | 26 ++----------------
> 4 files changed, 39 insertions(+), 78 deletions(-)
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index b440a00..59029a8 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o
> OBJS-$(CONFIG_QSVDEC) += qsvdec.o
> OBJS-$(CONFIG_QSVENC) += qsvenc.o
> OBJS-$(CONFIG_RANGECODER) += rangecoder.o
> -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
> -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
> +OBJS-$(CONFIG_RDFT) += rdft.o
> OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
> OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o
> OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o
> diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
> index 781d976..3bea8b4 100644
> --- a/libavcodec/arm/rdft_neon.S
> +++ b/libavcodec/arm/rdft_neon.S
> @@ -22,7 +22,7 @@
> #include "libavutil/arm/asm.S"
>
> function ff_rdft_calc_neon, export=1
> - push {r4-r8,lr}
> + push {r4-r9,lr}
>
> ldr r6, [r0, #4] @ inverse
> mov r4, r0
> @@ -30,9 +30,9 @@ function ff_rdft_calc_neon, export=1
>
> lsls r6, r6, #31
> bne 1f
> - add r0, r4, #20
> + add r0, r4, #24
> bl X(ff_fft_permute_neon)
> - add r0, r4, #20
> + add r0, r4, #24
> mov r1, r5
> bl X(ff_fft_calc_neon)
> 1:
> @@ -46,8 +46,10 @@ function ff_rdft_calc_neon, export=1
> sub r12, r12, #2
> ldr r3, [r4, #16] @ tsin
> mov r7, r0
> + ldr r9, [r4, #20] @ negative_sin
> sub r1, r1, #8
> mov lr, r1
> + lsl r9, r9, #31
> mov r8, #-8
> vld1.32 {d0}, [r0,:64]! @ d1[0,1]
> vld1.32 {d1}, [r1,:64], r8 @ d2[0,1]
> @@ -61,8 +63,10 @@ function ff_rdft_calc_neon, export=1
> vmov.i32 d17, #1<<31
> pld [r1, #-32]
> vtrn.32 d16, d17
> + vdup.32 d16, r9
> pld [r2, #32]
> - vrev64.32 d16, d16 @ d16=1,0 d17=0,1
> + veor d17, d16, d17
> + vrev64.32 d16, d17 @ negative_sin ? d16=0,1 d17=1,0 : d16=1,0 d17=0,1
> pld [r3, #32]
> 2:
> veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1]
> @@ -136,15 +140,15 @@ function ff_rdft_calc_neon, export=1
>
> cmp r6, #0
> it eq
> - popeq {r4-r8,pc}
> + popeq {r4-r9,pc}
>
> vmul.f32 d22, d22, d18
> vst1.32 {d22}, [r5,:64]
> - add r0, r4, #20
> + add r0, r4, #24
> mov r1, r5
> bl X(ff_fft_permute_neon)
> - add r0, r4, #20
> + add r0, r4, #24
> mov r1, r5
> - pop {r4-r8,lr}
> + pop {r4-r9,lr}
> b X(ff_fft_calc_neon)
> endfunc
> diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
> index c318aa8..194e0bc 100644
> --- a/libavcodec/rdft.c
> +++ b/libavcodec/rdft.c
> @@ -28,28 +28,6 @@
> * (Inverse) Real Discrete Fourier Transforms.
> */
>
> -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
> -#if !CONFIG_HARDCODED_TABLES
> -SINTABLE(16);
> -SINTABLE(32);
> -SINTABLE(64);
> -SINTABLE(128);
> -SINTABLE(256);
> -SINTABLE(512);
> -SINTABLE(1024);
> -SINTABLE(2048);
> -SINTABLE(4096);
> -SINTABLE(8192);
> -SINTABLE(16384);
> -SINTABLE(32768);
> -SINTABLE(65536);
> -#endif
> -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
> - NULL, NULL, NULL, NULL,
> - ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
> - ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
> -};
> -
> /** Map one real FFT into two parallel real even and odd FFTs. Then interleave
> * the two real FFTs into one complex FFT. Unmangle the results.
> * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
> @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
> ev.re = data[0];
> data[0] = ev.re+data[1];
> data[1] = ev.re-data[1];
> - for (i = 1; i < (n>>2); i++) {
> - i1 = 2*i;
> - i2 = n-i1;
> - /* Separate even and odd FFTs */
> - ev.re = k1*(data[i1 ]+data[i2 ]);
> - od.im = -k2*(data[i1 ]-data[i2 ]);
> - ev.im = k1*(data[i1+1]-data[i2+1]);
> - od.re = k2*(data[i1+1]+data[i2+1]);
> - /* Apply twiddle factors to the odd FFT and add to the even FFT */
> - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i];
> - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i];
> - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i];
> - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
> +
> +#define RDFT_UNMANGLE(sign0, sign1) \
> + for (i = 1; i < (n>>2); i++) { \
> + i1 = 2*i; \
> + i2 = n-i1; \
> + /* Separate even and odd FFTs */ \
> + ev.re = k1*(data[i1 ]+data[i2 ]); \
> + od.im = -k2*(data[i1 ]-data[i2 ]); \
> + ev.im = k1*(data[i1+1]-data[i2+1]); \
> + od.re = k2*(data[i1+1]+data[i2+1]); \
> + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
> + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \
> + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
> + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \
> + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
> + }
> +
> + if (s->negative_sin) {
> + RDFT_UNMANGLE(+,-)
> + } else {
> + RDFT_UNMANGLE(-,+)
> }
> +
> data[2*i+1]=s->sign_convention*data[2*i+1];
> if (s->inverse) {
> data[0] *= k1;
> @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
> s->nbits = nbits;
> s->inverse = trans == IDFT_C2R || trans == DFT_C2R;
> s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
> + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C;
>
> if (nbits < 4 || nbits > 16)
> return AVERROR(EINVAL);
> @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
>
> ff_init_ff_cos_tabs(nbits);
> s->tcos = ff_cos_tabs[nbits];
> - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
> -#if !CONFIG_HARDCODED_TABLES
> - {
> - int i;
> - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
> - for (i = 0; i < (n >> 2); i++)
> - s->tsin[i] = sin(i * theta);
> - }
> -#endif
> + s->tsin = ff_cos_tabs[nbits] + (n >> 2);
> s->rdft_calc = rdft_calc_c;
>
> if (ARCH_ARM) ff_rdft_init_arm(s);
> diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
> index 37c40e7..ffafca7 100644
> --- a/libavcodec/rdft.h
> +++ b/libavcodec/rdft.h
> @@ -25,29 +25,6 @@
> #include "config.h"
> #include "fft.h"
>
> -#if CONFIG_HARDCODED_TABLES
> -# define SINTABLE_CONST const
> -#else
> -# define SINTABLE_CONST
> -#endif
> -
> -#define SINTABLE(size) \
> - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
> -
> -extern SINTABLE(16);
> -extern SINTABLE(32);
> -extern SINTABLE(64);
> -extern SINTABLE(128);
> -extern SINTABLE(256);
> -extern SINTABLE(512);
> -extern SINTABLE(1024);
> -extern SINTABLE(2048);
> -extern SINTABLE(4096);
> -extern SINTABLE(8192);
> -extern SINTABLE(16384);
> -extern SINTABLE(32768);
> -extern SINTABLE(65536);
> -
> struct RDFTContext {
> int nbits;
> int inverse;
> @@ -55,7 +32,8 @@ struct RDFTContext {
>
> /* pre/post rotation tables */
> const FFTSample *tcos;
> - SINTABLE_CONST FFTSample *tsin;
> + const FFTSample *tsin;
> + int negative_sin;
> FFTContext fft;
> void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
> };
> --
> 2.9.3
>
Break fate, so dropped. Will post new patch.
Thank's
More information about the ffmpeg-devel
mailing list