[FFmpeg-devel] [PATCH v3] avcodec/rdft: remove sintable
Muhammad Faiz
mfcc64 at gmail.com
Tue Jul 11 09:53:54 EEST 2017
On Fri, Jul 7, 2017 at 2:50 PM, Muhammad Faiz <mfcc64 at gmail.com> wrote:
> It is redundant with costable. The first half of sintable is
> identical with the second half of costable. The second half
> of sintable is negative value of the first half of sintable.
>
> The computation is changed to handle sign of sin values, in
> C code and ARM assembly code.
>
> Signed-off-by: Muhammad Faiz <mfcc64 at gmail.com>
> ---
> libavcodec/Makefile | 3 +-
> libavcodec/arm/rdft_neon.S | 13 ++++++---
> libavcodec/rdft.c | 68 ++++++++++++++++------------------------------
> libavcodec/rdft.h | 26 ++----------------
> 4 files changed, 36 insertions(+), 74 deletions(-)
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index b440a00..59029a8 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o
> OBJS-$(CONFIG_QSVDEC) += qsvdec.o
> OBJS-$(CONFIG_QSVENC) += qsvenc.o
> OBJS-$(CONFIG_RANGECODER) += rangecoder.o
> -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
> -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
> +OBJS-$(CONFIG_RDFT) += rdft.o
> OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
> OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o
> OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o
> diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
> index 781d976..eabb92b 100644
> --- a/libavcodec/arm/rdft_neon.S
> +++ b/libavcodec/arm/rdft_neon.S
> @@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1
>
> lsls r6, r6, #31
> bne 1f
> - add r0, r4, #20
> + add r0, r4, #24
> bl X(ff_fft_permute_neon)
> - add r0, r4, #20
> + add r0, r4, #24
> mov r1, r5
> bl X(ff_fft_calc_neon)
> 1:
> ldr r12, [r4, #0] @ nbits
> mov r2, #1
> + ldr r8, [r4, #20] @ negative_sin
> lsl r12, r2, r12
> add r0, r5, #8
> + lsl r8, r8, #31
> add r1, r5, r12, lsl #2
> lsr r12, r12, #2
> + vdup.32 d26, r8
> ldr r2, [r4, #12] @ tcos
> sub r12, r12, #2
> ldr r3, [r4, #16] @ tsin
> @@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1
> vld1.32 {d5}, [r3,:64]! @ tsin[i]
> vmov.f32 d18, #0.5 @ k1
> vdup.32 d19, r6
> + veor d5, d26, d5
> pld [r0, #32]
> veor d19, d18, d19 @ k2
> vmov.i32 d16, #0
> @@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1
> vld1.32 {d5}, [r3,:64]! @ tsin[i]
> veor d24, d22, d17 @ ev.re,-ev.im
> vrev64.32 d3, d23 @ od.re, od.im
> + veor d5, d26, d5
> pld [r2, #32]
> veor d2, d3, d16 @ -od.re, od.im
> pld [r3, #32]
> @@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1
>
> vmul.f32 d22, d22, d18
> vst1.32 {d22}, [r5,:64]
> - add r0, r4, #20
> + add r0, r4, #24
> mov r1, r5
> bl X(ff_fft_permute_neon)
> - add r0, r4, #20
> + add r0, r4, #24
> mov r1, r5
> pop {r4-r8,lr}
> b X(ff_fft_calc_neon)
> diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
> index c318aa8..194e0bc 100644
> --- a/libavcodec/rdft.c
> +++ b/libavcodec/rdft.c
> @@ -28,28 +28,6 @@
> * (Inverse) Real Discrete Fourier Transforms.
> */
>
> -/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
> -#if !CONFIG_HARDCODED_TABLES
> -SINTABLE(16);
> -SINTABLE(32);
> -SINTABLE(64);
> -SINTABLE(128);
> -SINTABLE(256);
> -SINTABLE(512);
> -SINTABLE(1024);
> -SINTABLE(2048);
> -SINTABLE(4096);
> -SINTABLE(8192);
> -SINTABLE(16384);
> -SINTABLE(32768);
> -SINTABLE(65536);
> -#endif
> -static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
> - NULL, NULL, NULL, NULL,
> - ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
> - ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
> -};
> -
> /** Map one real FFT into two parallel real even and odd FFTs. Then interleave
> * the two real FFTs into one complex FFT. Unmangle the results.
> * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
> @@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
> ev.re = data[0];
> data[0] = ev.re+data[1];
> data[1] = ev.re-data[1];
> - for (i = 1; i < (n>>2); i++) {
> - i1 = 2*i;
> - i2 = n-i1;
> - /* Separate even and odd FFTs */
> - ev.re = k1*(data[i1 ]+data[i2 ]);
> - od.im = -k2*(data[i1 ]-data[i2 ]);
> - ev.im = k1*(data[i1+1]-data[i2+1]);
> - od.re = k2*(data[i1+1]+data[i2+1]);
> - /* Apply twiddle factors to the odd FFT and add to the even FFT */
> - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i];
> - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i];
> - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i];
> - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
> +
> +#define RDFT_UNMANGLE(sign0, sign1) \
> + for (i = 1; i < (n>>2); i++) { \
> + i1 = 2*i; \
> + i2 = n-i1; \
> + /* Separate even and odd FFTs */ \
> + ev.re = k1*(data[i1 ]+data[i2 ]); \
> + od.im = -k2*(data[i1 ]-data[i2 ]); \
> + ev.im = k1*(data[i1+1]-data[i2+1]); \
> + od.re = k2*(data[i1+1]+data[i2+1]); \
> + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
> + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \
> + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
> + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \
> + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
> + }
> +
> + if (s->negative_sin) {
> + RDFT_UNMANGLE(+,-)
> + } else {
> + RDFT_UNMANGLE(-,+)
> }
> +
> data[2*i+1]=s->sign_convention*data[2*i+1];
> if (s->inverse) {
> data[0] *= k1;
> @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
> s->nbits = nbits;
> s->inverse = trans == IDFT_C2R || trans == DFT_C2R;
> s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
> + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C;
>
> if (nbits < 4 || nbits > 16)
> return AVERROR(EINVAL);
> @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
>
> ff_init_ff_cos_tabs(nbits);
> s->tcos = ff_cos_tabs[nbits];
> - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
> -#if !CONFIG_HARDCODED_TABLES
> - {
> - int i;
> - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
> - for (i = 0; i < (n >> 2); i++)
> - s->tsin[i] = sin(i * theta);
> - }
> -#endif
> + s->tsin = ff_cos_tabs[nbits] + (n >> 2);
> s->rdft_calc = rdft_calc_c;
>
> if (ARCH_ARM) ff_rdft_init_arm(s);
> diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
> index 37c40e7..ffafca7 100644
> --- a/libavcodec/rdft.h
> +++ b/libavcodec/rdft.h
> @@ -25,29 +25,6 @@
> #include "config.h"
> #include "fft.h"
>
> -#if CONFIG_HARDCODED_TABLES
> -# define SINTABLE_CONST const
> -#else
> -# define SINTABLE_CONST
> -#endif
> -
> -#define SINTABLE(size) \
> - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
> -
> -extern SINTABLE(16);
> -extern SINTABLE(32);
> -extern SINTABLE(64);
> -extern SINTABLE(128);
> -extern SINTABLE(256);
> -extern SINTABLE(512);
> -extern SINTABLE(1024);
> -extern SINTABLE(2048);
> -extern SINTABLE(4096);
> -extern SINTABLE(8192);
> -extern SINTABLE(16384);
> -extern SINTABLE(32768);
> -extern SINTABLE(65536);
> -
> struct RDFTContext {
> int nbits;
> int inverse;
> @@ -55,7 +32,8 @@ struct RDFTContext {
>
> /* pre/post rotation tables */
> const FFTSample *tcos;
> - SINTABLE_CONST FFTSample *tsin;
> + const FFTSample *tsin;
> + int negative_sin;
> FFTContext fft;
> void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
> };
> --
> 2.9.3
>
Applied.
Thank's.
More information about the ffmpeg-devel
mailing list