[FFmpeg-devel] [PATCH v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template
Steven Liu
lq at chinaffmpeg.org
Wed Dec 26 10:15:27 EET 2018
Before patch:
init nbits = 17, get 10000 samples, average cost: 16175 us
After patch:
init nbits = 17, get 10000 samples, average cost: 14989 us
Signed-off-by: Steven Liu <lq at chinaffmpeg.org>
---
libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 762c014bc8..20a62e4290 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
if (s->fft_permutation == FF_FFT_PERM_AVX) {
fft_perm_avx(s);
} else {
- for(i=0; i<n; i++) {
- int k;
- j = i;
- if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
- j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
- k = -split_radix_permutation(i, n, s->inverse) & (n-1);
- if (s->revtab)
- s->revtab[k] = j;
- if (s->revtab32)
- s->revtab32[k] = j;
- }
+#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
+ for(i = 0; i < n; i++) {\
+ int k;\
+ j = i;\
+ j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
+ k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+ s->revtab##num[k] = j;\
+ } \
+} while(0);
+
+#define PROCESS_FFT_PERM_DEFAULT(num) do {\
+ for(i = 0; i < n; i++) {\
+ int k;\
+ j = i;\
+ k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+ s->revtab##num[k] = j;\
+ } \
+} while(0);
+
+#define SPLIT_RADIX_PERMUTATION(num) do { \
+ if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
+ PROCESS_FFT_PERM_SWAP_LSBS(num) \
+ } else {\
+ PROCESS_FFT_PERM_DEFAULT(num) \
+ }\
+} while(0);
+
+ if (s->revtab)
+ SPLIT_RADIX_PERMUTATION()
+ if (s->revtab32)
+ SPLIT_RADIX_PERMUTATION(32)
+
+#undef PROCESS_FFT_PERM_DEFAULT
+#undef PROCESS_FFT_PERM_SWAP_LSBS
+#undef SPLIT_RADIX_PERMUTATION
}
return 0;
--
2.15.2 (Apple Git-101.1)
More information about the ffmpeg-devel
mailing list