[FFmpeg-devel] [PATCH 3/5] avcodec/fft_fixed: Hardcode cosine tables to save space

Andreas Rheinhardt andreas.rheinhardt at gmail.com
Thu Jan 7 01:13:06 EET 2021


The tables that are used take 256B; the code to initialize them uses
281B here (GCC 9.3, x64, -O3, but in av_cold functions). On top of that,
removing this code also allows to remove the array of AVOnce used to
guard the cosine tables against multiple initializations; this also
removes relocations.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
---
 libavcodec/Makefile       |  2 +-
 libavcodec/fft.h          | 19 +++++++++---------
 libavcodec/fft_fixed.c    | 42 +++++++++++++++++++++++++++++++++++++++
 libavcodec/fft_template.c | 27 +++++++++----------------
 4 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fea37ef3c9..8e03feb7d1 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -83,7 +83,7 @@ OBJS-$(CONFIG_EXIF)                    += exif.o tiff_common.o
 OBJS-$(CONFIG_FAANDCT)                 += faandct.o
 OBJS-$(CONFIG_FAANIDCT)                += faanidct.o
 OBJS-$(CONFIG_FDCTDSP)                 += fdctdsp.o jfdctfst.o jfdctint.o
-FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
+FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o
 OBJS-$(CONFIG_FFT)                     += avfft.o fft_fixed.o fft_float.o \
                                           fft_fixed_32.o fft_init_table.o \
                                           $(FFT-OBJS-yes)
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index fedc0c5ef0..4b54265b7f 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -114,10 +114,18 @@ struct FFTContext {
     uint32_t *revtab32;
 };
 
-#if CONFIG_HARDCODED_TABLES
+#if CONFIG_HARDCODED_TABLES || !FFT_FLOAT
 #define COSTABLE_CONST const
+#define ff_init_ff_cos_tabs(...)
 #else
 #define COSTABLE_CONST
+#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs)
+
+/**
+ * Initialize the cosine table in ff_cos_tabs[index]
+ * @param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
 #endif
 
 #define COSTABLE(size) \
@@ -138,16 +146,9 @@ extern COSTABLE(16384);
 extern COSTABLE(32768);
 extern COSTABLE(65536);
 extern COSTABLE(131072);
-#endif /* FFT_FLOAT */
 extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[];
 
-#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs)
-
-/**
- * Initialize the cosine table in ff_cos_tabs[index]
- * @param index index in ff_cos_tabs array of the table to initialize
- */
-void ff_init_ff_cos_tabs(int index);
+#endif /* FFT_FLOAT */
 
 #define ff_fft_init FFT_NAME(ff_fft_init)
 #define ff_fft_end  FFT_NAME(ff_fft_end)
diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c
index 52d225ee09..ce52dec7fd 100644
--- a/libavcodec/fft_fixed.c
+++ b/libavcodec/fft_fixed.c
@@ -19,4 +19,46 @@
 #define FFT_FLOAT 0
 #define FFT_FIXED_32 0
 #define MAX_BITS 7
+
+#include "fft.h"
+
+COSTABLE(16) = {
+     32767,  30274,  23170,  12540,
+         0,  12540,  23170,  30274,
+};
+COSTABLE(32) = {
+     32767,  32138,  30274,  27246,
+     23170,  18205,  12540,   6393,
+         0,   6393,  12540,  18205,
+     23170,  27246,  30274,  32138,
+};
+COSTABLE(64) = {
+     32767,  32610,  32138,  31357,
+     30274,  28899,  27246,  25330,
+     23170,  20788,  18205,  15447,
+     12540,   9512,   6393,   3212,
+         0,   3212,   6393,   9512,
+     12540,  15447,  18205,  20788,
+     23170,  25330,  27246,  28899,
+     30274,  31357,  32138,  32610,
+};
+COSTABLE(128) = {
+     32767,  32729,  32610,  32413,
+     32138,  31786,  31357,  30853,
+     30274,  29622,  28899,  28106,
+     27246,  26320,  25330,  24279,
+     23170,  22006,  20788,  19520,
+     18205,  16846,  15447,  14010,
+     12540,  11039,   9512,   7962,
+      6393,   4808,   3212,   1608,
+         0,   1608,   3212,   4808,
+      6393,   7962,   9512,  11039,
+     12540,  14010,  15447,  16846,
+     18205,  19520,  20788,  22006,
+     23170,  24279,  25330,  26320,
+     27246,  28106,  28899,  29622,
+     30274,  30853,  31357,  31786,
+     32138,  32413,  32610,  32729,
+};
+
 #include "fft_template.c"
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 7a7d51a6b4..9d125de073 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -42,12 +42,12 @@
 #else /* FFT_FIXED_32 */
 
 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if FFT_FLOAT
 #if !CONFIG_HARDCODED_TABLES
 COSTABLE(16);
 COSTABLE(32);
 COSTABLE(64);
 COSTABLE(128);
-#if FFT_FLOAT
 COSTABLE(256);
 COSTABLE(512);
 COSTABLE(1024);
@@ -58,7 +58,6 @@ COSTABLE(16384);
 COSTABLE(32768);
 COSTABLE(65536);
 COSTABLE(131072);
-#endif /* FFT_FLOAT */
 
 static av_cold void init_ff_cos_tabs(int index)
 {
@@ -87,7 +86,6 @@ INIT_FF_COS_TABS_FUNC(4, 16)
 INIT_FF_COS_TABS_FUNC(5, 32)
 INIT_FF_COS_TABS_FUNC(6, 64)
 INIT_FF_COS_TABS_FUNC(7, 128)
-#if FFT_FLOAT
 INIT_FF_COS_TABS_FUNC(8, 256)
 INIT_FF_COS_TABS_FUNC(9, 512)
 INIT_FF_COS_TABS_FUNC(10, 1024)
@@ -98,7 +96,6 @@ INIT_FF_COS_TABS_FUNC(14, 16384)
 INIT_FF_COS_TABS_FUNC(15, 32768)
 INIT_FF_COS_TABS_FUNC(16, 65536)
 INIT_FF_COS_TABS_FUNC(17, 131072)
-#endif /* FFT_FLOAT */
 
 static CosTabsInitOnce cos_tabs_init_once[] = {
     { NULL },
@@ -109,7 +106,6 @@ static CosTabsInitOnce cos_tabs_init_once[] = {
     { init_ff_cos_tabs_32, AV_ONCE_INIT },
     { init_ff_cos_tabs_64, AV_ONCE_INIT },
     { init_ff_cos_tabs_128, AV_ONCE_INIT },
-#if FFT_FLOAT
     { init_ff_cos_tabs_256, AV_ONCE_INIT },
     { init_ff_cos_tabs_512, AV_ONCE_INIT },
     { init_ff_cos_tabs_1024, AV_ONCE_INIT },
@@ -120,17 +116,20 @@ static CosTabsInitOnce cos_tabs_init_once[] = {
     { init_ff_cos_tabs_32768, AV_ONCE_INIT },
     { init_ff_cos_tabs_65536, AV_ONCE_INIT },
     { init_ff_cos_tabs_131072, AV_ONCE_INIT },
-#endif /* FFT_FLOAT */
 };
 
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+    ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func);
+}
 #endif
+
 COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
     NULL, NULL, NULL, NULL,
     FFT_NAME(ff_cos_16),
     FFT_NAME(ff_cos_32),
     FFT_NAME(ff_cos_64),
     FFT_NAME(ff_cos_128),
-#if FFT_FLOAT
     FFT_NAME(ff_cos_256),
     FFT_NAME(ff_cos_512),
     FFT_NAME(ff_cos_1024),
@@ -141,8 +140,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
     FFT_NAME(ff_cos_32768),
     FFT_NAME(ff_cos_65536),
     FFT_NAME(ff_cos_131072),
-#endif /* FFT_FLOAT */
 };
+#endif /* FFT_FLOAT */
 
 #endif /* FFT_FIXED_32 */
 
@@ -160,13 +159,6 @@ static int split_radix_permutation(int i, int n, int inverse)
     else                  return split_radix_permutation(i, m, inverse)*4 - 1;
 }
 
-av_cold void ff_init_ff_cos_tabs(int index)
-{
-#if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
-    ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func);
-#endif
-}
-
 static const int avx_tab[] = {
     0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
 };
@@ -250,13 +242,12 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     if (ARCH_X86)     ff_fft_init_x86(s);
     if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
     if (HAVE_MIPSFPU) ff_fft_init_mips(s);
+    for (j = 4; !CONFIG_HARDCODED_TABLES && j <= nbits; j++)
+        ff_init_ff_cos_tabs(j);
 #else
     if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
     if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
 #endif
-    for(j=4; j<=nbits; j++) {
-        ff_init_ff_cos_tabs(j);
-    }
 #endif /* FFT_FIXED_32 */
 
 
-- 
2.25.1



More information about the ffmpeg-devel mailing list