[FFmpeg-devel] [PATCHv2 2/3] avcodec/mpegaudio_tablegen: speed up table generation

Thu Nov 26 16:23:03 CET 2015

This does some miscellaneous stuff mainly avoiding the usage of pow to
achieve significant speedups. This is not speed critical, but is
unnecessary latency and cycles wasted for a user.

All tables tested and are identical to the old ones
(bit-exact even in floating point case).

Sample benchmark (x86-64, Haswell, GNU/Linux):
old:
102329530 decicycles in mpegaudio_tableinit,       1 runs,      0 skips

new:
34111900 decicycles in mpegaudio_tableinit,       1 runs,      0 skips

Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
---
 libavcodec/mpegaudio_tablegen.h | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 86b2cd3..6cd5ad6 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -29,9 +29,11 @@
 
 #define TABLE_4_3_SIZE (8191 + 16)*4
 #if CONFIG_HARDCODED_TABLES
+#include "libavutil/tablegen.h"
 #define mpegaudio_tableinit()
 #include "libavcodec/mpegaudio_tables.h"
 #else
+#include "libavutil/libm.h"
 static int8_t   table_4_3_exp[TABLE_4_3_SIZE];
 static uint32_t table_4_3_value[TABLE_4_3_SIZE];
 static uint32_t exp_table_fixed[512];
@@ -45,12 +47,21 @@ static float expval_table_float[512][16];
 static av_cold void mpegaudio_tableinit(void)
 {
     int i, value, exponent;
+    double pow2_lut[4] = {
+        1.00000000000000000000, /* 2 ^ (0 * 0.25) */
+        1.18920711500272106672, /* 2 ^ (1 * 0.25) */
+        M_SQRT2               , /* 2 ^ (2 * 0.25) */
+        1.68179283050742908606, /* 2 ^ (3 * 0.25) */
+    };
+    double cbrt_lut[16];
+    for (i = 0; i < 16; ++i)
+        cbrt_lut[i] = cbrt(i);
+
     for (i = 1; i < TABLE_4_3_SIZE; i++) {
         double value = i / 4;
         double f, fm;
         int e, m;
-        /* cbrtf() isn't available on all systems, so we use powf(). */
-        f  = value / IMDCT_SCALAR * pow(value, 1.0 / 3.0) * pow(2, (i & 3) * 0.25);
+        f  = value / IMDCT_SCALAR * cbrt(value) * pow2_lut[i & 3];
         fm = frexp(f, &e);
         m  = (uint32_t)(fm * (1LL << 31) + 0.5);
         e += FRAC_BITS - 31 + 5 - 100;
@@ -61,10 +72,8 @@ static av_cold void mpegaudio_tableinit(void)
     }
     for (exponent = 0; exponent < 512; exponent++) {
         for (value = 0; value < 16; value++) {
-            /* cbrtf() isn't available on all systems, so we use powf(). */
-            double f = (double)value * pow(value, 1.0 / 3.0) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5) / IMDCT_SCALAR;
-            /* llrint() isn't always available, so round and cast manually. */
-            expval_table_fixed[exponent][value] = (long long int) (f < 0xFFFFFFFF ? floor(f + 0.5) : 0xFFFFFFFF);
+            double f = value * cbrt_lut[value] * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5) / IMDCT_SCALAR;
+            expval_table_fixed[exponent][value] = (f < 0xFFFFFFFF ? llrint(f) : 0xFFFFFFFF);
             expval_table_float[exponent][value] = f;
         }
         exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
-- 
2.6.2