[FFmpeg-devel] [PATCH][WIP] lavc/cbrt_tablegen: speed up tablegen
Ganesh Ajjanagadde
gajjanagadde at gmail.com
Fri Jan 1 03:55:23 CET 2016
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE. Does not work yet with --enable-hardcoded-tables due
to the union and lack of proper WRITE_ARRAY for it. Want to get feedback
on this; if we always dynamically init it this won't need addressing.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
---
libavcodec/aacdec_fixed.c | 4 ++--
libavcodec/aacdec_template.c | 2 +-
libavcodec/cbrt_tablegen.h | 53 +++++++++++++++++++++++++++++++-------------
3 files changed, 40 insertions(+), 19 deletions(-)
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 923fbe0..ebc585e 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c
@@ -154,9 +154,9 @@ static void vector_pow43(int *coefs, int len)
for (i=0; i<len; i++) {
coef = coefs[i];
if (coef < 0)
- coef = -(int)cbrt_tab[-coef];
+ coef = -(int)cbrt_tab[-coef].i;
else
- coef = (int)cbrt_tab[coef];
+ coef = (int)cbrt_tab[coef].i;
coefs[i] = coef;
}
}
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 620600c..b3ec9e6 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1791,7 +1791,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
v = -v;
*icf++ = v;
#else
- *icf++ = cbrt_tab[n] | (bits & 1U<<31);
+ *icf++ = cbrt_tab[n].i | (bits & 1U<<31);
#endif /* USE_FIXED */
bits <<= 1;
} else {
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 59b5a1d..f5ae03e 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -26,14 +26,9 @@
#include <stdint.h>
#include <math.h>
#include "libavutil/attributes.h"
+#include "libavutil/intfloat.h"
#include "libavcodec/aac_defines.h"
-#if USE_FIXED
-#define CBRT(x) lrint((x).f * 8192)
-#else
-#define CBRT(x) x.i
-#endif
-
#if CONFIG_HARDCODED_TABLES
#if USE_FIXED
#define cbrt_tableinit_fixed()
@@ -43,20 +38,46 @@
#include "libavcodec/cbrt_tables.h"
#endif
#else
-static uint32_t cbrt_tab[1 << 13];
+union ff_int32float64 {
+ uint32_t i;
+ double f;
+};
+static union ff_int32float64 cbrt_tab[1 << 13];
static av_cold void AAC_RENAME(cbrt_tableinit)(void)
{
- if (!cbrt_tab[(1<<13) - 1]) {
- int i;
- for (i = 0; i < 1<<13; i++) {
- union {
- float f;
- uint32_t i;
- } f;
- f.f = cbrt(i) * i;
- cbrt_tab[i] = CBRT(f);
+ int i, j, k;
+ double cbrt_val;
+
+ if (!cbrt_tab[(1<<13) - 1].i) {
+ cbrt_tab[0].f = 0;
+ for (i = 1; i < 1<<13; i++)
+ cbrt_tab[i].f = 1;
+
+ /* have to worry about non-squarefree numbers */
+ for (i = 2; i < 90; i++) {
+ if (cbrt_tab[i].f == 1) {
+ cbrt_val = i * cbrt(i);
+ for (k = i; k < (1<<13); k*= i)
+ for (j = k; j < (1<<13); j+=k)
+ cbrt_tab[j].f *= cbrt_val;
+ }
}
+
+ for (i = 91; i <= 8191; i+=2) {
+ if (cbrt_tab[i].f == 1) {
+ cbrt_val = i * cbrt(i);
+ for (j = i; j < (1<<13); j+=i)
+ cbrt_tab[j].f *= cbrt_val;
+ }
+ }
+#if USE_FIXED
+ for (i = 0; i < 1<<13; i++)
+ cbrt_tab[i].i = lrint(cbrt_tab[i].f * 8192);
+#else
+ for (i = 0; i < 1<<13; i++)
+ cbrt_tab[i].i = av_float2int((float)cbrt_tab[i].f);
+#endif
}
}
#endif /* CONFIG_HARDCODED_TABLES */
--
2.6.4
More information about the ffmpeg-devel
mailing list