[FFmpeg-devel] [PATCH] lavc/aacenc_quantization: use cbrt table
Ganesh Ajjanagadde
gajjanag at gmail.com
Thu Mar 10 00:49:20 CET 2016
There is no reason for computing cbrtf at runtime; we have a table for
this.
Cruft needed due to the build system, the people who still like using
hardcoded tables and need for single cbrt_tab across the code.
Yields non-negligible speedup (Haswell+GCC, -march=native)
before:
ffmpeg -i sin.flac -acodec aac -y sin_new.aac 5.22s user 0.03s system 105% cpu 4.970 total
after:
ffmpeg -i sin.flac -acodec aac -y sin_new.aac 5.15s user 0.02s system 105% cpu 4.884 total
Also reduces size of the binary:
after:
15503040 libavcodec/libavcodec.so.57
before:
15504176 libavcodec/libavcodec.so.57
Signed-off-by: Ganesh Ajjanagadde <gajjanag at gmail.com>
---
libavcodec/Makefile | 27 ++++++++++----
libavcodec/aacdec_fixed.c | 4 +--
libavcodec/aacdec_template.c | 4 +--
libavcodec/aacenc.c | 2 ++
libavcodec/aacenc_quantization.h | 3 +-
libavcodec/{cbrt_tablegen.h => cbrt_data.c} | 56 ++++++++++++-----------------
libavcodec/cbrt_tablegen.c | 20 +++++++++--
libavcodec/cbrt_tablegen.h | 53 +++++----------------------
libavcodec/cbrt_tablegen_template.c | 38 --------------------
9 files changed, 77 insertions(+), 130 deletions(-)
copy libavcodec/{cbrt_tablegen.h => cbrt_data.c} (64%)
delete mode 100644 libavcodec/cbrt_tablegen_template.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ee9a962..3315cf7 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -132,22 +132,31 @@ OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o
OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o
# decoders/encoders
+AAC_DECODER-OBJS-$(CONFIG_HARDCODED_TABLES) += cbrt_tables.o
+AAC_FIXED_DECODER-OBJS-$(CONFIG_HARDCODED_TABLES) += cbrt_tables.o
+AAC_ENCODER-OBJS-$(CONFIG_HARDCODED_TABLES) += cbrt_tables.o
+AAC_DECODER-OBJS-$(!CONFIG_HARDCODED_TABLES) += cbrt_data.o
+AAC_FIXED_DECODER-OBJS-$(!CONFIG_HARDCODED_TABLES) += cbrt_data.o
+AAC_ENCODER-OBJS-$(!CONFIG_HARDCODED_TABLES) += cbrt_data.o
OBJS-$(CONFIG_ZERO12V_DECODER) += 012v.o
OBJS-$(CONFIG_A64MULTI_ENCODER) += a64multienc.o elbg.o
OBJS-$(CONFIG_A64MULTI5_ENCODER) += a64multienc.o elbg.o
OBJS-$(CONFIG_AAC_DECODER) += aacdec.o aactab.o aacsbr.o aacps_float.o \
aacadtsdec.o mpeg4audio.o kbdwin.o \
- sbrdsp.o aacpsdsp_float.o
+ sbrdsp.o aacpsdsp_float.o \
+ $(AAC_DECODER-OBJS-yes)
OBJS-$(CONFIG_AAC_FIXED_DECODER) += aacdec_fixed.o aactab.o aacsbr_fixed.o aacps_fixed.o \
aacadtsdec.o mpeg4audio.o kbdwin.o \
- sbrdsp_fixed.o aacpsdsp_fixed.o
+ sbrdsp_fixed.o aacpsdsp_fixed.o \
+ $(AAC_FIXED_DECODER-OBJS-yes)
OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o aacenctab.o \
aacpsy.o aactab.o \
aacenc_is.o \
aacenc_tns.o \
aacenc_ltp.o \
aacenc_pred.o \
- psymodel.o mpeg4audio.o kbdwin.o
+ psymodel.o mpeg4audio.o kbdwin.o \
+ $(AAC_ENCODER-OBJS-yes)
OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o
OBJS-$(CONFIG_AC3_DECODER) += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
OBJS-$(CONFIG_AC3_FIXED_DECODER) += ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o
@@ -979,7 +988,6 @@ TOOLS = fourcc2pixfmt
HOSTPROGS = aacps_tablegen \
aacps_fixed_tablegen \
cbrt_tablegen \
- cbrt_fixed_tablegen \
cos_tablegen \
dv_tablegen \
motionpixels_tablegen \
@@ -993,6 +1001,7 @@ CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o
$(SUBDIR)dv_tablegen$(HOSTEXESUF): $(SUBDIR)dvdata_host.o
+$(SUBDIR)cbrt_tablegen$(HOSTEXESUF): $(SUBDIR)cbrt_data.o
TRIG_TABLES = cos cos_fixed sin
TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
@@ -1000,13 +1009,19 @@ TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cos_tablegen$(HOSTEXESUF)
$(M)./$< $* > $@
+CBRT_TABLES = cbrt
+CBRT_TABLES := $(CBRT_TABLES:%=$(SUBDIR)%_tables.c)
+
+$(CBRT_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cbrt_tablegen$(HOSTEXESUF)
+ $(M)./$< $* > $@
+
ifdef CONFIG_SMALL
$(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=1
else
$(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0
endif
-GEN_HEADERS = cbrt_tables.h cbrt_fixed_tables.h aacps_tables.h aacps_fixed_tables.h \
+GEN_HEADERS = aacps_tables.h aacps_fixed_tables.h \
dv_tables.h \
sinewin_tables.h sinewin_fixed_tables.h mpegaudio_tables.h motionpixels_tables.h \
pcm_tables.h qdm2_tables.h
@@ -1016,8 +1031,6 @@ $(GEN_HEADERS): $(SUBDIR)%_tables.h: $(SUBDIR)%_tablegen$(HOSTEXESUF)
$(M)./$< > $@
ifdef CONFIG_HARDCODED_TABLES
-$(SUBDIR)aacdec.o: $(SUBDIR)cbrt_tables.h
-$(SUBDIR)aacdec_fixed.o: $(SUBDIR)cbrt_fixed_tables.h
$(SUBDIR)aacps_float.o: $(SUBDIR)aacps_tables.h
$(SUBDIR)aacps_fixed.o: $(SUBDIR)aacps_fixed_tables.h
$(SUBDIR)aactab_fixed.o: $(SUBDIR)aac_fixed_tables.h
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 396a874..04ebe99 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c
@@ -155,9 +155,9 @@ static void vector_pow43(int *coefs, int len)
for (i=0; i<len; i++) {
coef = coefs[i];
if (coef < 0)
- coef = -(int)cbrt_tab[-coef];
+ coef = -(int)ff_cbrt_tab_fixed[-coef];
else
- coef = (int)cbrt_tab[coef];
+ coef = (int)ff_cbrt_tab_fixed[coef];
coefs[i] = coef;
}
}
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 6bc94c8..883ed52 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1104,7 +1104,7 @@ static av_cold void aac_static_table_init(void)
AAC_RENAME(ff_init_ff_sine_windows)( 9);
AAC_RENAME(ff_init_ff_sine_windows)( 7);
- AAC_RENAME(cbrt_tableinit)();
+ AAC_RENAME(ff_cbrt_tableinit)();
}
static AVOnce aac_table_init = AV_ONCE_INIT;
@@ -1795,7 +1795,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
v = -v;
*icf++ = v;
#else
- *icf++ = cbrt_tab[n] | (bits & 1U<<31);
+ *icf++ = ff_cbrt_tab[n] | (bits & 1U<<31);
#endif /* USE_FIXED */
bits <<= 1;
} else {
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 023260a..863df65 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -45,6 +45,7 @@
#include "aacenc.h"
#include "aacenctab.h"
#include "aacenc_utils.h"
+#include "cbrt_tablegen.h"
#include "psymodel.h"
@@ -897,6 +898,7 @@ alloc_fail:
static av_cold void aac_encode_init_tables(void)
{
ff_aac_tableinit();
+ AAC_RENAME(ff_cbrt_tableinit)();
}
static av_cold int aac_encode_init(AVCodecContext *avctx)
diff --git a/libavcodec/aacenc_quantization.h b/libavcodec/aacenc_quantization.h
index 4250407..b20669c 100644
--- a/libavcodec/aacenc_quantization.h
+++ b/libavcodec/aacenc_quantization.h
@@ -32,6 +32,7 @@
#include "aacenc.h"
#include "aacenctab.h"
#include "aacenc_utils.h"
+#include "cbrt_tablegen.h"
/**
* Calculate rate distortion cost for quantizing with given codebook
@@ -105,7 +106,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
curbits += 21;
} else {
int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
- quantized = c*cbrtf(c)*IQ;
+ quantized = av_int2float(ff_cbrt_tab[c])*IQ;
curbits += av_log2(c)*2 - 4 + 1;
}
} else {
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_data.c
similarity index 64%
copy from libavcodec/cbrt_tablegen.h
copy to libavcodec/cbrt_data.c
index 21e4b9a..c697581 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_data.c
@@ -1,5 +1,5 @@
/*
- * Header file for hardcoded AAC cube-root table
+ * AAC cube-root table
*
* Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger at gmx.de>
*
@@ -20,37 +20,17 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef AVCODEC_CBRT_TABLEGEN_H
-#define AVCODEC_CBRT_TABLEGEN_H
-
#include <stdint.h>
-#include <math.h>
-#include "libavutil/attributes.h"
-#include "libavutil/intfloat.h"
-#include "libavcodec/aac_defines.h"
+#include "cbrt_tablegen.h"
-#if USE_FIXED
-#define CBRT(x) lrint((x) * 8192)
-#else
-#define CBRT(x) av_float2int((float)(x))
-#endif
+uint32_t ff_cbrt_tab[1 << 13];
+uint32_t ff_cbrt_tab_fixed[1 << 13];
+static double cbrt_tab_dbl[1 << 13];
-#if CONFIG_HARDCODED_TABLES
-#if USE_FIXED
-#define cbrt_tableinit_fixed()
-#include "libavcodec/cbrt_fixed_tables.h"
-#else
-#define cbrt_tableinit()
-#include "libavcodec/cbrt_tables.h"
-#endif
-#else
-static uint32_t cbrt_tab[1 << 13];
+static av_cold void cbrt_tableinit_internal(void) {
+ int i, j, k;
-static av_cold void AAC_RENAME(cbrt_tableinit)(void)
-{
- static double cbrt_tab_dbl[1 << 13];
- if (!cbrt_tab[(1<<13) - 1]) {
- int i, j, k;
+ if (!cbrt_tab_dbl[(1<<13)-1]) {
double cbrt_val;
for (i = 1; i < 1<<13; i++)
@@ -73,11 +53,21 @@ static av_cold void AAC_RENAME(cbrt_tableinit)(void)
cbrt_tab_dbl[j] *= cbrt_val;
}
}
-
- for (i = 0; i < 1<<13; i++)
- cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]);
}
}
-#endif /* CONFIG_HARDCODED_TABLES */
-#endif /* AVCODEC_CBRT_TABLEGEN_H */
+av_cold void ff_cbrt_tableinit_fixed(void)
+{
+ cbrt_tableinit_internal();
+ if (!ff_cbrt_tab_fixed[(1<<13)-1])
+ for (int i = 0; i < 1<<13; i++)
+ ff_cbrt_tab_fixed[i] = lrint(cbrt_tab_dbl[i] * 8192);
+}
+
+av_cold void ff_cbrt_tableinit(void)
+{
+ cbrt_tableinit_internal();
+ if (!ff_cbrt_tab[(1<<13)-1])
+ for (int i = 0; i < 1<<13; i++)
+ ff_cbrt_tab[i] = av_float2int((float)cbrt_tab_dbl[i]);
+}
diff --git a/libavcodec/cbrt_tablegen.c b/libavcodec/cbrt_tablegen.c
index 8c2235e..b04d02f 100644
--- a/libavcodec/cbrt_tablegen.c
+++ b/libavcodec/cbrt_tablegen.c
@@ -20,5 +20,21 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#define USE_FIXED 0
-#include "cbrt_tablegen_template.c"
+#include <stdlib.h>
+#define CONFIG_HARDCODED_TABLES 0
+#include "libavutil/tablegen.h"
+#include "cbrt_tablegen.h"
+#include "tableprint.h"
+
+int main(void)
+{
+ ff_cbrt_tableinit();
+ ff_cbrt_tableinit_fixed();
+
+ write_fileheader();
+
+ WRITE_ARRAY("const", uint32_t, ff_cbrt_tab);
+ WRITE_ARRAY("const", uint32_t, ff_cbrt_tab_fixed);
+
+ return 0;
+}
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 21e4b9a..446f9c1 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -29,55 +29,18 @@
#include "libavutil/intfloat.h"
#include "libavcodec/aac_defines.h"
-#if USE_FIXED
-#define CBRT(x) lrint((x) * 8192)
-#else
-#define CBRT(x) av_float2int((float)(x))
-#endif
-
#if CONFIG_HARDCODED_TABLES
-#if USE_FIXED
-#define cbrt_tableinit_fixed()
-#include "libavcodec/cbrt_fixed_tables.h"
+#define ff_cbrt_tableinit_fixed()
+#define ff_cbrt_tableinit()
+extern const uint32_t ff_cbrt_tab[1 << 13];
+extern const uint32_t ff_cbrt_tab_fixed[1 << 13];
#else
-#define cbrt_tableinit()
-#include "libavcodec/cbrt_tables.h"
-#endif
-#else
-static uint32_t cbrt_tab[1 << 13];
-
-static av_cold void AAC_RENAME(cbrt_tableinit)(void)
-{
- static double cbrt_tab_dbl[1 << 13];
- if (!cbrt_tab[(1<<13) - 1]) {
- int i, j, k;
- double cbrt_val;
-
- for (i = 1; i < 1<<13; i++)
- cbrt_tab_dbl[i] = 1;
-
- /* have to take care of non-squarefree numbers */
- for (i = 2; i < 90; i++) {
- if (cbrt_tab_dbl[i] == 1) {
- cbrt_val = i * cbrt(i);
- for (k = i; k < 1<<13; k *= i)
- for (j = k; j < 1<<13; j += k)
- cbrt_tab_dbl[j] *= cbrt_val;
- }
- }
+extern uint32_t ff_cbrt_tab[1 << 13];
+extern uint32_t ff_cbrt_tab_fixed[1 << 13];
- for (i = 91; i <= 8191; i+= 2) {
- if (cbrt_tab_dbl[i] == 1) {
- cbrt_val = i * cbrt(i);
- for (j = i; j < 1<<13; j += i)
- cbrt_tab_dbl[j] *= cbrt_val;
- }
- }
+av_cold void ff_cbrt_tableinit_fixed(void);
+av_cold void ff_cbrt_tableinit(void);
- for (i = 0; i < 1<<13; i++)
- cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]);
- }
-}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_CBRT_TABLEGEN_H */
diff --git a/libavcodec/cbrt_tablegen_template.c b/libavcodec/cbrt_tablegen_template.c
deleted file mode 100644
index 7dcab91..0000000
--- a/libavcodec/cbrt_tablegen_template.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Generate a header file for hardcoded AAC cube-root table
- *
- * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger at gmx.de>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#include "libavutil/tablegen.h"
-#include "cbrt_tablegen.h"
-#include "tableprint.h"
-
-int main(void)
-{
- AAC_RENAME(cbrt_tableinit)();
-
- write_fileheader();
-
- WRITE_ARRAY("static const", uint32_t, cbrt_tab);
-
- return 0;
-}
--
2.7.2
More information about the ffmpeg-devel
mailing list