[FFmpeg-devel] [PATCH] aacenc_utils: unroll loops to allow compiler to use SIMD.
Reimar Döffinger
Reimar.Doeffinger at gmx.de
Sun Mar 6 19:35:58 CET 2016
Approximately 10% faster transcode from mp3 to aac
with default settings.
Signed-off-by: Reimar Döffinger <Reimar.Doeffinger at gmx.de>
---
libavcodec/aacenc_utils.h | 47 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 38 insertions(+), 9 deletions(-)
diff --git a/libavcodec/aacenc_utils.h b/libavcodec/aacenc_utils.h
index b9bd6bf..1639021 100644
--- a/libavcodec/aacenc_utils.h
+++ b/libavcodec/aacenc_utils.h
@@ -36,15 +36,29 @@
#define ROUND_TO_ZERO 0.1054f
#define C_QUANT 0.4054f
+#define ABSPOW(inv, outv) \
+do { \
+ float a = (inv); \
+ a = fabsf(a); \
+ (outv) = sqrtf(a * sqrtf(a)); \
+} while(0)
+
static inline void abs_pow34_v(float *out, const float *in, const int size)
{
int i;
- for (i = 0; i < size; i++) {
- float a = fabsf(in[i]);
- out[i] = sqrtf(a * sqrtf(a));
+ for (i = 0; i < size - 3; i += 4) {
+ ABSPOW(in[i], out[i]);
+ ABSPOW(in[i+1], out[i+1]);
+ ABSPOW(in[i+2], out[i+2]);
+ ABSPOW(in[i+3], out[i+3]);
+ }
+ for (; i < size; i++) {
+ ABSPOW(in[i], out[i]);
}
}
+#undef ABSPOW
+
static inline float pos_pow34(float a)
{
return sqrtf(a * sqrtf(a));
@@ -61,20 +75,35 @@ static inline int quant(float coef, const float Q, const float rounding)
return sqrtf(a * sqrtf(a)) + rounding;
}
+
+#define Q(scv, inv, outv) \
+do { \
+ float qc = (scv) * Q34; \
+ int tmp = (int)FFMIN(qc + rounding, (float)maxval); \
+ if (is_signed && (inv) < 0.0f) { \
+ tmp = -tmp; \
+ } \
+ (outv) = tmp; \
+} while(0)
+
static inline void quantize_bands(int *out, const float *in, const float *scaled,
int size, float Q34, int is_signed, int maxval,
const float rounding)
{
int i;
- for (i = 0; i < size; i++) {
- float qc = scaled[i] * Q34;
- out[i] = (int)FFMIN(qc + rounding, (float)maxval);
- if (is_signed && in[i] < 0.0f) {
- out[i] = -out[i];
- }
+ for (i = 0; i < size - 3; i += 4) {
+ Q(scaled[i], in[i], out[i]);
+ Q(scaled[i+1], in[i+1], out[i+1]);
+ Q(scaled[i+2], in[i+2], out[i+2]);
+ Q(scaled[i+3], in[i+3], out[i+3]);
+ }
+ for (; i < size; i++) {
+ Q(scaled[i], in[i], out[i]);
}
}
+#undef Q
+
static inline float find_max_val(int group_len, int swb_size, const float *scaled)
{
float maxval = 0.0f;
--
2.7.0
More information about the ffmpeg-devel
mailing list