[FFmpeg-devel] [PATCH 2/3] ARM optimised vp56_rac_get_prob()
Mans Rullgard
mans
Wed Feb 2 17:36:51 CET 2011
Approximately 3% faster on Cortex-A8.
Signed-off-by: Mans Rullgard <mans at mansr.com>
---
libavcodec/arm/vp56_arith.h | 92 +++++++++++++++++++++++++++++++++++++++++++
libavcodec/vp56.h | 6 ++-
2 files changed, 97 insertions(+), 1 deletions(-)
create mode 100644 libavcodec/arm/vp56_arith.h
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
new file mode 100644
index 0000000..9bcb466
--- /dev/null
+++ b/libavcodec/arm/vp56_arith.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2010 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_VP56_ARITH_H
+#define AVCODEC_ARM_VP56_ARITH_H
+
+#if HAVE_ARMV6 && HAVE_INLINE_ASM
+
+#define vp56_rac_get_prob vp56_rac_get_prob_armv6
+static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
+{
+ unsigned shift = ff_vp56_norm_shift[c->high];
+ unsigned code_word = c->code_word << shift;
+ unsigned high = c->high << shift;
+ unsigned bit;
+
+ __asm__ volatile ("adds %3, %3, %0 \n"
+ "cmpcs %7, %4 \n"
+ "ldrcsh %2, [%4], #2 \n"
+ "rsb %0, %6, #256 \n"
+ "smlabb %0, %5, %6, %0 \n"
+ "rev16cs %2, %2 \n"
+ "orrcs %1, %1, %2, lsl %3 \n"
+ "subcs %3, %3, #16 \n"
+ "lsr %0, %0, #8 \n"
+ "cmp %1, %0, lsl #16 \n"
+ "subge %1, %1, %0, lsl #16 \n"
+ "subge %0, %5, %0 \n"
+ "movge %2, #1 \n"
+ "movlt %2, #0 \n"
+ : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
+ "+&r"(c->bits), "+&r"(c->buffer)
+ : "r"(high), "r"(pr), "r"(c->end - 1),
+ "0"(shift), "1"(code_word));
+
+ return bit;
+}
+
+#define vp56_rac_get_prob_branchy vp56_rac_get_prob_branchy_armv6
+static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
+{
+ unsigned shift = ff_vp56_norm_shift[c->high];
+ unsigned code_word = c->code_word << shift;
+ unsigned high = c->high << shift;
+ unsigned low;
+ unsigned tmp;
+
+ __asm__ volatile ("adds %3, %3, %0 \n"
+ "cmpcs %7, %4 \n"
+ "ldrcsh %2, [%4], #2 \n"
+ "rsb %0, %6, #256 \n"
+ "smlabb %0, %5, %6, %0 \n"
+ "rev16cs %2, %2 \n"
+ "orrcs %1, %1, %2, lsl %3 \n"
+ "subcs %3, %3, #16 \n"
+ "lsr %0, %0, #8 \n"
+ "lsl %2, %0, #16 \n"
+ : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
+ "+&r"(c->bits), "+&r"(c->buffer)
+ : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift));
+
+ if (code_word >= tmp) {
+ c->high = high - low;
+ c->code_word = code_word - tmp;
+ return 1;
+ }
+
+ c->high = low;
+ c->code_word = code_word;
+ return 0;
+}
+
+#endif
+
+#endif
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index da6b1b6..d1735e5 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -201,7 +201,9 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
return code_word;
}
-#if ARCH_X86
+#if ARCH_ARM
+#include "arm/vp56_arith.h"
+#elif ARCH_X86
#include "x86/vp56_arith.h"
#endif
@@ -221,6 +223,7 @@ static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
}
#endif
+#ifndef vp56_rac_get_prob_branchy
// branchy variant, to be used where there's a branch based on the bit decoded
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
{
@@ -238,6 +241,7 @@ static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int pro
c->code_word = code_word;
return 0;
}
+#endif
static av_always_inline int vp56_rac_get(VP56RangeCoder *c)
{
--
1.7.4
More information about the ffmpeg-devel
mailing list