[FFmpeg-devel] [PATCH 20/21] Move alpha half-pel assembly from dsputil to hpeldsp.
Ronald S. Bultje
rsbultje at gmail.com
Mon Mar 11 00:54:46 CET 2013
From: "Ronald S. Bultje" <rsbultje at gmail.com>
---
libavcodec/alpha/Makefile | 2 +
libavcodec/alpha/dsputil_alpha.c | 187 +-----------------------------
libavcodec/alpha/dsputil_alpha.h | 2 -
libavcodec/alpha/dsputil_alpha_asm.S | 92 ---------------
libavcodec/alpha/hpeldsp_alpha.c | 212 +++++++++++++++++++++++++++++++++++
libavcodec/alpha/hpeldsp_alpha.h | 27 +++++
libavcodec/alpha/hpeldsp_alpha_asm.S | 135 ++++++++++++++++++++++
libavcodec/hpeldsp.c | 2 -
8 files changed, 377 insertions(+), 282 deletions(-)
create mode 100644 libavcodec/alpha/hpeldsp_alpha.c
create mode 100644 libavcodec/alpha/hpeldsp_alpha.h
create mode 100644 libavcodec/alpha/hpeldsp_alpha_asm.S
diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile
index e28200d..6f22137 100644
--- a/libavcodec/alpha/Makefile
+++ b/libavcodec/alpha/Makefile
@@ -4,4 +4,6 @@ OBJS += alpha/dsputil_alpha.o \
alpha/motion_est_mvi_asm.o \
alpha/simple_idct_alpha.o \
+OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \
+ alpha/hpeldsp_alpha_asm.o
OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index cb62665..03ba0a8 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -119,197 +119,12 @@ static void clear_blocks_axp(int16_t *blocks) {
} while (n);
}
-static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
-{
- return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-static inline uint64_t avg2(uint64_t a, uint64_t b)
-{
- return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-#if 0
-/* The XY2 routines basically utilize this scheme, but reuse parts in
- each iteration. */
-static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
-{
- uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
- return r1 + r2;
-}
-#endif
-
-#define OP(LOAD, STORE) \
- do { \
- STORE(LOAD(pixels), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_X2(LOAD, STORE) \
- do { \
- uint64_t pix1, pix2; \
- \
- pix1 = LOAD(pixels); \
- pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- STORE(AVG2(pix1, pix2), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_Y2(LOAD, STORE) \
- do { \
- uint64_t pix = LOAD(pixels); \
- do { \
- uint64_t next_pix; \
- \
- pixels += line_size; \
- next_pix = LOAD(pixels); \
- STORE(AVG2(pix, next_pix), block); \
- block += line_size; \
- pix = next_pix; \
- } while (--h); \
- } while (0)
-
-#define OP_XY2(LOAD, STORE) \
- do { \
- uint64_t pix1 = LOAD(pixels); \
- uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
- + (pix2 & BYTE_VEC(0x03)); \
- uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
- \
- do { \
- uint64_t npix1, npix2; \
- uint64_t npix_l, npix_h; \
- uint64_t avg; \
- \
- pixels += line_size; \
- npix1 = LOAD(pixels); \
- npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- npix_l = (npix1 & BYTE_VEC(0x03)) \
- + (npix2 & BYTE_VEC(0x03)); \
- npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
- avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
- + pix_h + npix_h; \
- STORE(avg, block); \
- \
- block += line_size; \
- pix_l = npix_l; \
- pix_h = npix_h; \
- } while (--h); \
- } while (0)
-
-#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
-static void OPNAME ## _pixels ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- if ((size_t) pixels & 0x7) { \
- OPKIND(uldq, STORE); \
- } else { \
- OPKIND(ldq, STORE); \
- } \
-} \
- \
-static void OPNAME ## _pixels16 ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
- OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
-}
-
-#define PIXOP(OPNAME, STORE) \
- MAKE_OP(OPNAME, , OP, STORE) \
- MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
- MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
- MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
-
-/* Rounding primitives. */
-#define AVG2 avg2
-#define AVG4 avg4
-#define AVG4_ROUNDER BYTE_VEC(0x02)
-#define STORE(l, b) stq(l, b)
-PIXOP(put, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg, STORE);
-
-/* Not rounding primitives. */
-#undef AVG2
-#undef AVG4
-#undef AVG4_ROUNDER
-#undef STORE
-#define AVG2 avg2_no_rnd
-#define AVG4 avg4_no_rnd
-#define AVG4_ROUNDER BYTE_VEC(0x01)
-#define STORE(l, b) stq(l, b)
-PIXOP(put_no_rnd, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg_no_rnd, STORE);
-
-static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h)
-{
- put_pixels_axp_asm(block, pixels, line_size, h);
- put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
-}
-
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
- c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
- c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
- c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
-
- c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
- c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_axp;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
-
- c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
- c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
- c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
- c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
-
- c->put_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_pixels_tab[1][1] = put_pixels_x2_axp;
- c->put_pixels_tab[1][2] = put_pixels_y2_axp;
- c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
-
- c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
- c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
-
- c->avg_pixels_tab[1][0] = avg_pixels_axp;
- c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
- c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
- c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
-
- c->clear_blocks = clear_blocks_axp;
+ c->clear_blocks = clear_blocks_axp;
}
/* amask clears all bits that correspond to present features. */
diff --git a/libavcodec/alpha/dsputil_alpha.h b/libavcodec/alpha/dsputil_alpha.h
index cf5ca3b..828cd16 100644
--- a/libavcodec/alpha/dsputil_alpha.h
+++ b/libavcodec/alpha/dsputil_alpha.h
@@ -26,8 +26,6 @@ void ff_simple_idct_axp(int16_t *block);
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
-void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
int line_size);
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
diff --git a/libavcodec/alpha/dsputil_alpha_asm.S b/libavcodec/alpha/dsputil_alpha_asm.S
index 557ba57..5c5f90a 100644
--- a/libavcodec/alpha/dsputil_alpha_asm.S
+++ b/libavcodec/alpha/dsputil_alpha_asm.S
@@ -43,98 +43,6 @@
.text
/************************************************************************
- * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- * int line_size, int h)
- */
- .align 6
- .globl put_pixels_axp_asm
- .ent put_pixels_axp_asm
-put_pixels_axp_asm:
- .frame sp, 0, ra
- .prologue 0
-
- and a1, 7, t0
- beq t0, $aligned
-
- .align 4
-$unaligned:
- ldq_u t0, 0(a1)
- ldq_u t1, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t2, 0(a1)
- ldq_u t3, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t4, 0(a1)
- ldq_u t5, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t6, 0(a1)
- ldq_u t7, 8(a1)
- extql t0, a1, t0
- addq a1, a2, a1
-
- extqh t1, a1, t1
- addq a0, a2, t8
- extql t2, a1, t2
- addq t8, a2, t9
-
- extqh t3, a1, t3
- addq t9, a2, ta
- extql t4, a1, t4
- or t0, t1, t0
-
- extqh t5, a1, t5
- or t2, t3, t2
- extql t6, a1, t6
- or t4, t5, t4
-
- extqh t7, a1, t7
- or t6, t7, t6
- stq t0, 0(a0)
- stq t2, 0(t8)
-
- stq t4, 0(t9)
- subq a3, 4, a3
- stq t6, 0(ta)
- addq ta, a2, a0
-
- bne a3, $unaligned
- ret
-
- .align 4
-$aligned:
- ldq t0, 0(a1)
- addq a1, a2, a1
- ldq t1, 0(a1)
- addq a1, a2, a1
-
- ldq t2, 0(a1)
- addq a1, a2, a1
- ldq t3, 0(a1)
-
- addq a0, a2, t4
- addq a1, a2, a1
- addq t4, a2, t5
- subq a3, 4, a3
-
- stq t0, 0(a0)
- addq t5, a2, t6
- stq t1, 0(t4)
- addq t6, a2, a0
-
- stq t2, 0(t5)
- stq t3, 0(t6)
-
- bne a3, $aligned
- ret
- .end put_pixels_axp_asm
-
-/************************************************************************
* void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
* int line_size)
*/
diff --git a/libavcodec/alpha/hpeldsp_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c
new file mode 100644
index 0000000..4968676
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha.c
@@ -0,0 +1,212 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/hpeldsp.h"
+#include "hpeldsp_alpha.h"
+#include "asm.h"
+
+static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
+{
+ return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+static inline uint64_t avg2(uint64_t a, uint64_t b)
+{
+ return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+#if 0
+/* The XY2 routines basically utilize this scheme, but reuse parts in
+ each iteration. */
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
+{
+ uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+ uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
+ + (l2 & BYTE_VEC(0x03))
+ + (l3 & BYTE_VEC(0x03))
+ + (l4 & BYTE_VEC(0x03))
+ + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+ return r1 + r2;
+}
+#endif
+
+#define OP(LOAD, STORE) \
+ do { \
+ STORE(LOAD(pixels), block); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP_X2(LOAD, STORE) \
+ do { \
+ uint64_t pix1, pix2; \
+ \
+ pix1 = LOAD(pixels); \
+ pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ STORE(AVG2(pix1, pix2), block); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP_Y2(LOAD, STORE) \
+ do { \
+ uint64_t pix = LOAD(pixels); \
+ do { \
+ uint64_t next_pix; \
+ \
+ pixels += line_size; \
+ next_pix = LOAD(pixels); \
+ STORE(AVG2(pix, next_pix), block); \
+ block += line_size; \
+ pix = next_pix; \
+ } while (--h); \
+ } while (0)
+
+#define OP_XY2(LOAD, STORE) \
+ do { \
+ uint64_t pix1 = LOAD(pixels); \
+ uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
+ + (pix2 & BYTE_VEC(0x03)); \
+ uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
+ + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
+ \
+ do { \
+ uint64_t npix1, npix2; \
+ uint64_t npix_l, npix_h; \
+ uint64_t avg; \
+ \
+ pixels += line_size; \
+ npix1 = LOAD(pixels); \
+ npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ npix_l = (npix1 & BYTE_VEC(0x03)) \
+ + (npix2 & BYTE_VEC(0x03)); \
+ npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
+ + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
+ avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
+ + pix_h + npix_h; \
+ STORE(avg, block); \
+ \
+ block += line_size; \
+ pix_l = npix_l; \
+ pix_h = npix_h; \
+ } while (--h); \
+ } while (0)
+
+#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
+static void OPNAME ## _pixels ## SUFF ## _axp \
+ (uint8_t *av_restrict block, const uint8_t *av_restrict pixels, \
+ ptrdiff_t line_size, int h) \
+{ \
+ if ((size_t) pixels & 0x7) { \
+ OPKIND(uldq, STORE); \
+ } else { \
+ OPKIND(ldq, STORE); \
+ } \
+} \
+ \
+static void OPNAME ## _pixels16 ## SUFF ## _axp \
+ (uint8_t *av_restrict block, const uint8_t *av_restrict pixels, \
+ ptrdiff_t line_size, int h) \
+{ \
+ OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
+ OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
+}
+
+#define PIXOP(OPNAME, STORE) \
+ MAKE_OP(OPNAME, , OP, STORE) \
+ MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
+ MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
+ MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
+
+/* Rounding primitives. */
+#define AVG2 avg2
+#define AVG4 avg4
+#define AVG4_ROUNDER BYTE_VEC(0x02)
+#define STORE(l, b) stq(l, b)
+PIXOP(put, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg, STORE);
+
+/* Not rounding primitives. */
+#undef AVG2
+#undef AVG4
+#undef AVG4_ROUNDER
+#undef STORE
+#define AVG2 avg2_no_rnd
+#define AVG4 avg4_no_rnd
+#define AVG4_ROUNDER BYTE_VEC(0x01)
+#define STORE(l, b) stq(l, b)
+PIXOP(put_no_rnd, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg_no_rnd, STORE);
+
+static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h)
+{
+ put_pixels_axp_asm(block, pixels, line_size, h);
+ put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
+}
+
+void ff_hpeldsp_init_alpha(HpelDSPContext* c, int flags)
+{
+ c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
+
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_axp;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
+
+ c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
+ c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
+ c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
+ c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
+
+ c->put_pixels_tab[1][0] = put_pixels_axp_asm;
+ c->put_pixels_tab[1][1] = put_pixels_x2_axp;
+ c->put_pixels_tab[1][2] = put_pixels_y2_axp;
+ c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
+
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
+
+ c->avg_pixels_tab[1][0] = avg_pixels_axp;
+ c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
+ c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
+ c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
+}
diff --git a/libavcodec/alpha/hpeldsp_alpha.h b/libavcodec/alpha/hpeldsp_alpha.h
new file mode 100644
index 0000000..53e8604
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha.h
@@ -0,0 +1,27 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H
+#define AVCODEC_ALPHA_HPELDSP_ALPHA_H
+
+#include <stdint.h>
+
+void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */
diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S b/libavcodec/alpha/hpeldsp_alpha_asm.S
new file mode 100644
index 0000000..afc3d42
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha_asm.S
@@ -0,0 +1,135 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * These functions are scheduled for pca56. They should work
+ * reasonably on ev6, though.
+ */
+
+#include "regdef.h"
+
+/* Some nicer register names. */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+ .set noat
+ .set noreorder
+ .arch pca56
+ .text
+
+/************************************************************************
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ * int line_size, int h)
+ */
+ .align 6
+ .globl put_pixels_axp_asm
+ .ent put_pixels_axp_asm
+put_pixels_axp_asm:
+ .frame sp, 0, ra
+ .prologue 0
+
+ and a1, 7, t0
+ beq t0, $aligned
+
+ .align 4
+$unaligned:
+ ldq_u t0, 0(a1)
+ ldq_u t1, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t2, 0(a1)
+ ldq_u t3, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t4, 0(a1)
+ ldq_u t5, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t6, 0(a1)
+ ldq_u t7, 8(a1)
+ extql t0, a1, t0
+ addq a1, a2, a1
+
+ extqh t1, a1, t1
+ addq a0, a2, t8
+ extql t2, a1, t2
+ addq t8, a2, t9
+
+ extqh t3, a1, t3
+ addq t9, a2, ta
+ extql t4, a1, t4
+ or t0, t1, t0
+
+ extqh t5, a1, t5
+ or t2, t3, t2
+ extql t6, a1, t6
+ or t4, t5, t4
+
+ extqh t7, a1, t7
+ or t6, t7, t6
+ stq t0, 0(a0)
+ stq t2, 0(t8)
+
+ stq t4, 0(t9)
+ subq a3, 4, a3
+ stq t6, 0(ta)
+ addq ta, a2, a0
+
+ bne a3, $unaligned
+ ret
+
+ .align 4
+$aligned:
+ ldq t0, 0(a1)
+ addq a1, a2, a1
+ ldq t1, 0(a1)
+ addq a1, a2, a1
+
+ ldq t2, 0(a1)
+ addq a1, a2, a1
+ ldq t3, 0(a1)
+
+ addq a0, a2, t4
+ addq a1, a2, a1
+ addq t4, a2, t5
+ subq a3, 4, a3
+
+ stq t0, 0(a0)
+ addq t5, a2, t6
+ stq t1, 0(t4)
+ addq t6, a2, a0
+
+ stq t2, 0(t5)
+ stq t3, 0(t6)
+
+ bne a3, $aligned
+ ret
+ .end put_pixels_axp_asm
diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c
index 31645fd..a9139bf 100644
--- a/libavcodec/hpeldsp.c
+++ b/libavcodec/hpeldsp.c
@@ -56,9 +56,7 @@ av_cold void ff_hpeldsp_init(HpelDSPContext* c, int flags)
if (ARCH_X86) ff_hpeldsp_init_x86 (c, flags);
if (ARCH_ARM) ff_hpeldsp_init_arm (c, flags);
if (HAVE_VIS) ff_hpeldsp_init_vis (c, flags);
-#if 0
if (ARCH_ALPHA) ff_hpeldsp_init_alpha (c, flags);
-#endif
if (ARCH_PPC) ff_hpeldsp_init_ppc (c, flags);
if (ARCH_SH4) ff_hpeldsp_init_sh4 (c, flags);
if (ARCH_BFIN) ff_hpeldsp_init_bfin (c, flags);
--
1.7.11.3
More information about the ffmpeg-devel
mailing list