[FFmpeg-devel] [PATCH 2/2] [RFC] libavcodec: remove DSP acceleration code for DEC Alpha
Sean McGovern
gseanmcg at gmail.com
Fri May 31 00:21:52 EEST 2024
---
Changelog | 1 +
libavcodec/alpha/Makefile | 10 -
libavcodec/alpha/asm.h | 153 --------------
libavcodec/alpha/blockdsp_alpha.c | 49 -----
libavcodec/alpha/hpeldsp_alpha.c | 213 -------------------
libavcodec/alpha/hpeldsp_alpha.h | 28 ---
libavcodec/alpha/hpeldsp_alpha_asm.S | 125 -----------
libavcodec/alpha/idctdsp_alpha.c | 127 -----------
libavcodec/alpha/idctdsp_alpha.h | 34 ---
libavcodec/alpha/idctdsp_alpha_asm.S | 167 ---------------
libavcodec/alpha/me_cmp_alpha.c | 279 ------------------------
libavcodec/alpha/me_cmp_mvi_asm.S | 179 ----------------
libavcodec/alpha/mpegvideo_alpha.c | 110 ----------
libavcodec/alpha/pixblockdsp_alpha.c | 79 -------
libavcodec/alpha/regdef.h | 77 -------
libavcodec/alpha/simple_idct_alpha.c | 303 ---------------------------
16 files changed, 1 insertion(+), 1933 deletions(-)
delete mode 100644 libavcodec/alpha/Makefile
delete mode 100644 libavcodec/alpha/asm.h
delete mode 100644 libavcodec/alpha/blockdsp_alpha.c
delete mode 100644 libavcodec/alpha/hpeldsp_alpha.c
delete mode 100644 libavcodec/alpha/hpeldsp_alpha.h
delete mode 100644 libavcodec/alpha/hpeldsp_alpha_asm.S
delete mode 100644 libavcodec/alpha/idctdsp_alpha.c
delete mode 100644 libavcodec/alpha/idctdsp_alpha.h
delete mode 100644 libavcodec/alpha/idctdsp_alpha_asm.S
delete mode 100644 libavcodec/alpha/me_cmp_alpha.c
delete mode 100644 libavcodec/alpha/me_cmp_mvi_asm.S
delete mode 100644 libavcodec/alpha/mpegvideo_alpha.c
delete mode 100644 libavcodec/alpha/pixblockdsp_alpha.c
delete mode 100644 libavcodec/alpha/regdef.h
delete mode 100644 libavcodec/alpha/simple_idct_alpha.c
diff --git a/Changelog b/Changelog
index 12770e4296..a1a40399f8 100644
--- a/Changelog
+++ b/Changelog
@@ -11,6 +11,7 @@ version <next>:
- vf_scale2ref deprecated
- qsv_params option added for QSV encoders
- VVC decoder compatible with DVB test content
+- removed libavcodec DSP code for the DEC Alpha
version 7.0:
diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile
deleted file mode 100644
index 796d9762b3..0000000000
--- a/libavcodec/alpha/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-OBJS-$(CONFIG_BLOCKDSP) += alpha/blockdsp_alpha.o
-OBJS-$(CONFIG_ME_CMP) += alpha/me_cmp_alpha.o \
- alpha/me_cmp_mvi_asm.o
-OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \
- alpha/hpeldsp_alpha_asm.o
-OBJS-$(CONFIG_IDCTDSP) += alpha/idctdsp_alpha.o \
- alpha/idctdsp_alpha_asm.o \
- alpha/simple_idct_alpha.o
-OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o
-OBJS-$(CONFIG_PIXBLOCKDSP) += alpha/pixblockdsp_alpha.o
diff --git a/libavcodec/alpha/asm.h b/libavcodec/alpha/asm.h
deleted file mode 100644
index 6d850cecc6..0000000000
--- a/libavcodec/alpha/asm.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_ALPHA_ASM_H
-#define AVCODEC_ALPHA_ASM_H
-
-#include <inttypes.h>
-
-#include "libavutil/common.h"
-
-#if AV_GCC_VERSION_AT_LEAST(2,96)
-# define likely(x) __builtin_expect((x) != 0, 1)
-# define unlikely(x) __builtin_expect((x) != 0, 0)
-#else
-# define likely(x) (x)
-# define unlikely(x) (x)
-#endif
-
-#define AMASK_BWX (1 << 0)
-#define AMASK_FIX (1 << 1)
-#define AMASK_CIX (1 << 2)
-#define AMASK_MVI (1 << 8)
-
-static inline uint64_t BYTE_VEC(uint64_t x)
-{
- x |= x << 8;
- x |= x << 16;
- x |= x << 32;
- return x;
-}
-static inline uint64_t WORD_VEC(uint64_t x)
-{
- x |= x << 16;
- x |= x << 32;
- return x;
-}
-
-#define sextw(x) ((int16_t) (x))
-
-#ifdef __GNUC__
-#define ldq(p) \
- (((const union { \
- uint64_t __l; \
- __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
- } *) (p))->__l)
-#define ldl(p) \
- (((const union { \
- int32_t __l; \
- __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
- } *) (p))->__l)
-#define stq(l, p) \
- do { \
- (((union { \
- uint64_t __l; \
- __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
- } *) (p))->__l) = l; \
- } while (0)
-#define stl(l, p) \
- do { \
- (((union { \
- int32_t __l; \
- __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
- } *) (p))->__l) = l; \
- } while (0)
-struct unaligned_long { uint64_t l; } __attribute__((packed));
-#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
-#define uldq(a) (((const struct unaligned_long *) (a))->l)
-
-#if AV_GCC_VERSION_AT_LEAST(3,3)
-#define prefetch(p) __builtin_prefetch((p), 0, 1)
-#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
-#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
-#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
-#define cmpbge __builtin_alpha_cmpbge
-/* Avoid warnings. */
-#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
-#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
-#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
-#define zap __builtin_alpha_zap
-#define zapnot __builtin_alpha_zapnot
-#define amask __builtin_alpha_amask
-#define implver __builtin_alpha_implver
-#define rpcc __builtin_alpha_rpcc
-#else
-#define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
-#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
-#define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
-#define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; })
-#define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; })
-#endif
-#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
-
-#if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__)
-#define minub8 __builtin_alpha_minub8
-#define minsb8 __builtin_alpha_minsb8
-#define minuw4 __builtin_alpha_minuw4
-#define minsw4 __builtin_alpha_minsw4
-#define maxub8 __builtin_alpha_maxub8
-#define maxsb8 __builtin_alpha_maxsb8
-#define maxuw4 __builtin_alpha_maxuw4
-#define maxsw4 __builtin_alpha_maxsw4
-#define perr __builtin_alpha_perr
-#define pklb __builtin_alpha_pklb
-#define pkwb __builtin_alpha_pkwb
-#define unpkbl __builtin_alpha_unpkbl
-#define unpkbw __builtin_alpha_unpkbw
-#else
-#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
-#define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
-#define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
-#endif
-
-#else
-#error "Unknown compiler!"
-#endif
-
-#endif /* AVCODEC_ALPHA_ASM_H */
diff --git a/libavcodec/alpha/blockdsp_alpha.c b/libavcodec/alpha/blockdsp_alpha.c
deleted file mode 100644
index c6f0964607..0000000000
--- a/libavcodec/alpha/blockdsp_alpha.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Alpha optimised block operations
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/attributes.h"
-#include "libavcodec/blockdsp.h"
-#include "asm.h"
-
-static void clear_blocks_axp(int16_t *blocks) {
- uint64_t *p = (uint64_t *) blocks;
- int n = sizeof(int16_t) * 6 * 64;
-
- do {
- p[0] = 0;
- p[1] = 0;
- p[2] = 0;
- p[3] = 0;
- p[4] = 0;
- p[5] = 0;
- p[6] = 0;
- p[7] = 0;
- p += 8;
- n -= 8 * 8;
- } while (n);
-}
-
-av_cold void ff_blockdsp_init_alpha(BlockDSPContext *c)
-{
- c->clear_blocks = clear_blocks_axp;
-}
diff --git a/libavcodec/alpha/hpeldsp_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c
deleted file mode 100644
index 8d54807d8f..0000000000
--- a/libavcodec/alpha/hpeldsp_alpha.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/attributes.h"
-#include "libavcodec/hpeldsp.h"
-#include "hpeldsp_alpha.h"
-#include "asm.h"
-
-static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
-{
- return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-static inline uint64_t avg2(uint64_t a, uint64_t b)
-{
- return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-#if 0
-/* The XY2 routines basically utilize this scheme, but reuse parts in
- each iteration. */
-static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
-{
- uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
- return r1 + r2;
-}
-#endif
-
-#define OP(LOAD, STORE) \
- do { \
- STORE(LOAD(pixels), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_X2(LOAD, STORE) \
- do { \
- uint64_t pix1, pix2; \
- \
- pix1 = LOAD(pixels); \
- pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- STORE(AVG2(pix1, pix2), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_Y2(LOAD, STORE) \
- do { \
- uint64_t pix = LOAD(pixels); \
- do { \
- uint64_t next_pix; \
- \
- pixels += line_size; \
- next_pix = LOAD(pixels); \
- STORE(AVG2(pix, next_pix), block); \
- block += line_size; \
- pix = next_pix; \
- } while (--h); \
- } while (0)
-
-#define OP_XY2(LOAD, STORE) \
- do { \
- uint64_t pix1 = LOAD(pixels); \
- uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
- + (pix2 & BYTE_VEC(0x03)); \
- uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
- \
- do { \
- uint64_t npix1, npix2; \
- uint64_t npix_l, npix_h; \
- uint64_t avg; \
- \
- pixels += line_size; \
- npix1 = LOAD(pixels); \
- npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- npix_l = (npix1 & BYTE_VEC(0x03)) \
- + (npix2 & BYTE_VEC(0x03)); \
- npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
- avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
- + pix_h + npix_h; \
- STORE(avg, block); \
- \
- block += line_size; \
- pix_l = npix_l; \
- pix_h = npix_h; \
- } while (--h); \
- } while (0)
-
-#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
-static void OPNAME ## _pixels ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- if ((size_t) pixels & 0x7) { \
- OPKIND(uldq, STORE); \
- } else { \
- OPKIND(ldq, STORE); \
- } \
-} \
- \
-static void OPNAME ## _pixels16 ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
- OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
-}
-
-#define PIXOP(OPNAME, STORE) \
- MAKE_OP(OPNAME, , OP, STORE) \
- MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
- MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
- MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
-
-/* Rounding primitives. */
-#define AVG2 avg2
-#define AVG4 avg4
-#define AVG4_ROUNDER BYTE_VEC(0x02)
-#define STORE(l, b) stq(l, b)
-PIXOP(put, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg, STORE);
-
-/* Not rounding primitives. */
-#undef AVG2
-#undef AVG4
-#undef AVG4_ROUNDER
-#undef STORE
-#define AVG2 avg2_no_rnd
-#define AVG4 avg4_no_rnd
-#define AVG4_ROUNDER BYTE_VEC(0x01)
-#define STORE(l, b) stq(l, b)
-PIXOP(put_no_rnd, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg_no_rnd, STORE);
-
-static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h)
-{
- put_pixels_axp_asm(block, pixels, line_size, h);
- put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
-}
-
-av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
-{
- c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
- c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
- c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
-
- c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
- c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_axp;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
-
- c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
- c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
- c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
- c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
-
- c->put_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_pixels_tab[1][1] = put_pixels_x2_axp;
- c->put_pixels_tab[1][2] = put_pixels_y2_axp;
- c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
-
- c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
- c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
-
- c->avg_pixels_tab[1][0] = avg_pixels_axp;
- c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
- c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
- c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
-}
diff --git a/libavcodec/alpha/hpeldsp_alpha.h b/libavcodec/alpha/hpeldsp_alpha.h
deleted file mode 100644
index 985182c67b..0000000000
--- a/libavcodec/alpha/hpeldsp_alpha.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H
-#define AVCODEC_ALPHA_HPELDSP_ALPHA_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-
-#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */
diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S b/libavcodec/alpha/hpeldsp_alpha_asm.S
deleted file mode 100644
index df386c429e..0000000000
--- a/libavcodec/alpha/hpeldsp_alpha_asm.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/*
- * These functions are scheduled for pca56. They should work
- * reasonably on ev6, though.
- */
-
-#include "regdef.h"
-
-
- .set noat
- .set noreorder
- .arch pca56
- .text
-
-/************************************************************************
- * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- * int line_size, int h)
- */
- .align 6
- .globl put_pixels_axp_asm
- .ent put_pixels_axp_asm
-put_pixels_axp_asm:
- .frame sp, 0, ra
- .prologue 0
-
- and a1, 7, t0
- beq t0, $aligned
-
- .align 4
-$unaligned:
- ldq_u t0, 0(a1)
- ldq_u t1, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t2, 0(a1)
- ldq_u t3, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t4, 0(a1)
- ldq_u t5, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t6, 0(a1)
- ldq_u t7, 8(a1)
- extql t0, a1, t0
- addq a1, a2, a1
-
- extqh t1, a1, t1
- addq a0, a2, t8
- extql t2, a1, t2
- addq t8, a2, t9
-
- extqh t3, a1, t3
- addq t9, a2, ta
- extql t4, a1, t4
- or t0, t1, t0
-
- extqh t5, a1, t5
- or t2, t3, t2
- extql t6, a1, t6
- or t4, t5, t4
-
- extqh t7, a1, t7
- or t6, t7, t6
- stq t0, 0(a0)
- stq t2, 0(t8)
-
- stq t4, 0(t9)
- subq a3, 4, a3
- stq t6, 0(ta)
- addq ta, a2, a0
-
- bne a3, $unaligned
- ret
-
- .align 4
-$aligned:
- ldq t0, 0(a1)
- addq a1, a2, a1
- ldq t1, 0(a1)
- addq a1, a2, a1
-
- ldq t2, 0(a1)
- addq a1, a2, a1
- ldq t3, 0(a1)
-
- addq a0, a2, t4
- addq a1, a2, a1
- addq t4, a2, t5
- subq a3, 4, a3
-
- stq t0, 0(a0)
- addq t5, a2, t6
- stq t1, 0(t4)
- addq t6, a2, a0
-
- stq t2, 0(t5)
- stq t3, 0(t6)
-
- bne a3, $aligned
- ret
- .end put_pixels_axp_asm
diff --git a/libavcodec/alpha/idctdsp_alpha.c b/libavcodec/alpha/idctdsp_alpha.c
deleted file mode 100644
index ff770c15fd..0000000000
--- a/libavcodec/alpha/idctdsp_alpha.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/attributes.h"
-#include "libavcodec/avcodec.h"
-#include "libavcodec/idctdsp.h"
-#include "idctdsp_alpha.h"
-#include "asm.h"
-
-void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
-void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
-
-void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
-void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
-
-#if 0
-/* These functions were the base for the optimized assembler routines,
- and remain here for documentation purposes. */
-static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size)
-{
- int i = 8;
- uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
-
- do {
- uint64_t shorts0, shorts1;
-
- shorts0 = ldq(block);
- shorts0 = maxsw4(shorts0, 0);
- shorts0 = minsw4(shorts0, clampmask);
- stl(pkwb(shorts0), pixels);
-
- shorts1 = ldq(block + 4);
- shorts1 = maxsw4(shorts1, 0);
- shorts1 = minsw4(shorts1, clampmask);
- stl(pkwb(shorts1), pixels + 4);
-
- pixels += line_size;
- block += 8;
- } while (--i);
-}
-
-void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size)
-{
- int h = 8;
- /* Keep this function a leaf function by generating the constants
- manually (mainly for the hack value ;-). */
- uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
- uint64_t signmask = zap(-1, 0x33);
- signmask ^= signmask >> 1; /* 0x8000800080008000 */
-
- do {
- uint64_t shorts0, pix0, signs0;
- uint64_t shorts1, pix1, signs1;
-
- shorts0 = ldq(block);
- shorts1 = ldq(block + 4);
-
- pix0 = unpkbw(ldl(pixels));
- /* Signed subword add (MMX paddw). */
- signs0 = shorts0 & signmask;
- shorts0 &= ~signmask;
- shorts0 += pix0;
- shorts0 ^= signs0;
- /* Clamp. */
- shorts0 = maxsw4(shorts0, 0);
- shorts0 = minsw4(shorts0, clampmask);
-
- /* Next 4. */
- pix1 = unpkbw(ldl(pixels + 4));
- signs1 = shorts1 & signmask;
- shorts1 &= ~signmask;
- shorts1 += pix1;
- shorts1 ^= signs1;
- shorts1 = maxsw4(shorts1, 0);
- shorts1 = minsw4(shorts1, clampmask);
-
- stl(pkwb(shorts0), pixels);
- stl(pkwb(shorts1), pixels + 4);
-
- pixels += line_size;
- block += 8;
- } while (--h);
-}
-#endif
-
-av_cold void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx,
- unsigned high_bit_depth)
-{
- /* amask clears all bits that correspond to present features. */
- if (amask(AMASK_MVI) == 0) {
- c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
- c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
- }
-
- put_pixels_clamped_axp_p = c->put_pixels_clamped;
- add_pixels_clamped_axp_p = c->add_pixels_clamped;
-
- if (!high_bit_depth && !avctx->lowres &&
- (avctx->idct_algo == FF_IDCT_AUTO)) {
- c->idct_put = ff_simple_idct_put_axp;
- c->idct_add = ff_simple_idct_add_axp;
- c->idct = ff_simple_idct_axp;
- }
-}
diff --git a/libavcodec/alpha/idctdsp_alpha.h b/libavcodec/alpha/idctdsp_alpha.h
deleted file mode 100644
index 8cc969d7de..0000000000
--- a/libavcodec/alpha/idctdsp_alpha.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_ALPHA_IDCTDSP_ALPHA_H
-#define AVCODEC_ALPHA_IDCTDSP_ALPHA_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-extern void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
-extern void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
-
-void ff_simple_idct_axp(int16_t *block);
-void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
-
-#endif /* AVCODEC_ALPHA_IDCTDSP_ALPHA_H */
diff --git a/libavcodec/alpha/idctdsp_alpha_asm.S b/libavcodec/alpha/idctdsp_alpha_asm.S
deleted file mode 100644
index f545df9e4f..0000000000
--- a/libavcodec/alpha/idctdsp_alpha_asm.S
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Alpha optimized IDCT-related routines
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/*
- * These functions are scheduled for pca56. They should work
- * reasonably on ev6, though.
- */
-
-#include "regdef.h"
-
- .set noat
- .set noreorder
- .arch pca56
- .text
-
-/************************************************************************
- * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
- * ptrdiff_t line_size)
- */
- .align 6
- .globl put_pixels_clamped_mvi_asm
- .ent put_pixels_clamped_mvi_asm
-put_pixels_clamped_mvi_asm:
- .frame sp, 0, ra
- .prologue 0
-
- lda t8, -1
- lda t9, 8 # loop counter
- zap t8, 0xaa, t8 # 00ff00ff00ff00ff
-
- .align 4
-1: ldq t0, 0(a0)
- ldq t1, 8(a0)
- ldq t2, 16(a0)
- ldq t3, 24(a0)
-
- maxsw4 t0, zero, t0
- subq t9, 2, t9
- maxsw4 t1, zero, t1
- lda a0, 32(a0)
-
- maxsw4 t2, zero, t2
- addq a1, a2, ta
- maxsw4 t3, zero, t3
- minsw4 t0, t8, t0
-
- minsw4 t1, t8, t1
- minsw4 t2, t8, t2
- minsw4 t3, t8, t3
- pkwb t0, t0
-
- pkwb t1, t1
- pkwb t2, t2
- pkwb t3, t3
- stl t0, 0(a1)
-
- stl t1, 4(a1)
- addq ta, a2, a1
- stl t2, 0(ta)
- stl t3, 4(ta)
-
- bne t9, 1b
- ret
- .end put_pixels_clamped_mvi_asm
-
-/************************************************************************
- * void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
- * ptrdiff_t line_size)
- */
- .align 6
- .globl add_pixels_clamped_mvi_asm
- .ent add_pixels_clamped_mvi_asm
-add_pixels_clamped_mvi_asm:
- .frame sp, 0, ra
- .prologue 0
-
- lda t1, -1
- lda th, 8
- zap t1, 0x33, tg
- nop
-
- srl tg, 1, t0
- xor tg, t0, tg # 0x8000800080008000
- zap t1, 0xaa, tf # 0x00ff00ff00ff00ff
-
- .align 4
-1: ldl t1, 0(a1) # pix0 (try to hit cache line soon)
- ldl t4, 4(a1) # pix1
- addq a1, a2, te # pixels += line_size
- ldq t0, 0(a0) # shorts0
-
- ldl t7, 0(te) # pix2 (try to hit cache line soon)
- ldl ta, 4(te) # pix3
- ldq t3, 8(a0) # shorts1
- ldq t6, 16(a0) # shorts2
-
- ldq t9, 24(a0) # shorts3
- unpkbw t1, t1 # 0 0 (quarter/op no.)
- and t0, tg, t2 # 0 1
- unpkbw t4, t4 # 1 0
-
- bic t0, tg, t0 # 0 2
- unpkbw t7, t7 # 2 0
- and t3, tg, t5 # 1 1
- addq t0, t1, t0 # 0 3
-
- xor t0, t2, t0 # 0 4
- unpkbw ta, ta # 3 0
- and t6, tg, t8 # 2 1
- maxsw4 t0, zero, t0 # 0 5
-
- bic t3, tg, t3 # 1 2
- bic t6, tg, t6 # 2 2
- minsw4 t0, tf, t0 # 0 6
- addq t3, t4, t3 # 1 3
-
- pkwb t0, t0 # 0 7
- xor t3, t5, t3 # 1 4
- maxsw4 t3, zero, t3 # 1 5
- addq t6, t7, t6 # 2 3
-
- xor t6, t8, t6 # 2 4
- and t9, tg, tb # 3 1
- minsw4 t3, tf, t3 # 1 6
- bic t9, tg, t9 # 3 2
-
- maxsw4 t6, zero, t6 # 2 5
- addq t9, ta, t9 # 3 3
- stl t0, 0(a1) # 0 8
- minsw4 t6, tf, t6 # 2 6
-
- xor t9, tb, t9 # 3 4
- maxsw4 t9, zero, t9 # 3 5
- lda a0, 32(a0) # block += 16;
- pkwb t3, t3 # 1 7
-
- minsw4 t9, tf, t9 # 3 6
- subq th, 2, th
- pkwb t6, t6 # 2 7
- pkwb t9, t9 # 3 7
-
- stl t3, 4(a1) # 1 8
- addq te, a2, a1 # pixels += line_size
- stl t6, 0(te) # 2 8
- stl t9, 4(te) # 3 8
-
- bne th, 1b
- ret
- .end add_pixels_clamped_mvi_asm
diff --git a/libavcodec/alpha/me_cmp_alpha.c b/libavcodec/alpha/me_cmp_alpha.c
deleted file mode 100644
index 0c1a4a62c5..0000000000
--- a/libavcodec/alpha/me_cmp_alpha.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/attributes.h"
-#include "libavcodec/me_cmp.h"
-#include "asm.h"
-
-int pix_abs16x16_mvi_asm(struct MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t line_size, int h);
-
-static inline uint64_t avg2(uint64_t a, uint64_t b)
-{
- return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
-{
- uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
- return r1 + r2;
-}
-
-static int pix_abs8x8_mvi(struct MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t line_size, int h)
-{
- int result = 0;
-
- if ((size_t) pix2 & 0x7) {
- /* works only when pix2 is actually unaligned */
- do { /* do 8 pixel a time */
- uint64_t p1, p2;
-
- p1 = ldq(pix1);
- p2 = uldq(pix2);
- result += perr(p1, p2);
-
- pix1 += line_size;
- pix2 += line_size;
- } while (--h);
- } else {
- do {
- uint64_t p1, p2;
-
- p1 = ldq(pix1);
- p2 = ldq(pix2);
- result += perr(p1, p2);
-
- pix1 += line_size;
- pix2 += line_size;
- } while (--h);
- }
-
- return result;
-}
-
-static int pix_abs16x16_x2_mvi(struct MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t line_size, int h)
-{
- int result = 0;
- uint64_t disalign = (size_t) pix2 & 0x7;
-
- switch (disalign) {
- case 0:
- do {
- uint64_t p1_l, p1_r, p2_l, p2_r;
- uint64_t l, r;
-
- p1_l = ldq(pix1);
- p1_r = ldq(pix1 + 8);
- l = ldq(pix2);
- r = ldq(pix2 + 8);
- p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
- p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
- pix1 += line_size;
- pix2 += line_size;
-
- result += perr(p1_l, p2_l)
- + perr(p1_r, p2_r);
- } while (--h);
- break;
- case 7:
- /* |.......l|lllllllr|rrrrrrr*|
- This case is special because disalign1 would be 8, which
- gets treated as 0 by extqh. At least it is a bit faster
- that way :) */
- do {
- uint64_t p1_l, p1_r, p2_l, p2_r;
- uint64_t l, m, r;
-
- p1_l = ldq(pix1);
- p1_r = ldq(pix1 + 8);
- l = ldq_u(pix2);
- m = ldq_u(pix2 + 8);
- r = ldq_u(pix2 + 16);
- p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m);
- p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r);
- pix1 += line_size;
- pix2 += line_size;
-
- result += perr(p1_l, p2_l)
- + perr(p1_r, p2_r);
- } while (--h);
- break;
- default:
- do {
- uint64_t disalign1 = disalign + 1;
- uint64_t p1_l, p1_r, p2_l, p2_r;
- uint64_t l, m, r;
-
- p1_l = ldq(pix1);
- p1_r = ldq(pix1 + 8);
- l = ldq_u(pix2);
- m = ldq_u(pix2 + 8);
- r = ldq_u(pix2 + 16);
- p2_l = avg2(extql(l, disalign) | extqh(m, disalign),
- extql(l, disalign1) | extqh(m, disalign1));
- p2_r = avg2(extql(m, disalign) | extqh(r, disalign),
- extql(m, disalign1) | extqh(r, disalign1));
- pix1 += line_size;
- pix2 += line_size;
-
- result += perr(p1_l, p2_l)
- + perr(p1_r, p2_r);
- } while (--h);
- break;
- }
- return result;
-}
-
-static int pix_abs16x16_y2_mvi(struct MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t line_size, int h)
-{
- int result = 0;
-
- if ((size_t) pix2 & 0x7) {
- uint64_t t, p2_l, p2_r;
- t = ldq_u(pix2 + 8);
- p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
- p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
-
- do {
- uint64_t p1_l, p1_r, np2_l, np2_r;
- uint64_t t;
-
- p1_l = ldq(pix1);
- p1_r = ldq(pix1 + 8);
- pix2 += line_size;
- t = ldq_u(pix2 + 8);
- np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
- np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
-
- result += perr(p1_l, avg2(p2_l, np2_l))
- + perr(p1_r, avg2(p2_r, np2_r));
-
- pix1 += line_size;
- p2_l = np2_l;
- p2_r = np2_r;
-
- } while (--h);
- } else {
- uint64_t p2_l, p2_r;
- p2_l = ldq(pix2);
- p2_r = ldq(pix2 + 8);
- do {
- uint64_t p1_l, p1_r, np2_l, np2_r;
-
- p1_l = ldq(pix1);
- p1_r = ldq(pix1 + 8);
- pix2 += line_size;
- np2_l = ldq(pix2);
- np2_r = ldq(pix2 + 8);
-
- result += perr(p1_l, avg2(p2_l, np2_l))
- + perr(p1_r, avg2(p2_r, np2_r));
-
- pix1 += line_size;
- p2_l = np2_l;
- p2_r = np2_r;
- } while (--h);
- }
- return result;
-}
-
-static int pix_abs16x16_xy2_mvi(struct MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t line_size, int h)
-{
- int result = 0;
-
- uint64_t p1_l, p1_r;
- uint64_t p2_l, p2_r, p2_x;
-
- p1_l = ldq(pix1);
- p1_r = ldq(pix1 + 8);
-
- if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
- p2_l = uldq(pix2);
- p2_r = uldq(pix2 + 8);
- p2_x = (uint64_t) pix2[16] << 56;
- } else {
- p2_l = ldq(pix2);
- p2_r = ldq(pix2 + 8);
- p2_x = ldq(pix2 + 16) << 56;
- }
-
- do {
- uint64_t np1_l, np1_r;
- uint64_t np2_l, np2_r, np2_x;
-
- pix1 += line_size;
- pix2 += line_size;
-
- np1_l = ldq(pix1);
- np1_r = ldq(pix1 + 8);
-
- if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
- np2_l = uldq(pix2);
- np2_r = uldq(pix2 + 8);
- np2_x = (uint64_t) pix2[16] << 56;
- } else {
- np2_l = ldq(pix2);
- np2_r = ldq(pix2 + 8);
- np2_x = ldq(pix2 + 16) << 56;
- }
-
- result += perr(p1_l,
- avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
- np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
- + perr(p1_r,
- avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
- np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
-
- p1_l = np1_l;
- p1_r = np1_r;
- p2_l = np2_l;
- p2_r = np2_r;
- p2_x = np2_x;
- } while (--h);
-
- return result;
-}
-
-av_cold void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx)
-{
- /* amask clears all bits that correspond to present features. */
- if (amask(AMASK_MVI) == 0) {
- c->sad[0] = pix_abs16x16_mvi_asm;
- c->sad[1] = pix_abs8x8_mvi;
- c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
- c->pix_abs[1][0] = pix_abs8x8_mvi;
- c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
- c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
- c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
- }
-}
diff --git a/libavcodec/alpha/me_cmp_mvi_asm.S b/libavcodec/alpha/me_cmp_mvi_asm.S
deleted file mode 100644
index 183feeb40c..0000000000
--- a/libavcodec/alpha/me_cmp_mvi_asm.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "regdef.h"
-
-/* Some nicer register names. */
-#define ta t10
-#define tb t11
-#define tc t12
-#define td AT
-/* Danger: these overlap with the argument list and the return value */
-#define te a5
-#define tf a4
-#define tg a3
-#define th v0
-
- .set noat
- .set noreorder
- .arch pca56
- .text
-
-/*****************************************************************************
- * int pix_abs16x16_mvi_asm(const uint8_t *pix1, const uint8_t *pix2, int line_size)
- *
- * This code is written with a pca56 in mind. For ev6, one should
- * really take the increased latency of 3 cycles for MVI instructions
- * into account.
- *
- * It is important to keep the loading and first use of a register as
- * far apart as possible, because if a register is accessed before it
- * has been fetched from memory, the CPU will stall.
- */
- .align 4
- .globl pix_abs16x16_mvi_asm
- .ent pix_abs16x16_mvi_asm
-pix_abs16x16_mvi_asm:
- .frame sp, 0, ra, 0
- .prologue 0
-
- and a2, 7, t0
- clr v0
- beq t0, $aligned
- .align 4
-$unaligned:
- /* Registers:
- line 0:
- t0: left_u -> left lo -> left
- t1: mid
- t2: right_u -> right hi -> right
- t3: ref left
- t4: ref right
- line 1:
- t5: left_u -> left lo -> left
- t6: mid
- t7: right_u -> right hi -> right
- t8: ref left
- t9: ref right
- temp:
- ta: left hi
- tb: right lo
- tc: error left
- td: error right */
-
- /* load line 0 */
- ldq_u t0, 0(a2) # left_u
- ldq_u t1, 8(a2) # mid
- ldq_u t2, 16(a2) # right_u
- ldq t3, 0(a1) # ref left
- ldq t4, 8(a1) # ref right
- addq a1, a3, a1 # pix1
- addq a2, a3, a2 # pix2
- /* load line 1 */
- ldq_u t5, 0(a2) # left_u
- ldq_u t6, 8(a2) # mid
- ldq_u t7, 16(a2) # right_u
- ldq t8, 0(a1) # ref left
- ldq t9, 8(a1) # ref right
- addq a1, a3, a1 # pix1
- addq a2, a3, a2 # pix2
- /* calc line 0 */
- extql t0, a2, t0 # left lo
- extqh t1, a2, ta # left hi
- extql t1, a2, tb # right lo
- or t0, ta, t0 # left
- extqh t2, a2, t2 # right hi
- perr t3, t0, tc # error left
- or t2, tb, t2 # right
- perr t4, t2, td # error right
- addq v0, tc, v0 # add error left
- addq v0, td, v0 # add error left
- /* calc line 1 */
- extql t5, a2, t5 # left lo
- extqh t6, a2, ta # left hi
- extql t6, a2, tb # right lo
- or t5, ta, t5 # left
- extqh t7, a2, t7 # right hi
- perr t8, t5, tc # error left
- or t7, tb, t7 # right
- perr t9, t7, td # error right
- addq v0, tc, v0 # add error left
- addq v0, td, v0 # add error left
- /* loop */
- subq a4, 2, a4 # h -= 2
- bne a4, $unaligned
- ret
-
- .align 4
-$aligned:
- /* load line 0 */
- ldq t0, 0(a2) # left
- ldq t1, 8(a2) # right
- addq a2, a3, a2 # pix2
- ldq t2, 0(a1) # ref left
- ldq t3, 8(a1) # ref right
- addq a1, a3, a1 # pix1
- /* load line 1 */
- ldq t4, 0(a2) # left
- ldq t5, 8(a2) # right
- addq a2, a3, a2 # pix2
- ldq t6, 0(a1) # ref left
- ldq t7, 8(a1) # ref right
- addq a1, a3, a1 # pix1
- /* load line 2 */
- ldq t8, 0(a2) # left
- ldq t9, 8(a2) # right
- addq a2, a3, a2 # pix2
- ldq ta, 0(a1) # ref left
- ldq tb, 8(a1) # ref right
- addq a1, a3, a1 # pix1
- /* load line 3 */
- ldq tc, 0(a2) # left
- ldq td, 8(a2) # right
- addq a2, a3, a2 # pix2
- ldq te, 0(a1) # ref left
- ldq a0, 8(a1) # ref right
- /* calc line 0 */
- perr t0, t2, t0 # error left
- addq a1, a3, a1 # pix1
- perr t1, t3, t1 # error right
- addq v0, t0, v0 # add error left
- /* calc line 1 */
- perr t4, t6, t0 # error left
- addq v0, t1, v0 # add error right
- perr t5, t7, t1 # error right
- addq v0, t0, v0 # add error left
- /* calc line 2 */
- perr t8, ta, t0 # error left
- addq v0, t1, v0 # add error right
- perr t9, tb, t1 # error right
- addq v0, t0, v0 # add error left
- /* calc line 3 */
- perr tc, te, t0 # error left
- addq v0, t1, v0 # add error right
- perr td, a0, t1 # error right
- addq v0, t0, v0 # add error left
- addq v0, t1, v0 # add error right
- /* loop */
- subq a4, 4, a4 # h -= 4
- bne a4, $aligned
- ret
- .end pix_abs16x16_mvi_asm
diff --git a/libavcodec/alpha/mpegvideo_alpha.c b/libavcodec/alpha/mpegvideo_alpha.c
deleted file mode 100644
index 126fe264a1..0000000000
--- a/libavcodec/alpha/mpegvideo_alpha.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/attributes.h"
-#include "libavcodec/mpegvideo.h"
-#include "asm.h"
-
-static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs,
- uint64_t qscale, uint64_t qadd)
-{
- uint64_t qmul = qscale << 1;
- uint64_t correction = WORD_VEC(qmul * 255 >> 8);
- int i;
-
- qadd = WORD_VEC(qadd);
-
- for(i = 0; i <= n_coeffs; block += 4, i += 4) {
- uint64_t levels, negmask, zeros, add, sub;
-
- levels = ldq(block);
- if (levels == 0)
- continue;
-
-#ifdef __alpha_max__
- /* I don't think the speed difference justifies runtime
- detection. */
- negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */
- negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
-#else
- negmask = cmpbge(WORD_VEC(0x7fff), levels);
- negmask &= (negmask >> 1) | (1 << 7);
- negmask = zap(-1, negmask);
-#endif
-
- zeros = cmpbge(0, levels);
- zeros &= zeros >> 1;
- /* zeros |= zeros << 1 is not needed since qadd <= 255, so
- zapping the lower byte suffices. */
-
- levels *= qmul;
- levels -= correction & (negmask << 16);
-
- add = qadd & ~negmask;
- sub = qadd & negmask;
- /* Set qadd to 0 for levels == 0. */
- add = zap(add, zeros);
- levels += add;
- levels -= sub;
-
- stq(levels, block);
- }
-}
-
-static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block,
- int n, int qscale)
-{
- int n_coeffs;
- uint64_t qadd;
- int16_t block0 = block[0];
-
- if (!s->h263_aic) {
- if (n < 4)
- block0 *= s->y_dc_scale;
- else
- block0 *= s->c_dc_scale;
- qadd = (qscale - 1) | 1;
- } else {
- qadd = 0;
- }
-
- if(s->ac_pred)
- n_coeffs = 63;
- else
- n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
-
- dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd);
-
- block[0] = block0;
-}
-
-static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block,
- int n, int qscale)
-{
- int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
- dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1);
-}
-
-av_cold void ff_mpv_common_init_axp(MpegEncContext *s)
-{
- s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp;
- s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp;
-}
diff --git a/libavcodec/alpha/pixblockdsp_alpha.c b/libavcodec/alpha/pixblockdsp_alpha.c
deleted file mode 100644
index c2f1a1d79c..0000000000
--- a/libavcodec/alpha/pixblockdsp_alpha.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * SIMD-optimized pixel operations
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/attributes.h"
-#include "libavcodec/pixblockdsp.h"
-#include "asm.h"
-
-static void get_pixels_mvi(int16_t *restrict block,
- const uint8_t *restrict pixels, ptrdiff_t stride)
-{
- int h = 8;
-
- do {
- uint64_t p;
-
- p = ldq(pixels);
- stq(unpkbw(p), block);
- stq(unpkbw(p >> 32), block + 4);
-
- pixels += stride;
- block += 8;
- } while (--h);
-}
-
-static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- ptrdiff_t stride)
-{
- int h = 8;
- uint64_t mask = 0x4040;
-
- mask |= mask << 16;
- mask |= mask << 32;
- do {
- uint64_t x, y, c, d, a;
- uint64_t signs;
-
- x = ldq(s1);
- y = ldq(s2);
- c = cmpbge(x, y);
- d = x - y;
- a = zap(mask, c); /* We use 0x4040404040404040 here... */
- d += 4 * a; /* ...so we can use s4addq here. */
- signs = zap(-1, c);
-
- stq(unpkbw(d) | (unpkbw(signs) << 8), block);
- stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
-
- s1 += stride;
- s2 += stride;
- block += 8;
- } while (--h);
-}
-
-av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
- unsigned high_bit_depth)
-{
- if (amask(AMASK_MVI) == 0) {
- if (!high_bit_depth)
- c->get_pixels = get_pixels_mvi;
- c->diff_pixels = diff_pixels_mvi;
- }
-}
diff --git a/libavcodec/alpha/regdef.h b/libavcodec/alpha/regdef.h
deleted file mode 100644
index f05577a89b..0000000000
--- a/libavcodec/alpha/regdef.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * copyright (c) 2002 Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* Some BSDs don't seem to have regdef.h... sigh */
-#ifndef AVCODEC_ALPHA_REGDEF_H
-#define AVCODEC_ALPHA_REGDEF_H
-
-#define v0 $0 /* function return value */
-
-#define t0 $1 /* temporary registers (caller-saved) */
-#define t1 $2
-#define t2 $3
-#define t3 $4
-#define t4 $5
-#define t5 $6
-#define t6 $7
-#define t7 $8
-
-#define s0 $9 /* saved-registers (callee-saved registers) */
-#define s1 $10
-#define s2 $11
-#define s3 $12
-#define s4 $13
-#define s5 $14
-#define s6 $15
-#define fp s6 /* frame-pointer (s6 in frame-less procedures) */
-
-#define a0 $16 /* argument registers (caller-saved) */
-#define a1 $17
-#define a2 $18
-#define a3 $19
-#define a4 $20
-#define a5 $21
-
-#define t8 $22 /* more temps (caller-saved) */
-#define t9 $23
-#define t10 $24
-#define t11 $25
-#define ra $26 /* return address register */
-#define t12 $27
-
-#define pv t12 /* procedure-variable register */
-#define AT $at /* assembler temporary */
-#define gp $29 /* global pointer */
-#define sp $30 /* stack pointer */
-#define zero $31 /* reads as zero, writes are noops */
-
-/* Some nicer register names. */
-#define ta t10
-#define tb t11
-#define tc t12
-#define td AT
-/* Danger: these overlap with the argument list and the return value */
-#define te a5
-#define tf a4
-#define tg a3
-#define th v0
-
-#endif /* AVCODEC_ALPHA_REGDEF_H */
diff --git a/libavcodec/alpha/simple_idct_alpha.c b/libavcodec/alpha/simple_idct_alpha.c
deleted file mode 100644
index 6e377ef243..0000000000
--- a/libavcodec/alpha/simple_idct_alpha.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Simple IDCT (Alpha optimized)
- *
- * Copyright (c) 2001 Michael Niedermayer <michaelni at gmx.at>
- *
- * based upon some outcommented C code from mpeg2dec (idct_mmx.c
- * written by Aaron Holtzman <aholtzma at ess.engr.uvic.ca>)
- *
- * Alpha optimizations by Måns Rullgård <mans at mansr.com>
- * and Falk Hueffner <falk at debian.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "idctdsp_alpha.h"
-#include "asm.h"
-
-// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
-// W4 is actually exactly 16384, but using 16383 works around
-// accumulating rounding errors for some encoders
-#define W1 22725
-#define W2 21407
-#define W3 19266
-#define W4 16383
-#define W5 12873
-#define W6 8867
-#define W7 4520
-#define ROW_SHIFT 11
-#define COL_SHIFT 20
-
-/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
-static inline int idct_row(int16_t *row)
-{
- int a0, a1, a2, a3, b0, b1, b2, b3, t;
- uint64_t l, r, t2;
- l = ldq(row);
- r = ldq(row + 4);
-
- if (l == 0 && r == 0)
- return 0;
-
- a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
-
- if (((l & ~0xffffUL) | r) == 0) {
- a0 >>= ROW_SHIFT;
- t2 = (uint16_t) a0;
- t2 |= t2 << 16;
- t2 |= t2 << 32;
-
- stq(t2, row);
- stq(t2, row + 4);
- return 1;
- }
-
- a1 = a0;
- a2 = a0;
- a3 = a0;
-
- t = extwl(l, 4); /* row[2] */
- if (t != 0) {
- t = sextw(t);
- a0 += W2 * t;
- a1 += W6 * t;
- a2 -= W6 * t;
- a3 -= W2 * t;
- }
-
- t = extwl(r, 0); /* row[4] */
- if (t != 0) {
- t = sextw(t);
- a0 += W4 * t;
- a1 -= W4 * t;
- a2 -= W4 * t;
- a3 += W4 * t;
- }
-
- t = extwl(r, 4); /* row[6] */
- if (t != 0) {
- t = sextw(t);
- a0 += W6 * t;
- a1 -= W2 * t;
- a2 += W2 * t;
- a3 -= W6 * t;
- }
-
- t = extwl(l, 2); /* row[1] */
- if (t != 0) {
- t = sextw(t);
- b0 = W1 * t;
- b1 = W3 * t;
- b2 = W5 * t;
- b3 = W7 * t;
- } else {
- b0 = 0;
- b1 = 0;
- b2 = 0;
- b3 = 0;
- }
-
- t = extwl(l, 6); /* row[3] */
- if (t) {
- t = sextw(t);
- b0 += W3 * t;
- b1 -= W7 * t;
- b2 -= W1 * t;
- b3 -= W5 * t;
- }
-
-
- t = extwl(r, 2); /* row[5] */
- if (t) {
- t = sextw(t);
- b0 += W5 * t;
- b1 -= W1 * t;
- b2 += W7 * t;
- b3 += W3 * t;
- }
-
- t = extwl(r, 6); /* row[7] */
- if (t) {
- t = sextw(t);
- b0 += W7 * t;
- b1 -= W5 * t;
- b2 += W3 * t;
- b3 -= W1 * t;
- }
-
- row[0] = (a0 + b0) >> ROW_SHIFT;
- row[1] = (a1 + b1) >> ROW_SHIFT;
- row[2] = (a2 + b2) >> ROW_SHIFT;
- row[3] = (a3 + b3) >> ROW_SHIFT;
- row[4] = (a3 - b3) >> ROW_SHIFT;
- row[5] = (a2 - b2) >> ROW_SHIFT;
- row[6] = (a1 - b1) >> ROW_SHIFT;
- row[7] = (a0 - b0) >> ROW_SHIFT;
-
- return 2;
-}
-
-static inline void idct_col(int16_t *col)
-{
- int a0, a1, a2, a3, b0, b1, b2, b3;
-
- col[0] += (1 << (COL_SHIFT - 1)) / W4;
-
- a0 = W4 * col[8 * 0];
- a1 = W4 * col[8 * 0];
- a2 = W4 * col[8 * 0];
- a3 = W4 * col[8 * 0];
-
- if (col[8 * 2]) {
- a0 += W2 * col[8 * 2];
- a1 += W6 * col[8 * 2];
- a2 -= W6 * col[8 * 2];
- a3 -= W2 * col[8 * 2];
- }
-
- if (col[8 * 4]) {
- a0 += W4 * col[8 * 4];
- a1 -= W4 * col[8 * 4];
- a2 -= W4 * col[8 * 4];
- a3 += W4 * col[8 * 4];
- }
-
- if (col[8 * 6]) {
- a0 += W6 * col[8 * 6];
- a1 -= W2 * col[8 * 6];
- a2 += W2 * col[8 * 6];
- a3 -= W6 * col[8 * 6];
- }
-
- if (col[8 * 1]) {
- b0 = W1 * col[8 * 1];
- b1 = W3 * col[8 * 1];
- b2 = W5 * col[8 * 1];
- b3 = W7 * col[8 * 1];
- } else {
- b0 = 0;
- b1 = 0;
- b2 = 0;
- b3 = 0;
- }
-
- if (col[8 * 3]) {
- b0 += W3 * col[8 * 3];
- b1 -= W7 * col[8 * 3];
- b2 -= W1 * col[8 * 3];
- b3 -= W5 * col[8 * 3];
- }
-
- if (col[8 * 5]) {
- b0 += W5 * col[8 * 5];
- b1 -= W1 * col[8 * 5];
- b2 += W7 * col[8 * 5];
- b3 += W3 * col[8 * 5];
- }
-
- if (col[8 * 7]) {
- b0 += W7 * col[8 * 7];
- b1 -= W5 * col[8 * 7];
- b2 += W3 * col[8 * 7];
- b3 -= W1 * col[8 * 7];
- }
-
- col[8 * 0] = (a0 + b0) >> COL_SHIFT;
- col[8 * 7] = (a0 - b0) >> COL_SHIFT;
- col[8 * 1] = (a1 + b1) >> COL_SHIFT;
- col[8 * 6] = (a1 - b1) >> COL_SHIFT;
- col[8 * 2] = (a2 + b2) >> COL_SHIFT;
- col[8 * 5] = (a2 - b2) >> COL_SHIFT;
- col[8 * 3] = (a3 + b3) >> COL_SHIFT;
- col[8 * 4] = (a3 - b3) >> COL_SHIFT;
-}
-
-/* If all rows but the first one are zero after row transformation,
- all rows will be identical after column transformation. */
-static inline void idct_col2(int16_t *col)
-{
- int i;
- uint64_t l, r;
-
- for (i = 0; i < 8; ++i) {
- int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
-
- a0 *= W4;
- col[i] = a0 >> COL_SHIFT;
- }
-
- l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
- stq(l, col + 2 * 4); stq(r, col + 3 * 4);
- stq(l, col + 4 * 4); stq(r, col + 5 * 4);
- stq(l, col + 6 * 4); stq(r, col + 7 * 4);
- stq(l, col + 8 * 4); stq(r, col + 9 * 4);
- stq(l, col + 10 * 4); stq(r, col + 11 * 4);
- stq(l, col + 12 * 4); stq(r, col + 13 * 4);
- stq(l, col + 14 * 4); stq(r, col + 15 * 4);
-}
-
-void ff_simple_idct_axp(int16_t *block)
-{
-
- int i;
- int rowsZero = 1; /* all rows except row 0 zero */
- int rowsConstant = 1; /* all rows consist of a constant value */
-
- for (i = 0; i < 8; i++) {
- int sparseness = idct_row(block + 8 * i);
-
- if (i > 0 && sparseness > 0)
- rowsZero = 0;
- if (sparseness == 2)
- rowsConstant = 0;
- }
-
- if (rowsZero) {
- idct_col2(block);
- } else if (rowsConstant) {
- idct_col(block);
- for (i = 0; i < 8; i += 2) {
- uint64_t v = (uint16_t) block[0];
- uint64_t w = (uint16_t) block[8];
-
- v |= v << 16;
- w |= w << 16;
- v |= v << 32;
- w |= w << 32;
- stq(v, block + 0 * 4);
- stq(v, block + 1 * 4);
- stq(w, block + 2 * 4);
- stq(w, block + 3 * 4);
- block += 4 * 4;
- }
- } else {
- for (i = 0; i < 8; i++)
- idct_col(block + i);
- }
-}
-
-void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
-{
- ff_simple_idct_axp(block);
- put_pixels_clamped_axp_p(block, dest, line_size);
-}
-
-void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
-{
- ff_simple_idct_axp(block);
- add_pixels_clamped_axp_p(block, dest, line_size);
-}
--
2.39.2
More information about the ffmpeg-devel
mailing list