[FFmpeg-devel] [PATCH v3 2/3] libavcodec/mips: Fix build errors reported by clang
yinshiyou-hf at loongson.cn
yinshiyou-hf at loongson.cn
Tue Jun 1 16:02:54 EEST 2021
> -----原始邮件-----
> 发件人: "Jin Bo" <jinbo at loongson.cn>
> 发送时间: 2021-06-01 14:22:09 (星期二)
> 收件人: ffmpeg-devel at ffmpeg.org
> 抄送: "Jin Bo" <jinbo at loongson.cn>
> 主题: [FFmpeg-devel] [PATCH v3 2/3] libavcodec/mips: Fix build errors reported by clang
>
> Clang is more strict on the type of asm operands, float or double
> type variable should use constraint 'f', integer variable should
> use constraint 'r'.
>
> Signed-off-by: Jin Bo <jinbo at loongson.cn>
> ---
> libavcodec/mips/constants.c | 89 +++++++------
> libavcodec/mips/constants.h | 88 +++++++------
> libavcodec/mips/h264chroma_mmi.c | 157 +++++++++++------------
> libavcodec/mips/h264dsp_mmi.c | 20 +--
> libavcodec/mips/h264pred_mmi.c | 23 ++--
> libavcodec/mips/h264qpel_mmi.c | 34 ++---
> libavcodec/mips/hevcdsp_mmi.c | 59 +++++----
> libavcodec/mips/idctdsp_mmi.c | 2 +-
> libavcodec/mips/mpegvideo_mmi.c | 20 +--
> libavcodec/mips/vc1dsp_mmi.c | 176 +++++++++++++-------------
> libavcodec/mips/vp8dsp_mmi.c | 263 +++++++++++++++++++++++++++++----------
> libavutil/mips/asmdefs.h | 8 ++
> 12 files changed, 536 insertions(+), 403 deletions(-)
>
> diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
> index 8c990b6..6a60dd3 100644
> --- a/libavcodec/mips/constants.c
> +++ b/libavcodec/mips/constants.c
> @@ -19,50 +19,49 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> */
>
> -#include "config.h"
> -#include "libavutil/mem_internal.h"
> +#include "libavutil/intfloat.h"
> #include "constants.h"
>
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) = {0x0001000100010001ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) = {0x0002000200020002ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) = {0x0003000300030003ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) = {0x0004000400040004ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) = {0x0005000500050005ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_12) = {0x000C000C000C000CULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = {0x000F000F000F000FULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) = {0x0010001000100010ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_17) = {0x0011001100110011ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) = {0x0012001200120012ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = {0x0014001400140014ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_22) = {0x0016001600160016ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) = {0x001C001C001C001CULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) = {0x0020002000200020ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = {0x0035003500350035ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) = {0x0040004000400040ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = {0x0080008000800080ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_512) = {0x0200020002000200ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) = {0xFFFBFFFAFFF9FFF8ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) = {0xFFFFFFFEFFFDFFFCULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) = {0x0004000300020001ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) = {0x0008000700060005ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) = {0x0003000200010000ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) = {0x0007000600050004ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) = {0x000b000a00090008ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) = {0x000f000e000d000cULL};
> -
> -DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) = {0x0101010101010101ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) = {0x0303030303030303ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) = {0x8080808080808080ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) = {0xA1A1A1A1A1A1A1A1ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_pb_FE) = {0xFEFEFEFEFEFEFEFEULL};
> -
> -DECLARE_ALIGNED(8, const uint64_t, ff_rnd) = {0x0004000400040004ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) = {0x0040004000400040ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) = {0x0020002000200020ULL};
> -
> -DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xFFFF0000FFFF0000ULL};
> -DECLARE_ALIGNED(8, const uint64_t, ff_d40000) = {0x0000000000040000ULL};
> +const union av_intfloat64 ff_pw_1 = {0x0001000100010001ULL};
> +const union av_intfloat64 ff_pw_2 = {0x0002000200020002ULL};
> +const union av_intfloat64 ff_pw_3 = {0x0003000300030003ULL};
> +const union av_intfloat64 ff_pw_4 = {0x0004000400040004ULL};
> +const union av_intfloat64 ff_pw_5 = {0x0005000500050005ULL};
> +const union av_intfloat64 ff_pw_6 = {0x0006000600060006ULL};
> +const union av_intfloat64 ff_pw_8 = {0x0008000800080008ULL};
> +const union av_intfloat64 ff_pw_9 = {0x0009000900090009ULL};
> +const union av_intfloat64 ff_pw_10 = {0x000A000A000A000AULL};
> +const union av_intfloat64 ff_pw_12 = {0x000C000C000C000CULL};
> +const union av_intfloat64 ff_pw_15 = {0x000F000F000F000FULL};
> +const union av_intfloat64 ff_pw_16 = {0x0010001000100010ULL};
> +const union av_intfloat64 ff_pw_17 = {0x0011001100110011ULL};
> +const union av_intfloat64 ff_pw_18 = {0x0012001200120012ULL};
> +const union av_intfloat64 ff_pw_20 = {0x0014001400140014ULL};
> +const union av_intfloat64 ff_pw_22 = {0x0016001600160016ULL};
> +const union av_intfloat64 ff_pw_28 = {0x001C001C001C001CULL};
> +const union av_intfloat64 ff_pw_32 = {0x0020002000200020ULL};
> +const union av_intfloat64 ff_pw_53 = {0x0035003500350035ULL};
> +const union av_intfloat64 ff_pw_64 = {0x0040004000400040ULL};
> +const union av_intfloat64 ff_pw_128 = {0x0080008000800080ULL};
> +const union av_intfloat64 ff_pw_512 = {0x0200020002000200ULL};
> +const union av_intfloat64 ff_pw_m8tom5 = {0xFFFBFFFAFFF9FFF8ULL};
> +const union av_intfloat64 ff_pw_m4tom1 = {0xFFFFFFFEFFFDFFFCULL};
> +const union av_intfloat64 ff_pw_1to4 = {0x0004000300020001ULL};
> +const union av_intfloat64 ff_pw_5to8 = {0x0008000700060005ULL};
> +const union av_intfloat64 ff_pw_0to3 = {0x0003000200010000ULL};
> +const union av_intfloat64 ff_pw_4to7 = {0x0007000600050004ULL};
> +const union av_intfloat64 ff_pw_8tob = {0x000b000a00090008ULL};
> +const union av_intfloat64 ff_pw_ctof = {0x000f000e000d000cULL};
> +const union av_intfloat64 ff_pw_32_1 = {0x0000000100000001ULL};
> +const union av_intfloat64 ff_pw_32_4 = {0x0000000400000004ULL};
> +const union av_intfloat64 ff_pw_32_64 = {0x0000004000000040ULL};
> +const union av_intfloat64 ff_pb_1 = {0x0101010101010101ULL};
> +const union av_intfloat64 ff_pb_3 = {0x0303030303030303ULL};
> +const union av_intfloat64 ff_pb_80 = {0x8080808080808080ULL};
> +const union av_intfloat64 ff_pb_A1 = {0xA1A1A1A1A1A1A1A1ULL};
> +const union av_intfloat64 ff_pb_FE = {0xFEFEFEFEFEFEFEFEULL};
> +const union av_intfloat64 ff_rnd = {0x0004000400040004ULL};
> +const union av_intfloat64 ff_rnd2 = {0x0040004000400040ULL};
> +const union av_intfloat64 ff_rnd3 = {0x0020002000200020ULL};
> +const union av_intfloat64 ff_ff_wm1010 = {0xFFFF0000FFFF0000ULL};
> +const union av_intfloat64 ff_d40000 = {0x0000000000040000ULL};
> diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
> index 2604559..df54b30 100644
> --- a/libavcodec/mips/constants.h
> +++ b/libavcodec/mips/constants.h
> @@ -22,50 +22,48 @@
> #ifndef AVCODEC_MIPS_CONSTANTS_H
> #define AVCODEC_MIPS_CONSTANTS_H
>
> -#include <stdint.h>
> -
> -extern const uint64_t ff_pw_1;
> -extern const uint64_t ff_pw_2;
> -extern const uint64_t ff_pw_3;
> -extern const uint64_t ff_pw_4;
> -extern const uint64_t ff_pw_5;
> -extern const uint64_t ff_pw_6;
> -extern const uint64_t ff_pw_8;
> -extern const uint64_t ff_pw_9;
> -extern const uint64_t ff_pw_10;
> -extern const uint64_t ff_pw_12;
> -extern const uint64_t ff_pw_15;
> -extern const uint64_t ff_pw_16;
> -extern const uint64_t ff_pw_17;
> -extern const uint64_t ff_pw_18;
> -extern const uint64_t ff_pw_20;
> -extern const uint64_t ff_pw_22;
> -extern const uint64_t ff_pw_28;
> -extern const uint64_t ff_pw_32;
> -extern const uint64_t ff_pw_53;
> -extern const uint64_t ff_pw_64;
> -extern const uint64_t ff_pw_128;
> -extern const uint64_t ff_pw_512;
> -extern const uint64_t ff_pw_m8tom5;
> -extern const uint64_t ff_pw_m4tom1;
> -extern const uint64_t ff_pw_1to4;
> -extern const uint64_t ff_pw_5to8;
> -extern const uint64_t ff_pw_0to3;
> -extern const uint64_t ff_pw_4to7;
> -extern const uint64_t ff_pw_8tob;
> -extern const uint64_t ff_pw_ctof;
> -
> -extern const uint64_t ff_pb_1;
> -extern const uint64_t ff_pb_3;
> -extern const uint64_t ff_pb_80;
> -extern const uint64_t ff_pb_A1;
> -extern const uint64_t ff_pb_FE;
> -
> -extern const uint64_t ff_rnd;
> -extern const uint64_t ff_rnd2;
> -extern const uint64_t ff_rnd3;
> -
> -extern const uint64_t ff_wm1010;
> -extern const uint64_t ff_d40000;
> +extern const union av_intfloat64 ff_pw_1;
> +extern const union av_intfloat64 ff_pw_2;
> +extern const union av_intfloat64 ff_pw_3;
> +extern const union av_intfloat64 ff_pw_4;
> +extern const union av_intfloat64 ff_pw_5;
> +extern const union av_intfloat64 ff_pw_6;
> +extern const union av_intfloat64 ff_pw_8;
> +extern const union av_intfloat64 ff_pw_9;
> +extern const union av_intfloat64 ff_pw_10;
> +extern const union av_intfloat64 ff_pw_12;
> +extern const union av_intfloat64 ff_pw_15;
> +extern const union av_intfloat64 ff_pw_16;
> +extern const union av_intfloat64 ff_pw_17;
> +extern const union av_intfloat64 ff_pw_18;
> +extern const union av_intfloat64 ff_pw_20;
> +extern const union av_intfloat64 ff_pw_22;
> +extern const union av_intfloat64 ff_pw_28;
> +extern const union av_intfloat64 ff_pw_32;
> +extern const union av_intfloat64 ff_pw_53;
> +extern const union av_intfloat64 ff_pw_64;
> +extern const union av_intfloat64 ff_pw_128;
> +extern const union av_intfloat64 ff_pw_512;
> +extern const union av_intfloat64 ff_pw_m8tom5;
> +extern const union av_intfloat64 ff_pw_m4tom1;
> +extern const union av_intfloat64 ff_pw_1to4;
> +extern const union av_intfloat64 ff_pw_5to8;
> +extern const union av_intfloat64 ff_pw_0to3;
> +extern const union av_intfloat64 ff_pw_4to7;
> +extern const union av_intfloat64 ff_pw_8tob;
> +extern const union av_intfloat64 ff_pw_ctof;
> +extern const union av_intfloat64 ff_pw_32_1;
> +extern const union av_intfloat64 ff_pw_32_4;
> +extern const union av_intfloat64 ff_pw_32_64;
> +extern const union av_intfloat64 ff_pb_1;
> +extern const union av_intfloat64 ff_pb_3;
> +extern const union av_intfloat64 ff_pb_80;
> +extern const union av_intfloat64 ff_pb_A1;
> +extern const union av_intfloat64 ff_pb_FE;
> +extern const union av_intfloat64 ff_rnd;
> +extern const union av_intfloat64 ff_rnd2;
> +extern const union av_intfloat64 ff_rnd3;
> +extern const union av_intfloat64 ff_wm1010;
> +extern const union av_intfloat64 ff_d40000;
>
> #endif /* AVCODEC_MIPS_CONSTANTS_H */
> diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
> index dbcba10..cc2d7cb 100644
> --- a/libavcodec/mips/h264chroma_mmi.c
> +++ b/libavcodec/mips/h264chroma_mmi.c
> @@ -29,12 +29,12 @@
> void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> int h, int x, int y)
> {
> - int A = 64, B, C, D, E;
> double ftmp[12];
> - uint64_t tmp[1];
> + union mmi_intfloat64 A, B, C, D, E;
> + A.i = 64;
>
> if (!(x || y)) {
> - /* x=0, y=0, A=64 */
> + /* x=0, y=0, A.i=64 */
> __asm__ volatile (
> "1: \n\t"
> MMI_ULDC1(%[ftmp0], %[src], 0x00)
> @@ -66,14 +66,13 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> );
> } else if (x && y) {
> /* x!=0, y!=0 */
> - D = x * y;
> - B = (x << 3) - D;
> - C = (y << 3) - D;
> - A = 64 - D - B - C;
> + D.i = x * y;
> + B.i = (x << 3) - D.i;
> + C.i = (y << 3) - D.i;
> + A.i = 64 - D.i - B.i - C.i;
>
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[B], %[B], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp9] \n\t"
> @@ -158,22 +157,21 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
> [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
> - [tmp0]"=&r"(tmp[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> - : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D)
> + : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [tmp0]"r"(0x06)
> : "memory"
> );
> } else if (x) {
> /* x!=0, y==0 */
> - E = x << 3;
> - A = 64 - E;
> + E.i = x << 3;
> + A.i = 64 - E.i;
>
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[E], %[E], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp7] \n\t"
> @@ -207,22 +205,20 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> - [tmp0]"=&r"(tmp[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [E]"f"(E)
> + [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
> + [A]"f"(A.f), [E]"f"(E.f)
> : "memory"
> );
> } else {
> /* x==0, y!=0 */
> - E = y << 3;
> - A = 64 - E;
> + E.i = y << 3;
> + A.i = 64 - E.i;
>
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[E], %[E], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp7] \n\t"
> @@ -276,12 +272,12 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> - [ftmp8]"=&f"(ftmp[8]), [tmp0]"=&r"(tmp[0]),
> + [ftmp8]"=&f"(ftmp[8]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [E]"f"(E)
> + [ff_pw_32]"f"(ff_pw_32.f), [A]"f"(A.f),
> + [E]"f"(E.f), [tmp0]"r"(0x06)
> : "memory"
> );
> }
> @@ -290,12 +286,12 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> int h, int x, int y)
> {
> - int A = 64, B, C, D, E;
> double ftmp[10];
> - uint64_t tmp[1];
> + union mmi_intfloat64 A, B, C, D, E;
> + A.i = 64;
>
> if(!(x || y)){
> - /* x=0, y=0, A=64 */
> + /* x=0, y=0, A.i=64 */
> __asm__ volatile (
> "1: \n\t"
> MMI_ULDC1(%[ftmp0], %[src], 0x00)
> @@ -323,13 +319,12 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> );
> } else if (x && y) {
> /* x!=0, y!=0 */
> - D = x * y;
> - B = (x << 3) - D;
> - C = (y << 3) - D;
> - A = 64 - D - B - C;
> + D.i = x * y;
> + B.i = (x << 3) - D.i;
> + C.i = (y << 3) - D.i;
> + A.i = 64 - D.i - B.i - C.i;
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[B], %[B], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp9] \n\t"
> @@ -383,21 +378,20 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
> - [tmp0]"=&r"(tmp[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> - : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D)
> + : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [tmp0]"r"(0x06)
> : "memory"
> );
> } else if (x) {
> /* x!=0, y==0 */
> - E = x << 3;
> - A = 64 - E;
> + E.i = x << 3;
> + A.i = 64 - E.i;
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[E], %[E], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp7] \n\t"
> @@ -433,21 +427,19 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> - [tmp0]"=&r"(tmp[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [E]"f"(E)
> + [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
> + [A]"f"(A.f), [E]"f"(E.f)
> : "memory"
> );
> } else {
> /* x==0, y!=0 */
> - E = y << 3;
> - A = 64 - E;
> + E.i = y << 3;
> + A.i = 64 - E.i;
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[E], %[E], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp7] \n\t"
> @@ -469,8 +461,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> "pmullh %[ftmp6], %[ftmp6], %[E] \n\t"
> "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
>
> - "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
> - "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t"
> + "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
> + "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t"
> "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
> "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
> "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
> @@ -483,12 +475,11 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> - [tmp0]"=&r"(tmp[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [E]"f"(E)
> + [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
> + [A]"f"(A.f), [E]"f"(E.f)
> : "memory"
> );
> }
> @@ -497,20 +488,19 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> int h, int x, int y)
> {
> - const int A = (8 - x) * (8 - y);
> - const int B = x * (8 - y);
> - const int C = (8 - x) * y;
> - const int D = x * y;
> - const int E = B + C;
> double ftmp[8];
> - uint64_t tmp[1];
> mips_reg addr[1];
> + union mmi_intfloat64 A, B, C, D, E;
> DECLARE_VAR_LOW32;
> + A.i = (8 - x) * (8 - y);
> + B.i = x * (8 - y);
> + C.i = (8 - x) * y;
> + D.i = x * y;
> + E.i = B.i + C.i;
>
> - if (D) {
> + if (D.i) {
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[B], %[B], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp7] \n\t"
> @@ -547,20 +537,19 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> - [tmp0]"=&r"(tmp[0]),
> RESTRICT_ASM_LOW32
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> - : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D)
> + : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [tmp0]"r"(0x06)
> : "memory"
> );
> - } else if (E) {
> - const int step = C ? stride : 1;
> + } else if (E.i) {
> + const int step = C.i ? stride : 1;
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[E], %[E], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp5] \n\t"
> @@ -585,14 +574,13 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> - [tmp0]"=&r"(tmp[0]),
> RESTRICT_ASM_LOW32
> [addr0]"=&r"(addr[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
> - [ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [E]"f"(E)
> + [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
> + [A]"f"(A.f), [E]"f"(E.f)
> : "memory"
> );
> } else {
> @@ -621,20 +609,19 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> int h, int x, int y)
> {
> - const int A = (8 - x) *(8 - y);
> - const int B = x * (8 - y);
> - const int C = (8 - x) * y;
> - const int D = x * y;
> - const int E = B + C;
> double ftmp[8];
> - uint64_t tmp[1];
> mips_reg addr[1];
> + union mmi_intfloat64 A, B, C, D, E;
> DECLARE_VAR_LOW32;
> + A.i = (8 - x) *(8 - y);
> + B.i = x * (8 - y);
> + C.i = (8 - x) * y;
> + D.i = x * y;
> + E.i = B.i + C.i;
>
> - if (D) {
> + if (D.i) {
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[B], %[B], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp7] \n\t"
> @@ -673,20 +660,19 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
> - [tmp0]"=&r"(tmp[0]),
> RESTRICT_ASM_LOW32
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> - : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D)
> + : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [tmp0]"r"(0x06)
> : "memory"
> );
> - } else if (E) {
> - const int step = C ? stride : 1;
> + } else if (E.i) {
> + const int step = C.i ? stride : 1;
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> - "dli %[tmp0], 0x06 \n\t"
> "pshufh %[A], %[A], %[ftmp0] \n\t"
> "pshufh %[E], %[E], %[ftmp0] \n\t"
> "mtc1 %[tmp0], %[ftmp5] \n\t"
> @@ -713,14 +699,13 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
> [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
> [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
> - [tmp0]"=&r"(tmp[0]),
> RESTRICT_ASM_LOW32
> [addr0]"=&r"(addr[0]),
> [dst]"+&r"(dst), [src]"+&r"(src),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
> - [ff_pw_32]"f"(ff_pw_32),
> - [A]"f"(A), [E]"f"(E)
> + [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
> + [A]"f"(A.f), [E]"f"(E.f)
> : "memory"
> );
> } else {
> diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
> index fe12b28..6e77995 100644
> --- a/libavcodec/mips/h264dsp_mmi.c
> +++ b/libavcodec/mips/h264dsp_mmi.c
> @@ -162,7 +162,7 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
> RESTRICT_ASM_ADDRT
> [tmp0]"=&r"(tmp[0])
> : [dst]"r"(dst), [block]"r"(block),
> - [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32)
> + [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f)
> : "memory"
> );
>
> @@ -1078,7 +1078,7 @@ void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
> RESTRICT_ASM_ALL64
> [output]"+&r"(output), [input]"+&r"(input),
> [qmul]"+&r"(qmul)
> - : [ff_pw_1]"f"(ff_pw_1)
> + : [ff_pw_1]"f"(ff_pw_1.f)
> : "memory"
> );
> }
> @@ -1556,8 +1556,8 @@ void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int bet
> [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
> : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
> [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
> - [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
> - [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
> + [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
> + [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
> : "memory"
> );
> }
> @@ -1866,8 +1866,8 @@ void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
> [addr0]"=&r"(addr[0])
> : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
> [alpha]"r"(alpha), [beta]"r"(beta),
> - [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
> - [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
> + [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
> + [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
> : "memory"
> );
> }
> @@ -1945,7 +1945,7 @@ void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
> [addr0]"=&r"(addr[0])
> : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
> [alpha]"r"(alpha), [beta]"r"(beta),
> - [ff_pb_1]"f"(ff_pb_1)
> + [ff_pb_1]"f"(ff_pb_1.f)
> : "memory"
> );
> }
> @@ -2084,8 +2084,8 @@ void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int be
> [pix]"+&r"(pix)
> : [alpha]"r"(alpha), [beta]"r"(beta),
> [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
> - [ff_pb_1]"f"(ff_pb_1), [ff_pb_3]"f"(ff_pb_3),
> - [ff_pb_A1]"f"(ff_pb_A1)
> + [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f),
> + [ff_pb_A1]"f"(ff_pb_A1.f)
> : "memory"
> );
> }
> @@ -2218,7 +2218,7 @@ void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
> [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
> [pix]"+&r"(pix)
> : [alpha]"r"(alpha), [beta]"r"(beta),
> - [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1)
> + [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f)
> : "memory"
> );
> }
> diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
> index f8947a0..480411f 100644
> --- a/libavcodec/mips/h264pred_mmi.c
> +++ b/libavcodec/mips/h264pred_mmi.c
> @@ -155,9 +155,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
> void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
> int has_topright, ptrdiff_t stride)
> {
> - uint32_t dc;
> double ftmp[11];
> mips_reg tmp[3];
> + union av_intfloat64 dc;
> DECLARE_VAR_ALL64;
> DECLARE_VAR_ADDRT;
>
> @@ -209,12 +209,12 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
> [ftmp10]"=&f"(ftmp[10]),
> [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
> RESTRICT_ASM_ALL64
> - [dc]"=r"(dc)
> + [dc]"=r"(dc.i)
> : [srcA]"r"((mips_reg)(src-stride-1)),
> [src0]"r"((mips_reg)(src-stride)),
> [src1]"r"((mips_reg)(src-stride+1)),
> [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
> - [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
> + [ff_pb_1]"r"(ff_pb_1.i), [ff_pw_2]"f"(ff_pw_2.f)
> : "memory"
> );
>
> @@ -238,7 +238,7 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
> RESTRICT_ASM_ALL64
> RESTRICT_ASM_ADDRT
> [src]"+&r"(src)
> - : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
> + : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride)
> : "memory"
> );
> }
> @@ -246,9 +246,10 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
> void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
> ptrdiff_t stride)
> {
> - uint32_t dc, dc1, dc2;
> + uint32_t dc1, dc2;
> double ftmp[14];
> mips_reg tmp[1];
> + union av_intfloat64 dc;
>
> const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
> const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
> @@ -322,7 +323,7 @@ void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
> );
>
> dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
> - dc = ((dc1+dc2+8)>>4)*0x01010101U;
> + dc.i = ((dc1+dc2+8)>>4)*0x01010101U;
>
> __asm__ volatile (
> "dli %[tmp0], 0x02 \n\t"
> @@ -344,7 +345,7 @@ void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
> RESTRICT_ASM_ALL64
> RESTRICT_ASM_ADDRT
> [src]"+&r"(src)
> - : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
> + : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride)
> : "memory"
> );
> }
> @@ -965,10 +966,10 @@ static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
> [addr0]"=&r"(addr[0])
> : [src]"r"(src), [stride]"r"((mips_reg)stride),
> [svq3]"r"(svq3), [rv40]"r"(rv40),
> - [ff_pw_m8tom5]"f"(ff_pw_m8tom5), [ff_pw_m4tom1]"f"(ff_pw_m4tom1),
> - [ff_pw_1to4]"f"(ff_pw_1to4), [ff_pw_5to8]"f"(ff_pw_5to8),
> - [ff_pw_0to3]"f"(ff_pw_0to3), [ff_pw_4to7]"r"(ff_pw_4to7),
> - [ff_pw_8tob]"r"(ff_pw_8tob), [ff_pw_ctof]"r"(ff_pw_ctof)
> + [ff_pw_m8tom5]"f"(ff_pw_m8tom5.f),[ff_pw_m4tom1]"f"(ff_pw_m4tom1.f),
> + [ff_pw_1to4]"f"(ff_pw_1to4.f), [ff_pw_5to8]"f"(ff_pw_5to8.f),
> + [ff_pw_0to3]"f"(ff_pw_0to3.f), [ff_pw_4to7]"r"(ff_pw_4to7.i),
> + [ff_pw_8tob]"r"(ff_pw_8tob.i), [ff_pw_ctof]"r"(ff_pw_ctof.i)
> : "memory"
> );
> }
> diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
> index 72362d3..3482956 100644
> --- a/libavcodec/mips/h264qpel_mmi.c
> +++ b/libavcodec/mips/h264qpel_mmi.c
> @@ -155,8 +155,8 @@ static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [dst]"+&r"(dst), [src]"+&r"(src)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
> - [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
> + [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -225,8 +225,8 @@ static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [dst]"+&r"(dst), [src]"+&r"(src)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
> - [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
> + [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -293,8 +293,8 @@ static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [dst]"+&r"(dst), [src]"+&r"(src)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
> - [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
> + [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -365,8 +365,8 @@ static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [dst]"+&r"(dst), [src]"+&r"(src)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
> - [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
> + [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -486,7 +486,7 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [dst]"+&r"(dst), [src]"+&r"(src)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -780,7 +780,7 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [h]"+&r"(h)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
>
> @@ -909,7 +909,7 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [src]"+&r"(src), [dst]"+&r"(dst)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -1235,7 +1235,7 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [h]"+&r"(h)
> : [dstStride]"r"((mips_reg)dstStride),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
>
> @@ -1306,7 +1306,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [tmp]"+&r"(tmp), [src]"+&r"(src)
> : [tmpStride]"r"(8),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
> + [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
> : "memory"
> );
>
> @@ -1567,7 +1567,7 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
> [src]"+&r"(src)
> : [tmp]"r"(tmp), [size]"r"(size),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
>
> @@ -1742,7 +1742,7 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
> [src2]"+&r"(src2), [h]"+&r"(h)
> : [src2Stride]"r"((mips_reg)src2Stride),
> [dstStride]"r"((mips_reg)dstStride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> @@ -1870,7 +1870,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
> [tmp]"+&r"(tmp), [src]"+&r"(src)
> : [tmpStride]"r"(8),
> [srcStride]"r"((mips_reg)srcStride),
> - [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
> + [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
> : "memory"
> );
>
> @@ -2065,7 +2065,7 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
> [src2]"+&r"(src2)
> : [dstStride]"r"((mips_reg)dstStride),
> [src2Stride]"r"((mips_reg)src2Stride),
> - [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
> + [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
> : "memory"
> );
> }
> diff --git a/libavcodec/mips/hevcdsp_mmi.c b/libavcodec/mips/hevcdsp_mmi.c
> index e89d37e..87fc255 100644
> --- a/libavcodec/mips/hevcdsp_mmi.c
> +++ b/libavcodec/mips/hevcdsp_mmi.c
> @@ -32,7 +32,7 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src, \
> int x, y; \
> pixel *src = (pixel*)_src - 3; \
> ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
> - uint64_t ftmp[15]; \
> + double ftmp[15]; \
> uint64_t rtmp[1]; \
> const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \
> \
> @@ -132,7 +132,7 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src, \
> ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
> int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
> int16_t *tmp = tmp_array; \
> - uint64_t ftmp[15]; \
> + double ftmp[15]; \
> uint64_t rtmp[1]; \
> \
> src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
> @@ -329,10 +329,12 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \
> pixel *dst = (pixel *)_dst; \
> ptrdiff_t dststride = _dststride / sizeof(pixel); \
> const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \
> - uint64_t ftmp[20]; \
> + double ftmp[20]; \
> uint64_t rtmp[1]; \
> - int shift = 7; \
> - int offset = 64; \
> + union av_intfloat64 shift; \
> + union av_intfloat64 offset; \
> + shift.i = 7; \
> + offset.i = 64; \
> \
> x = width >> 2; \
> y = height; \
> @@ -430,9 +432,9 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \
> [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \
> [ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2), \
> [dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x), \
> - [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
> + [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
> : [src_stride]"r"(srcstride), [dst_stride]"r"(dststride), \
> - [filter]"r"(filter), [shift]"f"(shift) \
> + [filter]"r"(filter), [shift]"f"(shift.f) \
> : "memory" \
> ); \
> }
> @@ -463,10 +465,12 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \
> ptrdiff_t dststride = _dststride / sizeof(pixel); \
> int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
> int16_t *tmp = tmp_array; \
> - uint64_t ftmp[20]; \
> + double ftmp[20]; \
> uint64_t rtmp[1]; \
> - int shift = 7; \
> - int offset = 64; \
> + union av_intfloat64 shift; \
> + union av_intfloat64 offset; \
> + shift.i = 7; \
> + offset.i = 64; \
> \
> src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
> filter = ff_hevc_qpel_filters[mx - 1]; \
> @@ -659,9 +663,9 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \
> [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \
> [ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2), \
> [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
> - [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
> + [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
> : [filter]"r"(filter), [stride]"r"(dststride), \
> - [shift]"f"(shift) \
> + [shift]"f"(shift.f) \
> : "memory" \
> ); \
> }
> @@ -692,10 +696,12 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \
> const int8_t *filter = ff_hevc_epel_filters[mx - 1]; \
> int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; \
> int16_t *tmp = tmp_array; \
> - uint64_t ftmp[12]; \
> + double ftmp[12]; \
> uint64_t rtmp[1]; \
> - int shift = 7; \
> - int offset = 64; \
> + union av_intfloat64 shift; \
> + union av_intfloat64 offset; \
> + shift.i = 7; \
> + offset.i = 64; \
> \
> src -= (EPEL_EXTRA_BEFORE * srcstride + 1); \
> x = width >> 2; \
> @@ -847,9 +853,9 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \
> [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \
> [ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2), \
> [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
> - [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
> + [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
> : [filter]"r"(filter), [stride]"r"(dststride), \
> - [shift]"f"(shift) \
> + [shift]"f"(shift.f) \
> : "memory" \
> ); \
> }
> @@ -875,9 +881,10 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \
> ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
> pixel *dst = (pixel *)_dst; \
> ptrdiff_t dststride = _dststride / sizeof(pixel); \
> - uint64_t ftmp[12]; \
> + double ftmp[12]; \
> uint64_t rtmp[1]; \
> - int shift = 7; \
> + union av_intfloat64 shift; \
> + shift.i = 7; \
> \
> y = height; \
> x = width >> 3; \
> @@ -959,7 +966,7 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \
> [ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]), \
> [src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src), \
> [x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0]) \
> - : [dststride]"r"(dststride), [shift]"f"(shift), \
> + : [dststride]"r"(dststride), [shift]"f"(shift.f), \
> [srcstride]"r"(srcstride) \
> : "memory" \
> ); \
> @@ -989,10 +996,12 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \
> ptrdiff_t dststride = _dststride / sizeof(pixel); \
> int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
> int16_t *tmp = tmp_array; \
> - uint64_t ftmp[20]; \
> + double ftmp[20]; \
> uint64_t rtmp[1]; \
> - int shift = 6; \
> - int offset = 32; \
> + union av_intfloat64 shift; \
> + union av_intfloat64 offset; \
> + shift.i = 6; \
> + offset.i = 32; \
> \
> src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
> filter = ff_hevc_qpel_filters[mx - 1]; \
> @@ -1166,9 +1175,9 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \
> [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \
> [ftmp14]"=&f"(ftmp[14]), \
> [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
> - [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
> + [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
> : [filter]"r"(filter), [stride]"r"(dststride), \
> - [shift]"f"(shift) \
> + [shift]"f"(shift.f) \
> : "memory" \
> ); \
> }
> diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
> index 0047aef..d22e5ee 100644
> --- a/libavcodec/mips/idctdsp_mmi.c
> +++ b/libavcodec/mips/idctdsp_mmi.c
> @@ -142,7 +142,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
> [pixels]"+&r"(pixels)
> : [block]"r"(block),
> [line_size]"r"((mips_reg)line_size),
> - [ff_pb_80]"f"(ff_pb_80)
> + [ff_pb_80]"f"(ff_pb_80.f)
> : "memory"
> );
> }
> diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
> index edaa839..3d5b5e2 100644
> --- a/libavcodec/mips/mpegvideo_mmi.c
> +++ b/libavcodec/mips/mpegvideo_mmi.c
> @@ -28,12 +28,13 @@
> void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
> int n, int qscale)
> {
> - int64_t level, qmul, qadd, nCoeffs;
> + int64_t level, nCoeffs;
> double ftmp[6];
> mips_reg addr[1];
> + union mmi_intfloat64 qmul_u, qadd_u;
> DECLARE_VAR_ALL64;
>
> - qmul = qscale << 1;
> + qmul_u.i = qscale << 1;
> av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>
> if (!s->h263_aic) {
> @@ -41,9 +42,9 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
> level = block[0] * s->y_dc_scale;
> else
> level = block[0] * s->c_dc_scale;
> - qadd = (qscale-1) | 1;
> + qadd_u.i = (qscale-1) | 1;
> } else {
> - qadd = 0;
> + qadd_u.i = 0;
> level = block[0];
> }
>
> @@ -93,7 +94,7 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
> [addr0]"=&r"(addr[0])
> : [block]"r"((mips_reg)(block+nCoeffs)),
> [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
> - [qmul]"f"(qmul), [qadd]"f"(qadd)
> + [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
> : "memory"
> );
>
> @@ -103,13 +104,14 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
> void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
> int n, int qscale)
> {
> - int64_t qmul, qadd, nCoeffs;
> + int64_t nCoeffs;
> double ftmp[6];
> mips_reg addr[1];
> + union mmi_intfloat64 qmul_u, qadd_u;
> DECLARE_VAR_ALL64;
>
> - qmul = qscale << 1;
> - qadd = (qscale - 1) | 1;
> + qmul_u.i = qscale << 1;
> + qadd_u.i = (qscale - 1) | 1;
> av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
> nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
>
> @@ -153,7 +155,7 @@ void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
> [addr0]"=&r"(addr[0])
> : [block]"r"((mips_reg)(block+nCoeffs)),
> [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
> - [qmul]"f"(qmul), [qadd]"f"(qadd)
> + [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
> : "memory"
> );
> }
> diff --git a/libavcodec/mips/vc1dsp_mmi.c b/libavcodec/mips/vc1dsp_mmi.c
> index a8ab3f6..27a3c81 100644
> --- a/libavcodec/mips/vc1dsp_mmi.c
> +++ b/libavcodec/mips/vc1dsp_mmi.c
> @@ -129,9 +129,11 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> double ftmp[9];
> mips_reg addr[1];
> int count;
> + union mmi_intfloat64 dc_u;
>
> dc = (3 * dc + 1) >> 1;
> dc = (3 * dc + 16) >> 5;
> + dc_u.i = dc;
>
> __asm__ volatile(
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> @@ -189,7 +191,7 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> [addr0]"=&r"(addr[0]),
> [count]"=&r"(count), [dest]"+&r"(dest)
> : [linesize]"r"((mips_reg)linesize),
> - [dc]"f"(dc)
> + [dc]"f"(dc_u.f)
> : "memory"
> );
> }
> @@ -198,9 +200,6 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
> {
> DECLARE_ALIGNED(16, int16_t, temp[64]);
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
> double ftmp[23];
> uint64_t tmp[1];
>
> @@ -407,8 +406,8 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
> [ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
> [ftmp22]"=&f"(ftmp[22]),
> [tmp0]"=&r"(tmp[0])
> - : [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local),
> - [ff_pw_4]"f"(ff_pw_4_local), [block]"r"(block),
> + : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
> + [ff_pw_4]"f"(ff_pw_32_4.f), [block]"r"(block),
> [temp]"r"(temp)
> : "memory"
> );
> @@ -420,9 +419,11 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> {
> int dc = block[0];
> double ftmp[9];
> + union mmi_intfloat64 dc_u;
>
> dc = ( 3 * dc + 1) >> 1;
> dc = (17 * dc + 64) >> 7;
> + dc_u.i = dc;
>
> __asm__ volatile(
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> @@ -467,7 +468,7 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> [ftmp8]"=&f"(ftmp[8])
> : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
> [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
> - [dc]"f"(dc)
> + [dc]"f"(dc_u.f)
> : "memory"
> );
> }
> @@ -480,8 +481,6 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> double ftmp[16];
> uint32_t tmp[1];
> int16_t count = 4;
> - DECLARE_ALIGNED(16, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
> - DECLARE_ALIGNED(16, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
> int16_t coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4,
> 12, 15, 6, -4, -12, -16, -16, -9,
> 12, 9, -6, -16, -12, 4, 16, 15,
> @@ -591,7 +590,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
> [ftmp14]"=&f"(ftmp[14]), [tmp0]"=&r"(tmp[0]),
> [src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count)
> - : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
> + : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
> : "memory"
> );
>
> @@ -859,7 +858,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
> [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
> [tmp0]"=&r"(tmp[0])
> - : [ff_pw_64]"f"(ff_pw_64_local),
> + : [ff_pw_64]"f"(ff_pw_32_64.f),
> [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
> :"memory"
> );
> @@ -871,10 +870,12 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> {
> int dc = block[0];
> double ftmp[9];
> + union mmi_intfloat64 dc_u;
> DECLARE_VAR_LOW32;
>
> dc = (17 * dc + 4) >> 3;
> dc = (12 * dc + 64) >> 7;
> + dc_u.i = dc;
>
> __asm__ volatile(
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> @@ -934,7 +935,7 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
> [dest4]"r"(dest+4*linesize), [dest5]"r"(dest+5*linesize),
> [dest6]"r"(dest+6*linesize), [dest7]"r"(dest+7*linesize),
> - [dc]"f"(dc)
> + [dc]"f"(dc_u.f)
> : "memory"
> );
> }
> @@ -945,14 +946,11 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> int16_t *src = block;
> int16_t *dst = block;
> double ftmp[23];
> - uint32_t count = 8, tmp[1];
> + uint64_t count = 8, tmp[1];
> int16_t coeff[16] = {17, 22, 17, 10,
> 17, 10,-17,-22,
> 17,-10,-17, 22,
> 17,-22, 17,-10};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
>
> // 1st loop
> __asm__ volatile (
> @@ -998,7 +996,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
> [tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
> [src]"+&r"(src), [dst]"+&r"(dst)
> - : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
> + : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
> : "memory"
> );
>
> @@ -1115,7 +1113,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> [ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
> [ftmp22]"=&f"(ftmp[22]),
> [tmp0]"=&r"(tmp[0])
> - : [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local),
> + : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
> [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
> : "memory"
> );
> @@ -1127,10 +1125,12 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> {
> int dc = block[0];
> double ftmp[5];
> + union mmi_intfloat64 dc_u;
> DECLARE_VAR_LOW32;
>
> dc = (17 * dc + 4) >> 3;
> dc = (17 * dc + 64) >> 7;
> + dc_u.i = dc;
>
> __asm__ volatile(
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> @@ -1166,7 +1166,7 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
> [ftmp4]"=&f"(ftmp[4])
> : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
> [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
> - [dc]"f"(dc)
> + [dc]"f"(dc_u.f)
> : "memory"
> );
> }
> @@ -1181,8 +1181,6 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> 17, 10,-17,-22,
> 17,-10,-17, 22,
> 17,-22, 17,-10};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
> - DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
> // 1st loop
> __asm__ volatile (
>
> @@ -1226,7 +1224,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
> [tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
> [src]"+&r"(src), [dst]"+&r"(dst)
> - : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
> + : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
> : "memory"
> );
>
> @@ -1370,7 +1368,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
> [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
> [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
> [tmp0]"=&r"(tmp[0])
> - : [ff_pw_64]"f"(ff_pw_64_local),
> + : [ff_pw_64]"f"(ff_pw_32_64.f),
> [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
> :"memory"
> );
> @@ -1660,14 +1658,15 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
> const uint8_t *src, mips_reg stride,
> int rnd, int64_t shift)
> {
> + union mmi_intfloat64 shift_u;
> DECLARE_VAR_LOW32;
> DECLARE_VAR_ADDRT;
> + shift_u.i = shift;
>
> __asm__ volatile(
> "pxor $f0, $f0, $f0 \n\t"
> "li $8, 0x03 \n\t"
> LOAD_ROUNDER_MMI("%[rnd]")
> - "ldc1 $f12, %[ff_pw_9] \n\t"
> "1: \n\t"
> MMI_ULWC1($f4, %[src], 0x00)
> PTR_ADDU "%[src], %[src], %[stride] \n\t"
> @@ -1689,9 +1688,9 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
> : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT
> [src]"+r"(src), [dst]"+r"(dst)
> : [stride]"r"(stride), [stride1]"r"(-2*stride),
> - [shift]"f"(shift), [rnd]"m"(rnd),
> - [stride2]"r"(9*stride-4), [ff_pw_9]"m"(ff_pw_9)
> - : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12",
> + [shift]"f"(shift_u.f), [rnd]"m"(rnd),
> + [stride2]"r"(9*stride-4)
> + : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",
> "$f14", "$f16", "memory"
> );
> }
> @@ -1713,8 +1712,6 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
> \
> __asm__ volatile( \
> LOAD_ROUNDER_MMI("%[rnd]") \
> - "ldc1 $f12, %[ff_pw_128] \n\t" \
> - "ldc1 $f10, %[ff_pw_9] \n\t" \
> "1: \n\t" \
> MMI_ULDC1($f2, %[src], 0x00) \
> MMI_ULDC1($f4, %[src], 0x08) \
> @@ -1728,16 +1725,16 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
> "paddh $f6, $f6, $f0 \n\t" \
> MMI_ULDC1($f0, %[src], 0x0b) \
> "paddh $f8, $f8, $f0 \n\t" \
> - "pmullh $f6, $f6, $f10 \n\t" \
> - "pmullh $f8, $f8, $f10 \n\t" \
> + "pmullh $f6, $f6, %[ff_pw_9] \n\t" \
> + "pmullh $f8, $f8, %[ff_pw_9] \n\t" \
> "psubh $f6, $f6, $f2 \n\t" \
> "psubh $f8, $f8, $f4 \n\t" \
> "li $8, 0x07 \n\t" \
> "mtc1 $8, $f16 \n\t" \
> NORMALIZE_MMI("$f16") \
> /* Remove bias */ \
> - "paddh $f6, $f6, $f12 \n\t" \
> - "paddh $f8, $f8, $f12 \n\t" \
> + "paddh $f6, $f6, %[ff_pw_128] \n\t" \
> + "paddh $f8, $f8, %[ff_pw_128] \n\t" \
> TRANSFER_DO_PACK(OP) \
> "addiu %[h], %[h], -0x01 \n\t" \
> PTR_ADDIU "%[src], %[src], 0x18 \n\t" \
> @@ -1747,8 +1744,8 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
> [h]"+r"(h), \
> [src]"+r"(src), [dst]"+r"(dst) \
> : [stride]"r"(stride), [rnd]"m"(rnd), \
> - [ff_pw_9]"m"(ff_pw_9), [ff_pw_128]"m"(ff_pw_128) \
> - : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", \
> + [ff_pw_9]"f"(ff_pw_9.f), [ff_pw_128]"f"(ff_pw_128.f) \
> + : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14", \
> "$f16", "memory" \
> ); \
> }
> @@ -1774,7 +1771,6 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
> "pxor $f0, $f0, $f0 \n\t" \
> "li $10, 0x08 \n\t" \
> LOAD_ROUNDER_MMI("%[rnd]") \
> - "ldc1 $f12, %[ff_pw_9] \n\t" \
> "1: \n\t" \
> MMI_ULWC1($f6, %[src], 0x00) \
> MMI_ULWC1($f8, %[src], 0x04) \
> @@ -1791,8 +1787,8 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
> PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \
> MMI_ULWC1($f2, $9, 0x00) \
> MMI_ULWC1($f4, $9, 0x04) \
> - "pmullh $f6, $f6, $f12 \n\t" /* 0,9,9,0*/ \
> - "pmullh $f8, $f8, $f12 \n\t" /* 0,9,9,0*/ \
> + "pmullh $f6, $f6, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
> + "pmullh $f8, $f8, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
> "punpcklbh $f2, $f2, $f0 \n\t" \
> "punpcklbh $f4, $f4, $f0 \n\t" \
> "psubh $f6, $f6, $f2 \n\t" /*-1,9,9,0*/ \
> @@ -1819,9 +1815,9 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
> : [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \
> [stride]"r"(stride), [rnd]"m"(rnd), \
> [stride1]"r"(stride-offset), \
> - [ff_pw_9]"m"(ff_pw_9) \
> + [ff_pw_9]"f"(ff_pw_9.f) \
> : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \
> - "$f12", "$f14", "$f16", "memory" \
> + "$f14", "$f16", "memory" \
> ); \
> }
>
> @@ -1852,8 +1848,8 @@ VC1_SHIFT2(OP_AVG, avg_)
> LOAD($f8, $9, M*4) \
> UNPACK("$f6") \
> UNPACK("$f8") \
> - "pmullh $f6, $f6, $f12 \n\t" /* *18 */ \
> - "pmullh $f8, $f8, $f12 \n\t" /* *18 */ \
> + "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
> + "pmullh $f8, $f8, %[ff_pw_18] \n\t" /* *18 */ \
> "psubh $f6, $f6, $f2 \n\t" /* *18, -3 */ \
> "psubh $f8, $f8, $f4 \n\t" /* *18, -3 */ \
> PTR_ADDU "$9, %[src], "#A4" \n\t" \
> @@ -1872,8 +1868,8 @@ VC1_SHIFT2(OP_AVG, avg_)
> LOAD($f4, $9, M*4) \
> UNPACK("$f2") \
> UNPACK("$f4") \
> - "pmullh $f2, $f2, $f10 \n\t" /* *53 */ \
> - "pmullh $f4, $f4, $f10 \n\t" /* *53 */ \
> + "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
> + "pmullh $f4, $f4, %[ff_pw_53] \n\t" /* *53 */ \
> "paddh $f6, $f6, $f2 \n\t" /* 4,53,18,-3 */ \
> "paddh $f8, $f8, $f4 \n\t" /* 4,53,18,-3 */
>
> @@ -1892,16 +1888,16 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
> int rnd, int64_t shift) \
> { \
> int h = 8; \
> + union mmi_intfloat64 shift_u; \
> DECLARE_VAR_LOW32; \
> DECLARE_VAR_ADDRT; \
> + shift_u.i = shift; \
> \
> src -= src_stride; \
> \
> __asm__ volatile( \
> "pxor $f0, $f0, $f0 \n\t" \
> LOAD_ROUNDER_MMI("%[rnd]") \
> - "ldc1 $f10, %[ff_pw_53] \n\t" \
> - "ldc1 $f12, %[ff_pw_18] \n\t" \
> ".p2align 3 \n\t" \
> "1: \n\t" \
> MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
> @@ -1917,12 +1913,12 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
> PTR_ADDU "$9, %[src], "#A2" \n\t" \
> MMI_ULWC1($f6, $9, 0x08) \
> DO_UNPACK("$f6") \
> - "pmullh $f6, $f6, $f12 \n\t" /* *18 */ \
> + "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
> "psubh $f6, $f6, $f2 \n\t" /* *18,-3 */ \
> PTR_ADDU "$9, %[src], "#A3" \n\t" \
> MMI_ULWC1($f2, $9, 0x08) \
> DO_UNPACK("$f2") \
> - "pmullh $f2, $f2, $f10 \n\t" /* *53 */ \
> + "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
> "paddh $f6, $f6, $f2 \n\t" /* *53,18,-3 */ \
> PTR_ADDU "$9, %[src], "#A4" \n\t" \
> MMI_ULWC1($f2, $9, 0x08) \
> @@ -1945,10 +1941,10 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
> [src]"+r"(src), [dst]"+r"(dst) \
> : [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \
> [stride_x3]"r"(3*src_stride), \
> - [rnd]"m"(rnd), [shift]"f"(shift), \
> - [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
> - [ff_pw_3]"f"(ff_pw_3) \
> - : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
> + [rnd]"m"(rnd), [shift]"f"(shift_u.f), \
> + [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
> + [ff_pw_3]"f"(ff_pw_3.f) \
> + : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
> "$f14", "$f16", "memory" \
> ); \
> }
> @@ -1975,8 +1971,6 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
> __asm__ volatile( \
> "pxor $f0, $f0, $f0 \n\t" \
> LOAD_ROUNDER_MMI("%[rnd]") \
> - "ldc1 $f10, %[ff_pw_53] \n\t" \
> - "ldc1 $f12, %[ff_pw_18] \n\t" \
> ".p2align 3 \n\t" \
> "1: \n\t" \
> MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \
> @@ -1995,9 +1989,9 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
> [h]"+r"(h), \
> [src]"+r"(src), [dst]"+r"(dst) \
> : [stride]"r"(stride), [rnd]"m"(rnd), \
> - [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
> - [ff_pw_3]"f"(ff_pw_3), [ff_pw_128]"f"(ff_pw_128) \
> - : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
> + [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
> + [ff_pw_3]"f"(ff_pw_3.f), [ff_pw_128]"f"(ff_pw_128.f) \
> + : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
> "$f14", "$f16", "memory" \
> ); \
> }
> @@ -2025,8 +2019,6 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
> __asm__ volatile ( \
> "pxor $f0, $f0, $f0 \n\t" \
> LOAD_ROUNDER_MMI("%[rnd]") \
> - "ldc1 $f10, %[ff_pw_53] \n\t" \
> - "ldc1 $f12, %[ff_pw_18] \n\t" \
> ".p2align 3 \n\t" \
> "1: \n\t" \
> MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
> @@ -2044,9 +2036,9 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
> : [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \
> [offset_x3]"r"(3*offset), [stride]"r"(stride), \
> [rnd]"m"(rnd), \
> - [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
> - [ff_pw_3]"f"(ff_pw_3) \
> - : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
> + [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
> + [ff_pw_3]"f"(ff_pw_3.f) \
> + : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
> "$f14", "$f16", "memory" \
> ); \
> }
> @@ -2246,14 +2238,15 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
> uint8_t *src /* align 1 */,
> ptrdiff_t stride, int h, int x, int y)
> {
> - const int A = (8 - x) * (8 - y);
> - const int B = (x) * (8 - y);
> - const int C = (8 - x) * (y);
> - const int D = (x) * (y);
> + union mmi_intfloat64 A, B, C, D;
> double ftmp[10];
> uint32_t tmp[1];
> DECLARE_VAR_ALL64;
> DECLARE_VAR_ADDRT;
> + A.i = (8 - x) * (8 - y);
> + B.i = (x) * (8 - y);
> + C.i = (8 - x) * (y);
> + D.i = (x) * (y);
>
> av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
>
> @@ -2290,9 +2283,9 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
> [src]"+&r"(src), [dst]"+&r"(dst),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D),
> - [ff_pw_28]"f"(ff_pw_28)
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [ff_pw_28]"f"(ff_pw_28.f)
> : "memory"
> );
> }
> @@ -2301,14 +2294,15 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
> uint8_t *src /* align 1 */,
> ptrdiff_t stride, int h, int x, int y)
> {
> - const int A = (8 - x) * (8 - y);
> - const int B = (x) * (8 - y);
> - const int C = (8 - x) * (y);
> - const int D = (x) * (y);
> + union mmi_intfloat64 A, B, C, D;
> double ftmp[6];
> uint32_t tmp[1];
> DECLARE_VAR_LOW32;
> DECLARE_VAR_ADDRT;
> + A.i = (8 - x) * (8 - y);
> + B.i = (x) * (8 - y);
> + C.i = (8 - x) * (y);
> + D.i = (x) * (y);
>
> av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
>
> @@ -2343,9 +2337,9 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
> [src]"+&r"(src), [dst]"+&r"(dst),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D),
> - [ff_pw_28]"f"(ff_pw_28)
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [ff_pw_28]"f"(ff_pw_28.f)
> : "memory"
> );
> }
> @@ -2354,14 +2348,15 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
> uint8_t *src /* align 1 */,
> ptrdiff_t stride, int h, int x, int y)
> {
> - const int A = (8 - x) * (8 - y);
> - const int B = (x) * (8 - y);
> - const int C = (8 - x) * (y);
> - const int D = (x) * (y);
> + union mmi_intfloat64 A, B, C, D;
> double ftmp[10];
> uint32_t tmp[1];
> DECLARE_VAR_ALL64;
> DECLARE_VAR_ADDRT;
> + A.i = (8 - x) * (8 - y);
> + B.i = (x) * (8 - y);
> + C.i = (8 - x) * (y);
> + D.i = (x) * (y);
>
> av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
>
> @@ -2401,9 +2396,9 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
> [src]"+&r"(src), [dst]"+&r"(dst),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D),
> - [ff_pw_28]"f"(ff_pw_28)
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [ff_pw_28]"f"(ff_pw_28.f)
> : "memory"
> );
> }
> @@ -2412,14 +2407,15 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
> uint8_t *src /* align 1 */,
> ptrdiff_t stride, int h, int x, int y)
> {
> - const int A = (8 - x) * (8 - y);
> - const int B = ( x) * (8 - y);
> - const int C = (8 - x) * ( y);
> - const int D = ( x) * ( y);
> + union mmi_intfloat64 A, B, C, D;
> double ftmp[6];
> uint32_t tmp[1];
> DECLARE_VAR_LOW32;
> DECLARE_VAR_ADDRT;
> + A.i = (8 - x) * (8 - y);
> + B.i = (x) * (8 - y);
> + C.i = (8 - x) * (y);
> + D.i = (x) * (y);
>
> av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
>
> @@ -2457,9 +2453,9 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
> [src]"+&r"(src), [dst]"+&r"(dst),
> [h]"+&r"(h)
> : [stride]"r"((mips_reg)stride),
> - [A]"f"(A), [B]"f"(B),
> - [C]"f"(C), [D]"f"(D),
> - [ff_pw_28]"f"(ff_pw_28)
> + [A]"f"(A.f), [B]"f"(B.f),
> + [C]"f"(C.f), [D]"f"(D.f),
> + [ff_pw_28]"f"(ff_pw_28.f)
> : "memory"
> );
> }
> diff --git a/libavcodec/mips/vp8dsp_mmi.c b/libavcodec/mips/vp8dsp_mmi.c
> index b352906..327eaf5 100644
> --- a/libavcodec/mips/vp8dsp_mmi.c
> +++ b/libavcodec/mips/vp8dsp_mmi.c
> @@ -1128,12 +1128,14 @@ void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
> void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
> {
> #if 1
> - DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
> - DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
> double ftmp[12];
> uint32_t tmp[1];
> + union av_intfloat64 ff_ph_4e7b_u;
> + union av_intfloat64 ff_ph_22a3_u;
> DECLARE_VAR_LOW32;
> DECLARE_VAR_ALL64;
> + ff_ph_4e7b_u.i = 0x4e7b4e7b4e7b4e7bULL;
> + ff_ph_22a3_u.i = 0x22a322a322a322a3ULL;
>
> __asm__ volatile (
> "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
> @@ -1253,8 +1255,8 @@ void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
> [tmp0]"=&r"(tmp[0])
> : [dst0]"r"(dst), [dst1]"r"(dst+stride),
> [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
> - [block]"r"(block), [ff_pw_4]"f"(ff_pw_4),
> - [ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_22a3]"f"(ff_ph_22a3)
> + [block]"r"(block), [ff_pw_4]"f"(ff_pw_4.f),
> + [ff_ph_4e7b]"f"(ff_ph_4e7b_u.f), [ff_ph_22a3]"f"(ff_ph_22a3_u.f)
> : "memory"
> );
> #else
> @@ -1595,8 +1597,16 @@ void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> const uint64_t *filter = fourtap_subpel_filters[mx - 1];
> double ftmp[9];
> uint32_t tmp[1];
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> mips_reg src1, dst1;
> DECLARE_VAR_ALL64;
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
>
> /*
> dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
> @@ -1644,11 +1654,11 @@ void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [dst1]"=&r"(dst1), [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
> - [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
> + [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
> + [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
> : "memory"
> );
> #else
> @@ -1672,7 +1682,16 @@ void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> const uint64_t *filter = fourtap_subpel_filters[mx - 1];
> double ftmp[9];
> uint32_t tmp[1];
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> DECLARE_VAR_ALL64;
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> +
>
> /*
> dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
> @@ -1705,11 +1724,11 @@ void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> RESTRICT_ASM_ALL64
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
> - [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
> + [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
> + [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
> : "memory"
> );
> #else
> @@ -1733,7 +1752,15 @@ void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> const uint64_t *filter = fourtap_subpel_filters[mx - 1];
> double ftmp[6];
> uint32_t tmp[1];
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> DECLARE_VAR_LOW32;
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
>
> /*
> dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
> @@ -1760,11 +1787,11 @@ void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> RESTRICT_ASM_LOW32
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
> - [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
> + [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
> + [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
> : "memory"
> );
> #else
> @@ -1789,7 +1816,19 @@ void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[9];
> uint32_t tmp[1];
> mips_reg src1, dst1;
> + union av_intfloat64 filter0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> + union av_intfloat64 filter5;
> DECLARE_VAR_ALL64;
> + filter0.i = filter[0];
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> + filter5.i = filter[5];
>
> /*
> dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7];
> @@ -1837,12 +1876,12 @@ void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [dst1]"=&r"(dst1), [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
> - [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
> - [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
> + [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
> + [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
> + [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
> : "memory"
> );
> #else
> @@ -1866,7 +1905,19 @@ void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> const uint64_t *filter = fourtap_subpel_filters[mx - 1];
> double ftmp[9];
> uint32_t tmp[1];
> + union av_intfloat64 filter0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> + union av_intfloat64 filter5;
> DECLARE_VAR_ALL64;
> + filter0.i = filter[0];
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> + filter5.i = filter[5];
>
> /*
> dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
> @@ -1899,12 +1950,12 @@ void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> RESTRICT_ASM_ALL64
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
> - [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
> - [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
> + [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
> + [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
> + [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
> : "memory"
> );
> #else
> @@ -1928,7 +1979,19 @@ void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> const uint64_t *filter = fourtap_subpel_filters[mx - 1];
> double ftmp[6];
> uint32_t tmp[1];
> + union av_intfloat64 filter0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> + union av_intfloat64 filter5;
> DECLARE_VAR_LOW32;
> + filter0.i = filter[0];
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> + filter5.i = filter[5];
>
> /*
> dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
> @@ -1955,12 +2018,12 @@ void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> RESTRICT_ASM_LOW32
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
> - [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
> - [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
> + [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
> + [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
> + [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
> : "memory"
> );
> #else
> @@ -1985,7 +2048,15 @@ void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[9];
> uint32_t tmp[1];
> mips_reg src0, src1, dst0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> DECLARE_VAR_ALL64;
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
>
> /*
> dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
> @@ -2034,11 +2105,11 @@ void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
> - [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
> + [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
> + [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
> : "memory"
> );
> #else
> @@ -2063,7 +2134,15 @@ void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[9];
> uint32_t tmp[1];
> mips_reg src1;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> DECLARE_VAR_ALL64;
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
>
> /*
> dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
> @@ -2097,11 +2176,11 @@ void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
> - [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
> + [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
> + [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
> : "memory"
> );
> #else
> @@ -2126,7 +2205,15 @@ void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[6];
> uint32_t tmp[1];
> mips_reg src1;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> DECLARE_VAR_LOW32;
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
>
> /*
> dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
> @@ -2154,11 +2241,11 @@ void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
> - [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
> + [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
> + [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
> : "memory"
> );
> #else
> @@ -2183,7 +2270,19 @@ void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[9];
> uint32_t tmp[1];
> mips_reg src0, src1, dst0;
> + union av_intfloat64 filter0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> + union av_intfloat64 filter5;
> DECLARE_VAR_ALL64;
> + filter0.i = filter[0];
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> + filter5.i = filter[5];
>
> /*
> dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
> @@ -2232,12 +2331,12 @@ void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
> - [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
> - [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
> + [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
> + [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
> + [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
> : "memory"
> );
> #else
> @@ -2262,7 +2361,19 @@ void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[9];
> uint32_t tmp[1];
> mips_reg src1;
> + union av_intfloat64 filter0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> + union av_intfloat64 filter5;
> DECLARE_VAR_ALL64;
> + filter0.i = filter[0];
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> + filter5.i = filter[5];
>
> /*
> dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
> @@ -2296,12 +2407,12 @@ void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
> - [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
> - [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
> + [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
> + [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
> + [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
> : "memory"
> );
> #else
> @@ -2326,7 +2437,19 @@ void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> double ftmp[6];
> uint32_t tmp[1];
> mips_reg src1;
> + union av_intfloat64 filter0;
> + union av_intfloat64 filter1;
> + union av_intfloat64 filter2;
> + union av_intfloat64 filter3;
> + union av_intfloat64 filter4;
> + union av_intfloat64 filter5;
> DECLARE_VAR_LOW32;
> + filter0.i = filter[0];
> + filter1.i = filter[1];
> + filter2.i = filter[2];
> + filter3.i = filter[3];
> + filter4.i = filter[4];
> + filter5.i = filter[5];
>
> /*
> dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
> @@ -2354,12 +2477,12 @@ void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src)
> - : [ff_pw_64]"f"(ff_pw_64),
> + : [ff_pw_64]"f"(ff_pw_64.f),
> [srcstride]"r"((mips_reg)srcstride),
> [dststride]"r"((mips_reg)dststride),
> - [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
> - [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
> - [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
> + [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
> + [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
> + [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
> : "memory"
> );
> #else
> @@ -2847,11 +2970,13 @@ void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> ptrdiff_t sstride, int h, int mx, int my)
> {
> #if 1
> - int a = 8 - mx, b = mx;
> + union mmi_intfloat64 a, b;
> double ftmp[7];
> uint32_t tmp[1];
> mips_reg dst0, src0;
> DECLARE_VAR_ALL64;
> + a.i = 8 - mx;
> + b.i = mx;
>
> /*
> dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
> @@ -2900,10 +3025,10 @@ void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> [dst0]"=&r"(dst0), [src0]"=&r"(src0),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src),
> - [a]"+&f"(a), [b]"+&f"(b)
> + [a]"+&f"(a.f), [b]"+&f"(b.f)
> : [sstride]"r"((mips_reg)sstride),
> [dstride]"r"((mips_reg)dstride),
> - [ff_pw_4]"f"(ff_pw_4)
> + [ff_pw_4]"f"(ff_pw_4.f)
> : "memory"
> );
> #else
> @@ -2923,11 +3048,13 @@ void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> ptrdiff_t sstride, int h, int mx, int my)
> {
> #if 1
> - int c = 8 - my, d = my;
> + union mmi_intfloat64 c, d;
> double ftmp[7];
> uint32_t tmp[1];
> mips_reg src0, src1, dst0;
> DECLARE_VAR_ALL64;
> + c.i = 8 - my;
> + d.i = my;
>
> /*
> dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
> @@ -2968,10 +3095,10 @@ void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src),
> - [c]"+&f"(c), [d]"+&f"(d)
> + [c]"+&f"(c.f), [d]"+&f"(d.f)
> : [sstride]"r"((mips_reg)sstride),
> [dstride]"r"((mips_reg)dstride),
> - [ff_pw_4]"f"(ff_pw_4)
> + [ff_pw_4]"f"(ff_pw_4.f)
> : "memory"
> );
> #else
> @@ -3025,10 +3152,12 @@ void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> ptrdiff_t sstride, int h, int mx, int my)
> {
> #if 1
> - int a = 8 - mx, b = mx;
> + union mmi_intfloat64 a, b;
> double ftmp[7];
> uint32_t tmp[1];
> DECLARE_VAR_ALL64;
> + a.i = 8 - mx;
> + b.i = mx;
>
> /*
> dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
> @@ -3062,10 +3191,10 @@ void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> RESTRICT_ASM_ALL64
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src),
> - [a]"+&f"(a), [b]"+&f"(b)
> + [a]"+&f"(a.f), [b]"+&f"(b.f)
> : [sstride]"r"((mips_reg)sstride),
> [dstride]"r"((mips_reg)dstride),
> - [ff_pw_4]"f"(ff_pw_4)
> + [ff_pw_4]"f"(ff_pw_4.f)
> : "memory"
> );
> #else
> @@ -3085,11 +3214,13 @@ void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> ptrdiff_t sstride, int h, int mx, int my)
> {
> #if 1
> - int c = 8 - my, d = my;
> + union mmi_intfloat64 c, d;
> double ftmp[7];
> uint32_t tmp[1];
> mips_reg src1;
> DECLARE_VAR_ALL64;
> + c.i = 8 - my;
> + d.i = my;
>
> /*
> dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
> @@ -3124,10 +3255,10 @@ void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src),
> - [c]"+&f"(c), [d]"+&f"(d)
> + [c]"+&f"(c.f), [d]"+&f"(d.f)
> : [sstride]"r"((mips_reg)sstride),
> [dstride]"r"((mips_reg)dstride),
> - [ff_pw_4]"f"(ff_pw_4)
> + [ff_pw_4]"f"(ff_pw_4.f)
> : "memory"
> );
> #else
> @@ -3181,11 +3312,13 @@ void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> ptrdiff_t sstride, int h, int mx, int my)
> {
> #if 1
> - int a = 8 - mx, b = mx;
> + union mmi_intfloat64 a, b;
> double ftmp[5];
> uint32_t tmp[1];
> DECLARE_VAR_LOW32;
> DECLARE_VAR_ALL64;
> + a.i = 8 - mx;
> + b.i = mx;
>
> /*
> dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
> @@ -3215,10 +3348,10 @@ void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> RESTRICT_ASM_ALL64
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src),
> - [a]"+&f"(a), [b]"+&f"(b)
> + [a]"+&f"(a.f), [b]"+&f"(b.f)
> : [sstride]"r"((mips_reg)sstride),
> [dstride]"r"((mips_reg)dstride),
> - [ff_pw_4]"f"(ff_pw_4)
> + [ff_pw_4]"f"(ff_pw_4.f)
> : "memory"
> );
> #else
> @@ -3238,12 +3371,14 @@ void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> ptrdiff_t sstride, int h, int mx, int my)
> {
> #if 1
> - int c = 8 - my, d = my;
> + union mmi_intfloat64 c, d;
> double ftmp[7];
> uint32_t tmp[1];
> mips_reg src1;
> DECLARE_VAR_LOW32;
> DECLARE_VAR_ALL64;
> + c.i = 8 - my;
> + d.i = my;
>
> /*
> dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
> @@ -3274,10 +3409,10 @@ void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
> [src1]"=&r"(src1),
> [h]"+&r"(h),
> [dst]"+&r"(dst), [src]"+&r"(src),
> - [c]"+&f"(c), [d]"+&f"(d)
> + [c]"+&f"(c.f), [d]"+&f"(d.f)
> : [sstride]"r"((mips_reg)sstride),
> [dstride]"r"((mips_reg)dstride),
> - [ff_pw_4]"f"(ff_pw_4)
> + [ff_pw_4]"f"(ff_pw_4.f)
> : "memory"
> );
> #else
> diff --git a/libavutil/mips/asmdefs.h b/libavutil/mips/asmdefs.h
> index 76bb2b9..659342b 100644
> --- a/libavutil/mips/asmdefs.h
> +++ b/libavutil/mips/asmdefs.h
> @@ -27,6 +27,8 @@
> #ifndef AVUTIL_MIPS_ASMDEFS_H
> #define AVUTIL_MIPS_ASMDEFS_H
>
> +#include <stdint.h>
> +
> #if defined(_ABI64) && _MIPS_SIM == _ABI64
> # define mips_reg int64_t
> # define PTRSIZE " 8 "
> @@ -97,4 +99,10 @@ __asm__(".macro parse_r var r\n\t"
> ".endif\n\t"
> ".endm");
>
> +/* General union structure for clang adaption */
> +union mmi_intfloat64 {
> + int64_t i;
> + double f;
> +};
> +
> #endif /* AVCODEC_MIPS_ASMDEFS_H */
> --
> 2.1.0
>
LGTM.
</stdint.h></stdint.h></jinbo at loongson.cn></jinbo at loongson.cn></jinbo at loongson.cn>
More information about the ffmpeg-devel
mailing list