[FFmpeg-devel] [PATCH v3 2/4] avcodec/mips: Use MMI marcos to replace Loongson3 instructions

Wed Jul 28 04:46:58 EEST 2021

> 2021年7月23日 下午1:53，Jiaxun Yang <jiaxun.yang at flygoat.com> 写道：
> 
> Loongson3's extention instructions (prefixed with gs) are widely used
> in our MMI codebase. However, these instructions are not avilable on
> Loongson-2E/F while MMI code should work on these processors.
> 
> Previously we introduced mmiutils marcos to provide backward compactbility
> but newly commited code didn't follow that. In this patch I revised the
> codebase and converted all these instructions into MMI marcos to get
> Loongson2 supproted again.
> 
> Signed-off-by: Jiaxun Yang <jiaxun.yang at flygoat.com>
> Reviewed-by: Shiyou Yin <yinshiyou-hf at loongson.cn>
> ---
> libavcodec/mips/h264chroma_mmi.c  |  28 +++-
> libavcodec/mips/h264dsp_mmi.c     |   8 +-
> libavcodec/mips/hevcdsp_mmi.c     | 251 ++++++++++++------------------
> libavcodec/mips/hpeldsp_mmi.c     |   1 +
> libavcodec/mips/simple_idct_mmi.c |  49 +++---
> libavcodec/mips/vp3dsp_idct_mmi.c |  11 +-
> libavcodec/mips/vp8dsp_mmi.c      | 100 +++++-------
> libavcodec/mips/vp9_mc_mmi.c      | 128 ++++++---------
> 8 files changed, 247 insertions(+), 329 deletions(-)
> 
> diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
> index cc2d7cb7e9..ec35c5a72e 100644
> --- a/libavcodec/mips/h264chroma_mmi.c
> +++ b/libavcodec/mips/h264chroma_mmi.c
> @@ -31,6 +31,8 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> {
>     double ftmp[12];
>     union mmi_intfloat64 A, B, C, D, E;
> +    DECLARE_VAR_ALL64;
> +
>     A.i = 64;
> 
>     if (!(x || y)) {
> @@ -57,7 +59,8 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp3], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
>             "bnez       %[h],       1b                                 \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [dst]"+&r"(dst),              [src]"+&r"(src),
>               [h]"+&r"(h)
> @@ -151,7 +154,8 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp3], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
>             "bnez       %[h],       1b                                 \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
>               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
> @@ -201,7 +205,8 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp1], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
>             "bnez       %[h],       1b                                 \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
>               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
> @@ -268,7 +273,8 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp2], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
>             "bnez       %[h],       1b                                 \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
>               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
> @@ -288,6 +294,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
> {
>     double ftmp[10];
>     union mmi_intfloat64 A, B, C, D, E;
> +    DECLARE_VAR_ALL64;
> +
>     A.i = 64;
> 
>     if(!(x || y)){
> @@ -310,7 +318,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
>             "addi       %[h],       %[h],           -0x02               \n\t"
>             "bnez       %[h],       1b                                  \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [dst]"+&r"(dst),              [src]"+&r"(src),
>               [h]"+&r"(h)
> @@ -373,7 +382,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp1], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
>             "bnez       %[h],       1b                             \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
>               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
> @@ -423,7 +433,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp1], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
>             "bnez       %[h],       1b                             \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
>               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
> @@ -471,7 +482,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
>             MMI_SDC1(%[ftmp1], %[dst], 0x00)
>             PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
>             "bnez       %[h],       1b                             \n\t"
> -            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
> +            : RESTRICT_ASM_ALL64
> +              [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
>               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
>               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
>               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
> diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
> index 6e77995523..b5ab07c863 100644
> --- a/libavcodec/mips/h264dsp_mmi.c
> +++ b/libavcodec/mips/h264dsp_mmi.c
> @@ -40,8 +40,8 @@ void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
>         MMI_LDC1(%[ftmp3], %[src], 0x10)
>         MMI_LDC1(%[ftmp4], %[src], 0x18)
>         /* memset(src, 0, 32); */
> -        "gssqc1     %[ftmp0],   %[ftmp0],       0x00(%[src])            \n\t"
> -        "gssqc1     %[ftmp0],   %[ftmp0],       0x10(%[src])            \n\t"
> +        MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x00)
> +        MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x10)
>         MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
>         MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
>         MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
> @@ -90,8 +90,8 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
>         MMI_LDC1(%[ftmp3], %[block], 0x18)
>         /* memset(block, 0, 32) */
>         "pxor       %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
> -        "gssqc1     %[ftmp4],   %[ftmp4],       0x00(%[block])          \n\t"
> -        "gssqc1     %[ftmp4],   %[ftmp4],       0x10(%[block])          \n\t"
> +        MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x00)
> +        MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x10)
>         "dli        %[tmp0],    0x01                                    \n\t"
>         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
>         "dli        %[tmp0],    0x06                                    \n\t"
> diff --git a/libavcodec/mips/hevcdsp_mmi.c b/libavcodec/mips/hevcdsp_mmi.c
> index 87fc2555a4..6583bef5da 100644
> --- a/libavcodec/mips/hevcdsp_mmi.c
> +++ b/libavcodec/mips/hevcdsp_mmi.c
> @@ -35,6 +35,7 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src,     \
>     double ftmp[15];                                                     \
>     uint64_t rtmp[1];                                                    \
>     const int8_t *filter = ff_hevc_qpel_filters[mx - 1];                 \
> +    DECLARE_VAR_ALL64;                                                   \
>                                                                          \
>     x = x_step;                                                          \
>     y = height;                                                          \
> @@ -50,14 +51,10 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src,     \
>                                                                          \
>         "1:                                                     \n\t"    \
>         "2:                                                     \n\t"    \
> -        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"    \
> -        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"    \
> -        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"    \
> -        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"    \
> +        MMI_ULDC1(%[ftmp3], %[src], 0x00)                                \
> +        MMI_ULDC1(%[ftmp4], %[src], 0x01)                                \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x02)                                \
> +        MMI_ULDC1(%[ftmp6], %[src], 0x03)                                \
>         "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"    \
>         "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"    \
>         "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
> @@ -83,8 +80,7 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src,     \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
>         "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
> -        "gssdlc1      %[ftmp3],      0x07(%[dst])               \n\t"    \
> -        "gssdrc1      %[ftmp3],      0x00(%[dst])               \n\t"    \
> +        MMI_ULDC1(%[ftmp3], %[dst], 0x00)                                \
>                                                                          \
>         "daddi        %[x],          %[x],         -0x01        \n\t"    \
>         PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"    \
> @@ -98,7 +94,8 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src,     \
>         PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"    \
>         PTR_ADDIU    "%[dst],        %[dst],        0x80        \n\t"    \
>         "bnez         %[y],          1b                         \n\t"    \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
> +        : RESTRICT_ASM_ALL64                                             \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
> @@ -134,6 +131,7 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>     int16_t *tmp = tmp_array;                                            \
>     double ftmp[15];                                                     \
>     uint64_t rtmp[1];                                                    \
> +    DECLARE_VAR_ALL64;                                                   \
>                                                                          \
>     src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                        \
>     filter = ff_hevc_qpel_filters[mx - 1];                               \
> @@ -151,14 +149,10 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>                                                                          \
>         "1:                                                     \n\t"    \
>         "2:                                                     \n\t"    \
> -        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"    \
> -        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"    \
> -        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"    \
> -        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"    \
> -        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"    \
> +        MMI_ULDC1(%[ftmp3], %[src], 0x00)                                \
> +        MMI_ULDC1(%[ftmp4], %[src], 0x01)                                \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x02)                                \
> +        MMI_ULDC1(%[ftmp6], %[src], 0x03)                                \
>         "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"    \
>         "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"    \
>         "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
> @@ -184,8 +178,7 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
>         "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
> -        "gssdlc1      %[ftmp3],      0x07(%[tmp])               \n\t"    \
> -        "gssdrc1      %[ftmp3],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                                \
>                                                                          \
>         "daddi        %[x],          %[x],         -0x01        \n\t"    \
>         PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"    \
> @@ -199,7 +192,8 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>         PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"    \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
>         "bnez         %[y],          1b                         \n\t"    \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
> +        : RESTRICT_ASM_ALL64                                             \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
> @@ -228,29 +222,21 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>                                                                          \
>         "1:                                                     \n\t"    \
>         "2:                                                     \n\t"    \
> -        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp7],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp7],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp7], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp8],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp8],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp8], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp9],      0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp9],      0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp9], %[tmp], 0x00)                                \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
> -        "gsldlc1      %[ftmp10],     0x07(%[tmp])               \n\t"    \
> -        "gsldrc1      %[ftmp10],     0x00(%[tmp])               \n\t"    \
> +        MMI_ULDC1(%[ftmp10], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"    \
>         TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
>                      %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])         \
> @@ -275,8 +261,7 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>         "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
>         "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"    \
>         "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
> -        "gssdlc1      %[ftmp3],      0x07(%[dst])               \n\t"    \
> -        "gssdrc1      %[ftmp3],      0x00(%[dst])               \n\t"    \
> +        MMI_USDC1(%[ftmp3], %[dst], 0x00)                               \
>                                                                          \
>         "daddi        %[x],          %[x],         -0x01        \n\t"    \
>         PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"    \
> @@ -290,7 +275,8 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
>         PTR_ADDIU    "%[dst],        %[dst],        0x80        \n\t"    \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
>         "bnez         %[y],          1b                         \n\t"    \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
> +        : RESTRICT_ASM_ALL64                                             \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
> @@ -333,6 +319,8 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
>     uint64_t rtmp[1];                                                   \
>     union av_intfloat64 shift;                                          \
>     union av_intfloat64 offset;                                         \
> +    DECLARE_VAR_ALL64;                                                  \
> +    DECLARE_VAR_LOW32;                                                  \
>     shift.i = 7;                                                        \
>     offset.i = 64;                                                      \
>                                                                         \
> @@ -353,14 +341,10 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
>         "1:                                                     \n\t"   \
>         "li           %[x],        " #x_step "                  \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
> +        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
> +        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
>         "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
>         "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
>         "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
> @@ -387,8 +371,7 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
>         "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
>         "paddh        %[ftmp3],      %[ftmp3],      %[offset]   \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x07(%[src2])              \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x00(%[src2])              \n\t"   \
> +        MMI_ULDC1(%[ftmp4], %[src2], 0x00)                              \
>         "li           %[rtmp0],      0x10                       \n\t"   \
>         "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
>         "punpcklhw    %[ftmp5],      %[ftmp0],      %[ftmp3]    \n\t"   \
> @@ -407,8 +390,7 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
>         "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
>         "pand         %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
>         "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
> -        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
> -        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
> +        MMI_USWC1(%[ftmp3], %[dst], 0x00)                               \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
> @@ -424,7 +406,8 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
>         PTR_ADDU     "%[dst],        %[dst],    %[dst_stride]   \n\t"   \
>         PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32                         \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> @@ -469,6 +452,8 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>     uint64_t rtmp[1];                                                   \
>     union av_intfloat64 shift;                                          \
>     union av_intfloat64 offset;                                         \
> +    DECLARE_VAR_ALL64;                                                  \
> +    DECLARE_VAR_LOW32;                                                  \
>     shift.i = 7;                                                        \
>     offset.i = 64;                                                      \
>                                                                         \
> @@ -488,14 +473,10 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>                                                                         \
>         "1:                                                     \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
> +        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
> +        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
>         "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
>         "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
>         "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
> @@ -521,8 +502,7 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
>         "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
> -        "gssdlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
> -        "gssdrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
> +        MMI_USDC1(%[ftmp3], %[tmp], 0x00)                               \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
> @@ -536,7 +516,8 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_ALL64                                            \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> @@ -567,29 +548,21 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "1:                                                     \n\t"   \
>         "li           %[x],        " #x_step "                  \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp7],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp7],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp7], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp8],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp8],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp8], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp9],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp9],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp9], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp10],     0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp10],     0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp10], %[tmp], 0x00)                              \
>         PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"   \
>         TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
>                      %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
> @@ -614,8 +587,7 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
>         "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
>         "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x07(%[src2])              \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x00(%[src2])              \n\t"   \
> +        MMI_ULDC1(%[ftmp4], %[src2], 0x00)                              \
>         "pxor         %[ftmp7],      %[ftmp7],      %[ftmp7]    \n\t"   \
>         "li           %[rtmp0],      0x10                       \n\t"   \
>         "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
> @@ -637,8 +609,7 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp7]    \n\t"   \
>         "pand         %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
>         "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
> -        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
> -        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
> +        MMI_USWC1(%[ftmp3], %[dst], 0x00)                               \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
> @@ -654,7 +625,8 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32                         \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> @@ -700,6 +672,8 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>     uint64_t rtmp[1];                                                   \
>     union av_intfloat64 shift;                                          \
>     union av_intfloat64 offset;                                         \
> +    DECLARE_VAR_ALL64;                                                  \
> +    DECLARE_VAR_LOW32;                                                  \
>     shift.i = 7;                                                        \
>     offset.i = 64;                                                      \
>                                                                         \
> @@ -716,14 +690,10 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>                                                                         \
>         "1:                                                     \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gslwlc1      %[ftmp2],      0x03(%[src])               \n\t"   \
> -        "gslwrc1      %[ftmp2],      0x00(%[src])               \n\t"   \
> -        "gslwlc1      %[ftmp3],      0x04(%[src])               \n\t"   \
> -        "gslwrc1      %[ftmp3],      0x01(%[src])               \n\t"   \
> -        "gslwlc1      %[ftmp4],      0x05(%[src])               \n\t"   \
> -        "gslwrc1      %[ftmp4],      0x02(%[src])               \n\t"   \
> -        "gslwlc1      %[ftmp5],      0x06(%[src])               \n\t"   \
> -        "gslwrc1      %[ftmp5],      0x03(%[src])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
> +        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
> +        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
>         "punpcklbh    %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
>         "pmullh       %[ftmp2],      %[ftmp2],      %[ftmp1]    \n\t"   \
>         "punpcklbh    %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
> @@ -737,8 +707,7 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "paddh        %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"   \
>         "paddh        %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"   \
>         "paddh        %[ftmp2],      %[ftmp2],      %[ftmp4]    \n\t"   \
> -        "gssdlc1      %[ftmp2],      0x07(%[tmp])               \n\t"   \
> -        "gssdrc1      %[ftmp2],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp2], %[tmp], 0x00)                               \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
> @@ -752,7 +721,8 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_ALL64                                            \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> @@ -782,17 +752,13 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "1:                                                     \n\t"   \
>         "li           %[x],        " #x_step "                  \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],       -0x180       \n\t"   \
>         TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
>                      %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
> @@ -807,8 +773,7 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
>         "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
>         "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x07(%[src2])              \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x00(%[src2])              \n\t"   \
> +        MMI_ULDC1(%[ftmp4], %[tmp], 0x02)                               \
>         "li           %[rtmp0],      0x10                       \n\t"   \
>         "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
>         "punpcklhw    %[ftmp5],      %[ftmp2],      %[ftmp3]    \n\t"   \
> @@ -829,8 +794,7 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp2]    \n\t"   \
>         "pand         %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
>         "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
> -        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
> -        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
> +        MMI_USWC1(%[ftmp3], %[dst], 0x0)                                \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
> @@ -846,7 +810,8 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
>         PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_LOW32 RESTRICT_ASM_ALL64                         \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> @@ -884,6 +849,7 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst,             \
>     double  ftmp[12];                                                     \
>     uint64_t rtmp[1];                                                     \
>     union av_intfloat64 shift;                                            \
> +    DECLARE_VAR_ALL64;                                                    \
>     shift.i = 7;                                                          \
>                                                                           \
>     y = height;                                                           \
> @@ -901,12 +867,9 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst,             \
>                                                                           \
>         "1:                                                     \n\t"     \
>         "2:                                                     \n\t"     \
> -        "gsldlc1      %[ftmp5],      0x07(%[src])               \n\t"     \
> -        "gsldrc1      %[ftmp5],      0x00(%[src])               \n\t"     \
> -        "gsldlc1      %[ftmp2],      0x07(%[src2])              \n\t"     \
> -        "gsldrc1      %[ftmp2],      0x00(%[src2])              \n\t"     \
> -        "gsldlc1      %[ftmp3],      0x0f(%[src2])              \n\t"     \
> -        "gsldrc1      %[ftmp3],      0x08(%[src2])              \n\t"     \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x00)                                 \
> +        MMI_ULDC1(%[ftmp2], %[src2], 0x00)                                \
> +        MMI_ULDC1(%[ftmp3], %[src2], 0x08)                                \
>         "punpcklbh    %[ftmp4],      %[ftmp5],      %[ftmp0]    \n\t"     \
>         "punpckhbh    %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"     \
>         "psllh        %[ftmp4],      %[ftmp4],      %[ftmp1]    \n\t"     \
> @@ -940,8 +903,7 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst,             \
>         "pand         %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"     \
>         "pand         %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"     \
>         "packushb     %[ftmp2],      %[ftmp2],      %[ftmp4]    \n\t"     \
> -        "gssdlc1      %[ftmp2],      0x07(%[dst])               \n\t"     \
> -        "gssdrc1      %[ftmp2],      0x00(%[dst])               \n\t"     \
> +        MMI_USDC1(%[ftmp2], %[dst], 0x0)                                  \
>                                                                           \
>         "daddi        %[x],          %[x],         -0x01        \n\t"     \
>         PTR_ADDIU    "%[src],        %[src],        0x08        \n\t"     \
> @@ -958,7 +920,8 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst,             \
>         PTR_ADDU     "%[dst],        %[dst],       %[dststride] \n\t"     \
>         PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"     \
>         "bnez         %[y],          1b                         \n\t"     \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                   \
> +        : RESTRICT_ASM_ALL64                                              \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                   \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                   \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                   \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                   \
> @@ -1000,6 +963,8 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>     uint64_t rtmp[1];                                                   \
>     union av_intfloat64 shift;                                          \
>     union av_intfloat64 offset;                                         \
> +    DECLARE_VAR_ALL64;                                                  \
> +    DECLARE_VAR_LOW32;                                                  \
>     shift.i = 6;                                                        \
>     offset.i = 32;                                                      \
>                                                                         \
> @@ -1019,14 +984,10 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>                                                                         \
>         "1:                                                     \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"   \
> -        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"   \
> -        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[src], 0x00)                               \
> +        MMI_ULDC1(%[ftmp4], %[src], 0x01)                               \
> +        MMI_ULDC1(%[ftmp5], %[src], 0x02)                               \
> +        MMI_ULDC1(%[ftmp6], %[src], 0x03)                               \
>         "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
>         "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
>         "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
> @@ -1052,8 +1013,7 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
>         "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
>         "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
> -        "gssdlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
> -        "gssdrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
> +        MMI_USDC1(%[ftmp3], %[tmp], 0x0)                                \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
> @@ -1067,7 +1027,8 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>         PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_ALL64                                            \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> @@ -1099,29 +1060,21 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>         "1:                                                     \n\t"   \
>         "li           %[x],        " #x_step "                  \n\t"   \
>         "2:                                                     \n\t"   \
> -        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp3], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp4], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp5], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp6], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp7],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp7],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp7], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp8],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp8],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp8], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp9],      0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp9],      0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp9], %[tmp], 0x00)                               \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
> -        "gsldlc1      %[ftmp10],     0x07(%[tmp])               \n\t"   \
> -        "gsldrc1      %[ftmp10],     0x00(%[tmp])               \n\t"   \
> +        MMI_ULDC1(%[ftmp10], %[tmp], 0x00)                              \
>         PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"   \
>         TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
>                      %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
> @@ -1152,8 +1105,7 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>         "pcmpgth      %[ftmp7],      %[ftmp3],      %[ftmp7]    \n\t"   \
>         "pand         %[ftmp3],      %[ftmp3],      %[ftmp7]    \n\t"   \
>         "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
> -        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
> -        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
> +        MMI_USWC1(%[ftmp3], %[dst], 0x00)                               \
>                                                                         \
>         "daddi        %[x],          %[x],         -0x01        \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
> @@ -1166,7 +1118,8 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
>         PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
>         PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
>         "bnez         %[y],          1b                         \n\t"   \
> -        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
> +        : RESTRICT_ASM_ALL64 RESTRICT_ASM_LOW32                         \
> +          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
>           [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
>           [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
>           [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
> diff --git a/libavcodec/mips/hpeldsp_mmi.c b/libavcodec/mips/hpeldsp_mmi.c
> index bf3e4636aa..8e9c0fa821 100644
> --- a/libavcodec/mips/hpeldsp_mmi.c
> +++ b/libavcodec/mips/hpeldsp_mmi.c
> @@ -307,6 +307,7 @@ inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
>     double ftmp[4];
>     mips_reg addr[5];
>     DECLARE_VAR_LOW32;
> +    DECLARE_VAR_ADDRT;
> 
>     __asm__ volatile (
>         "1:                                                             \n\t"
> diff --git a/libavcodec/mips/simple_idct_mmi.c b/libavcodec/mips/simple_idct_mmi.c
> index ad068a8251..4680520edc 100644
> --- a/libavcodec/mips/simple_idct_mmi.c
> +++ b/libavcodec/mips/simple_idct_mmi.c
> @@ -56,6 +56,8 @@ DECLARE_ALIGNED(16, const int16_t, W_arr)[46] = {
> 
> void ff_simple_idct_8_mmi(int16_t *block)
> {
> +    DECLARE_VAR_ALL64;
> +
>     BACKUP_REG
>     __asm__ volatile (
> 
> @@ -142,20 +144,20 @@ void ff_simple_idct_8_mmi(int16_t *block)
>         /* idctRowCondDC row0~8 */
> 
>         /* load W */
> -        "gslqc1       $f19,      $f18,      0x00(%[w_arr])      \n\t"
> -        "gslqc1       $f21,      $f20,      0x10(%[w_arr])      \n\t"
> -        "gslqc1       $f23,      $f22,      0x20(%[w_arr])      \n\t"
> -        "gslqc1       $f25,      $f24,      0x30(%[w_arr])      \n\t"
> -        "gslqc1       $f17,      $f16,      0x40(%[w_arr])      \n\t"
> +        MMI_LQC1($f19, $f18, %[w_arr], 0x00)
> +        MMI_LQC1($f21, $f20, %[w_arr], 0x10)
> +        MMI_LQC1($f23, $f22, %[w_arr], 0x20)
> +        MMI_LQC1($f25, $f24, %[w_arr], 0x30)
> +        MMI_LQC1($f17, $f16, %[w_arr], 0x40)
>         /* load source in block */
> -        "gslqc1       $f1,       $f0,       0x00(%[block])      \n\t"
> -        "gslqc1       $f3,       $f2,       0x10(%[block])      \n\t"
> -        "gslqc1       $f5,       $f4,       0x20(%[block])      \n\t"
> -        "gslqc1       $f7,       $f6,       0x30(%[block])      \n\t"
> -        "gslqc1       $f9,       $f8,       0x40(%[block])      \n\t"
> -        "gslqc1       $f11,      $f10,      0x50(%[block])      \n\t"
> -        "gslqc1       $f13,      $f12,      0x60(%[block])      \n\t"
> -        "gslqc1       $f15,      $f14,      0x70(%[block])      \n\t"
> +        MMI_LQC1($f1, $f0, %[block], 0x00)
> +        MMI_LQC1($f3, $f2, %[block], 0x10)
> +        MMI_LQC1($f5, $f4, %[block], 0x20)
> +        MMI_LQC1($f7, $f6, %[block], 0x30)
> +        MMI_LQC1($f9, $f8, %[block], 0x40)
> +        MMI_LQC1($f11, $f10, %[block], 0x50)
> +        MMI_LQC1($f13, $f12, %[block], 0x60)
> +        MMI_LQC1($f15, $f14, %[block], 0x70)
> 
>         /* $9: mask ; $f17: ROW_SHIFT */
>         "dmfc1        $9,        $f17                           \n\t"
> @@ -253,8 +255,7 @@ void ff_simple_idct_8_mmi(int16_t *block)
>         /* idctSparseCol col0~3 */
> 
>         /* $f17: ff_p16_32; $f16: COL_SHIFT-16 */
> -        "gsldlc1      $f17,      0x57(%[w_arr])                 \n\t"
> -        "gsldrc1      $f17,      0x50(%[w_arr])                 \n\t"
> +        MMI_ULDC1($f17, %[w_arr], 0x50)
>         "li           $10,       4                              \n\t"
>         "dmtc1        $10,       $f16                           \n\t"
>         "paddh        $f0,       $f0,       $f17                \n\t"
> @@ -395,16 +396,16 @@ void ff_simple_idct_8_mmi(int16_t *block)
>         "punpcklwd    $f11,      $f27,      $f29                \n\t"
>         "punpckhwd    $f15,      $f27,      $f29                \n\t"
>         /* Store */
> -        "gssqc1       $f1,       $f0,       0x00(%[block])      \n\t"
> -        "gssqc1       $f5,       $f4,       0x10(%[block])      \n\t"
> -        "gssqc1       $f9,       $f8,       0x20(%[block])      \n\t"
> -        "gssqc1       $f13,      $f12,      0x30(%[block])      \n\t"
> -        "gssqc1       $f3,       $f2,       0x40(%[block])      \n\t"
> -        "gssqc1       $f7,       $f6,       0x50(%[block])      \n\t"
> -        "gssqc1       $f11,      $f10,      0x60(%[block])      \n\t"
> -        "gssqc1       $f15,      $f14,      0x70(%[block])      \n\t"
> +        MMI_SQC1($f1, $f0, %[block], 0x00)
> +        MMI_SQC1($f5, $f4, %[block], 0x10)
> +        MMI_SQC1($f9, $f8, %[block], 0x20)
> +        MMI_SQC1($f13, $f12, %[block], 0x30)
> +        MMI_SQC1($f3, $f2, %[block], 0x40)
> +        MMI_SQC1($f7, $f6, %[block], 0x50)
> +        MMI_SQC1($f11, $f10, %[block], 0x60)
> +        MMI_SQC1($f15, $f14, %[block], 0x70)
> 
> -        : [block]"+&r"(block)
> +        : RESTRICT_ASM_ALL64 [block]"+&r"(block)
>         : [w_arr]"r"(W_arr)
>         : "memory"
>     );
> diff --git a/libavcodec/mips/vp3dsp_idct_mmi.c b/libavcodec/mips/vp3dsp_idct_mmi.c
> index 0d4cba19ec..f658affd36 100644
> --- a/libavcodec/mips/vp3dsp_idct_mmi.c
> +++ b/libavcodec/mips/vp3dsp_idct_mmi.c
> @@ -722,6 +722,8 @@ void ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1,
>     if (h == 8) {
>         double ftmp[6];
>         uint64_t tmp[2];
> +        DECLARE_VAR_ALL64;
> +    
>         __asm__ volatile (
>             "li          %[tmp0],        0x08                            \n\t"
>             "li          %[tmp1],        0xfefefefe                      \n\t"
> @@ -730,10 +732,8 @@ void ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1,
>             "li          %[tmp1],        0x01                            \n\t"
>             "dmtc1       %[tmp1],        %[ftmp5]                        \n\t"
>             "1:                                                          \n\t"
> -            "gsldlc1     %[ftmp1],       0x07(%[src1])                   \n\t"
> -            "gsldrc1     %[ftmp1],       0x00(%[src1])                   \n\t"
> -            "gsldlc1     %[ftmp2],       0x07(%[src2])                   \n\t"
> -            "gsldrc1     %[ftmp2],       0x00(%[src2])                   \n\t"
> +            MMI_ULDC1(%[ftmp1], %[src1], 0x0)
> +            MMI_ULDC1(%[ftmp2], %[src2], 0x0)
>             "pxor        %[ftmp3],       %[ftmp1],             %[ftmp2]  \n\t"
>             "pand        %[ftmp3],       %[ftmp3],             %[ftmp4]  \n\t"
>             "psrlw       %[ftmp3],       %[ftmp3],             %[ftmp5]  \n\t"
> @@ -745,7 +745,8 @@ void ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1,
>             PTR_ADDU    "%[dst],         %[dst],               %[stride] \n\t"
>             PTR_ADDIU   "%[tmp0],        %[tmp0],              -0x01     \n\t"
>             "bnez        %[tmp0],        1b                              \n\t"
> -            : [dst]"+&r"(dst), [src1]"+&r"(src1), [src2]"+&r"(src2),
> +            : RESTRICT_ASM_ALL64
> +              [dst]"+&r"(dst), [src1]"+&r"(src1), [src2]"+&r"(src2),
>               [ftmp1]"=&f"(ftmp[0]), [ftmp2]"=&f"(ftmp[1]), [ftmp3]"=&f"(ftmp[2]),
>               [ftmp4]"=&f"(ftmp[3]), [ftmp5]"=&f"(ftmp[4]), [ftmp6]"=&f"(ftmp[5]),
>               [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1])
> diff --git a/libavcodec/mips/vp8dsp_mmi.c b/libavcodec/mips/vp8dsp_mmi.c
> index 327eaf561e..bc774aa365 100644
> --- a/libavcodec/mips/vp8dsp_mmi.c
> +++ b/libavcodec/mips/vp8dsp_mmi.c
> @@ -791,51 +791,40 @@ static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst,
>     DECLARE_DOUBLE_1;
>     DECLARE_DOUBLE_2;
>     DECLARE_UINT32_T;
> +    DECLARE_VAR_ALL64;
> +
>     __asm__ volatile(
>         /* Get data from dst */
> -        "gsldlc1    %[q0],      0x07(%[dst])                      \n\t"
> -        "gsldrc1    %[q0],      0x00(%[dst])                      \n\t"
> +        MMI_ULDC1(%[q0], %[dst], 0x0)
>         PTR_SUBU    "%[tmp0],   %[dst],         %[stride]         \n\t"
> -        "gsldlc1    %[p0],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[p0],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[p0], %[tmp0], 0x0)
>         PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gsldlc1    %[p1],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[p1],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[p1], %[tmp0], 0x0)
>         PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gsldlc1    %[p2],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[p2],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[p2], %[tmp0], 0x0)
>         PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gsldlc1    %[p3],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[p3],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[p3], %[tmp0], 0x0)
>         PTR_ADDU    "%[tmp0],   %[dst],         %[stride]         \n\t"
> -        "gsldlc1    %[q1],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[q1],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[q1], %[tmp0], 0x0)
>         PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gsldlc1    %[q2],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[q2],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[q2], %[tmp0], 0x0)
>         PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gsldlc1    %[q3],      0x07(%[tmp0])                     \n\t"
> -        "gsldrc1    %[q3],      0x00(%[tmp0])                     \n\t"
> +        MMI_ULDC1(%[q3], %[tmp0], 0x0)
>         MMI_VP8_LOOP_FILTER
>         /* Move to dst */
> -        "gssdlc1    %[q0],      0x07(%[dst])                      \n\t"
> -        "gssdrc1    %[q0],      0x00(%[dst])                      \n\t"
> +        MMI_USDC1(%[q0], %[dst], 0x0)
>         PTR_SUBU    "%[tmp0],   %[dst],         %[stride]         \n\t"
> -        "gssdlc1    %[p0],      0x07(%[tmp0])                     \n\t"
> -        "gssdrc1    %[p0],      0x00(%[tmp0])                     \n\t"
> +        MMI_USDC1(%[p0], %[tmp0], 0x0)
>         PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gssdlc1    %[p1],      0x07(%[tmp0])                     \n\t"
> -        "gssdrc1    %[p1],      0x00(%[tmp0])                     \n\t"
> +        MMI_USDC1(%[p1], %[tmp0], 0x0)
>         PTR_SUBU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gssdlc1    %[p2],      0x07(%[tmp0])                     \n\t"
> -        "gssdrc1    %[p2],      0x00(%[tmp0])                     \n\t"
> +        MMI_USDC1(%[p2], %[tmp0], 0x0)
>         PTR_ADDU    "%[tmp0],   %[dst],         %[stride]         \n\t"
> -        "gssdlc1    %[q1],      0x07(%[tmp0])                     \n\t"
> -        "gssdrc1    %[q1],      0x00(%[tmp0])                     \n\t"
> +        MMI_USDC1(%[q1], %[tmp0], 0x0)
>         PTR_ADDU    "%[tmp0],   %[tmp0],        %[stride]         \n\t"
> -        "gssdlc1    %[q2],      0x07(%[tmp0])                     \n\t"
> -        "gssdrc1    %[q2],      0x00(%[tmp0])                     \n\t"
> -        : [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
> +        MMI_USDC1(%[q2], %[tmp0], 0x0)
> +        : RESTRICT_ASM_ALL64
> +          [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
>           [p1]"=&f"(ftmp[2]),       [p0]"=&f"(ftmp[3]),
>           [q0]"=&f"(ftmp[4]),       [q1]"=&f"(ftmp[5]),
>           [q2]"=&f"(ftmp[6]),       [q3]"=&f"(ftmp[7]),
> @@ -876,31 +865,25 @@ static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst,
>     DECLARE_DOUBLE_1;
>     DECLARE_DOUBLE_2;
>     DECLARE_UINT32_T;
> +    DECLARE_VAR_ALL64;
> +
>     __asm__ volatile(
>         /* Get data from dst */
> -        "gsldlc1    %[p3],        0x03(%[dst])                    \n\t"
> -        "gsldrc1    %[p3],        -0x04(%[dst])                   \n\t"
> +        MMI_ULDC1(%[p3], %[dst], -0x04)
>         PTR_ADDU    "%[tmp0],     %[dst],           %[stride]     \n\t"
> -        "gsldlc1    %[p2],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[p2],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[p2], %[tmp0], -0x04)
>         PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
> -        "gsldlc1    %[p1],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[p1],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[p1], %[tmp0], -0x04)
>         PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
> -        "gsldlc1    %[p0],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[p0],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[p0], %[tmp0], -0x04)
>         PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
> -        "gsldlc1    %[q0],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[q0],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[q0], %[tmp0], -0x04)
>         PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
> -        "gsldlc1    %[q1],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[q1],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[q1], %[tmp0], -0x04)
>         PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
> -        "gsldlc1    %[q2],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[q2],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[q2], %[tmp0], -0x04)
>         PTR_ADDU    "%[tmp0],     %[tmp0],          %[stride]     \n\t"
> -        "gsldlc1    %[q3],        0x03(%[tmp0])                   \n\t"
> -        "gsldrc1    %[q3],        -0x04(%[tmp0])                  \n\t"
> +        MMI_ULDC1(%[q3], %[tmp0], -0x04)
>         /* Matrix transpose */
>         TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
>                      %[q0], %[q1], %[q2], %[q3],
> @@ -911,30 +894,23 @@ static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst,
>                      %[q0], %[q1], %[q2], %[q3],
>                      %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
>         /* Move to dst */
> -        "gssdlc1    %[p3],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[p3],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[p3], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[p2],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[p2],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[p2], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[p1],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[p1],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[p1], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[p0],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[p0],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[p0], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[q0],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[q0],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[q0], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[q1],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[q1],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[q1], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[q2],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[q2],        -0x04(%[dst])                   \n\t"
> +        MMI_USDC1(%[q2], %[dst], -0x04)
>         PTR_ADDU    "%[dst],      %[dst],           %[stride]     \n\t"
> -        "gssdlc1    %[q3],        0x03(%[dst])                    \n\t"
> -        "gssdrc1    %[q3],        -0x04(%[dst])                   \n\t"
> -        : [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
> +        MMI_USDC1(%[q3], %[dst], -0x04)
> +        : RESTRICT_ASM_ALL64
> +          [p3]"=&f"(ftmp[0]),       [p2]"=&f"(ftmp[1]),
>           [p1]"=&f"(ftmp[2]),       [p0]"=&f"(ftmp[3]),
>           [q0]"=&f"(ftmp[4]),       [q1]"=&f"(ftmp[5]),
>           [q2]"=&f"(ftmp[6]),       [q3]"=&f"(ftmp[7]),
> diff --git a/libavcodec/mips/vp9_mc_mmi.c b/libavcodec/mips/vp9_mc_mmi.c
> index 812f7a6994..495cac3d0b 100644
> --- a/libavcodec/mips/vp9_mc_mmi.c
> +++ b/libavcodec/mips/vp9_mc_mmi.c
> @@ -77,29 +77,24 @@ static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
> {
>     double ftmp[15];
>     uint32_t tmp[2];
> +    DECLARE_VAR_ALL64;
>     src -= 3;
>     src_stride -= w;
>     dst_stride -= w;
>     __asm__ volatile (
>         "move       %[tmp1],    %[width]                   \n\t"
>         "pxor       %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
> -        "gsldlc1    %[filter1], 0x07(%[filter])            \n\t"
> -        "gsldrc1    %[filter1], 0x00(%[filter])            \n\t"
> -        "gsldlc1    %[filter2], 0x0f(%[filter])            \n\t"
> -        "gsldrc1    %[filter2], 0x08(%[filter])            \n\t"
> +        MMI_ULDC1(%[filter1], %[filter], 0x00)
> +        MMI_ULDC1(%[filter2], %[filter], 0x08)
>         "li         %[tmp0],    0x07                       \n\t"
>         "dmtc1      %[tmp0],    %[ftmp13]                  \n\t"
>         "punpcklwd  %[ftmp13],  %[ftmp13],   %[ftmp13]     \n\t"
>         "1:                                                \n\t"
>         /* Get 8 data per row */
> -        "gsldlc1    %[ftmp5],   0x07(%[src])               \n\t"
> -        "gsldrc1    %[ftmp5],   0x00(%[src])               \n\t"
> -        "gsldlc1    %[ftmp7],   0x08(%[src])               \n\t"
> -        "gsldrc1    %[ftmp7],   0x01(%[src])               \n\t"
> -        "gsldlc1    %[ftmp9],   0x09(%[src])               \n\t"
> -        "gsldrc1    %[ftmp9],   0x02(%[src])               \n\t"
> -        "gsldlc1    %[ftmp11],  0x0A(%[src])               \n\t"
> -        "gsldrc1    %[ftmp11],  0x03(%[src])               \n\t"
> +        MMI_ULDC1(%[ftmp5], %[src], 0x00)
> +        MMI_ULDC1(%[ftmp7], %[src], 0x01)
> +        MMI_ULDC1(%[ftmp9], %[src], 0x02)
> +        MMI_ULDC1(%[ftmp11], %[src], 0x03)
>         "punpcklbh  %[ftmp4],   %[ftmp5],    %[ftmp0]      \n\t"
>         "punpckhbh  %[ftmp5],   %[ftmp5],    %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp6],   %[ftmp7],    %[ftmp0]      \n\t"
> @@ -127,7 +122,8 @@ static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
>         PTR_ADDU   "%[dst],     %[dst],      %[dst_stride] \n\t"
>         PTR_ADDIU  "%[height],  %[height],   -0x01         \n\t"
>         "bnez       %[height],  1b                         \n\t"
> -        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
> +        : RESTRICT_ASM_ALL64
> +          [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
>           [filter1]"=&f"(ftmp[2]),  [filter2]"=&f"(ftmp[3]),
>           [ftmp0]"=&f"(ftmp[4]),    [ftmp4]"=&f"(ftmp[5]),
>           [ftmp5]"=&f"(ftmp[6]),    [ftmp6]"=&f"(ftmp[7]),
> @@ -153,15 +149,14 @@ static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
>     double ftmp[17];
>     uint32_t tmp[1];
>     ptrdiff_t addr = src_stride;
> +    DECLARE_VAR_ALL64;
>     src_stride -= w;
>     dst_stride -= w;
> 
>     __asm__ volatile (
>         "pxor       %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
> -        "gsldlc1    %[ftmp4],    0x07(%[filter])           \n\t"
> -        "gsldrc1    %[ftmp4],    0x00(%[filter])           \n\t"
> -        "gsldlc1    %[ftmp5],    0x0f(%[filter])           \n\t"
> -        "gsldrc1    %[ftmp5],    0x08(%[filter])           \n\t"
> +        MMI_ULDC1(%[ftmp4], %[filter], 0x00)
> +        MMI_ULDC1(%[ftmp5], %[filter], 0x08)
>         "punpcklwd  %[filter10], %[ftmp4],   %[ftmp4]      \n\t"
>         "punpckhwd  %[filter32], %[ftmp4],   %[ftmp4]      \n\t"
>         "punpcklwd  %[filter54], %[ftmp5],   %[ftmp5]      \n\t"
> @@ -171,29 +166,21 @@ static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
>         "punpcklwd  %[ftmp13],   %[ftmp13],  %[ftmp13]     \n\t"
>         "1:                                                \n\t"
>         /* Get 8 data per column */
> -        "gsldlc1    %[ftmp4],    0x07(%[src])              \n\t"
> -        "gsldrc1    %[ftmp4],    0x00(%[src])              \n\t"
> +        MMI_ULDC1(%[ftmp4], %[src], 0x0)
>         PTR_ADDU   "%[tmp0],     %[src],     %[addr]       \n\t"
> -        "gsldlc1    %[ftmp5],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp5],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp5], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp6],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp6],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp6], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp7],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp7],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp7], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp8],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp8],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp8], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp9],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp9],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp9], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp10],   0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp10],   0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp10], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp11],   0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp11],   0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp11], %[tmp0], 0x0)
>         "punpcklbh  %[ftmp4],    %[ftmp4],   %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp5],    %[ftmp5],   %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp6],    %[ftmp6],   %[ftmp0]      \n\t"
> @@ -221,7 +208,8 @@ static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
>         PTR_ADDU   "%[dst],      %[dst],     %[dst_stride] \n\t"
>         PTR_ADDIU  "%[height],   %[height],  -0x01         \n\t"
>         "bnez       %[height],   1b                        \n\t"
> -        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
> +        : RESTRICT_ASM_ALL64
> +          [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
>           [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
>           [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
>           [ftmp0]"=&f"(ftmp[6]),    [ftmp4]"=&f"(ftmp[7]),
> @@ -247,6 +235,7 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
> {
>     double ftmp[15];
>     uint32_t tmp[2];
> +    DECLARE_VAR_ALL64;
>     src -= 3;
>     src_stride -= w;
>     dst_stride -= w;
> @@ -254,23 +243,17 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
>     __asm__ volatile (
>         "move       %[tmp1],    %[width]                   \n\t"
>         "pxor       %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
> -        "gsldlc1    %[filter1], 0x07(%[filter])            \n\t"
> -        "gsldrc1    %[filter1], 0x00(%[filter])            \n\t"
> -        "gsldlc1    %[filter2], 0x0f(%[filter])            \n\t"
> -        "gsldrc1    %[filter2], 0x08(%[filter])            \n\t"
> +        MMI_ULDC1(%[filter1], %[filter], 0x00)
> +        MMI_ULDC1(%[filter2], %[filter], 0x08)
>         "li         %[tmp0],    0x07                       \n\t"
>         "dmtc1      %[tmp0],    %[ftmp13]                  \n\t"
>         "punpcklwd  %[ftmp13],  %[ftmp13],   %[ftmp13]     \n\t"
>         "1:                                                \n\t"
>         /* Get 8 data per row */
> -        "gsldlc1    %[ftmp5],   0x07(%[src])               \n\t"
> -        "gsldrc1    %[ftmp5],   0x00(%[src])               \n\t"
> -        "gsldlc1    %[ftmp7],   0x08(%[src])               \n\t"
> -        "gsldrc1    %[ftmp7],   0x01(%[src])               \n\t"
> -        "gsldlc1    %[ftmp9],   0x09(%[src])               \n\t"
> -        "gsldrc1    %[ftmp9],   0x02(%[src])               \n\t"
> -        "gsldlc1    %[ftmp11],  0x0A(%[src])               \n\t"
> -        "gsldrc1    %[ftmp11],  0x03(%[src])               \n\t"
> +        MMI_ULDC1(%[ftmp5], %[src], 0x00)
> +        MMI_ULDC1(%[ftmp7], %[src], 0x01)
> +        MMI_ULDC1(%[ftmp9], %[src], 0x02)
> +        MMI_ULDC1(%[ftmp11], %[src], 0x03)
>         "punpcklbh  %[ftmp4],   %[ftmp5],    %[ftmp0]      \n\t"
>         "punpckhbh  %[ftmp5],   %[ftmp5],    %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp6],   %[ftmp7],    %[ftmp0]      \n\t"
> @@ -289,8 +272,7 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
>         "packsswh   %[srcl],    %[srcl],     %[srch]       \n\t"
>         "packushb   %[ftmp12],  %[srcl],     %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp12],  %[ftmp12],   %[ftmp0]      \n\t"
> -        "gsldlc1    %[ftmp4],   0x07(%[dst])               \n\t"
> -        "gsldrc1    %[ftmp4],   0x00(%[dst])               \n\t"
> +        MMI_ULDC1(%[ftmp4], %[dst], 0x0)
>         "punpcklbh  %[ftmp4],   %[ftmp4],    %[ftmp0]      \n\t"
>         "paddh      %[ftmp12],  %[ftmp12],   %[ftmp4]      \n\t"
>         "li         %[tmp0],    0x10001                    \n\t"
> @@ -309,7 +291,8 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
>         PTR_ADDU   "%[dst],     %[dst],      %[dst_stride] \n\t"
>         PTR_ADDIU  "%[height],  %[height],   -0x01         \n\t"
>         "bnez       %[height],  1b                         \n\t"
> -        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
> +        : RESTRICT_ASM_ALL64
> +          [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
>           [filter1]"=&f"(ftmp[2]),  [filter2]"=&f"(ftmp[3]),
>           [ftmp0]"=&f"(ftmp[4]),    [ftmp4]"=&f"(ftmp[5]),
>           [ftmp5]"=&f"(ftmp[6]),    [ftmp6]"=&f"(ftmp[7]),
> @@ -335,15 +318,14 @@ static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
>     double ftmp[17];
>     uint32_t tmp[1];
>     ptrdiff_t addr = src_stride;
> +    DECLARE_VAR_ALL64;
>     src_stride -= w;
>     dst_stride -= w;
> 
>     __asm__ volatile (
>         "pxor       %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
> -        "gsldlc1    %[ftmp4],    0x07(%[filter])           \n\t"
> -        "gsldrc1    %[ftmp4],    0x00(%[filter])           \n\t"
> -        "gsldlc1    %[ftmp5],    0x0f(%[filter])           \n\t"
> -        "gsldrc1    %[ftmp5],    0x08(%[filter])           \n\t"
> +        MMI_ULDC1(%[ftmp4], %[filter], 0x00)
> +        MMI_ULDC1(%[ftmp5], %[filter], 0x08)
>         "punpcklwd  %[filter10], %[ftmp4],   %[ftmp4]      \n\t"
>         "punpckhwd  %[filter32], %[ftmp4],   %[ftmp4]      \n\t"
>         "punpcklwd  %[filter54], %[ftmp5],   %[ftmp5]      \n\t"
> @@ -353,29 +335,21 @@ static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
>         "punpcklwd  %[ftmp13],   %[ftmp13],  %[ftmp13]     \n\t"
>         "1:                                                \n\t"
>         /* Get 8 data per column */
> -        "gsldlc1    %[ftmp4],    0x07(%[src])              \n\t"
> -        "gsldrc1    %[ftmp4],    0x00(%[src])              \n\t"
> +        MMI_ULDC1(%[ftmp4], %[src], 0x0)
>         PTR_ADDU   "%[tmp0],     %[src],     %[addr]       \n\t"
> -        "gsldlc1    %[ftmp5],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp5],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp5], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp6],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp6],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp6], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp7],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp7],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp7], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp8],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp8],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp8], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp9],    0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp9],    0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp9], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp10],   0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp10],   0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp10], %[tmp0], 0x0)
>         PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
> -        "gsldlc1    %[ftmp11],   0x07(%[tmp0])             \n\t"
> -        "gsldrc1    %[ftmp11],   0x00(%[tmp0])             \n\t"
> +        MMI_ULDC1(%[ftmp11], %[tmp0], 0x0)
>         "punpcklbh  %[ftmp4],    %[ftmp4],   %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp5],    %[ftmp5],   %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp6],    %[ftmp6],   %[ftmp0]      \n\t"
> @@ -394,8 +368,7 @@ static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
>         "packsswh   %[srcl],     %[srcl],    %[srch]       \n\t"
>         "packushb   %[ftmp12],   %[srcl],    %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp12],   %[ftmp12],  %[ftmp0]      \n\t"
> -        "gsldlc1    %[ftmp4],    0x07(%[dst])              \n\t"
> -        "gsldrc1    %[ftmp4],    0x00(%[dst])              \n\t"
> +        MMI_ULDC1(%[ftmp4], %[dst], 0x00)
>         "punpcklbh  %[ftmp4],    %[ftmp4],   %[ftmp0]      \n\t"
>         "paddh      %[ftmp12],   %[ftmp12],  %[ftmp4]      \n\t"
>         "li         %[tmp0],     0x10001                   \n\t"
> @@ -414,7 +387,8 @@ static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
>         PTR_ADDU   "%[dst],      %[dst],     %[dst_stride] \n\t"
>         PTR_ADDIU  "%[height],   %[height],  -0x01         \n\t"
>         "bnez       %[height],   1b                        \n\t"
> -        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
> +        : RESTRICT_ASM_ALL64
> +          [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
>           [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
>           [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
>           [ftmp0]"=&f"(ftmp[6]),    [ftmp4]"=&f"(ftmp[7]),
> @@ -439,6 +413,7 @@ static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
> {
>     double ftmp[4];
>     uint32_t tmp[2];
> +    DECLARE_VAR_ALL64;
>     src_stride -= w;
>     dst_stride -= w;
> 
> @@ -449,10 +424,8 @@ static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
>         "dmtc1      %[tmp0],    %[ftmp3]                  \n\t"
>         "punpcklhw  %[ftmp3],   %[ftmp3],   %[ftmp3]      \n\t"
>         "1:                                               \n\t"
> -        "gslwlc1    %[ftmp1],   0x07(%[src])              \n\t"
> -        "gslwrc1    %[ftmp1],   0x00(%[src])              \n\t"
> -        "gslwlc1    %[ftmp2],   0x07(%[dst])              \n\t"
> -        "gslwrc1    %[ftmp2],   0x00(%[dst])              \n\t"
> +        MMI_ULDC1(%[ftmp1], %[src], 0x00)
> +        MMI_ULDC1(%[ftmp2], %[dst], 0x00)
>         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]      \n\t"
>         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]      \n\t"
>         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp2]      \n\t"
> @@ -469,7 +442,8 @@ static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
>         PTR_ADDU   "%[src],     %[src],     %[src_stride] \n\t"
>         PTR_ADDIU  "%[height],  %[height],  -0x01         \n\t"
>         "bnez       %[height],  1b                        \n\t"
> -        : [ftmp0]"=&f"(ftmp[0]),  [ftmp1]"=&f"(ftmp[1]),
> +        : RESTRICT_ASM_ALL64
> +          [ftmp0]"=&f"(ftmp[0]),  [ftmp1]"=&f"(ftmp[1]),
>           [ftmp2]"=&f"(ftmp[2]),  [ftmp3]"=&f"(ftmp[3]),
>           [tmp0]"=&r"(tmp[0]),    [tmp1]"=&r"(tmp[1]),
>           [src]"+&r"(src),        [dst]"+&r"(dst),
> -- 
> 2.32.0

LGTM