[FFmpeg-cvslog] avcodec: loongson relocate constants of idctdsp and h264pred

Mon Jul 20 14:37:03 CEST 2015

ffmpeg | branch: master | 周晓勇 <zhouxiaoyong at loongson.cn> | Mon Jul 20 13:45:49 2015 +0800| [fdac5ff682f0c8070e5be39a44dd0f29bd0fa435] | committer: Michael Niedermayer

avcodec: loongson relocate constants of idctdsp and h264pred

Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fdac5ff682f0c8070e5be39a44dd0f29bd0fa435
---

 libavcodec/mips/constants.c    |    5 ++++
 libavcodec/mips/constants.h    |    5 ++++
 libavcodec/mips/h264pred_mmi.c |   61 +++++++++++++++-------------------------
 libavcodec/mips/idctdsp_mmi.c  |    8 ++----
 4 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index 84841c2..a25fd24 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -42,6 +42,8 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) =    {0x0004000300020001ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) =    {0x0008000700060005ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) =    {0x0003000200010000ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) =    {0x0007000600050004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) =    {0x000b000a00090008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) =    {0x000f000e000d000cULL};
 
 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) =       {0x0101010101010101ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) =       {0x0303030303030303ULL};
@@ -51,3 +53,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) =      {0xA1A1A1A1A1A1A1A1ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd) =        {0x0004000400040004ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) =       {0x0040004000400040ULL};
 DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) =       {0x0020002000200020ULL};
+
+DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) =     {0xFFFF0000FFFF0000ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_d40000) =     {0x0000000000040000ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
index 8f5292e..571002f 100644
--- a/libavcodec/mips/constants.h
+++ b/libavcodec/mips/constants.h
@@ -43,6 +43,8 @@ extern const uint64_t ff_pw_1to4;
 extern const uint64_t ff_pw_5to8;
 extern const uint64_t ff_pw_0to3;
 extern const uint64_t ff_pw_4to7;
+extern const uint64_t ff_pw_8tob;
+extern const uint64_t ff_pw_ctof;
 
 extern const uint64_t ff_pb_1;
 extern const uint64_t ff_pb_3;
@@ -53,4 +55,7 @@ extern const uint64_t ff_rnd;
 extern const uint64_t ff_rnd2;
 extern const uint64_t ff_rnd3;
 
+extern const uint64_t ff_wm1010;
+extern const uint64_t ff_d40000;
+
 #endif /* AVCODEC_MIPS_CONSTANTS_H */
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
index b8c0676..c5ae796 100644
--- a/libavcodec/mips/h264pred_mmi.c
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -23,6 +23,7 @@
  */
 
 #include "h264pred_mips.h"
+#include "constants.h"
 
 void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
@@ -50,14 +51,12 @@ void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
     __asm__ volatile (
-        ".set arch=loongson3a               \r\n"
         "daddiu $2, %0, -1                  \r\n"
         "daddu $3, %0, $0                   \r\n"
         "dli $6, 0x10                       \r\n"
-        "dli $7, 0x0101010101010101         \r\n"
         "1:                                 \r\n"
         "lbu $4, 0($2)                      \r\n"
-        "dmul $5, $4, $7                    \r\n"
+        "dmul $5, $4, %2                    \r\n"
         "sdl $5, 7($3)                      \r\n"
         "sdr $5, 0($3)                      \r\n"
         "sdl $5, 15($3)                     \r\n"
@@ -66,7 +65,7 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
         "daddu $3, %1                       \r\n"
         "daddiu $6, -1                      \r\n"
         "bnez $6, 1b                        \r\n"
-        ::"r"(src),"r"(stride)
+        ::"r"(src),"r"(stride),"r"(ff_pb_1)
         : "$2","$3","$4","$5","$6","memory"
     );
 }
@@ -74,7 +73,6 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
     __asm__ volatile (
-        ".set arch=loongson3a               \r\n"
         "daddiu $2, %0, -1                  \r\n"
         "dli $6, 0x10                       \r\n"
         "xor $8, $8, $8                     \r\n"
@@ -93,10 +91,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
         "daddiu $2, $2, 1                   \r\n"
         "daddiu $6, $6, -1                  \r\n"
         "bnez $6, 2b                        \r\n"
-        "dli $7, 0x0101010101010101         \r\n"
         "daddiu $8, $8, 0x10                \r\n"
         "dsra $8, 5                         \r\n"
-        "dmul $5, $8, $7                    \r\n"
+        "dmul $5, $8, %2                    \r\n"
         "daddu $2, %0, $0                   \r\n"
         "dli $6, 0x10                       \r\n"
         "3:                                 \r\n"
@@ -107,8 +104,8 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
         "daddu $2, $2, %1                   \r\n"
         "daddiu $6, $6, -1                  \r\n"
         "bnez $6, 3b                        \r\n"
-        ::"r"(src),"r"(stride)
-        : "$2","$3","$4","$5","$6","$7","$8","memory"
+        ::"r"(src),"r"(stride),"r"(ff_pb_1)
+        : "$2","$3","$4","$5","$6","$8","memory"
     );
 }
 
@@ -348,10 +345,8 @@ void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
                  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
 
     __asm__ volatile (
-        ".set arch=loongson3a               \r\n"
-        "dli $4, 0x01010101010101           \r\n"
         "daddu $2, %2, $0                   \r\n"
-        "dmul $3, $2, $4                    \r\n"
+        "dmul $3, $2, %3                    \r\n"
         "xor $4, $4, $4                     \r\n"
         "gsswx $3, 0(%0,$4)                 \r\n"
         "daddu $4, %1                       \r\n"
@@ -360,7 +355,7 @@ void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
         "gsswx $3, 0(%0,$4)                 \r\n"
         "daddu $4, %1                       \r\n"
         "gsswx $3, 0(%0,$4)                 \r\n"
-        ::"r"(src),"r"(stride),"r"(dc)
+        ::"r"(src),"r"(stride),"r"(dc),"r"(ff_pb_1)
         : "$2","$3","$4","memory"
     );
 }
@@ -387,21 +382,19 @@ void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
     __asm__ volatile (
-        ".set arch=loongson3a               \r\n"
         "daddiu $2, %0, -1                  \r\n"
         "daddu $3, %0, $0                   \r\n"
         "dli $6, 0x8                        \r\n"
-        "dli $7, 0x0101010101010101         \r\n"
         "1:                                 \r\n"
         "lbu $4, 0($2)                      \r\n"
-        "dmul $5, $4, $7                    \r\n"
+        "dmul $5, $4, %2                    \r\n"
         "sdl $5, 7($3)                      \r\n"
         "sdr $5, 0($3)                      \r\n"
         "daddu $2, %1                       \r\n"
         "daddu $3, %1                       \r\n"
         "daddiu $6, -1                      \r\n"
         "bnez $6, 1b                        \r\n"
-        ::"r"(src),"r"(stride)
+        ::"r"(src),"r"(stride),"r"(ff_pb_1)
         : "$2","$3","$4","$5","$6","memory"
     );
 }
@@ -425,14 +418,10 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
         "punpcklbh $f2, $f2, $f8            \r\n"
         "punpcklbh $f4, $f4, $f8            \r\n"
         "punpcklbh $f6, $f6, $f8            \r\n"
-        "dli $4, 0xfffbfffafff9fff8         \r\n"
-        "dmtc1 $4, $f20                     \r\n"
-        "dli $4, 0xfffffffefffdfffc         \r\n"
-        "dmtc1 $4, $f22                     \r\n"
-        "dli $4, 0x0004000300020001         \r\n"
-        "dmtc1 $4, $f24                     \r\n"
-        "dli $4, 0x0008000700060005         \r\n"
-        "dmtc1 $4, $f26                     \r\n"
+        "dmtc1 %4, $f20                     \r\n"
+        "dmtc1 %5, $f22                     \r\n"
+        "dmtc1 %6, $f24                     \r\n"
+        "dmtc1 %7, $f26                     \r\n"
         "pmullh $f0, $f0, $f20              \r\n"
         "pmullh $f2, $f2, $f22              \r\n"
         "pmullh $f4, $f4, $f24              \r\n"
@@ -576,17 +565,13 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
         "pshufh $f12, $f12, $f8             \r\n"
         "dli $4, 5                          \r\n"
         "dmtc1 $4, $f14                     \r\n"
-        "dli $4, 0x0003000200010000         \r\n"
-        "dmtc1 $4, $f2                      \r\n"
+        "dmtc1 %8, $f2                      \r\n"
         "pmullh $f2, $f2, $f0               \r\n"
-        "dli $4, 0x0007000600050004         \r\n"
-        "dmtc1 $4, $f4                      \r\n"
+        "dmtc1 %9, $f4                      \r\n"
         "pmullh $f4, $f4, $f0               \r\n"
-        "dli $4, 0x000b000a00090008         \r\n"
-        "dmtc1 $4, $f6                      \r\n"
+        "dmtc1 %10, $f6                      \r\n"
         "pmullh $f6, $f6, $f0               \r\n"
-        "dli $4, 0x000f000e000d000c         \r\n"
-        "dmtc1 $4, $f8                      \r\n"
+        "dmtc1 %11, $f8                      \r\n"
         "pmullh $f8, $f8, $f0               \r\n"
         "daddu $3, %0, $0                   \r\n"
         "dli $2, 16                         \r\n"
@@ -609,7 +594,9 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
         "daddu $3, %1                       \r\n"
         "daddiu $2, -1                      \r\n"
         "bnez $2, 1b                        \r\n"
-        ::"r"(src),"r"(stride),"r"(svq3),"r"(rv40)
+        ::"r"(src),"r"(stride),"r"(svq3),"r"(rv40),
+          "r"(ff_pw_m8tom5),"r"(ff_pw_m4tom1),"r"(ff_pw_1to4),"r"(ff_pw_5to8),
+          "r"(ff_pw_0to3),"r"(ff_pw_4to7),"r"(ff_pw_8tob),"r"(ff_pw_ctof)
         : "$2","$3","$4","$5","$6","$7","$8","memory"
     );
 }
@@ -784,21 +771,19 @@ void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
 void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
 {
     __asm__ volatile (
-        ".set arch=loongson3a               \r\n"
         "daddiu $2, %0, -1                  \r\n"
         "daddu $3, %0, $0                   \r\n"
         "dli $6, 0x10                       \r\n"
-        "dli $7, 0x0101010101010101         \r\n"
         "1:                                 \r\n"
         "lbu $4, 0($2)                      \r\n"
-        "dmul $5, $4, $7                    \r\n"
+        "dmul $5, $4, %2                    \r\n"
         "sdl $5, 7($3)                      \r\n"
         "sdr $5, 0($3)                      \r\n"
         "daddu $2, %1                       \r\n"
         "daddu $3, %1                       \r\n"
         "daddiu $6, -1                      \r\n"
         "bnez $6, 1b                        \r\n"
-        ::"r"(src),"r"(stride)
+        ::"r"(src),"r"(stride),"r"(ff_pb_1)
         : "$2","$3","$4","$5","$6","memory"
     );
 }
diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
index 015032f..83afb8a 100644
--- a/libavcodec/mips/idctdsp_mmi.c
+++ b/libavcodec/mips/idctdsp_mmi.c
@@ -36,8 +36,6 @@
 #define ROW_SHIFT 11
 #define COL_SHIFT 20
 
-DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
 DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
     1<<(ROW_SHIFT-1),   0, 1<<(ROW_SHIFT-1),   0,
     1<<(ROW_SHIFT-1),   1, 1<<(ROW_SHIFT-1),   0,
@@ -815,7 +813,7 @@ static void simple_idct_mmi(int16_t *block)
         //IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
 
         "9:                             \n\t"
-        ::"r"(block),"r"(temp),"r"(coeffs),"m"(wm1010),"m"(d40000)
+        ::"r"(block),"r"(temp),"r"(coeffs),"m"(ff_wm1010),"m"(ff_d40000)
         : "$10","$11"
     );
 }
@@ -886,7 +884,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
     int64_t line_skip3;
 
     __asm__ volatile (
-        "ldc1 $f0, %4                   \n\t"
+        "dmtc1 %4, $f0                  \n\t"
         "daddu %1, %3, %3               \n\t"
         "ldc1 $f2, 0(%2)                \n\t"
         "ldc1 $f10, 8(%2)               \n\t"
@@ -933,7 +931,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
         "gssdxc1 $f6, 0(%0, $10)        \n\t"
         "gssdxc1 $f8, 0(%0, %1)         \n\t"
         : "+&r"(pixels),"=&r"(line_skip3)
-        : "r"(block),"r"(line_skip),"m"(ff_pb_80)
+        : "r"(block),"r"(line_skip),"r"(ff_pb_80)
         : "$10","memory"
     );
 }