[FFmpeg-devel] [PATCH] avcodec/mips: simplified code in vp3dsp_idct_msa.c.
gxw
guxiwei-hf at loongson.cn
Sun Sep 15 13:35:57 EEST 2019
Use the macros of ADD8 to replace continuous addition operations.
---
libavcodec/mips/vp3dsp_idct_msa.c | 80 ++++++++-----------------------------
libavutil/mips/generic_macros_msa.h | 6 +++
2 files changed, 22 insertions(+), 64 deletions(-)
diff --git a/libavcodec/mips/vp3dsp_idct_msa.c b/libavcodec/mips/vp3dsp_idct_msa.c
index 90c578f..e4cd377 100644
--- a/libavcodec/mips/vp3dsp_idct_msa.c
+++ b/libavcodec/mips/vp3dsp_idct_msa.c
@@ -178,14 +178,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
c0, c1, c2, c3);
ILVR_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
c4, c5, c6, c7);
- A += c0;
- B += c7;
- C += c1;
- D += c2;
- E += c3;
- F += c4;
- G += c5;
- H += c6;
+ ADD8(A, c0, B, c7, C, c1, D, c2, E, c3, F, c4, G, c5, H, c6,
+ A, B, C, D, E, F, G, H);
}
CLIP_SW8_0_255(A, B, C, D, E, F, G, H);
sign_l = __msa_or_v((v16u8)r1_r, (v16u8)r2_r);
@@ -208,14 +202,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
Gd = Bdd;
Hd = Bdd;
} else {
- Ad = Add + c0;
- Bd = Add + c1;
- Cd = Add + c2;
- Dd = Add + c3;
- Ed = Add + c4;
- Fd = Add + c5;
- Gd = Add + c6;
- Hd = Add + c7;
+ ADD8(Add, c0, Add, c1, Add, c2, Add, c3, Add, c4, Add, c5, Add, c6,
+ Add, c7, Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
CLIP_SW8_0_255(Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
}
Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
@@ -235,14 +223,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
- r0_r = Ad + A;
- r1_r = Bd + C;
- r2_r = Cd + D;
- r3_r = Dd + E;
- r0_l = Ed + F;
- r1_l = Fd + G;
- r2_l = Gd + H;
- r3_l = Hd + B;
+ ADD8(Ad, A, Bd, C, Cd, D, Dd, E, Ed, F, Fd, G, Gd, H, Hd, B,
+ r0_r, r1_r, r2_r, r3_r, r0_l, r1_l, r2_l, r3_l);
/* Row 4 to 7 */
TRANSPOSE4x4_SW_SW(r4_r, r5_r, r6_r, r7_r,
@@ -286,14 +268,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
c0, c1, c2, c3);
ILVL_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
c4, c5, c6, c7);
- A += c0;
- B += c7;
- C += c1;
- D += c2;
- E += c3;
- F += c4;
- G += c5;
- H += c6;
+ ADD8(A, c0, B, c7, C, c1, D, c2, E, c3, F, c4, G, c5, H, c6,
+ A, B, C, D, E, F, G, H);
}
CLIP_SW8_0_255(A, B, C, D, E, F, G, H);
sign_l = __msa_or_v((v16u8)r5_r, (v16u8)r6_r);
@@ -316,14 +292,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
Gd = Bdd;
Hd = Bdd;
} else {
- Ad = Add + c0;
- Bd = Add + c1;
- Cd = Add + c2;
- Dd = Add + c3;
- Ed = Add + c4;
- Fd = Add + c5;
- Gd = Add + c6;
- Hd = Add + c7;
+ ADD8(Add, c0, Add, c1, Add, c2, Add, c3, Add, c4, Add, c5, Add, c6,
+ Add, c7, Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
CLIP_SW8_0_255(Ad, Bd, Cd, Dd, Ed, Fd, Gd, Hd);
}
Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
@@ -343,14 +313,8 @@ static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
- r4_r = Ad + A;
- r5_r = Bd + C;
- r6_r = Cd + D;
- r7_r = Dd + E;
- r4_l = Ed + F;
- r5_l = Fd + G;
- r6_l = Gd + H;
- r7_l = Hd + B;
+ ADD8(Ad, A, Bd, C, Cd, D, Dd, E, Ed, F, Fd, G, Gd, H, Hd, B,
+ r4_r, r5_r, r6_r, r7_r, r4_l, r5_l, r6_l, r7_l);
VSHF_B2_SB(r0_r, r4_r, r1_r, r5_r, mask, mask, d0, d1);
VSHF_B2_SB(r2_r, r6_r, r3_r, r7_r, mask, mask, d2, d3);
VSHF_B2_SB(r0_l, r4_l, r1_l, r5_l, mask, mask, d4, d5);
@@ -400,14 +364,8 @@ void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
e0, e1, e2, e3);
ILVR_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
e4, e5, e6, e7);
- e0 += dc;
- e1 += dc;
- e2 += dc;
- e3 += dc;
- e4 += dc;
- e5 += dc;
- e6 += dc;
- e7 += dc;
+ ADD8(e0, dc, e1, dc, e2, dc, e3, dc, e4, dc, e5, dc, e6, dc, e7, dc,
+ e0, e1, e2, e3, e4, e5, e6, e7);
CLIP_SW8_0_255(e0, e1, e2, e3, e4, e5, e6, e7);
/* Left part */
@@ -415,14 +373,8 @@ void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
r0, r1, r2, r3);
ILVL_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
r4, r5, r6, r7);
- r0 += dc;
- r1 += dc;
- r2 += dc;
- r3 += dc;
- r4 += dc;
- r5 += dc;
- r6 += dc;
- r7 += dc;
+ ADD8(r0, dc, r1, dc, r2, dc, r3, dc, r4, dc, r5, dc, r6, dc, r7, dc,
+ r0, r1, r2, r3, r4, r5, r6, r7);
CLIP_SW8_0_255(r0, r1, r2, r3, r4, r5, r6, r7);
VSHF_B2_SB(e0, r0, e1, r1, mask, mask, d0, d1);
VSHF_B2_SB(e2, r2, e3, r3, mask, mask, d2, d3);
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index c085d58..3d892ce 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -2153,6 +2153,12 @@
ADD2(in0, in1, in2, in3, out0, out1); \
ADD2(in4, in5, in6, in7, out2, out3); \
}
+#define ADD8(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, \
+ in13, in14, in15, out0, out1, out2, out3, out4, out5, out6, out7) \
+{ \
+ ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3); \
+ ADD4(in8, in9, in10, in11, in12, in13, in14, in15, out4, out5, out6, out7); \
+}
/* Description : Subtraction of 2 pairs of vectors
Arguments : Inputs - in0, in1, in2, in3
--
2.1.0
More information about the ffmpeg-devel
mailing list