[FFmpeg-devel] [PATCH 2/2] avutil/mips: [loongson] simplify macro TRANSPOSE_4H and TRANSPOSE_8B
Michael Niedermayer
michael at niedermayer.cc
Sun Sep 9 04:38:09 EEST 2018
On Fri, Sep 07, 2018 at 11:51:05AM +0800, Shiyou Yin wrote:
> >-----Original Message-----
> >From: ffmpeg-devel-bounces at ffmpeg.org [mailto:ffmpeg-devel-bounces at ffmpeg.org] On Behalf Of
> >Shiyou Yin
> >Sent: Thursday, September 6, 2018 4:11 PM
> >To: ffmpeg-devel at ffmpeg.org
> >Subject: [FFmpeg-devel] [PATCH 2/2] avutil/mips: [loongson] simplify macro TRANSPOSE_4H and
> >TRANSPOSE_8B
> >
> >Simplify macro TRANSPOSE_4H in mmiutils.h and add TRANSPOSE_8B as a common macro.
> >---
> > libavcodec/mips/vc1dsp_mmi.c | 12 +++----
> > libavcodec/mips/vp8dsp_mmi.c | 72 +++++--------------------------------
> > libavutil/mips/mmiutils.h | 84 ++++++++++++++++++++++++++++----------------
> > 3 files changed, 65 insertions(+), 103 deletions(-)
> >
> >diff --git a/libavcodec/mips/vc1dsp_mmi.c b/libavcodec/mips/vc1dsp_mmi.c
> >index a439b40..80778a5 100644
> >--- a/libavcodec/mips/vc1dsp_mmi.c
> >+++ b/libavcodec/mips/vc1dsp_mmi.c
> >@@ -248,8 +248,7 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
> > 0xfff70004, 0xfff0000f, %[ff_pw_4])
> >
> > TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
> >- %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
> >- %[ftmp5], %[tmp0], %[ftmp6], %[ftmp7])
> >+ %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
> >
> > MMI_SDC1(%[ftmp15], %[dst], 0x00)
> > MMI_SDC1(%[ftmp16], %[dst], 0x10)
> >@@ -257,8 +256,7 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
> > MMI_SDC1(%[ftmp18], %[dst], 0x30)
> >
> > TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
> >- %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
> >- %[ftmp5], %[tmp0], %[ftmp6], %[ftmp7])
> >+ %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
> >
> > MMI_SDC1(%[ftmp19], %[dst], 0x08)
> > MMI_SDC1(%[ftmp20], %[dst], 0x18)
> >@@ -301,8 +299,7 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
> > 0xfff70004, 0xfff0000f, %[ff_pw_4])
> >
> > TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
> >- %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
> >- %[ftmp5], %[tmp0], %[ftmp6], %[ftmp7])
> >+ %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
> >
> > MMI_SDC1(%[ftmp15], %[dst], 0x40)
> > MMI_SDC1(%[ftmp16], %[dst], 0x50)
> >@@ -310,8 +307,7 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
> > MMI_SDC1(%[ftmp18], %[dst], 0x70)
> >
> > TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
> >- %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
> >- %[ftmp5], %[tmp0], %[ftmp6], %[ftmp7])
> >+ %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
> >
> > MMI_SDC1(%[ftmp19], %[dst], 0x48)
> > MMI_SDC1(%[ftmp20], %[dst], 0x58)
> >diff --git a/libavcodec/mips/vp8dsp_mmi.c b/libavcodec/mips/vp8dsp_mmi.c
> >index b24a87a..bd80aa1 100644
> >--- a/libavcodec/mips/vp8dsp_mmi.c
> >+++ b/libavcodec/mips/vp8dsp_mmi.c
> >@@ -44,58 +44,6 @@
> > "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \
> > "punpckhbh "#dst_l", "#src", %[db_2] \n\t"
> >
> >-#define MMI_TRANSPOSE8x8_UB_UB(src_0, src_1, src_2, src_3, \
> >- src_4, src_5, src_6, src_7, \
> >- dst_0, dst_1, dst_2, dst_3, \
> >- dst_4, dst_5, dst_6, dst_7) \
> >- "li %[it_1], 0xe4 \n\t" \
> >- "dmtc1 %[it_1], %[db_1] \n\t" \
> >- "pshufh %[db_2], "#src_0", %[db_1] \n\t" \
> >- "punpcklbh "#dst_0", "#src_0", "#src_1" \n\t" \
> >- "punpckhbh "#dst_1", %[db_2], "#src_1" \n\t" \
> >- "pshufh %[db_2], "#src_2", %[db_1] \n\t" \
> >- "punpcklbh "#dst_2", "#src_2", "#src_3" \n\t" \
> >- "punpckhbh "#dst_3", %[db_2], "#src_3" \n\t" \
> >- "pshufh %[db_2], "#src_4", %[db_1] \n\t" \
> >- "punpcklbh "#dst_4", "#src_4", "#src_5" \n\t" \
> >- "punpckhbh "#dst_5", %[db_2], "#src_5" \n\t" \
> >- "pshufh %[db_2], "#src_6", %[db_1] \n\t" \
> >- "punpcklbh "#dst_6", "#src_6", "#src_7" \n\t" \
> >- "punpckhbh "#dst_7", %[db_2], "#src_7" \n\t" \
> >- \
> >- "pshufh %[db_2], "#dst_0", %[db_1] \n\t" \
> >- "punpcklhw "#dst_0", "#dst_0", "#dst_2" \n\t" \
> >- "punpckhhw "#dst_2", %[db_2], "#dst_2" \n\t" \
> >- "pshufh %[db_2], "#dst_1", %[db_1] \n\t" \
> >- "punpcklhw "#dst_1", "#dst_1", "#dst_3" \n\t" \
> >- "punpckhhw "#dst_3", %[db_2], "#dst_3" \n\t" \
> >- "pshufh %[db_2], "#dst_4", %[db_1] \n\t" \
> >- "punpcklhw "#dst_4", "#dst_4", "#dst_6" \n\t" \
> >- "punpckhhw "#dst_6", %[db_2], "#dst_6" \n\t" \
> >- "pshufh %[db_2], "#dst_5", %[db_1] \n\t" \
> >- "punpcklhw "#dst_5", "#dst_5", "#dst_7" \n\t" \
> >- "punpckhhw "#dst_7", %[db_2], "#dst_7" \n\t" \
> >- \
> >- "pshufh %[db_2], "#dst_0", %[db_1] \n\t" \
> >- "punpcklwd "#dst_0", "#dst_0", "#dst_4" \n\t" \
> >- "punpckhwd "#dst_4", %[db_2], "#dst_4" \n\t" \
> >- "pshufh %[db_2], "#dst_1", %[db_1] \n\t" \
> >- "punpcklwd "#dst_1", "#dst_1", "#dst_5" \n\t" \
> >- "punpckhwd "#dst_5", %[db_2], "#dst_5" \n\t" \
> >- "pshufh %[db_2], "#dst_2", %[db_1] \n\t" \
> >- "punpcklwd "#dst_2", "#dst_2", "#dst_6" \n\t" \
> >- "punpckhwd "#dst_6", %[db_2], "#dst_6" \n\t" \
> >- "pshufh %[db_2], "#dst_3", %[db_1] \n\t" \
> >- "punpcklwd "#dst_3", "#dst_3", "#dst_7" \n\t" \
> >- "punpckhwd "#dst_7", %[db_2], "#dst_7" \n\t" \
> >- \
> >- "pshufh %[db_2], "#dst_1", %[db_1] \n\t" \
> >- "pshufh "#dst_1", "#dst_4", %[db_1] \n\t" \
> >- "pshufh "#dst_4", %[db_2], %[db_1] \n\t" \
> >- "pshufh %[db_2], "#dst_3", %[db_1] \n\t" \
> >- "pshufh "#dst_3", "#dst_6", %[db_1] \n\t" \
> >- "pshufh "#dst_6", %[db_2], %[db_1] \n\t"
> >-
> > #define MMI_VP8_LOOP_FILTER \
> > /* Calculation of hev */ \
> > "dmtc1 %[thresh], %[ftmp3] \n\t" \
> >@@ -952,16 +900,14 @@ static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst,
> > "gsldlc1 %[q3], 0x03(%[tmp0]) \n\t"
> > "gsldrc1 %[q3], -0x04(%[tmp0]) \n\t"
> > /* Matrix transpose */
> >- MMI_TRANSPOSE8x8_UB_UB(%[p3], %[p2], %[p1], %[p0],
> >- %[q0], %[q1], %[q2], %[q3],
> >- %[p3], %[p2], %[p1], %[p0],
> >- %[q0], %[q1], %[q2], %[q3])
> >+ TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
> >+ %[q0], %[q1], %[q2], %[q3],
> >+ %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
> > MMI_VP8_LOOP_FILTER
> > /* Matrix transpose */
> >- MMI_TRANSPOSE8x8_UB_UB(%[p3], %[p2], %[p1], %[p0],
> >- %[q0], %[q1], %[q2], %[q3],
> >- %[p3], %[p2], %[p1], %[p0],
> >- %[q0], %[q1], %[q2], %[q3])
> >+ TRANSPOSE_8B(%[p3], %[p2], %[p1], %[p0],
> >+ %[q0], %[q1], %[q2], %[q3],
> >+ %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
> > /* Move to dst */
> > "gssdlc1 %[p3], 0x03(%[dst]) \n\t"
> > "gssdrc1 %[p3], -0x04(%[dst]) \n\t"
> >@@ -1233,8 +1179,7 @@ void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
> > MMI_SDC1(%[ftmp0], %[block], 0x18)
> >
> > TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
> >- %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
> >- %[ftmp9], %[tmp0], %[ftmp0], %[ftmp10])
> >+ %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
> >
> > // t[0 4 8 12]
> > "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
> >@@ -1269,8 +1214,7 @@ void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
> > "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
> >
> > TRANSPOSE_4H(%[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4],
> >- %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8],
> >- %[ftmp9], %[tmp0], %[ftmp0], %[ftmp10])
> >+ %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
> >
> > MMI_LWC1(%[ftmp5], %[dst0], 0x00)
> > MMI_LWC1(%[ftmp6], %[dst1], 0x00)
> >diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
> >index b16edc4..76b1199 100644
> >--- a/libavutil/mips/mmiutils.h
> >+++ b/libavutil/mips/mmiutils.h
> >@@ -250,30 +250,53 @@
> > : "memory" \
> > );
> >
> >-#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
> >- "li "#r1", 0x93 \n\t" \
> >- "xor "#zero","#zero","#zero" \n\t" \
> >- "mtc1 "#r1", "#shift" \n\t" \
> >- "punpcklhw "#t1", "#m1", "#zero" \n\t" \
> >- "punpcklhw "#t5", "#m2", "#zero" \n\t" \
> >- "pshufh "#t5", "#t5", "#shift" \n\t" \
> >- "or "#t1", "#t1", "#t5" \n\t" \
> >- "punpckhhw "#t2", "#m1", "#zero" \n\t" \
> >- "punpckhhw "#t5", "#m2", "#zero" \n\t" \
> >- "pshufh "#t5", "#t5", "#shift" \n\t" \
> >- "or "#t2", "#t2", "#t5" \n\t" \
> >- "punpcklhw "#t3", "#m3", "#zero" \n\t" \
> >- "punpcklhw "#t5", "#m4", "#zero" \n\t" \
> >- "pshufh "#t5", "#t5", "#shift" \n\t" \
> >- "or "#t3", "#t3", "#t5" \n\t" \
> >- "punpckhhw "#t4", "#m3", "#zero" \n\t" \
> >- "punpckhhw "#t5", "#m4", "#zero" \n\t" \
> >- "pshufh "#t5", "#t5", "#shift" \n\t" \
> >- "or "#t4", "#t4", "#t5" \n\t" \
> >- "punpcklwd "#m1", "#t1", "#t3" \n\t" \
> >- "punpckhwd "#m2", "#t1", "#t3" \n\t" \
> >- "punpcklwd "#m3", "#t2", "#t4" \n\t" \
> >- "punpckhwd "#m4", "#t2", "#t4" \n\t"
> >+/**
> >+ * brief: Transpose 4X4 half word packaged data.
> >+ * fr_i0, fr_i1, fr_i2, fr_i3: src & dst
> >+ * fr_t0, fr_t1, fr_t2, fr_t3: temporary register
> >+ */
> >+#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, \
> >+ fr_t0, fr_t1, fr_t2, fr_t3) \
> >+ "punpcklhw "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
> >+ "punpckhhw "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
> >+ "punpcklhw "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
> >+ "punpckhhw "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
> >+ "punpcklwd "#fr_i0", "#fr_t0", "#fr_t2" \n\t" \
> >+ "punpckhwd "#fr_i1", "#fr_t0", "#fr_t2" \n\t" \
> >+ "punpcklwd "#fr_i2", "#fr_t1", "#fr_t3" \n\t" \
> >+ "punpckhwd "#fr_i3", "#fr_t1", "#fr_t3" \n\t"
> >+
> >+/**
> >+ * brief: Transpose 8x8 byte packaged data.
> >+ * fr_i0~i7: src & dst
> >+ * fr_t0~t3: temporary register
> >+ */
> >+#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, \
> >+ fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3) \
> >+ "punpcklbh "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
> >+ "punpckhbh "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
> >+ "punpcklbh "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
> >+ "punpckhbh "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
> >+ "punpcklbh "#fr_i0", "#fr_i4", "#fr_i5" \n\t" \
> >+ "punpckhbh "#fr_i1", "#fr_i4", "#fr_i5" \n\t" \
> >+ "punpcklbh "#fr_i2", "#fr_i6", "#fr_i7" \n\t" \
> >+ "punpckhbh "#fr_i3", "#fr_i6", "#fr_i7" \n\t" \
> >+ "punpcklhw "#fr_i4", "#fr_t0", "#fr_t2" \n\t" \
> >+ "punpckhhw "#fr_i5", "#fr_t0", "#fr_t2" \n\t" \
> >+ "punpcklhw "#fr_i6", "#fr_t1", "#fr_t3" \n\t" \
> >+ "punpckhhw "#fr_i7", "#fr_t1", "#fr_t3" \n\t" \
> >+ "punpcklhw "#fr_t0", "#fr_i0", "#fr_i2" \n\t" \
> >+ "punpckhhw "#fr_t1", "#fr_i0", "#fr_i2" \n\t" \
> >+ "punpcklhw "#fr_t2", "#fr_i1", "#fr_i3" \n\t" \
> >+ "punpckhhw "#fr_t3", "#fr_i1", "#fr_i3" \n\t" \
> >+ "punpcklwd "#fr_i0", "#fr_i4", "#fr_t0" \n\t" \
> >+ "punpckhwd "#fr_i1", "#fr_i4", "#fr_t0" \n\t" \
> >+ "punpcklwd "#fr_i2", "#fr_i5", "#fr_t1" \n\t" \
> >+ "punpckhwd "#fr_i3", "#fr_i5", "#fr_t1" \n\t" \
> >+ "punpcklwd "#fr_i4", "#fr_i6", "#fr_t2" \n\t" \
> >+ "punpckhwd "#fr_i5", "#fr_i6", "#fr_t2" \n\t" \
> >+ "punpcklwd "#fr_i6", "#fr_i7", "#fr_t3" \n\t" \
> >+ "punpckhwd "#fr_i7", "#fr_i7", "#fr_t3" \n\t"
> >
> > /**
> > * brief: Parallel SRA for 8 byte packaged data.
> >@@ -303,15 +326,14 @@
> > "psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
> > "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
> >
> >-
> >-#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
> >- "psrah "#fp1", "#fp1", "#shift" \n\t" \
> >- "psrah "#fp2", "#fp2", "#shift" \n\t" \
> >- "psrah "#fp3", "#fp3", "#shift" \n\t" \
> >+#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
> >+ "psrah "#fp1", "#fp1", "#shift" \n\t" \
> >+ "psrah "#fp2", "#fp2", "#shift" \n\t" \
> >+ "psrah "#fp3", "#fp3", "#shift" \n\t" \
> > "psrah "#fp4", "#fp4", "#shift" \n\t"
> >
> >-#define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift) \
> >- PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
> >+#define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift) \
> >+ PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
> > PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
> >
> >
> >--
> >2.1.0
>
> Hi Michael, could you please help to review this patch.
> BTW, this patch was based on the previous patch" [PATCH 2/2] avcodec/mips: [loongson] optimize vp8 decoding in vp8dsp.",
> you'd better merge it first. Thank you very much.
will apply both
thanks
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The real ebay dictionary, page 2
"100% positive feedback" - "All either got their money back or didnt complain"
"Best seller ever, very honest" - "Seller refunded buyer after failed scam"
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20180909/c2d6f201/attachment.sig>
More information about the ffmpeg-devel
mailing list