[FFmpeg-devel] [PATCH 12/34] aarch64: vp9itxfm: Use the right lane sizes in 8x8 for improved readability
Martin Storsjö
martin at martin.st
Wed Mar 8 12:00:52 EET 2017
This is cherrypicked from libav commit
3dd7827258ddaa2e51085d0c677d6f3b1be3572f.
---
libavcodec/aarch64/vp9itxfm_neon.S | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S
index e42cc2d..3b34749 100644
--- a/libavcodec/aarch64/vp9itxfm_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_neon.S
@@ -385,10 +385,10 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
.endif
ld1 {v0.8h}, [x4]
- movi v2.16b, #0
- movi v3.16b, #0
- movi v4.16b, #0
- movi v5.16b, #0
+ movi v2.8h, #0
+ movi v3.8h, #0
+ movi v4.8h, #0
+ movi v5.8h, #0
.ifc \txfm1\()_\txfm2,idct_idct
cmp w3, #1
@@ -411,11 +411,11 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
b 2f
.endif
1:
- ld1 {v16.16b,v17.16b,v18.16b,v19.16b}, [x2], #64
- ld1 {v20.16b,v21.16b,v22.16b,v23.16b}, [x2], #64
+ ld1 {v16.8h,v17.8h,v18.8h,v19.8h}, [x2], #64
+ ld1 {v20.8h,v21.8h,v22.8h,v23.8h}, [x2], #64
sub x2, x2, #128
- st1 {v2.16b,v3.16b,v4.16b,v5.16b}, [x2], #64
- st1 {v2.16b,v3.16b,v4.16b,v5.16b}, [x2], #64
+ st1 {v2.8h,v3.8h,v4.8h,v5.8h}, [x2], #64
+ st1 {v2.8h,v3.8h,v4.8h,v5.8h}, [x2], #64
\txfm1\()8
--
2.7.4
More information about the ffmpeg-devel
mailing list