[FFmpeg-cvslog] lavc/aarch64: Move non-neon vp9 copy functions out of neon source file.
    Carl Eugen Hoyos 
    git at videolan.org
       
    Wed Mar 11 15:17:04 EET 2020
    
    
  
ffmpeg | branch: master | Carl Eugen Hoyos <ceffmpeg at gmail.com> | Wed Mar 11 13:01:02 2020 +0100| [9a217549043ff25a37973555f71122f4725ba54e] | committer: Carl Eugen Hoyos
lavc/aarch64: Move non-neon vp9 copy functions out of neon source file.
Fixes part of ticket #8565.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9a217549043ff25a37973555f71122f4725ba54e
---
 libavcodec/aarch64/Makefile           |  1 +
 libavcodec/aarch64/vp9mc_16bpp_neon.S | 25 -----------
 libavcodec/aarch64/vp9mc_aarch64.c    | 81 +++++++++++++++++++++++++++++++++++
 libavcodec/aarch64/vp9mc_neon.S       | 30 -------------
 4 files changed, 82 insertions(+), 55 deletions(-)
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 00f93bf59f..90e7210ee0 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -21,6 +21,7 @@ OBJS-$(CONFIG_VC1DSP)                   += aarch64/vc1dsp_init_aarch64.o
 OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)              += aarch64/vp9dsp_init_10bpp_aarch64.o \
                                            aarch64/vp9dsp_init_12bpp_aarch64.o \
+                                           aarch64/vp9mc_aarch64.o             \
                                            aarch64/vp9dsp_init_aarch64.o
 
 # ARMv8 optimizations
diff --git a/libavcodec/aarch64/vp9mc_16bpp_neon.S b/libavcodec/aarch64/vp9mc_16bpp_neon.S
index cac6428709..53b372c262 100644
--- a/libavcodec/aarch64/vp9mc_16bpp_neon.S
+++ b/libavcodec/aarch64/vp9mc_16bpp_neon.S
@@ -25,31 +25,6 @@
 //                            const uint8_t *ref, ptrdiff_t ref_stride,
 //                            int h, int mx, int my);
 
-function ff_vp9_copy128_aarch64, export=1
-1:
-        ldp             x5,  x6,  [x2]
-        ldp             x7,  x8,  [x2, #16]
-        stp             x5,  x6,  [x0]
-        ldp             x9,  x10, [x2, #32]
-        stp             x7,  x8,  [x0, #16]
-        subs            w4,  w4,  #1
-        ldp             x11, x12, [x2, #48]
-        stp             x9,  x10, [x0, #32]
-        stp             x11, x12, [x0, #48]
-        ldp             x5,  x6,  [x2, #64]
-        ldp             x7,  x8,  [x2, #80]
-        stp             x5,  x6,  [x0, #64]
-        ldp             x9,  x10, [x2, #96]
-        stp             x7,  x8,  [x0, #80]
-        ldp             x11, x12, [x2, #112]
-        stp             x9,  x10, [x0, #96]
-        stp             x11, x12, [x0, #112]
-        add             x2,  x2,  x3
-        add             x0,  x0,  x1
-        b.ne            1b
-        ret
-endfunc
-
 function ff_vp9_avg64_16_neon, export=1
         mov             x5,  x0
         sub             x1,  x1,  #64
diff --git a/libavcodec/aarch64/vp9mc_aarch64.c b/libavcodec/aarch64/vp9mc_aarch64.c
new file mode 100644
index 0000000000..f17a8cf04a
--- /dev/null
+++ b/libavcodec/aarch64/vp9mc_aarch64.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+//                            const uint8_t *ref, ptrdiff_t ref_stride,
+//                            int h, int mx, int my);
+
+function ff_vp9_copy128_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        ldp             x9,  x10, [x2, #32]
+        stp             x7,  x8,  [x0, #16]
+        subs            w4,  w4,  #1
+        ldp             x11, x12, [x2, #48]
+        stp             x9,  x10, [x0, #32]
+        stp             x11, x12, [x0, #48]
+        ldp             x5,  x6,  [x2, #64]
+        ldp             x7,  x8,  [x2, #80]
+        stp             x5,  x6,  [x0, #64]
+        ldp             x9,  x10, [x2, #96]
+        stp             x7,  x8,  [x0, #80]
+        ldp             x11, x12, [x2, #112]
+        stp             x9,  x10, [x0, #96]
+        stp             x11, x12, [x0, #112]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy64_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        ldp             x9,  x10, [x2, #32]
+        stp             x7,  x8,  [x0, #16]
+        subs            w4,  w4,  #1
+        ldp             x11, x12, [x2, #48]
+        stp             x9,  x10, [x0, #32]
+        stp             x11, x12, [x0, #48]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy32_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        subs            w4,  w4,  #1
+        stp             x7,  x8,  [x0, #16]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
index f67624ca04..abf2bae9db 100644
--- a/libavcodec/aarch64/vp9mc_neon.S
+++ b/libavcodec/aarch64/vp9mc_neon.S
@@ -25,23 +25,6 @@
 //                            const uint8_t *ref, ptrdiff_t ref_stride,
 //                            int h, int mx, int my);
 
-function ff_vp9_copy64_aarch64, export=1
-1:
-        ldp             x5,  x6,  [x2]
-        ldp             x7,  x8,  [x2, #16]
-        stp             x5,  x6,  [x0]
-        ldp             x9,  x10, [x2, #32]
-        stp             x7,  x8,  [x0, #16]
-        subs            w4,  w4,  #1
-        ldp             x11, x12, [x2, #48]
-        stp             x9,  x10, [x0, #32]
-        stp             x11, x12, [x0, #48]
-        add             x2,  x2,  x3
-        add             x0,  x0,  x1
-        b.ne            1b
-        ret
-endfunc
-
 function ff_vp9_avg64_neon, export=1
         mov             x5,  x0
 1:
@@ -64,19 +47,6 @@ function ff_vp9_avg64_neon, export=1
         ret
 endfunc
 
-function ff_vp9_copy32_aarch64, export=1
-1:
-        ldp             x5,  x6,  [x2]
-        ldp             x7,  x8,  [x2, #16]
-        stp             x5,  x6,  [x0]
-        subs            w4,  w4,  #1
-        stp             x7,  x8,  [x0, #16]
-        add             x2,  x2,  x3
-        add             x0,  x0,  x1
-        b.ne            1b
-        ret
-endfunc
-
 function ff_vp9_avg32_neon, export=1
 1:
         ld1             {v2.16b, v3.16b},  [x2], x3
    
    
More information about the ffmpeg-cvslog
mailing list