[FFmpeg-devel] [PATCH 1/3] lavc/aarch64: add clip N macro
J. Dekker
jdek at itanimul.li
Wed Mar 22 02:07:08 EET 2023
Signed-off-by: J. Dekker <jdek at itanimul.li>
---
libavcodec/aarch64/hevcdsp_idct_neon.S | 19 +++++--------------
libavcodec/aarch64/neon.S | 11 +++++++++++
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 467cb0f48a..3e59dd20bb 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -5,7 +5,7 @@
*
* Ported from arm/hevcdsp_idct_neon.S by
* Copyright (c) 2020 Reimar Döffinger
- * Copyright (c) 2020 J. Dekker
+ * Copyright (c) 2023 J. Dekker <jdek at itanimul.li>
*
* This file is part of FFmpeg.
*
@@ -38,13 +38,6 @@ const trans, align=4
.short 31, 22, 13, 4
endconst
-.macro clip2 in1, in2, min, max
- smax \in1, \in1, \min
- smax \in2, \in2, \min
- smin \in1, \in1, \max
- smin \in2, \in2, \max
-.endm
-
function ff_hevc_add_residual_4x4_8_neon, export=1
ld1 {v0.8h-v1.8h}, [x1]
ld1 {v2.s}[0], [x0], x2
@@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0
ld1 {v3.d}[1], [x12], x2
movi v4.8h, #0
sqadd v1.8h, v1.8h, v3.8h
- clip2 v0.8h, v1.8h, v4.8h, v21.8h
+ clip v4.8h, v21.8h, v0.8h, v1.8h
st1 {v0.d}[0], [x0], x2
st1 {v0.d}[1], [x0], x2
st1 {v1.d}[0], [x0], x2
@@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0
sqadd v0.8h, v0.8h, v2.8h
ld1 {v3.8h}, [x12]
sqadd v1.8h, v1.8h, v3.8h
- clip2 v0.8h, v1.8h, v4.8h, v21.8h
+ clip v4.8h, v21.8h, v0.8h, v1.8h
st1 {v0.8h}, [x0], x2
st1 {v1.8h}, [x12], x2
bne 1b
@@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0
sqadd v1.8h, v1.8h, v17.8h
sqadd v2.8h, v2.8h, v18.8h
sqadd v3.8h, v3.8h, v19.8h
- clip2 v0.8h, v1.8h, v20.8h, v21.8h
- clip2 v2.8h, v3.8h, v20.8h, v21.8h
+ clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
st1 {v0.8h-v1.8h}, [x0], x2
st1 {v2.8h-v3.8h}, [x12], x2
bne 1b
@@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0
sqadd v1.8h, v1.8h, v17.8h
sqadd v2.8h, v2.8h, v18.8h
sqadd v3.8h, v3.8h, v19.8h
- clip2 v0.8h, v1.8h, v20.8h, v21.8h
- clip2 v2.8h, v3.8h, v20.8h, v21.8h
+ clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
st1 {v0.8h-v3.8h}, [x0], x2
bne 1b
ret
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index 1ad32c359d..bc105e4861 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -1,6 +1,8 @@
/*
* This file is part of FFmpeg.
*
+ * Copyright (c) 2023 J. Dekker <jdek at itanimul.li>
+ *
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
@@ -16,6 +18,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+.macro clip min, max, regs:vararg
+.irp x, \regs
+ smax \x, \x, \min
+.endr
+.irp x, \regs
+ smin \x, \x, \max
+.endr
+.endm
+
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
trn1 \r8\().8B, \r0\().8B, \r1\().8B
trn2 \r9\().8B, \r0\().8B, \r1\().8B
--
2.39.2
More information about the ffmpeg-devel
mailing list