[FFmpeg-cvslog] avfilter/x86/vf_hflip : merge hflip byte and hflip short to one macro
Martin Vignali
git at videolan.org
Tue Dec 19 22:12:00 EET 2017
ffmpeg | branch: master | Martin Vignali <martin.vignali at gmail.com> | Tue Dec 19 21:04:29 2017 +0100| [a4a4179e83e61a36cf6900a1ca2a61b9efb03350] | committer: Martin Vignali
avfilter/x86/vf_hflip : merge hflip byte and hflip short to one macro
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a4a4179e83e61a36cf6900a1ca2a61b9efb03350
---
libavfilter/x86/vf_hflip.asm | 61 ++++++++++++--------------------------------
1 file changed, 17 insertions(+), 44 deletions(-)
diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
index 89d6d0c69b..82e1154d21 100644
--- a/libavfilter/x86/vf_hflip.asm
+++ b/libavfilter/x86/vf_hflip.asm
@@ -29,11 +29,16 @@ pb_flip_short: db 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
SECTION .text
-INIT_XMM ssse3
-cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x
- mova m0, [pb_flip_byte]
+;%1 byte or short, %2 b or w, %3 size in byte (1 for byte, 2 for short)
+%macro HFLIP 3
+cglobal hflip_%1, 3, 5, 3, src, dst, w, r, x
+ mova m0, [pb_flip_%1]
xor xq, xq
+%if %3 == 1
movsxdifnidn wq, wd
+%else ; short
+ add wd, wd
+%endif
mov rq, wq
and rq, 2 * mmsize - 1
cmp wq, 2 * mmsize
@@ -42,8 +47,8 @@ cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x
.loop0:
neg xq
- movu m1, [srcq + xq - mmsize + 1]
- movu m2, [srcq + xq - 2 * mmsize + 1]
+ movu m1, [srcq + xq - mmsize + %3]
+ movu m2, [srcq + xq - 2 * mmsize + %3]
pshufb m1, m0
pshufb m2, m0
neg xq
@@ -59,49 +64,17 @@ cglobal hflip_byte, 3, 5, 3, src, dst, w, r, x
.loop1:
neg xq
- mov rb, [srcq + xq]
+ mov r%2, [srcq + xq]
neg xq
- mov [dstq + xq], rb
- add xq, 1
+ mov [dstq + xq], r%2
+ add xq, %3
cmp xq, wq
jl .loop1
.end:
RET
+%endmacro
-cglobal hflip_short, 3, 5, 3, src, dst, w, r, x
- mova m0, [pb_flip_short]
- xor xq, xq
- add wd, wd
- mov rq, wq
- and rq, 2 * mmsize - 1
- cmp wq, 2 * mmsize
- jl .loop1
- sub wq, rq
-
- .loop0:
- neg xq
- movu m1, [srcq + xq - mmsize + 2]
- movu m2, [srcq + xq - 2 * mmsize + 2]
- pshufb m1, m0
- pshufb m2, m0
- neg xq
- movu [dstq + xq ], m1
- movu [dstq + xq + mmsize], m2
- add xq, mmsize * 2
- cmp xq, wq
- jl .loop0
-
- cmp rq, 0
- je .end
- add wq, rq
+INIT_XMM ssse3
+HFLIP byte, b, 1
+HFLIP short, w, 2
- .loop1:
- neg xq
- mov rw, [srcq + xq]
- neg xq
- mov [dstq + xq], rw
- add xq, 2
- cmp xq, wq
- jl .loop1
- .end:
-RET
More information about the ffmpeg-cvslog
mailing list