[FFmpeg-devel] [PATCH 1/2] pixblockdsp: x86: Condense diff_pixels_* to a shared macro
Timothy Gu
timothygu99 at gmail.com
Sun Nov 1 17:59:42 CET 2015
---
libavcodec/x86/pixblockdsp.asm | 66 ++++++++++++++++++++----------------------
1 file changed, 31 insertions(+), 35 deletions(-)
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 7c5377b..a7d9816 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
mova [r0+0x70], m3
RET
-INIT_MMX mmx
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
; int stride);
-cglobal diff_pixels, 4,5
- movsxdifnidn r3, r3d
- pxor m7, m7
- add r0, 128
- mov r4, -128
-.loop:
- mova m0, [r1]
- mova m2, [r2]
- mova m1, m0
- mova m3, m2
- punpcklbw m0, m7
- punpckhbw m1, m7
- punpcklbw m2, m7
- punpckhbw m3, m7
- psubw m0, m2
- psubw m1, m3
- mova [r0+r4+0], m0
- mova [r0+r4+8], m1
- add r1, r3
- add r2, r3
- add r4, 16
- jne .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal diff_pixels, 4, 5, 5
+%macro DIFF_PIXELS 0
+cglobal diff_pixels, 4,5,5
movsxdifnidn r3, r3d
pxor m4, m4
add r0, 128
mov r4, -128
.loop:
- movh m0, [r1]
- movh m2, [r2]
- movh m1, [r1+r3]
- movh m3, [r2+r3]
+ movq m0, [r1]
+ movq m2, [r2]
+%if mmsize == 8
+ movq m1, m0
+ movq m3, m2
+ punpcklbw m0, m4
+ punpckhbw m1, m4
+ punpcklbw m2, m4
+ punpckhbw m3, m4
+%else
+ movq m1, [r1+r3]
+ movq m3, [r2+r3]
punpcklbw m0, m4
punpcklbw m1, m4
punpcklbw m2, m4
punpcklbw m3, m4
+%endif
psubw m0, m2
psubw m1, m3
- mova [r0+r4+0 ], m0
- mova [r0+r4+16], m1
+ mova [r0+r4+0], m0
+ mova [r0+r4+mmsize], m1
+%if mmsize == 8
+ add r1, r3
+ add r2, r3
+%else
lea r1, [r1+r3*2]
lea r2, [r2+r3*2]
- add r4, 32
+%endif
+ add r4, 2 * mmsize
jne .loop
- RET
+ REP_RET
+%endmacro
+
+INIT_MMX mmx
+DIFF_PIXELS
+
+INIT_XMM sse2
+DIFF_PIXELS
--
2.1.4
More information about the ffmpeg-devel
mailing list