[FFmpeg-devel] [PATCH 3/4] huffyuvencdsp: Add ff_diff_bytes_sse2
Timothy Gu
timothygu99 at gmail.com
Mon Oct 19 22:00:45 CEST 2015
4% to 35% faster depending on the width.
---
libavcodec/x86/huffyuvencdsp.asm | 31 ++++++++++++++++++++-----------
libavcodec/x86/huffyuvencdsp_mmx.c | 8 +++++++-
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm
index 97de7e9..9625fbe 100644
--- a/libavcodec/x86/huffyuvencdsp.asm
+++ b/libavcodec/x86/huffyuvencdsp.asm
@@ -27,27 +27,27 @@
section .text
-INIT_MMX mmx
; void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
; intptr_t w);
-cglobal diff_bytes, 4,6,0, dst, src1, src2, w, i
+%macro DIFF_BYTES 0
+cglobal diff_bytes, 4,6,2, dst, src1, src2, w, i
xor iq, iq
- cmp wq, 16
+ cmp wq, mmsize * 2
jb .loop2
- sub wq, 15
+ sub wq, mmsize * 2 - 1
.loop:
- mova m0, [src2q + iq]
- mova m1, [src1q + iq]
+ movu m0, [src2q + iq]
+ movu m1, [src1q + iq]
psubb m1, m0
mova [iq + dstq], m1
- mova m0, [src2q + iq + 8]
- mova m1, [src1q + iq + 8]
+ movu m0, [src2q + iq + mmsize]
+ movu m1, [src1q + iq + mmsize]
psubb m1, m0
- mova [8 + iq + dstq], m1
- add iq, 16
+ mova [mmsize + iq + dstq], m1
+ add iq, mmsize * 2
cmp iq, wq
jb .loop
- add wq, 15
+ add wq, mmsize * 2 - 1
.loop2:
mov r6b, byte [src1q + iq]
sub r6b, byte [src2q + iq]
@@ -56,3 +56,12 @@ cglobal diff_bytes, 4,6,0, dst, src1, src2, w, i
cmp iq, wq
jb .loop2
REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+DIFF_BYTES
+%endif
+
+INIT_XMM sse2
+DIFF_BYTES
diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
index c5f81c8..9af5305 100644
--- a/libavcodec/x86/huffyuvencdsp_mmx.c
+++ b/libavcodec/x86/huffyuvencdsp_mmx.c
@@ -31,6 +31,8 @@
void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
+void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+ intptr_t w);
#if HAVE_INLINE_ASM
@@ -80,11 +82,15 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
{
av_unused int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_MMX(cpu_flags)) {
+ if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_mmx;
}
if (INLINE_MMXEXT(cpu_flags)) {
c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext;
}
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ c->diff_bytes = ff_diff_bytes_sse2;
+ }
}
--
1.9.1
More information about the ffmpeg-devel
mailing list