[FFmpeg-devel] [PATCH 2/3] x86/vf_blend: make all functions work on x86_32
James Almer
jamrial at gmail.com
Thu Dec 24 07:02:27 CET 2015
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavfilter/x86/vf_blend.asm | 103 +++++++++++++++++++---------------------
libavfilter/x86/vf_blend_init.c | 4 +-
2 files changed, 52 insertions(+), 55 deletions(-)
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index d079b79..5f0271a 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -22,7 +22,6 @@
%include "libavutil/x86/x86util.asm"
-%if ARCH_X86_64
SECTION_RODATA
pw_128: times 8 dw 128
@@ -34,11 +33,19 @@ pb_255: times 16 db 255
SECTION .text
%macro BLEND_INIT 2
-cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+%if ARCH_X86_64
+cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
+ mov widthd, dword widthm
+%else
+cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
+%define dst_linesizeq r5mp
+%define widthq r6mp
+%endif
+ mov endd, dword r8m
add topq, widthq
add bottomq, widthq
add dstq, widthq
- sub endq, startq
+ sub endd, dword r7m ; start
neg widthq
%endmacro
@@ -54,15 +61,14 @@ REP_RET
%macro BLEND_SIMPLE 2
BLEND_INIT %1, 2
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
+ movu m0, [topq + xq]
+ movu m1, [bottomq + xq]
p%2 m0, m1
- mova [dstq + x], m0
- add r10q, mmsize
+ mova [dstq + xq], m0
+ add xq, mmsize
jl .loop
BLEND_END
%endmacro
@@ -80,38 +86,36 @@ BLEND_INIT difference128, 4
pxor m2, m2
mova m3, [pw_128]
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movh m0, [topq + x]
- movh m1, [bottomq + x]
+ movh m0, [topq + xq]
+ movh m1, [bottomq + xq]
punpcklbw m0, m2
punpcklbw m1, m2
paddw m0, m3
psubw m0, m1
packuswb m0, m0
- movh [dstq + x], m0
- add r10q, mmsize / 2
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
jl .loop
BLEND_END
BLEND_INIT average, 3
pxor m2, m2
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movh m0, [topq + x]
- movh m1, [bottomq + x]
+ movh m0, [topq + xq]
+ movh m1, [bottomq + xq]
punpcklbw m0, m2
punpcklbw m1, m2
paddw m0, m1
psrlw m0, 1
packuswb m0, m0
- movh [dstq + x], m0
- add r10q, mmsize / 2
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
jl .loop
BLEND_END
@@ -119,19 +123,18 @@ BLEND_INIT addition128, 4
pxor m2, m2
mova m3, [pw_128]
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movh m0, [topq + x]
- movh m1, [bottomq + x]
+ movh m0, [topq + xq]
+ movh m1, [bottomq + xq]
punpcklbw m0, m2
punpcklbw m1, m2
paddw m0, m1
psubw m0, m3
packuswb m0, m0
- movh [dstq + x], m0
- add r10q, mmsize / 2
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
jl .loop
BLEND_END
@@ -140,38 +143,36 @@ BLEND_INIT hardmix, 5
mova m3, [pb_128]
mova m4, [pb_127]
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
+ movu m0, [topq + xq]
+ movu m1, [bottomq + xq]
pxor m1, m4
pxor m0, m3
pcmpgtb m1, m0
pxor m1, m2
- mova [dstq + x], m1
- add r10q, mmsize
+ mova [dstq + xq], m1
+ add xq, mmsize
jl .loop
BLEND_END
BLEND_INIT phoenix, 4
mova m3, [pb_255]
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movu m0, [topq + x]
- movu m1, [bottomq + x]
+ movu m0, [topq + xq]
+ movu m1, [bottomq + xq]
mova m2, m0
pminub m0, m1
pmaxub m1, m2
mova m2, m3
psubusb m2, m1
paddusb m2, m0
- mova [dstq + x], m2
- add r10q, mmsize
+ mova [dstq + xq], m2
+ add xq, mmsize
jl .loop
BLEND_END
@@ -179,19 +180,18 @@ INIT_XMM ssse3
BLEND_INIT difference, 3
pxor m2, m2
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movh m0, [topq + x]
- movh m1, [bottomq + x]
+ movh m0, [topq + xq]
+ movh m1, [bottomq + xq]
punpcklbw m0, m2
punpcklbw m1, m2
psubw m0, m1
pabsw m0, m0
packuswb m0, m0
- movh [dstq + x], m0
- add r10q, mmsize / 2
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
jl .loop
BLEND_END
@@ -199,12 +199,11 @@ BLEND_INIT negation, 5
pxor m2, m2
mova m4, [pw_255]
.nextrow:
- mov r10q, widthq
- %define x r10q
+ mov xq, widthq
.loop:
- movh m0, [topq + x]
- movh m1, [bottomq + x]
+ movh m0, [topq + xq]
+ movh m1, [bottomq + xq]
punpcklbw m0, m2
punpcklbw m1, m2
mova m3, m4
@@ -214,9 +213,7 @@ BLEND_INIT negation, 5
mova m0, m4
psubw m0, m3
packuswb m0, m0
- movh [dstq + x], m0
- add r10q, mmsize / 2
+ movh [dstq + xq], m0
+ add xq, mmsize / 2
jl .loop
BLEND_END
-
-%endif
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 82b8848..b7d234f 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -49,7 +49,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
{
int cpu_flags = av_get_cpu_flags();
- if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
+ if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
switch (param->mode) {
case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
@@ -65,7 +65,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
}
}
- if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
+ if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
switch (param->mode) {
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;
--
2.6.3
More information about the ffmpeg-devel
mailing list