[FFmpeg-cvslog] avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD
Paul B Mahol
git at videolan.org
Wed Oct 7 23:03:05 CEST 2015
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Wed Oct 7 10:12:26 2015 +0200| [0948ba320496d02ad185487c18b249610de1a184] | committer: Paul B Mahol
avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD
Signed-off-by: Paul B Mahol <onemda at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0948ba320496d02ad185487c18b249610de1a184
---
libavfilter/x86/vf_blend.asm | 64 +++++++++++++++++++++++++++++++++++++++
libavfilter/x86/vf_blend_init.c | 14 +++++++++
2 files changed, 78 insertions(+)
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 167e72b..54b5430 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -27,6 +27,8 @@ SECTION_RODATA
pw_128: times 8 dw 128
pw_255: times 8 dw 255
+pb_128: times 16 db 128
+pb_255: times 16 db 255
SECTION .text
@@ -273,6 +275,37 @@ cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, d
jg .nextrow
REP_RET
+cglobal blend_hardmix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+ add topq, widthq
+ add bottomq, widthq
+ add dstq, widthq
+ sub endq, startq
+ mova m2, [pb_255]
+ mova m3, [pb_128]
+ neg widthq
+.nextrow:
+ mov r10q, widthq
+ %define x r10q
+
+ .loop:
+ movu m0, [topq + x]
+ movu m1, [bottomq + x]
+ pxor m1, m2
+ pxor m0, m3
+ pxor m1, m3
+ pcmpgtb m1, m0
+ pxor m1, m2
+ mova [dstq + x], m1
+ add r10q, mmsize
+ jl .loop
+
+ add topq, top_linesizeq
+ add bottomq, bottom_linesizeq
+ add dstq, dst_linesizeq
+ sub endd, 1
+ jg .nextrow
+REP_RET
+
cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
add bottomq, widthq
@@ -298,6 +331,37 @@ cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize,
jg .nextrow
REP_RET
+cglobal blend_phoenix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
+ add topq, widthq
+ add bottomq, widthq
+ add dstq, widthq
+ sub endq, startq
+ mova m3, [pb_255]
+ neg widthq
+.nextrow:
+ mov r10q, widthq
+ %define x r10q
+
+ .loop:
+ movu m0, [topq + x]
+ movu m1, [bottomq + x]
+ mova m2, m0
+ pminub m0, m1
+ pmaxub m1, m2
+ mova m2, m3
+ psubusb m2, m1
+ paddusb m2, m0
+ mova [dstq + x], m2
+ add r10q, mmsize
+ jl .loop
+
+ add topq, top_linesizeq
+ add bottomq, bottom_linesizeq
+ add dstq, dst_linesizeq
+ sub endd, 1
+ jg .nextrow
+REP_RET
+
INIT_XMM ssse3
cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index 61e90f8..454d030 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -59,6 +59,12 @@ void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values);
+void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+ const uint8_t *bottom, ptrdiff_t bottom_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+ struct FilterParams *param, double *values);
+
void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize,
@@ -71,6 +77,12 @@ void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values);
+void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
+ const uint8_t *bottom, ptrdiff_t bottom_linesize,
+ uint8_t *dst, ptrdiff_t dst_linesize,
+ ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
+ struct FilterParams *param, double *values);
+
void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize,
@@ -107,8 +119,10 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break;
case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break;
case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
+ case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
case BLEND_OR: param->blend = ff_blend_or_sse2; break;
+ case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
}
More information about the ffmpeg-cvslog
mailing list