[FFmpeg-devel] [PATCH 2/2] x86/videodsp: add emulated_edge_mc_ssse3
James Almer
jamrial at gmail.com
Mon Jun 23 20:51:54 CEST 2014
Signed-off-by: James Almer <jamrial at gmail.com>
---
Not benched.
libavcodec/x86/videodsp.asm | 16 ++++++++++++++++
libavcodec/x86/videodsp_init.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+)
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index ad15af9..761a0b0 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -21,6 +21,10 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA
+
+cextern pb_3
+
SECTION .text
; slow vertical extension loop function. Works with variable-width, and
@@ -96,6 +100,9 @@ cglobal emu_edge_hvar, 5, 6, 2, dst, dst_stride, start_x, n_words, h, w
lea dstq, [dstq+n_wordsq*2]
neg n_wordsq
lea start_xq, [start_xq+n_wordsq*2]
+%if cpuflag(ssse3)
+ mova m1, [pb_3]
+%endif
.y_loop: ; do {
mov wq, n_wordsq ; initialize w
SPLATB_LOAD m0, dstq+start_xq, m1 ; read(1); splat
@@ -120,6 +127,8 @@ hvar_fn
INIT_XMM sse2
hvar_fn
+INIT_XMM ssse3
+hvar_fn
; macro to read/write a horizontal number of pixels (%2) to/from registers
; on sse, - fills xmm0-15 for consecutive sets of 16 pixels
@@ -412,6 +421,9 @@ cglobal emu_edge_hfix %+ %%n, 4, 5, 2, dst, dst_stride, start_x, bh, val
%else
cglobal emu_edge_hfix %+ %%n, 4, 4, 2, dst, dst_stride, start_x, bh
%endif
+%if cpuflag(ssse3)
+ mova m1, [pb_3]
+%endif
.loop_y: ; do {
READ_V_PIXEL %%n, dstq+start_xq ; $variable_regs = read($n)
WRITE_V_PIXEL %%n, dstq ; write($variable_regs, $n)
@@ -436,6 +448,10 @@ H_EXTEND 16, 22
%endif
INIT_XMM sse2
H_EXTEND 16, 22
+INIT_MMX ssse3
+H_EXTEND 4, 14
+INIT_XMM ssse3
+H_EXTEND 16, 22
%macro PREFETCH_FN 1
cglobal prefetch, 3, 3, 0, buf, stride, h
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index bd61ab4..85c541f 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -147,6 +147,23 @@ static emu_edge_hfix_func *hfixtbl_sse2[11] = {
ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2
};
extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix4_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix6_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix8_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_ssse3;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_ssse3;
+static emu_edge_hfix_func *hfixtbl_ssse3[11] = {
+ ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_ssse3, ff_emu_edge_hfix6_ssse3,
+ ff_emu_edge_hfix8_ssse3, ff_emu_edge_hfix10_ssse3, ff_emu_edge_hfix12_ssse3,
+ ff_emu_edge_hfix14_ssse3, ff_emu_edge_hfix16_ssse3, ff_emu_edge_hfix18_ssse3,
+ ff_emu_edge_hfix20_ssse3, ff_emu_edge_hfix22_ssse3
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_ssse3;
static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride,
@@ -268,6 +285,18 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
}
+
+static av_noinline void emulated_edge_mc_ssse3(uint8_t *buf, const uint8_t *src,
+ ptrdiff_t buf_stride,
+ ptrdiff_t src_stride,
+ int block_w, int block_h,
+ int src_x, int src_y, int w,
+ int h)
+{
+ emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+ src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+ hfixtbl_ssse3, &ff_emu_edge_hvar_ssse3);
+}
#endif /* HAVE_YASM */
void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
@@ -301,5 +330,8 @@ av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
ctx->emulated_edge_mc = emulated_edge_mc_sse2;
}
+ if (EXTERNAL_SSSE3(cpu_flags) && bpc <= 8) {
+ ctx->emulated_edge_mc = emulated_edge_mc_ssse3;
+ }
#endif /* HAVE_YASM */
}
--
1.8.5.5
More information about the ffmpeg-devel
mailing list