[MPlayer-cvslog] r35679 - trunk/libmpcodecs/vf_ass.c
upsuper
subversion at mplayerhq.hu
Fri Dec 14 03:16:36 CET 2012
Author: upsuper
Date: Fri Dec 14 03:16:36 2012
New Revision: 35679
Log:
Reduce register usage in an asm block.
Reduce to 4 registers in the asm block of render_frame_yuv422_sse4.
After this modification, the function is only ~3.4x faster than
render_frame_yuv422.
Modified:
trunk/libmpcodecs/vf_ass.c
Modified: trunk/libmpcodecs/vf_ass.c
==============================================================================
--- trunk/libmpcodecs/vf_ass.c Fri Dec 14 03:16:30 2012 (r35678)
+++ trunk/libmpcodecs/vf_ass.c Fri Dec 14 03:16:36 2012 (r35679)
@@ -274,9 +274,12 @@ static void render_frame_yuv422_sse4(vf_
"psrlw $8, %%xmm3 \n\t"
"packuswb %%xmm7, %%xmm1 \n\t"
"packuswb %%xmm7, %%xmm3 \n\t"
- "movq (%[src_y], %[j], 1), %%xmm4 \n\t"
- "movq (%[src_u], %[j], 1), %%xmm5 \n\t"
- "movq (%[src_v], %[j], 1), %%xmm6 \n\t"
+ "mov %[src_y], %%"REG_S" \n\t"
+ "movq (%%"REG_S", %[j], 1), %%xmm4 \n\t"
+ "mov %[src_u], %%"REG_S" \n\t"
+ "movq (%%"REG_S", %[j], 1), %%xmm5 \n\t"
+ "mov %[src_v], %%"REG_S" \n\t"
+ "movq (%%"REG_S", %[j], 1), %%xmm6 \n\t"
"packuswb %%xmm7, %%xmm5 \n\t"
"packuswb %%xmm7, %%xmm6 \n\t"
"punpcklbw %%xmm6, %%xmm5 \n\t"
@@ -302,12 +305,13 @@ static void render_frame_yuv422_sse4(vf_
: : [dst] "r" (dst + i * stride),
[alpha] "r" (alpha + i * outw),
- [src_y] "r" (src_y + i * outw),
- [src_u] "r" (src_u + i * outw),
- [src_v] "r" (src_v + i * outw),
+ [src_y] "g" (src_y + i * outw),
+ [src_u] "g" (src_u + i * outw),
+ [src_v] "g" (src_v + i * outw),
[j] "r" (xmin),
[xmax] "g" (xmax),
[f] "g" (is_uyvy)
+ : REG_S
);
}
}
More information about the MPlayer-cvslog
mailing list