[MPlayer-cvslog] r35679 - trunk/libmpcodecs/vf_ass.c

upsuper subversion at mplayerhq.hu
Fri Dec 14 03:16:36 CET 2012


Author: upsuper
Date: Fri Dec 14 03:16:36 2012
New Revision: 35679

Log:
Reduce register usage in an asm block.

Reduce to 4 registers in the asm block of render_frame_yuv422_sse4.
After this modification, the function is only ~3.4x faster than
render_frame_yuv422.

Modified:
   trunk/libmpcodecs/vf_ass.c

Modified: trunk/libmpcodecs/vf_ass.c
==============================================================================
--- trunk/libmpcodecs/vf_ass.c	Fri Dec 14 03:16:30 2012	(r35678)
+++ trunk/libmpcodecs/vf_ass.c	Fri Dec 14 03:16:36 2012	(r35679)
@@ -274,9 +274,12 @@ static void render_frame_yuv422_sse4(vf_
                 "psrlw      $8, %%xmm3 \n\t"
                 "packuswb   %%xmm7, %%xmm1 \n\t"
                 "packuswb   %%xmm7, %%xmm3 \n\t"
-                "movq       (%[src_y], %[j], 1),    %%xmm4 \n\t"
-                "movq       (%[src_u], %[j], 1),    %%xmm5 \n\t"
-                "movq       (%[src_v], %[j], 1),    %%xmm6 \n\t"
+                "mov        %[src_y],   %%"REG_S" \n\t"
+                "movq       (%%"REG_S", %[j], 1),   %%xmm4 \n\t"
+                "mov        %[src_u],   %%"REG_S" \n\t"
+                "movq       (%%"REG_S", %[j], 1),   %%xmm5 \n\t"
+                "mov        %[src_v],   %%"REG_S" \n\t"
+                "movq       (%%"REG_S", %[j], 1),   %%xmm6 \n\t"
                 "packuswb   %%xmm7, %%xmm5 \n\t"
                 "packuswb   %%xmm7, %%xmm6 \n\t"
                 "punpcklbw  %%xmm6, %%xmm5 \n\t"
@@ -302,12 +305,13 @@ static void render_frame_yuv422_sse4(vf_
 
                 : : [dst]   "r" (dst + i * stride),
                     [alpha] "r" (alpha + i * outw),
-                    [src_y] "r" (src_y + i * outw),
-                    [src_u] "r" (src_u + i * outw),
-                    [src_v] "r" (src_v + i * outw),
+                    [src_y] "g" (src_y + i * outw),
+                    [src_u] "g" (src_u + i * outw),
+                    [src_v] "g" (src_v + i * outw),
                     [j]     "r" (xmin),
                     [xmax]  "g" (xmax),
                     [f]     "g" (is_uyvy)
+                : REG_S
         );
     }
 }


More information about the MPlayer-cvslog mailing list