[Mplayer-cvslog] CVS: main/libvo yuv2rgb_mmx.c,1.4,1.5
Jürgen Keil
jkeil at mplayer.dev.hu
Thu Jul 12 17:23:28 CEST 2001
Update of /cvsroot/mplayer/main/libvo
In directory mplayer:/var/tmp.root/cvs-serv20197
Modified Files:
yuv2rgb_mmx.c
Log Message:
yuv2rgb_mmx crashes with ffdivx codec, when we play back avi files that have
a frame width that is not an exact multiple of 8.
Testcase: 405.avi (356x240). Playing on an MMX capable x86 system using the
x11 video-out driver results in a segfault.
The MMX routines convert image data in quantities of 8 pixels in each loop,
and the inner loop was not terminated in case there are only 1-7 pixels left,
producing too much RGB output.
For now, just ignore the last few pixels on each row, to avoid the segfaults.
(Gives a black vertical border on the right, if you play a video with
width%8 != 0) A possible future enhancement would be, to add a second loop
to convert the last width%8 pixels to RGB using a byte loop.
Index: yuv2rgb_mmx.c
===================================================================
RCS file: /cvsroot/mplayer/main/libvo/yuv2rgb_mmx.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- yuv2rgb_mmx.c 11 Jun 2001 17:43:15 -0000 1.4
+++ yuv2rgb_mmx.c 12 Jul 2001 15:23:26 -0000 1.5
@@ -76,24 +76,29 @@
int rgb_stride, int y_stride, int uv_stride)
{
int even = 1;
- int x = 0, y = 0;
+ int x, y;
- /* load data for first scan line */
- __asm__ __volatile__ (
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
- "pxor %%mm4, %%mm4;" /* zero mm4 */
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ for (y = v_size; --y >= 0; ) {
+ uint8_t *_image = image;
+ uint8_t *_py = py;
+ uint8_t *_pu = pu;
+ uint8_t *_pv = pv;
+
+ /* load data for start of next scan line */
+ __asm__ __volatile__ (
+ "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+ "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- //"movl $0, (%3);" /* cache preload for image */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ : : "r" (_py), "r" (_pu), "r" (_pv));
- do {
- do {
+ for (x = h_size >> 3; --x >= 0; ) {
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
pixels in each iteration */
- __asm__ __volatile__ (".align 8;"
+
+ __asm__ __volatile__ (
/* Do the multiply part of the conversion for even and odd pixels,
register usage:
mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
@@ -199,40 +204,24 @@
"movd 4 (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
MOVNTQ " %%mm5, 8 (%3);" /* store pixel 4-7 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ : : "r" (_py), "r" (_pu), "r" (_pv), "r" (_image));
- py += 8;
- pu += 4;
- pv += 4;
- image += 16;
- x += 8;
- } while (x < h_size);
-
- if (even) {
- pu -= h_size/2;
- pv -= h_size/2;
- } else {
- pu += (uv_stride - h_size/2);
- pv += (uv_stride - h_size/2);
+ _py += 8;
+ _pu += 4;
+ _pv += 4;
+ _image += 16;
}
- py += (y_stride - h_size);
- image += (rgb_stride - 2*h_size);
-
- /* load data for start of next scan line */
- __asm__ __volatile__ (
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 00 u3 u2 u1 u0 */
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 00 v2 v1 v0 */
-
- //"movl $0, (%3);" /* cache preload for image */
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ if (!even) {
+ pu += uv_stride;
+ pv += uv_stride;
+ }
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ py += y_stride;
+ image += rgb_stride;
- x = 0;
- y += 1;
even = (!even);
- } while (y < v_size) ;
+ }
__asm__ __volatile__ (EMMS);
}
@@ -243,25 +232,29 @@
int rgb_stride, int y_stride, int uv_stride)
{
int even = 1;
- int x = 0, y = 0;
+ int x, y;
- __asm__ __volatile__ (
- ".align 8;"
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
- //"movl $0, (%3);" /* cache preload for image */
+ __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
- "pxor %%mm4, %%mm4;" /* zero mm4 */
+ for (y = v_size; --y >= 0; ) {
+ uint8_t *_image = image;
+ uint8_t *_py = py;
+ uint8_t *_pu = pu;
+ uint8_t *_pv = pv;
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ /* load data for start of next scan line */
+ __asm__ __volatile__
+ (
+ "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+ "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ : : "r" (_py), "r" (_pu), "r" (_pv)
+ );
- do {
- do {
+ for (x = h_size >> 3; --x >= 0; ) {
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
pixels in each iteration */
__asm__ __volatile__ (
- ".align 8;"
/* Do the multiply part of the conversion for even and odd pixels,
register usage:
mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
@@ -379,43 +372,24 @@
"pxor %%mm4, %%mm4;" /* zero mm4 */
"movq 8 (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ : : "r" (_py), "r" (_pu), "r" (_pv), "r" (_image));
- py += 8;
- pu += 4;
- pv += 4;
- image += 32;
- x += 8;
- } while (x < h_size);
-
- if (even) {
- pu -= h_size/2;
- pv -= h_size/2;
- } else {
- pu += (uv_stride - h_size/2);
- pv += (uv_stride - h_size/2);
+ _py += 8;
+ _pu += 4;
+ _pv += 4;
+ _image += 32;
}
- py += (y_stride - h_size);
- image += (rgb_stride - 4*h_size);
-
- /* load data for start of next scan line */
- __asm__ __volatile__
- (
- ".align 8;"
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
-
- //"movl $0, (%3);" /* cache preload for image */
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image)
- );
+ if (!even) {
+ pu += uv_stride;
+ pv += uv_stride;
+ }
+ py += y_stride;
+ image += rgb_stride;
- x = 0;
- y += 1;
even = (!even);
- } while ( y < v_size) ;
+ }
__asm__ __volatile__ (EMMS);
}
More information about the MPlayer-cvslog
mailing list