[FFmpeg-devel] [PATCH] swscale alpha channel support
Cédric Schieli
cschieli
Thu Mar 5 23:21:05 CET 2009
2009/3/5 Michael Niedermayer <michaelni at gmx.at>:
> On Thu, Mar 05, 2009 at 03:09:26PM +0100, C?dric Schieli wrote:
>> 2009/3/2 Michael Niedermayer <michaelni at gmx.at>:
>> > On Fri, Feb 27, 2009 at 11:30:25PM +0100, C?dric Schieli wrote:
[...]
>> #9 : swscale-example_use_alpha.patch
>> updated
>> the second hunk in this one needs some explanations :
>> without it, swscale-example will segfault
>> even without any of my patches, simply adding a uint16_t** field to
>> struct SwsContext and av_malloc'ing between 13 and 236 bytes (or 26
>> and 408 on x86_64) will make swscale-example to segfault (see
>> bug.diff)
>> after some debugging, I found that my alpPixBuf data get corrupted somehow
>> disabling MMX code with --disable-mmx prevent this from happening
>> running swscale-example under valgrind (even without bug.diff) will segfault
>> can someone can help on this ?
>
> in swscale_internal.h there are #defines that define the offsets of
> various fields in the struct so the asm can access them, you dont maybe
> mess that up by adding a field?
I carefully added my fields either before the defines or at the end of
the struct
when testing with "bug.diff", moving around the field in the struct
doesn't change anything
>
>
> and if valgrind itself segfaults, that should be reported to the valgrind
> devs ...
no, it's not valgrind that segfaults, but swscale-example run under
valgrind (logfile attached)
when building with --disable-mmx, all runs fine
> #7
> [...]
>> @@ -1191,6 +1254,52 @@
>> ? ? ? ? ?{
>> ? ? ? ? ? ? ?//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
>> ? ? ? ? ? ? ?case PIX_FMT_RGB32:
>> + ? ? ? ? ? ? ? ?if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
>> +#if !ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?*(uint16_t **)(&c->u_temp)=abuf0;
>> + ? ? ? ? ? ? ? ? ? ?*(uint16_t **)(&c->v_temp)=abuf1;
>> +#endif
>> + ? ? ? ? ? ? ? ? ? ?__asm__ volatile(
>> +#if !ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?"mov %%"REG_b", "ESP_OFFSET"(%5) ? ? ? ?\n\t"
>> +#endif
>> + ? ? ? ? ? ? ? ? ? ?"mov ? ? ? ?%4, %%"REG_b" ? ? ? ? ? ? ? \n\t"
>> +#if !ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?"push %%"REG_BP" ? ? ? ? ? ? ? ? ? ? ? ?\n\t"
>> +#endif
>> + ? ? ? ? ? ? ? ? ? ?YSCALEYUV2RGB(%%REGBP, %5)
>> +#if !ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?"push ? ? ? ? ? ? ? ? ? %0 ? ? ? ? ? ? ?\n\t"
>> + ? ? ? ? ? ? ? ? ? ?"push ? ? ? ? ? ? ? ? ? %1 ? ? ? ? ? ? ?\n\t"
>> + ? ? ? ? ? ? ? ? ? ?"mov ? ? ? ? ?"U_TEMP"(%5), %0 ? ? ? ? ?\n\t"
>> + ? ? ? ? ? ? ? ? ? ?"mov ? ? ? ? ?"V_TEMP"(%5), %1 ? ? ? ? ?\n\t"
>> +#endif
>> +#if ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
>> +#else
>> + ? ? ? ? ? ? ? ? ? ?YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
>> +#endif
>> + ? ? ? ? ? ? ? ? ? ?"psraw ? ? ? ? ? ? ? ? ?$3, %%mm1 ? ? ? \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
>> + ? ? ? ? ? ? ? ? ? ?"psraw ? ? ? ? ? ? ? ? ?$3, %%mm7 ? ? ? \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
>> + ? ? ? ? ? ? ? ? ? ?"packuswb ? ? ? ? ? ?%%mm7, %%mm1 ? ? ? \n\t"
>> +#if !ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?"pop ? ? ? ? ? ? ? ? ? ?%1 ? ? ? ? ? ? ?\n\t"
>> + ? ? ? ? ? ? ? ? ? ?"pop ? ? ? ? ? ? ? ? ? ?%0 ? ? ? ? ? ? ?\n\t"
>> +#endif
>> + ? ? ? ? ? ? ? ? ? ?WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
>> +#if !ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?"pop %%"REG_BP" ? ? ? ? ? ? ? ? ? ? ? ? \n\t"
>> + ? ? ? ? ? ? ? ? ? ?"mov "ESP_OFFSET"(%5), %%"REG_b" ? ? ? ?\n\t"
>> +#endif
>> +
>> + ? ? ? ? ? ? ? ? ? ?:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
>> + ? ? ? ? ? ? ? ? ? ?"a" (&c->redDither)
>> +#if ARCH_X86_64
>> + ? ? ? ? ? ? ? ? ? ?,"r" (abuf0), "r" (abuf1)
>> + ? ? ? ? ? ? ? ? ? ?: "%"REG_b, "%"REG_BP
>> +#endif
>> + ? ? ? ? ? ? ? ? ? ?);
>> + ? ? ? ? ? ? ? ?}else{
>> ? ? ? ? ? ? ? ? ?__asm__ volatile(
>> ? ? ? ? ? ? ? ? ?"mov %%"REG_b", "ESP_OFFSET"(%5) ? ? ? ?\n\t"
>> ? ? ? ? ? ? ? ? ?"mov ? ? ? ?%4, %%"REG_b" ? ? ? ? ? ? ? \n\t"
>
> ehm, this looks really messy ...
Yes, I wanted to not duplicate code, but it finally looks ugly
Do you prefer something like this ?
#if ARCH_X86_64
__asm__ volatile(
"mov %4, %%"REG_b" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
"psraw $3, %%mm1 \n\t" /*
abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7 \n\t" /*
abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1 \n\t"
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2,
%%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r"
(uvbuf1), "m" (dest),
"r" (&c->redDither), "r" (abuf0), "r" (abuf1)
: "%"REG_b, "%"REG_BP
);
#else
*(uint16_t **)(&c->u_temp)=abuf0;
*(uint16_t **)(&c->v_temp)=abuf1;
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
"push %0 \n\t"
"push %1 \n\t"
"mov "U_TEMP"(%5), %0 \n\t"
"mov "V_TEMP"(%5), %1 \n\t"
YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
"psraw $3, %%mm1 \n\t" /*
abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7 \n\t" /*
abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1 \n\t"
"pop %1 \n\t"
"pop %0 \n\t"
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2,
%%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D"
(uvbuf1), "m" (dest),
"a" (&c->redDither)
);
#endif
> [...]
>
>> @@ -2144,11 +2293,17 @@
>> ? ? ?}
>> ? ? ?else if (srcFormat==PIX_FMT_RGB32)
>> ? ? ?{
>> + ? ? ? ?if (isAlpha)
>> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src+3, srcW, pal);
>> + ? ? ? ?else
>> ? ? ? ? ?RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
>> ? ? ? ? ?src= formatConvBuffer;
>> ? ? ?}
>> ? ? ?else if (srcFormat==PIX_FMT_RGB32_1)
>> ? ? ?{
>> + ? ? ? ?if (isAlpha)
>> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src, srcW, pal);
>> + ? ? ? ?else
>> ? ? ? ? ?RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
>> ? ? ? ? ?src= formatConvBuffer;
>> ? ? ?}
>> @@ -2169,11 +2324,17 @@
>> ? ? ?}
>> ? ? ?else if (srcFormat==PIX_FMT_BGR32)
>> ? ? ?{
>> + ? ? ? ?if (isAlpha)
>> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src+3, srcW, pal);
>> + ? ? ? ?else
>> ? ? ? ? ?RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
>> ? ? ? ? ?src= formatConvBuffer;
>> ? ? ?}
>> ? ? ?else if (srcFormat==PIX_FMT_BGR32_1)
>> ? ? ?{
>> + ? ? ? ?if (isAlpha)
>> + ? ? ? ? ? ?RENAME(bgr32ToA)(formatConvBuffer, src, srcW, pal);
>> + ? ? ? ?else
>> ? ? ? ? ?RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
>> ? ? ? ? ?src= formatConvBuffer;
>> ? ? ?}
>
> This doesnt look like it would work on big endian systems
>
>
> [...]
>
>>
>> + ? ?if (CONFIG_SWSCALE_ALPHA && (dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
>> + ? ? ? ?memset(dst[3], 255, dstStride[3]*(dstY-lastDstY));
>> +
>
> you cant write outside 0..width per line, also stride can be negative
>
>
> [...]
>
> #8
>> Index: ffmpeg/libswscale/swscale.c
>> ===================================================================
>> --- ffmpeg.orig/libswscale/swscale.c ?2009-03-03 09:31:07.000000000 +0100
>> +++ ffmpeg/libswscale/swscale.c ? ? ? 2009-03-03 10:00:28.000000000 +0100
>> @@ -133,6 +133,7 @@
>> ? ? ?)
>> ?#define isSupportedOut(x) ? ( ? ? ? \
>> ? ? ? ? ? ? (x)==PIX_FMT_YUV420P ? ? \
>> + ? ? ? ?|| (x)==PIX_FMT_YUVA420P ? ?\
>> ? ? ? ? ?|| (x)==PIX_FMT_YUYV422 ? ? \
>> ? ? ? ? ?|| (x)==PIX_FMT_UYVY422 ? ? \
>> ? ? ? ? ?|| (x)==PIX_FMT_YUV444P ? ? \
>> @@ -2053,12 +2054,16 @@
>> ? ? ? ? ? ? ? ? ? ? ? ?int srcSliceH, uint8_t* dst[], int dstStride[])
>> ?{
>> ? ? ?int plane;
>> - ? ?for (plane=0; plane<3; plane++)
>> + ? ?for (plane=0; plane<4; plane++)
>> ? ? ?{
>> - ? ? ? ?int length= plane==0 ? c->srcW ?: -((-c->srcW ?)>>c->chrDstHSubSample);
>> - ? ? ? ?int y= ? ? ?plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
>> - ? ? ? ?int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
>> -
>> + ? ? ? ?int length= (plane==0 || plane==3) ? c->srcW ?: -((-c->srcW ?)>>c->chrDstHSubSample);
>> + ? ? ? ?int y= ? ? ?(plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
>> + ? ? ? ?int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
>> +
>
>> + ? ? ? ?if (((!isALPHA(c->srcFormat)) || (!isALPHA(c->dstFormat))) && plane == 3){
>
> !(isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) && plane == 3
>
>
>> + ? ? ? ? ? ?if (isALPHA(c->dstFormat))
>> + ? ? ? ? ? ? ? ?memset(dst[3], 255, dstStride[3]*height);
>
> as said elewhere writing must be limited to 0..width
>
> [...]
>
>
>
> --
> Michael ? ? GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Dictatorship naturally arises out of democracy, and the most aggravated
> form of tyranny and slavery out of the most extreme liberty. -- Plato
>
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.9 (GNU/Linux)
>
> iD8DBQFJsENVYR7HhwQLD6sRAhtqAJ4xOxLufiuCcAAogdXiuhB2E3V5nQCglVEJ
> WeASGa7xs2H????=
> =Acv0
> -----END PGP SIGNATURE-----
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at mplayerhq.hu
> https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-devel
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: valgrind.log
Type: text/x-log
Size: 7864 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090305/cee8c572/attachment.bin>
More information about the ffmpeg-devel
mailing list