[FFmpeg-devel] [RFC] Alpha support

Mon Jan 19 00:44:37 CET 2009

On Sat, Jan 17, 2009 at 05:41:01PM +0100, C?dric Schieli wrote:
> Hello,
> 
> 2009/1/17 Reimar D?ffinger <Reimar.Doeffinger at stud.uni-karlsruhe.de>
[...]
> >
> > > +static int init(AVFilterContext *ctx, const char *args, void *opaque)
> >
> > av_cold?
> >
> 
> None of the current filters does it. IMHO yes, but I'll wait for a
> consensus.

it should be eventually added to all filters of course.

[...]

> Index: ffmpeg/libswscale/swscale.c
> ===================================================================
> --- ffmpeg.orig/libswscale/swscale.c	2009-01-17 17:23:07.786299639 +0100
> +++ ffmpeg/libswscale/swscale.c	2009-01-17 17:23:14.850327889 +0100
> @@ -586,6 +586,49 @@
>              else if (V<0) V=0;    \
>          }
>  
> +#define YSCALE_YUVA_2_PACKEDX_NOCLIP_C(type) \
> +    for (i=0; i<(dstW>>1); i++){\
> +        int j;\
> +        int Y1 = 1<<18;\
> +        int Y2 = 1<<18;\
> +        int U  = 1<<18;\
> +        int V  = 1<<18;\
> +        int A1 = 1<<18;\
> +        int A2 = 1<<18;\
> +        type av_unused *r, *b, *g;\
> +        const int i2= 2*i;\
> +        \
> +        for (j=0; j<lumFilterSize; j++)\
> +        {\
> +            Y1 += lumSrc[j][i2  ] * lumFilter[j];\
> +            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
> +            A1 += alpSrc[j][i2  ] * lumFilter[j];\
> +            A2 += alpSrc[j][i2+1] * lumFilter[j];\
> +        }\
> +        for (j=0; j<chrFilterSize; j++)\
> +        {\
> +            U += chrSrc[j][i     ] * chrFilter[j];\
> +            V += chrSrc[j][i+VOFW] * chrFilter[j];\
> +        }\
> +        Y1>>=19;\
> +        Y2>>=19;\
> +        U >>=19;\
> +        V >>=19;\
> +        A1>>=19;\
> +        A2>>=19;\
> +
> +#define YSCALE_YUVA_2_PACKEDX_C(type) \
> +        YSCALE_YUVA_2_PACKEDX_NOCLIP_C(type)\
> +        if ((Y1|Y2|U|V|A1|A2)&256)\
> +        {\
> +            Y1=av_clip_uint8(Y1); \
> +            Y2=av_clip_uint8(Y1); \
> +            U =av_clip_uint8(U);  \
> +            V =av_clip_uint8(V);  \
> +            A1=av_clip_uint8(A1); \
> +            A2=av_clip_uint8(A2); \
> +        }
> +
>  #define YSCALE_YUV_2_PACKEDX_FULL_C \
>      for (i=0; i<dstW; i++){\
>          int j;\
> @@ -622,6 +665,45 @@
>              else if (B<0)B=0;   \
>          }\
>  
> +#define YSCALE_YUVA_2_PACKEDX_FULL_C \
> +    for (i=0; i<dstW; i++){\
> +        int j;\
> +        int Y = 0;\
> +        int U = -128<<19;\
> +        int V = -128<<19;\
> +        int A = 0;\
> +        int R,G,B;\
> +        \
> +        for (j=0; j<lumFilterSize; j++){\
> +            Y += lumSrc[j][i     ] * lumFilter[j];\
> +        }\
> +        for (j=0; j<chrFilterSize; j++){\
> +            U += chrSrc[j][i     ] * chrFilter[j];\
> +            V += chrSrc[j][i+VOFW] * chrFilter[j];\
> +        }\
> +        for (j=0; j<lumFilterSize; j++){\
> +            A += alpSrc[j][i     ] * lumFilter[j];\
> +        }\
> +        Y >>=10;\
> +        U >>=10;\
> +        V >>=10;\
> +        A >>=10;\
> +
> +#define YSCALE_YUVA_2_RGBX_FULL_C(rnd) \
> +    YSCALE_YUVA_2_PACKEDX_FULL_C\
> +        Y-= c->yuv2rgb_y_offset;\
> +        Y*= c->yuv2rgb_y_coeff;\
> +        Y+= rnd;\
> +        R= Y + V*c->yuv2rgb_v2r_coeff;\
> +        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
> +        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
> +        if ((R|G|B|A)&(0xC0000000)){\
> +            R=av_clip(R, 0, (256<<22)-1); \
> +            G=av_clip(G, 0, (256<<22)-1); \
> +            B=av_clip(B, 0, (256<<22)-1); \
> +            A=av_clip(A, 0, (256<<22)-1); \
> +        }\
> +
>  
>  #define YSCALE_YUV_2_GRAY16_C \
>      for (i=0; i<(dstW>>1); i++){\

This code is largly duplicate of existing code
Copy&paste + edit extensions are in general never acceptable

[...]
>  static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
>                                      int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
> -                                    uint8_t *dest, int dstW, int y)
> +                                    int16_t **alpSrc, uint8_t *dest, int dstW, int y)
>  {
>      int i;
>      int step= fmt_depth(c->dstFormat)/8;
>      int aidx= 3;
>  
> +    if(needALPHA(c)){
> +        switch(c->dstFormat){
> +        case PIX_FMT_RGB32:
> +            YSCALE_YUVA_2_RGBX_FULL_C(1<<21)
> +                dest[0]= B>>22;
> +                dest[1]= G>>22;
> +                dest[2]= R>>22;
> +                dest[3]= A>>22;
> +                dest+= step;
> +            }
> +            break;
> +        case PIX_FMT_BGR32:
> +            YSCALE_YUVA_2_RGBX_FULL_C(1<<21)
> +                dest[0]= R>>22;
> +                dest[1]= G>>22;
> +                dest[2]= B>>22;
> +                dest[3]= A>>22;
> +                dest+= step;
> +            }
> +            break;
> +        case PIX_FMT_RGB32_1:
> +            YSCALE_YUVA_2_RGBX_FULL_C(1<<21)
> +                dest[0]= A>>22;
> +                dest[1]= R>>22;
> +                dest[2]= G>>22;
> +                dest[3]= B>>22;
> +                dest+= step;
> +            }
> +            break;
> +        case PIX_FMT_BGR32_1:
> +            YSCALE_YUVA_2_RGBX_FULL_C(1<<21)
> +                dest[0]= A>>22;
> +                dest[1]= B>>22;
> +                dest[2]= G>>22;
> +                dest[3]= R>>22;
> +                dest+= step;
> +            }
> +            break;
> +        default:
> +            assert(0);
> +        }
> +        return;
> +    }
> +
>      switch(c->dstFormat){
>      case PIX_FMT_ARGB:
>          dest++;

This looks just wrong (in the it wont work on big endian sense), 
besides being terribly bloated, yeah macros look small but there is alot
of code behind each ...

and i woulr prefer if/else over
+if(){
+   ...
+   return;
+}

[...]

> +#define needALPHA(x) (x->alpPixBuf)

ugly wraper

[...]
> @@ -1165,6 +1261,42 @@
>          {
>              //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
>              case PIX_FMT_RGB32:
> +                if(needALPHA(c))
> +                {
> +                    __asm__ volatile(
> +                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
> +                    "mov        %4, %%"REG_b"               \n\t"
> +                    "push %%"REG_BP"                        \n\t"
> +                    YSCALEYUV2RGB(%%REGBP, %5)
> +                    "push                   %0              \n\t"
> +                    "push                   %1              \n\t"
> +                    "mov       3*"PTR_SIZE"+%6, %0          \n\t" /* compensate 3 pushs */
> +                    "mov       3*"PTR_SIZE"+%7, %1          \n\t" /* compensate 3 pushs */
> +                    "movq  (%0, %%"REG_BP", 2), %%mm6       \n\t" /* abuf0[eax] */
> +                    "movq  (%1, %%"REG_BP", 2), %%mm7       \n\t" /* abuf1[eax] */
> +                    "movq 8(%0, %%"REG_BP", 2), %%mm0       \n\t" /* abuf0[eax] */
> +                    "movq 8(%1, %%"REG_BP", 2), %%mm1       \n\t" /* abuf1[eax] */
> +                    "psubw               %%mm1, %%mm0       \n\t" /* abuf0[eax] - abuf1[eax]*/
> +                    "psubw               %%mm7, %%mm6       \n\t" /* abuf0[eax] - abuf1[eax]*/
> +                    "pmulhw "LUM_MMX_FILTER_OFFSET"+8(%5), %%mm0 \n\t" /* (abuf0[eax] - abuf1[eax])yalpha1>>16*/
> +                    "pmulhw "LUM_MMX_FILTER_OFFSET"+8(%5), %%mm6 \n\t" /* (abuf0[eax] - abuf1[eax])yalpha1>>16*/
> +                    "psraw                  $4, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>4*/
> +                    "psraw                  $4, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>4*/
> +                    "paddw               %%mm0, %%mm1       \n\t" /* abuf0[eax]yalpha1 + abuf1[eax](1-yalpha1) >>16*/
> +                    "paddw               %%mm6, %%mm7       \n\t" /* abuf0[eax]yalpha1 + abuf1[eax](1-yalpha1) >>16*/
> +                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
> +                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
> +                    "packuswb            %%mm1, %%mm7       \n\t"
> +                    "pop                    %1              \n\t"
> +                    "pop                    %0              \n\t"
> +                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
> +                    "pop %%"REG_BP"                         \n\t"
> +                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
> +
> +                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
> +                    "a" (&c->redDither), "m" (abuf0), "m" (abuf1)
> +                    );
> +                }else{
>                  __asm__ volatile(
>                  "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
>                  "mov        %4, %%"REG_b"               \n\t"

this code is not valid
you cant be changing ebp and use "m", gcc can use ebp for addressing in "m"
besides the code should be factorized with the existing code not copy & paste
+ edited, unless this really is not possible but i doubt it, at least a macro
could be used

[...]
> +static inline void RENAME(bgr32_1ToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
> +{
> +    int i;
> +    for (i=0; i<width; i++)
> +    {
> +        ((uint32_t *)dst)[i]= ((uint32_t *)src)[i]&0xFFFFFF00;
> +    }
> +}

whatever its doing its not what the function name suggests

except that your patch is a little messy and needs cleanup though i must say,
nice to see someone work on alpha support in swscale!

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Why not whip the teacher when the pupil misbehaves? -- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090119/9cc0da43/attachment.pgp>