[FFmpeg-devel] [PATCH] [BROKEN] swscale: RGBA64 output

Sun May 12 20:54:15 CEST 2013

On Sun, May 12, 2013 at 06:10:48PM +0000, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
>  libswscale/output.c                 | 290 ++++++++++++++++++++++++++++++++++--
>  libswscale/utils.c                  |   4 +-
>  libswscale/yuv2rgb.c                |  86 +++++++++++
>  tests/ref/fate/filter-pixdesc       |   2 +
>  tests/ref/fate/filter-pixfmts-copy  |   2 +
>  tests/ref/fate/filter-pixfmts-field |   2 +
>  tests/ref/fate/filter-pixfmts-hflip |   2 +
>  tests/ref/fate/filter-pixfmts-il    |   2 +
>  tests/ref/fate/filter-pixfmts-null  |   2 +
>  tests/ref/fate/filter-pixfmts-scale |   2 +
>  tests/ref/fate/filter-pixfmts-vflip |   2 +
>  11 files changed, 383 insertions(+), 13 deletions(-)
> 
> diff --git a/libswscale/output.c b/libswscale/output.c
> index a0826d0..c82b04b 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -674,12 +674,248 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
>      }
>  
>  static av_always_inline void
> +yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
> +                       const int32_t **lumSrc, int lumFilterSize,
> +                       const int16_t *chrFilter, const int32_t **chrUSrc,
> +                       const int32_t **chrVSrc, int chrFilterSize,
> +                       const int32_t **alpSrc, uint16_t *dest, int dstW,
> +                       int y, enum AVPixelFormat target, int hasAlpha)
> +{
> +    int i;
> +
> +    for (i = 0; i < ((dstW + 1) >> 1); i++) {
> +        int j, A1 = 0, A2 = 0;
> +        int Y1 = -0x40000000;
> +        int Y2 = -0x40000000;
> +        int U  = -128 << 23; // 19
> +        int V  = -128 << 23;
> +        int R, G, B;
> +
> +        for (j = 0; j < lumFilterSize; j++) {
> +            Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
> +            Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
> +        }
> +        for (j = 0; j < chrFilterSize; j++) {;
> +            U += chrUSrc[j][i] * (unsigned)chrFilter[j];
> +            V += chrVSrc[j][i] * (unsigned)chrFilter[j];
> +        }
> +
> +        if (hasAlpha) {
> +            A1 = -0x40000000;
> +            A2 = -0x40000000;
> +            for (j = 0; j < lumFilterSize; j++) {
> +                A1 += alpSrc[j][i * 2]     * (unsigned)lumFilter[j];
> +                A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
> +            }
> +            A1 >>= 14; // 10
> +            A1 += 0x10000;
> +            A2 >>= 14;
> +            A2 += 0x10000;
> +            A1 -= c->yuv2rgb_y_offset;
> +            A2 -= c->yuv2rgb_y_offset;
> +            A1 *= c->yuv2rgb_y_coeff;
> +            A2 *= c->yuv2rgb_y_coeff;
> +            A1 += 1 << 13; // 21
> +            A2 += 1 << 13;
> +        }
> +
> +        // 8bit: 12+15=27; 16-bit: 12+19=31
> +        Y1 >>= 14; // 10
> +        Y1 += 0x10000;
> +        Y2 >>= 14;
> +        Y2 += 0x10000;
> +        U  >>= 14;
> +        V  >>= 14;
> +
> +        // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
> +        Y1 -= c->yuv2rgb_y_offset;
> +        Y2 -= c->yuv2rgb_y_offset;
> +        Y1 *= c->yuv2rgb_y_coeff;
> +        Y2 *= c->yuv2rgb_y_coeff;
> +        Y1 += 1 << 13; // 21
> +        Y2 += 1 << 13;
> +        // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
> +
> +        R = V * c->yuv2rgb_v2r_coeff;
> +        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> +        B =                            U * c->yuv2rgb_u2b_coeff;
> +
> +        // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
> +        output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> +        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
> +        output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> +        output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
> +        output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> +        output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
> +        output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> +        output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
> +        dest += 8;
> +    }
> +}
> +
> +static av_always_inline void
> +yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
> +                       const int32_t *ubuf[2], const int32_t *vbuf[2],
> +                       const int32_t *abuf[2], uint16_t *dest, int dstW,
> +                       int yalpha, int uvalpha, int y,
> +                       enum AVPixelFormat target, int hasAlpha)
> +{
> +    const int32_t *buf0  = buf[0],  *buf1  = buf[1],
> +                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
> +                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
> +                  *abuf0 = hasAlpha ? abuf[0] : NULL,
> +                  *abuf1 = hasAlpha ? abuf[1] : NULL;
> +    int  yalpha1 = 4096 - yalpha;
> +    int uvalpha1 = 4096 - uvalpha;
> +    int i;
> +
> +    for (i = 0; i < ((dstW + 1) >> 1); i++) {
> +        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
> +        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
> +        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
> +        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
> +        int A1, A2;
> +        int R, G, B;
> +
> +        Y1 -= c->yuv2rgb_y_offset;
> +        Y2 -= c->yuv2rgb_y_offset;
> +        Y1 *= c->yuv2rgb_y_coeff;
> +        Y2 *= c->yuv2rgb_y_coeff;
> +        Y1 += 1 << 13;
> +        Y2 += 1 << 13;
> +
> +        R = V * c->yuv2rgb_v2r_coeff;
> +        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> +        B =                            U * c->yuv2rgb_u2b_coeff;
> +
> +        if (hasAlpha) {
> +            A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 14;
> +            A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 14;
> +
> +            A1 -= c->yuv2rgb_y_offset;
> +            A2 -= c->yuv2rgb_y_offset;
> +            A1 *= c->yuv2rgb_y_coeff;
> +            A2 *= c->yuv2rgb_y_coeff;
> +            A1 += 1 << 13;
> +            A2 += 1 << 13;
> +        }
> +
> +        output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> +        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
> +        output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> +        output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
> +        output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> +        output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
> +        output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> +        output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
> +        dest += 8;
> +    }
> +}
> +
> +static av_always_inline void
> +yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
> +                       const int32_t *ubuf[2], const int32_t *vbuf[2],
> +                       const int32_t *abuf0, uint16_t *dest, int dstW,
> +                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
> +{
> +    const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
> +    int i;
> +
> +    if (uvalpha < 2048) {
> +        for (i = 0; i < ((dstW + 1) >> 1); i++) {
> +            int Y1 = (buf0[i * 2]    ) >> 2;
> +            int Y2 = (buf0[i * 2 + 1]) >> 2;
> +            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
> +            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
> +            int R, G, B;
> +            int A1, A2;
> +
> +            Y1 -= c->yuv2rgb_y_offset;
> +            Y2 -= c->yuv2rgb_y_offset;
> +            Y1 *= c->yuv2rgb_y_coeff;
> +            Y2 *= c->yuv2rgb_y_coeff;
> +            Y1 += 1 << 13;
> +            Y2 += 1 << 13;
> +
> +            if (hasAlpha) {
> +                A1 = abuf0[i * 2    ] >> 2;
> +                A2 = abuf0[i * 2 + 1] >> 2;
> +
> +                A1 -= c->yuv2rgb_y_offset;
> +                A2 -= c->yuv2rgb_y_offset;
> +                A1 *= c->yuv2rgb_y_coeff;
> +                A2 *= c->yuv2rgb_y_coeff;
> +                A1 += 1 << 13;
> +                A2 += 1 << 13;
> +            }
> +
> +            R = V * c->yuv2rgb_v2r_coeff;
> +            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> +            B =                            U * c->yuv2rgb_u2b_coeff;
> +
> +            output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> +            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
> +            output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> +            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
> +            output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> +            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
> +            output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> +            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
> +            dest += 8;
> +        }
> +    } else {
> +        const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
> +        for (i = 0; i < ((dstW + 1) >> 1); i++) {
> +            int Y1 = (buf0[i * 2]    ) >> 2;
> +            int Y2 = (buf0[i * 2 + 1]) >> 2;
> +            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
> +            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
> +            int R, G, B;
> +            int A1, A2;
> +
> +            Y1 -= c->yuv2rgb_y_offset;
> +            Y2 -= c->yuv2rgb_y_offset;
> +            Y1 *= c->yuv2rgb_y_coeff;
> +            Y2 *= c->yuv2rgb_y_coeff;
> +            Y1 += 1 << 13;
> +            Y2 += 1 << 13;
> +
> +            if (hasAlpha) {
> +                A1 = abuf0[i * 2    ] >> 2;
> +                A2 = abuf0[i * 2 + 1] >> 2;
> +
> +                A1 -= c->yuv2rgb_y_offset;
> +                A2 -= c->yuv2rgb_y_offset;
> +                A1 *= c->yuv2rgb_y_coeff;
> +                A2 *= c->yuv2rgb_y_coeff;
> +                A1 += 1 << 13;
> +                A2 += 1 << 13;
> +            }
> +
> +            R = V * c->yuv2rgb_v2r_coeff;
> +            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> +            B =                            U * c->yuv2rgb_u2b_coeff;
> +
> +            output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> +            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
> +            output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> +            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
> +            output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> +            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
> +            output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> +            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
> +            dest += 8;
> +        }
> +    }
> +}

off topic: maybe this can be factorized with the rgb48 code or the
whole could be refactored to be less ugly ...

back on topic, the patch should be ok once it works

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I have often repented speaking, but never of holding my tongue.
-- Xenocrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130512/0da72168/attachment.asc>