[FFmpeg-devel] [PATCH] [BROKEN] swscale: RGBA64 output
Michael Niedermayer
michaelni at gmx.at
Sun May 12 20:54:15 CEST 2013
On Sun, May 12, 2013 at 06:10:48PM +0000, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
> libswscale/output.c | 290 ++++++++++++++++++++++++++++++++++--
> libswscale/utils.c | 4 +-
> libswscale/yuv2rgb.c | 86 +++++++++++
> tests/ref/fate/filter-pixdesc | 2 +
> tests/ref/fate/filter-pixfmts-copy | 2 +
> tests/ref/fate/filter-pixfmts-field | 2 +
> tests/ref/fate/filter-pixfmts-hflip | 2 +
> tests/ref/fate/filter-pixfmts-il | 2 +
> tests/ref/fate/filter-pixfmts-null | 2 +
> tests/ref/fate/filter-pixfmts-scale | 2 +
> tests/ref/fate/filter-pixfmts-vflip | 2 +
> 11 files changed, 383 insertions(+), 13 deletions(-)
>
> diff --git a/libswscale/output.c b/libswscale/output.c
> index a0826d0..c82b04b 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -674,12 +674,248 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
> }
>
> static av_always_inline void
> +yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
> + const int32_t **lumSrc, int lumFilterSize,
> + const int16_t *chrFilter, const int32_t **chrUSrc,
> + const int32_t **chrVSrc, int chrFilterSize,
> + const int32_t **alpSrc, uint16_t *dest, int dstW,
> + int y, enum AVPixelFormat target, int hasAlpha)
> +{
> + int i;
> +
> + for (i = 0; i < ((dstW + 1) >> 1); i++) {
> + int j, A1 = 0, A2 = 0;
> + int Y1 = -0x40000000;
> + int Y2 = -0x40000000;
> + int U = -128 << 23; // 19
> + int V = -128 << 23;
> + int R, G, B;
> +
> + for (j = 0; j < lumFilterSize; j++) {
> + Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j];
> + Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
> + }
> + for (j = 0; j < chrFilterSize; j++) {;
> + U += chrUSrc[j][i] * (unsigned)chrFilter[j];
> + V += chrVSrc[j][i] * (unsigned)chrFilter[j];
> + }
> +
> + if (hasAlpha) {
> + A1 = -0x40000000;
> + A2 = -0x40000000;
> + for (j = 0; j < lumFilterSize; j++) {
> + A1 += alpSrc[j][i * 2] * (unsigned)lumFilter[j];
> + A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
> + }
> + A1 >>= 14; // 10
> + A1 += 0x10000;
> + A2 >>= 14;
> + A2 += 0x10000;
> + A1 -= c->yuv2rgb_y_offset;
> + A2 -= c->yuv2rgb_y_offset;
> + A1 *= c->yuv2rgb_y_coeff;
> + A2 *= c->yuv2rgb_y_coeff;
> + A1 += 1 << 13; // 21
> + A2 += 1 << 13;
> + }
> +
> + // 8bit: 12+15=27; 16-bit: 12+19=31
> + Y1 >>= 14; // 10
> + Y1 += 0x10000;
> + Y2 >>= 14;
> + Y2 += 0x10000;
> + U >>= 14;
> + V >>= 14;
> +
> + // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
> + Y1 -= c->yuv2rgb_y_offset;
> + Y2 -= c->yuv2rgb_y_offset;
> + Y1 *= c->yuv2rgb_y_coeff;
> + Y2 *= c->yuv2rgb_y_coeff;
> + Y1 += 1 << 13; // 21
> + Y2 += 1 << 13;
> + // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
> +
> + R = V * c->yuv2rgb_v2r_coeff;
> + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> + B = U * c->yuv2rgb_u2b_coeff;
> +
> + // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
> + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
> + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
> + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
> + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
> + dest += 8;
> + }
> +}
> +
> +static av_always_inline void
> +yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
> + const int32_t *ubuf[2], const int32_t *vbuf[2],
> + const int32_t *abuf[2], uint16_t *dest, int dstW,
> + int yalpha, int uvalpha, int y,
> + enum AVPixelFormat target, int hasAlpha)
> +{
> + const int32_t *buf0 = buf[0], *buf1 = buf[1],
> + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
> + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
> + *abuf0 = hasAlpha ? abuf[0] : NULL,
> + *abuf1 = hasAlpha ? abuf[1] : NULL;
> + int yalpha1 = 4096 - yalpha;
> + int uvalpha1 = 4096 - uvalpha;
> + int i;
> +
> + for (i = 0; i < ((dstW + 1) >> 1); i++) {
> + int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
> + int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
> + int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
> + int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
> + int A1, A2;
> + int R, G, B;
> +
> + Y1 -= c->yuv2rgb_y_offset;
> + Y2 -= c->yuv2rgb_y_offset;
> + Y1 *= c->yuv2rgb_y_coeff;
> + Y2 *= c->yuv2rgb_y_coeff;
> + Y1 += 1 << 13;
> + Y2 += 1 << 13;
> +
> + R = V * c->yuv2rgb_v2r_coeff;
> + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> + B = U * c->yuv2rgb_u2b_coeff;
> +
> + if (hasAlpha) {
> + A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 14;
> + A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 14;
> +
> + A1 -= c->yuv2rgb_y_offset;
> + A2 -= c->yuv2rgb_y_offset;
> + A1 *= c->yuv2rgb_y_coeff;
> + A2 *= c->yuv2rgb_y_coeff;
> + A1 += 1 << 13;
> + A2 += 1 << 13;
> + }
> +
> + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
> + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
> + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
> + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
> + dest += 8;
> + }
> +}
> +
> +static av_always_inline void
> +yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
> + const int32_t *ubuf[2], const int32_t *vbuf[2],
> + const int32_t *abuf0, uint16_t *dest, int dstW,
> + int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
> +{
> + const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
> + int i;
> +
> + if (uvalpha < 2048) {
> + for (i = 0; i < ((dstW + 1) >> 1); i++) {
> + int Y1 = (buf0[i * 2] ) >> 2;
> + int Y2 = (buf0[i * 2 + 1]) >> 2;
> + int U = (ubuf0[i] + (-128 << 11)) >> 2;
> + int V = (vbuf0[i] + (-128 << 11)) >> 2;
> + int R, G, B;
> + int A1, A2;
> +
> + Y1 -= c->yuv2rgb_y_offset;
> + Y2 -= c->yuv2rgb_y_offset;
> + Y1 *= c->yuv2rgb_y_coeff;
> + Y2 *= c->yuv2rgb_y_coeff;
> + Y1 += 1 << 13;
> + Y2 += 1 << 13;
> +
> + if (hasAlpha) {
> + A1 = abuf0[i * 2 ] >> 2;
> + A2 = abuf0[i * 2 + 1] >> 2;
> +
> + A1 -= c->yuv2rgb_y_offset;
> + A2 -= c->yuv2rgb_y_offset;
> + A1 *= c->yuv2rgb_y_coeff;
> + A2 *= c->yuv2rgb_y_coeff;
> + A1 += 1 << 13;
> + A2 += 1 << 13;
> + }
> +
> + R = V * c->yuv2rgb_v2r_coeff;
> + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> + B = U * c->yuv2rgb_u2b_coeff;
> +
> + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
> + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
> + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
> + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
> + dest += 8;
> + }
> + } else {
> + const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
> + for (i = 0; i < ((dstW + 1) >> 1); i++) {
> + int Y1 = (buf0[i * 2] ) >> 2;
> + int Y2 = (buf0[i * 2 + 1]) >> 2;
> + int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
> + int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
> + int R, G, B;
> + int A1, A2;
> +
> + Y1 -= c->yuv2rgb_y_offset;
> + Y2 -= c->yuv2rgb_y_offset;
> + Y1 *= c->yuv2rgb_y_coeff;
> + Y2 *= c->yuv2rgb_y_coeff;
> + Y1 += 1 << 13;
> + Y2 += 1 << 13;
> +
> + if (hasAlpha) {
> + A1 = abuf0[i * 2 ] >> 2;
> + A2 = abuf0[i * 2 + 1] >> 2;
> +
> + A1 -= c->yuv2rgb_y_offset;
> + A2 -= c->yuv2rgb_y_offset;
> + A1 *= c->yuv2rgb_y_coeff;
> + A2 *= c->yuv2rgb_y_coeff;
> + A1 += 1 << 13;
> + A2 += 1 << 13;
> + }
> +
> + R = V * c->yuv2rgb_v2r_coeff;
> + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
> + B = U * c->yuv2rgb_u2b_coeff;
> +
> + output_pixel(&dest[0], av_clip_uintp2(B_R + Y1, 30) >> 14);
> + output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
> + output_pixel(&dest[2], av_clip_uintp2(R_B + Y1, 30) >> 14);
> + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
> + output_pixel(&dest[4], av_clip_uintp2(B_R + Y2, 30) >> 14);
> + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
> + output_pixel(&dest[6], av_clip_uintp2(R_B + Y2, 30) >> 14);
> + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
> + dest += 8;
> + }
> + }
> +}
off topic: maybe this can be factorized with the rgb48 code or the
whole could be refactored to be less ugly ...
back on topic, the patch should be ok once it works
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
I have often repented speaking, but never of holding my tongue.
-- Xenocrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130512/0da72168/attachment.asc>
More information about the ffmpeg-devel
mailing list