[FFmpeg-devel] [PATCH 1/5] sws/output: replace inaccurate yuv2rgb table of X scale

Michael Niedermayer michaelni at gmx.at
Sun Apr 14 12:17:18 CEST 2013


Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
---
 libswscale/output.c |  176 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 158 insertions(+), 18 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index d9745fb..8224620 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1018,6 +1018,127 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
 }
 
 static av_always_inline void
+yuv2rgb_write2(uint8_t *_dest, int i, int Y1, int Y2,
+              unsigned A1, unsigned A2,
+              int R, int G, int B, int y,
+              enum AVPixelFormat target, int hasAlpha, int isBGR)
+{
+    //XXX !target is always the RGB variant even for BGR
+    if (isBGR && target != AV_PIX_FMT_RGB8)
+        FFSWAP(int, R, B);
+    if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_ABGR) {
+        _dest[i * 8 + 0] = av_clip_uintp2(A1  , 30) >> 22;
+        _dest[i * 8 + 1] = av_clip_uintp2(R+Y1, 30) >> 22;
+        _dest[i * 8 + 2] = av_clip_uintp2(G+Y1, 30) >> 22;
+        _dest[i * 8 + 3] = av_clip_uintp2(B+Y1, 30) >> 22;
+        _dest[i * 8 + 4] = av_clip_uintp2(A2  , 30) >> 22;
+        _dest[i * 8 + 5] = av_clip_uintp2(R+Y2, 30) >> 22;
+        _dest[i * 8 + 6] = av_clip_uintp2(G+Y2, 30) >> 22;
+        _dest[i * 8 + 7] = av_clip_uintp2(B+Y2, 30) >> 22;
+    } else if (target == AV_PIX_FMT_RGBA || target == AV_PIX_FMT_BGRA) {
+        _dest[i * 8 + 0] = av_clip_uintp2(R+Y1, 30) >> 22;
+        _dest[i * 8 + 1] = av_clip_uintp2(G+Y1, 30) >> 22;
+        _dest[i * 8 + 2] = av_clip_uintp2(B+Y1, 30) >> 22;
+        _dest[i * 8 + 3] = av_clip_uintp2(A1  , 30) >> 22;
+        _dest[i * 8 + 4] = av_clip_uintp2(R+Y2, 30) >> 22;
+        _dest[i * 8 + 5] = av_clip_uintp2(G+Y2, 30) >> 22;
+        _dest[i * 8 + 6] = av_clip_uintp2(B+Y2, 30) >> 22;
+        _dest[i * 8 + 7] = av_clip_uintp2(A2  , 30) >> 22;
+    } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
+        _dest[i * 6 + 0] = av_clip_uintp2(R+Y1, 30) >> 22;
+        _dest[i * 6 + 1] = av_clip_uintp2(G+Y1, 30) >> 22;
+        _dest[i * 6 + 2] = av_clip_uintp2(B+Y1, 30) >> 22;
+        _dest[i * 6 + 3] = av_clip_uintp2(R+Y2, 30) >> 22;
+        _dest[i * 6 + 4] = av_clip_uintp2(G+Y2, 30) >> 22;
+        _dest[i * 6 + 5] = av_clip_uintp2(B+Y2, 30) >> 22;
+    } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
+               target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
+               target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
+        uint16_t *dest = (uint16_t *) _dest;
+        int dr1, dg1, db1, dr2, dg2, db2;
+
+        if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
+            dr1 = av_clip_uintp2((dither_2x2_8[ y & 1     ][0]<<22) + R+Y1, 30) >> 25;
+            dg1 = av_clip_uintp2((dither_2x2_4[ y & 1     ][0]<<22) + G+Y1, 30) >> 24;
+            db1 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][0]<<22) + B+Y1, 30) >> 25;
+            dr2 = av_clip_uintp2((dither_2x2_8[ y & 1     ][1]<<22) + R+Y2, 30) >> 25;
+            dg2 = av_clip_uintp2((dither_2x2_4[ y & 1     ][1]<<22) + G+Y2, 30) >> 24;
+            db2 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][1]<<22) + B+Y2, 30) >> 25;
+            dest[i * 2 + 0] = db1 + (dg1<<5) + (dr1<<11);
+            dest[i * 2 + 1] = db2 + (dg2<<5) + (dr2<<11);
+        } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
+            dr1 = av_clip_uintp2((dither_2x2_8[ y & 1     ][0]<<22) + R+Y1, 30) >> 25;
+            dg1 = av_clip_uintp2((dither_2x2_8[ y & 1     ][1]<<22) + G+Y1, 30) >> 25;
+            db1 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][0]<<22) + B+Y1, 30) >> 25;
+            dr2 = av_clip_uintp2((dither_2x2_8[ y & 1     ][1]<<22) + R+Y2, 30) >> 25;
+            dg2 = av_clip_uintp2((dither_2x2_8[ y & 1     ][0]<<22) + G+Y2, 30) >> 25;
+            db2 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][1]<<22) + B+Y2, 30) >> 25;
+            dest[i * 2 + 0] = db1 + (dg1<<5) + (dr1<<10);
+            dest[i * 2 + 1] = db2 + (dg2<<5) + (dr2<<10);
+        } else {
+            dr1 = av_clip_uintp2((dither_4x4_16[ y & 3     ][0]<<22) + R+Y1, 30) >> 26;
+            dg1 = av_clip_uintp2((dither_4x4_16[ y & 3     ][1]<<22) + G+Y1, 30) >> 26;
+            db1 = av_clip_uintp2((dither_4x4_16[(y & 3) ^ 3][0]<<22) + B+Y1, 30) >> 26;
+            dr2 = av_clip_uintp2((dither_4x4_16[ y & 3     ][1]<<22) + R+Y2, 30) >> 26;
+            dg2 = av_clip_uintp2((dither_4x4_16[ y & 3     ][0]<<22) + G+Y2, 30) >> 26;
+            db2 = av_clip_uintp2((dither_4x4_16[(y & 3) ^ 3][1]<<22) + B+Y2, 30) >> 26;
+            dest[i * 2 + 0] = db1 + (dg1<<4) + (dr1<<8);
+            dest[i * 2 + 1] = db2 + (dg2<<4) + (dr2<<8);
+        }
+    } else /* 8/4-bit */ {
+        uint8_t *dest = (uint8_t *) _dest;
+        int dr1, dg1, db1, dr2, dg2, db2;
+
+        if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
+            const uint8_t * const d64 = dither_8x8_73[y & 7];
+            const uint8_t * const d32 = dither_8x8_32[y & 7];
+            dr1 = R+Y1; dr1 -= dr1>>3;
+            dg1 = G+Y1; dg1 -= dg1>>3;
+            db1 = B+Y1; db1 -= db1>>2;
+            dr2 = R+Y2; dr2 -= dr2>>3;
+            dg2 = G+Y2; dg2 -= dg2>>3;
+            db2 = B+Y2; db2 -= db2>>2;
+            dr1 = av_clip_uintp2((d32[(i * 2 + 0) & 7]<<22) + dr1, 30) >> 27;
+            dg1 = av_clip_uintp2((d32[(i * 2 + 0) & 7]<<22) + dg1, 30) >> 27;
+            db1 = av_clip_uintp2((d64[(i * 2 + 0) & 7]*64/73<<22) + db1, 30) >> 28;
+            dr2 = av_clip_uintp2((d32[(i * 2 + 1) & 7]<<22) + dr2, 30) >> 27;
+            dg2 = av_clip_uintp2((d32[(i * 2 + 1) & 7]<<22) + dg2, 30) >> 27;
+            db2 = av_clip_uintp2((d64[(i * 2 + 1) & 7]*64/73<<22) + db2, 30) >> 28;
+            if(isBGR) {
+                dest[i * 2 + 0] = dr1 + (dg1<<3) + (db1<<6);
+                dest[i * 2 + 1] = dr2 + (dg2<<3) + (db2<<6);
+            } else {
+                dest[i * 2 + 0] = db1 + (dg1<<2) + (dr1<<5);
+                dest[i * 2 + 1] = db2 + (dg2<<2) + (dr2<<5);
+            }
+        } else {
+            const uint8_t * const d64  = dither_8x8_73 [y & 7];
+            const uint8_t * const d128 = dither_8x8_220[y & 7];
+
+            dr1 = R+Y1;
+            dg1 = G+Y1; dg1 -= dg1>>2;
+            db1 = B+Y1;
+            dr2 = R+Y2;
+            dg2 = G+Y2; dg2 -= dg2>>2;
+            db2 = B+Y2;
+            dr1 = av_clip_uintp2((d128[(i * 2 + 0) & 7]*128/220<<22) + (dr1>>1), 30) >> 29;
+            dg1 = av_clip_uintp2((d64 [(i * 2 + 0) & 7]*64/73    <<22) + dg1, 30) >> 28;
+            db1 = av_clip_uintp2((d128[(i * 2 + 0) & 7]*128/220<<22) + (db1>>1), 30) >> 29;
+            dr2 = av_clip_uintp2((d128[(i * 2 + 1) & 7]*128/220<<22) + (dr2>>1), 30) >> 29;
+            dg2 = av_clip_uintp2((d64 [(i * 2 + 1) & 7]*64/73    <<22) + dg2, 30) >> 28;
+            db2 = av_clip_uintp2((d128[(i * 2 + 1) & 7]*128/220<<22) + (db2>>1), 30) >> 29;
+            if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
+                dest[i] = db1     + (dg1<<1) + (dr1<<3)
+                       + (db2<<4) + (dg2<<5) + (dr2<<7);
+            } else {
+                dest[i * 2 + 0] = db1     + (dg1<<1) + (dr1<<3);
+                dest[i * 2 + 1] = db2     + (dg2<<1) + (dr2<<3);
+            }
+        }
+    }
+}
+
+static av_always_inline void
 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
                      const int16_t **lumSrc, int lumFilterSize,
                      const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -1026,14 +1147,17 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
                      int y, enum AVPixelFormat target, int hasAlpha)
 {
     int i;
+    int isBGR = c->dstFormat == AV_PIX_FMT_ABGR   || c->dstFormat == AV_PIX_FMT_BGRA   || c->dstFormat == AV_PIX_FMT_BGR24
+             || c->dstFormat == AV_PIX_FMT_BGR565LE || c->dstFormat == AV_PIX_FMT_BGR555LE || c->dstFormat == AV_PIX_FMT_BGR444LE
+             || c->dstFormat == AV_PIX_FMT_BGR565BE || c->dstFormat == AV_PIX_FMT_BGR555BE || c->dstFormat == AV_PIX_FMT_BGR444BE
+             || c->dstFormat == AV_PIX_FMT_BGR8     || c->dstFormat == AV_PIX_FMT_BGR4 || c->dstFormat == AV_PIX_FMT_BGR4_BYTE;
 
     for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int j, A1, A2;
+        int j, A1, A2, R, G, B;
         int Y1 = 1 << 18;
         int Y2 = 1 << 18;
-        int U  = 1 << 18;
-        int V  = 1 << 18;
-        const void *r, *g, *b;
+        int U  = (1 << 18) + (-128 << 19);
+        int V  = (1 << 18) + (-128 << 19);
 
         for (j = 0; j < lumFilterSize; j++) {
             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
@@ -1043,10 +1167,10 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
             U += chrUSrc[j][i] * chrFilter[j];
             V += chrVSrc[j][i] * chrFilter[j];
         }
-        Y1 >>= 19;
-        Y2 >>= 19;
-        U  >>= 19;
-        V  >>= 19;
+        Y1 >>= 10;
+        Y2 >>= 10;
+        U  >>= 10;
+        V  >>= 10;
         if (hasAlpha) {
             A1 = 1 << 18;
             A2 = 1 << 18;
@@ -1054,20 +1178,36 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
             }
-            A1 >>= 19;
-            A2 >>= 19;
-            if ((A1 | A2) & 0x100) {
-                A1 = av_clip_uint8(A1);
-                A2 = av_clip_uint8(A2);
+            if ((A1 | A2) & 0xF8000000) {
+                A1 = av_clip_uintp2(A1, 27);
+                A2 = av_clip_uintp2(A2, 27);
             }
+            A1 <<= 3;
+            A2 <<= 3;
         }
 
-        r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM];
-        g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
-        b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
+        Y1 -= c->yuv2rgb_y_offset;
+        Y2 -= c->yuv2rgb_y_offset;
+        Y1 *= c->yuv2rgb_y_coeff;
+        Y2 *= c->yuv2rgb_y_coeff;
+        Y1 += 1 << 13;
+        Y2 += 1 << 13;
 
-        yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
-                      r, g, b, y, target, hasAlpha);
+        R = V * c->yuv2rgb_v2r_coeff;
+        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+        B =                            U * c->yuv2rgb_u2b_coeff;
+
+        yuv2rgb_write2(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+                       R, G, B, y, target, hasAlpha, isBGR);
+    }
+    if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+        uint16_t *dest16 = (uint16_t*)dest;
+        if (6-1 > desc->comp[0].depth_minus1 && desc->comp[0].depth_minus1 > 3-1) {
+            for (i = 0; i < dstW; i++) {
+                dest16[i] = av_bswap16(dest16[i]);
+            }
+        }
     }
 }
 
-- 
1.7.9.5



More information about the ffmpeg-devel mailing list