[FFmpeg-devel] [PATCH 1/5] sws/output: replace inaccurate yuv2rgb table of X scale
Michael Niedermayer
michaelni at gmx.at
Sun Apr 14 12:17:18 CEST 2013
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
---
libswscale/output.c | 176 +++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 158 insertions(+), 18 deletions(-)
diff --git a/libswscale/output.c b/libswscale/output.c
index d9745fb..8224620 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1018,6 +1018,127 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
}
static av_always_inline void
+yuv2rgb_write2(uint8_t *_dest, int i, int Y1, int Y2,
+ unsigned A1, unsigned A2,
+ int R, int G, int B, int y,
+ enum AVPixelFormat target, int hasAlpha, int isBGR)
+{
+ //XXX !target is always the RGB variant even for BGR
+ if (isBGR && target != AV_PIX_FMT_RGB8)
+ FFSWAP(int, R, B);
+ if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_ABGR) {
+ _dest[i * 8 + 0] = av_clip_uintp2(A1 , 30) >> 22;
+ _dest[i * 8 + 1] = av_clip_uintp2(R+Y1, 30) >> 22;
+ _dest[i * 8 + 2] = av_clip_uintp2(G+Y1, 30) >> 22;
+ _dest[i * 8 + 3] = av_clip_uintp2(B+Y1, 30) >> 22;
+ _dest[i * 8 + 4] = av_clip_uintp2(A2 , 30) >> 22;
+ _dest[i * 8 + 5] = av_clip_uintp2(R+Y2, 30) >> 22;
+ _dest[i * 8 + 6] = av_clip_uintp2(G+Y2, 30) >> 22;
+ _dest[i * 8 + 7] = av_clip_uintp2(B+Y2, 30) >> 22;
+ } else if (target == AV_PIX_FMT_RGBA || target == AV_PIX_FMT_BGRA) {
+ _dest[i * 8 + 0] = av_clip_uintp2(R+Y1, 30) >> 22;
+ _dest[i * 8 + 1] = av_clip_uintp2(G+Y1, 30) >> 22;
+ _dest[i * 8 + 2] = av_clip_uintp2(B+Y1, 30) >> 22;
+ _dest[i * 8 + 3] = av_clip_uintp2(A1 , 30) >> 22;
+ _dest[i * 8 + 4] = av_clip_uintp2(R+Y2, 30) >> 22;
+ _dest[i * 8 + 5] = av_clip_uintp2(G+Y2, 30) >> 22;
+ _dest[i * 8 + 6] = av_clip_uintp2(B+Y2, 30) >> 22;
+ _dest[i * 8 + 7] = av_clip_uintp2(A2 , 30) >> 22;
+ } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
+ _dest[i * 6 + 0] = av_clip_uintp2(R+Y1, 30) >> 22;
+ _dest[i * 6 + 1] = av_clip_uintp2(G+Y1, 30) >> 22;
+ _dest[i * 6 + 2] = av_clip_uintp2(B+Y1, 30) >> 22;
+ _dest[i * 6 + 3] = av_clip_uintp2(R+Y2, 30) >> 22;
+ _dest[i * 6 + 4] = av_clip_uintp2(G+Y2, 30) >> 22;
+ _dest[i * 6 + 5] = av_clip_uintp2(B+Y2, 30) >> 22;
+ } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
+ target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
+ target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
+ uint16_t *dest = (uint16_t *) _dest;
+ int dr1, dg1, db1, dr2, dg2, db2;
+
+ if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
+ dr1 = av_clip_uintp2((dither_2x2_8[ y & 1 ][0]<<22) + R+Y1, 30) >> 25;
+ dg1 = av_clip_uintp2((dither_2x2_4[ y & 1 ][0]<<22) + G+Y1, 30) >> 24;
+ db1 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][0]<<22) + B+Y1, 30) >> 25;
+ dr2 = av_clip_uintp2((dither_2x2_8[ y & 1 ][1]<<22) + R+Y2, 30) >> 25;
+ dg2 = av_clip_uintp2((dither_2x2_4[ y & 1 ][1]<<22) + G+Y2, 30) >> 24;
+ db2 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][1]<<22) + B+Y2, 30) >> 25;
+ dest[i * 2 + 0] = db1 + (dg1<<5) + (dr1<<11);
+ dest[i * 2 + 1] = db2 + (dg2<<5) + (dr2<<11);
+ } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
+ dr1 = av_clip_uintp2((dither_2x2_8[ y & 1 ][0]<<22) + R+Y1, 30) >> 25;
+ dg1 = av_clip_uintp2((dither_2x2_8[ y & 1 ][1]<<22) + G+Y1, 30) >> 25;
+ db1 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][0]<<22) + B+Y1, 30) >> 25;
+ dr2 = av_clip_uintp2((dither_2x2_8[ y & 1 ][1]<<22) + R+Y2, 30) >> 25;
+ dg2 = av_clip_uintp2((dither_2x2_8[ y & 1 ][0]<<22) + G+Y2, 30) >> 25;
+ db2 = av_clip_uintp2((dither_2x2_8[(y & 1) ^ 1][1]<<22) + B+Y2, 30) >> 25;
+ dest[i * 2 + 0] = db1 + (dg1<<5) + (dr1<<10);
+ dest[i * 2 + 1] = db2 + (dg2<<5) + (dr2<<10);
+ } else {
+ dr1 = av_clip_uintp2((dither_4x4_16[ y & 3 ][0]<<22) + R+Y1, 30) >> 26;
+ dg1 = av_clip_uintp2((dither_4x4_16[ y & 3 ][1]<<22) + G+Y1, 30) >> 26;
+ db1 = av_clip_uintp2((dither_4x4_16[(y & 3) ^ 3][0]<<22) + B+Y1, 30) >> 26;
+ dr2 = av_clip_uintp2((dither_4x4_16[ y & 3 ][1]<<22) + R+Y2, 30) >> 26;
+ dg2 = av_clip_uintp2((dither_4x4_16[ y & 3 ][0]<<22) + G+Y2, 30) >> 26;
+ db2 = av_clip_uintp2((dither_4x4_16[(y & 3) ^ 3][1]<<22) + B+Y2, 30) >> 26;
+ dest[i * 2 + 0] = db1 + (dg1<<4) + (dr1<<8);
+ dest[i * 2 + 1] = db2 + (dg2<<4) + (dr2<<8);
+ }
+ } else /* 8/4-bit */ {
+ uint8_t *dest = (uint8_t *) _dest;
+ int dr1, dg1, db1, dr2, dg2, db2;
+
+ if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
+ const uint8_t * const d64 = dither_8x8_73[y & 7];
+ const uint8_t * const d32 = dither_8x8_32[y & 7];
+ dr1 = R+Y1; dr1 -= dr1>>3;
+ dg1 = G+Y1; dg1 -= dg1>>3;
+ db1 = B+Y1; db1 -= db1>>2;
+ dr2 = R+Y2; dr2 -= dr2>>3;
+ dg2 = G+Y2; dg2 -= dg2>>3;
+ db2 = B+Y2; db2 -= db2>>2;
+ dr1 = av_clip_uintp2((d32[(i * 2 + 0) & 7]<<22) + dr1, 30) >> 27;
+ dg1 = av_clip_uintp2((d32[(i * 2 + 0) & 7]<<22) + dg1, 30) >> 27;
+ db1 = av_clip_uintp2((d64[(i * 2 + 0) & 7]*64/73<<22) + db1, 30) >> 28;
+ dr2 = av_clip_uintp2((d32[(i * 2 + 1) & 7]<<22) + dr2, 30) >> 27;
+ dg2 = av_clip_uintp2((d32[(i * 2 + 1) & 7]<<22) + dg2, 30) >> 27;
+ db2 = av_clip_uintp2((d64[(i * 2 + 1) & 7]*64/73<<22) + db2, 30) >> 28;
+ if(isBGR) {
+ dest[i * 2 + 0] = dr1 + (dg1<<3) + (db1<<6);
+ dest[i * 2 + 1] = dr2 + (dg2<<3) + (db2<<6);
+ } else {
+ dest[i * 2 + 0] = db1 + (dg1<<2) + (dr1<<5);
+ dest[i * 2 + 1] = db2 + (dg2<<2) + (dr2<<5);
+ }
+ } else {
+ const uint8_t * const d64 = dither_8x8_73 [y & 7];
+ const uint8_t * const d128 = dither_8x8_220[y & 7];
+
+ dr1 = R+Y1;
+ dg1 = G+Y1; dg1 -= dg1>>2;
+ db1 = B+Y1;
+ dr2 = R+Y2;
+ dg2 = G+Y2; dg2 -= dg2>>2;
+ db2 = B+Y2;
+ dr1 = av_clip_uintp2((d128[(i * 2 + 0) & 7]*128/220<<22) + (dr1>>1), 30) >> 29;
+ dg1 = av_clip_uintp2((d64 [(i * 2 + 0) & 7]*64/73 <<22) + dg1, 30) >> 28;
+ db1 = av_clip_uintp2((d128[(i * 2 + 0) & 7]*128/220<<22) + (db1>>1), 30) >> 29;
+ dr2 = av_clip_uintp2((d128[(i * 2 + 1) & 7]*128/220<<22) + (dr2>>1), 30) >> 29;
+ dg2 = av_clip_uintp2((d64 [(i * 2 + 1) & 7]*64/73 <<22) + dg2, 30) >> 28;
+ db2 = av_clip_uintp2((d128[(i * 2 + 1) & 7]*128/220<<22) + (db2>>1), 30) >> 29;
+ if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
+ dest[i] = db1 + (dg1<<1) + (dr1<<3)
+ + (db2<<4) + (dg2<<5) + (dr2<<7);
+ } else {
+ dest[i * 2 + 0] = db1 + (dg1<<1) + (dr1<<3);
+ dest[i * 2 + 1] = db2 + (dg2<<1) + (dr2<<3);
+ }
+ }
+ }
+}
+
+static av_always_inline void
yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -1026,14 +1147,17 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
int y, enum AVPixelFormat target, int hasAlpha)
{
int i;
+ int isBGR = c->dstFormat == AV_PIX_FMT_ABGR || c->dstFormat == AV_PIX_FMT_BGRA || c->dstFormat == AV_PIX_FMT_BGR24
+ || c->dstFormat == AV_PIX_FMT_BGR565LE || c->dstFormat == AV_PIX_FMT_BGR555LE || c->dstFormat == AV_PIX_FMT_BGR444LE
+ || c->dstFormat == AV_PIX_FMT_BGR565BE || c->dstFormat == AV_PIX_FMT_BGR555BE || c->dstFormat == AV_PIX_FMT_BGR444BE
+ || c->dstFormat == AV_PIX_FMT_BGR8 || c->dstFormat == AV_PIX_FMT_BGR4 || c->dstFormat == AV_PIX_FMT_BGR4_BYTE;
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int j, A1, A2;
+ int j, A1, A2, R, G, B;
int Y1 = 1 << 18;
int Y2 = 1 << 18;
- int U = 1 << 18;
- int V = 1 << 18;
- const void *r, *g, *b;
+ int U = (1 << 18) + (-128 << 19);
+ int V = (1 << 18) + (-128 << 19);
for (j = 0; j < lumFilterSize; j++) {
Y1 += lumSrc[j][i * 2] * lumFilter[j];
@@ -1043,10 +1167,10 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
U += chrUSrc[j][i] * chrFilter[j];
V += chrVSrc[j][i] * chrFilter[j];
}
- Y1 >>= 19;
- Y2 >>= 19;
- U >>= 19;
- V >>= 19;
+ Y1 >>= 10;
+ Y2 >>= 10;
+ U >>= 10;
+ V >>= 10;
if (hasAlpha) {
A1 = 1 << 18;
A2 = 1 << 18;
@@ -1054,20 +1178,36 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
A1 += alpSrc[j][i * 2 ] * lumFilter[j];
A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
}
- A1 >>= 19;
- A2 >>= 19;
- if ((A1 | A2) & 0x100) {
- A1 = av_clip_uint8(A1);
- A2 = av_clip_uint8(A2);
+ if ((A1 | A2) & 0xF8000000) {
+ A1 = av_clip_uintp2(A1, 27);
+ A2 = av_clip_uintp2(A2, 27);
}
+ A1 <<= 3;
+ A2 <<= 3;
}
- r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
- g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
- b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
+ Y1 -= c->yuv2rgb_y_offset;
+ Y2 -= c->yuv2rgb_y_offset;
+ Y1 *= c->yuv2rgb_y_coeff;
+ Y2 *= c->yuv2rgb_y_coeff;
+ Y1 += 1 << 13;
+ Y2 += 1 << 13;
- yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
- r, g, b, y, target, hasAlpha);
+ R = V * c->yuv2rgb_v2r_coeff;
+ G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+ B = U * c->yuv2rgb_u2b_coeff;
+
+ yuv2rgb_write2(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
+ R, G, B, y, target, hasAlpha, isBGR);
+ }
+ if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+ uint16_t *dest16 = (uint16_t*)dest;
+ if (6-1 > desc->comp[0].depth_minus1 && desc->comp[0].depth_minus1 > 3-1) {
+ for (i = 0; i < dstW; i++) {
+ dest16[i] = av_bswap16(dest16[i]);
+ }
+ }
}
}
--
1.7.9.5
More information about the ffmpeg-devel
mailing list