[FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8
Mateusz
mateuszb at poczta.onet.pl
Wed Sep 27 13:04:21 EEST 2017
W dniu 2017-09-26 o 13:31, Carl Eugen Hoyos pisze:
> 2017-09-26 1:33 GMT+02:00 Mateusz <mateuszb at poczta.onet.pl>:
>
>> I've sent C code patch 2017-09-06 (and nothing) so I thought that the
>> problem is with speed. For simplicity I've attached this patch.
>
> You could (wait a day or two and) either add an option to
> select your dithering code or put it under #ifdef so more
> people can test it.
I've attached patch that do nothing unless you specify
--extra-cflags="-DNEW_DITHER_COPY" or export CFLAGS="-DNEW_DITHER_COPY"
>> In theory it is enough to make only dst = (src + dither)>>shift;
>> -- white in limited range has 0 on bits to remove (235*4 for example)
>> so overflow is impossible. For files with full range not marked as
>> full range overflow is possible (for dither > 0) and white goes
>> to black. tmp - (tmp>>dst_depth) undoing this overflow.
>
> (Not necessarily related, sorry if I misunderstand:)
> Valid limited-range frames can contain some pixels with peak
> values outside of the defined range.
>
> Carl Eugen
OK, so this fight with possible overflow is even more needed.
Mateusz
-------------- next part --------------
From 2c0adb2d9a0fc0fbbffc643d27860fbb779c08fc Mon Sep 17 00:00:00 2001
From: Mateusz <mateuszb at poczta.onet.pl>
Date: Tue, 26 Sep 2017 22:20:10 +0200
Subject: [PATCH] swscale: new precise DITHER_COPY macro (if
"-DNEW_DITHER_COPY" CFLAGS)
---
libswscale/swscale_unscaled.c | 47 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 46 insertions(+), 1 deletion(-)
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..0d41695 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,6 +110,7 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
{ 112, 16,104, 8,118, 22,110, 14,},
}};
+#ifndef NEW_DITHER_COPY
static const uint16_t dither_scale[15][16]={
{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
@@ -127,7 +128,7 @@ static const uint16_t dither_scale[15][16]={
{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
};
-
+#endif
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val)
@@ -1501,6 +1502,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
return srcSliceH;
}
+#ifndef NEW_DITHER_COPY
#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
@@ -1521,6 +1523,49 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
dst += dstStride;\
src += srcStride;\
}
+#else
+#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+ unsigned shift= src_depth-dst_depth, tmp;\
+ if (shiftonly) {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ for (j = 0; j < length-7; j+=8){\
+ tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\
+ }\
+ for (; j < length; j++){\
+ tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ } else {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ for (j = 0; j < length-7; j+=8){\
+ tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - (tmp>>dst_depth) + dither[0])>>shift);\
+ tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - (tmp>>dst_depth) + dither[1])>>shift);\
+ tmp = bswap(src[j+2]); dst[j+2] = dbswap((tmp - (tmp>>dst_depth) + dither[2])>>shift);\
+ tmp = bswap(src[j+3]); dst[j+3] = dbswap((tmp - (tmp>>dst_depth) + dither[3])>>shift);\
+ tmp = bswap(src[j+4]); dst[j+4] = dbswap((tmp - (tmp>>dst_depth) + dither[4])>>shift);\
+ tmp = bswap(src[j+5]); dst[j+5] = dbswap((tmp - (tmp>>dst_depth) + dither[5])>>shift);\
+ tmp = bswap(src[j+6]); dst[j+6] = dbswap((tmp - (tmp>>dst_depth) + dither[6])>>shift);\
+ tmp = bswap(src[j+7]); dst[j+7] = dbswap((tmp - (tmp>>dst_depth) + dither[7])>>shift);\
+ }\
+ for (; j < length; j++){\
+ tmp = bswap(src[j]); dst[j] = dbswap((tmp - (tmp>>dst_depth) + dither[j&7])>>shift);\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ }
+#endif
static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
--
2.7.4
More information about the ffmpeg-devel
mailing list