[FFmpeg-devel] [PATCH 4/9] simple_idct10: improve precision
Christophe Gisquet
christophe.gisquet at gmail.com
Mon Oct 12 19:37:45 CEST 2015
omse goes from 0.03060703 (which fails for dct-test) to 0.01663750.
This also actually improve the error of decoding the sample generated
by fate-vsynth3-dnxhd1080i-10bit using simple_idct10 to FAANI, which
goes (when resampled to yuv422p) from:
stddev: 0.06 PSNR: 72.28 MAXDIFF: 1
to identical.
---
libavcodec/simple_idct.c | 9 ++++--
libavcodec/simple_idct_template.c | 45 ++++++++++++++++++---------
tests/ref/fate/dnxhr-444 | 2 +-
tests/ref/vsynth/vsynth1-dnxhd-720p-10bit | 2 +-
tests/ref/vsynth/vsynth2-dnxhd-720p-10bit | 2 +-
tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit | 2 +-
6 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index eeb6279..4d6d20d 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -36,6 +36,11 @@
#define BIT_DEPTH 10
#include "simple_idct_template.c"
+
+#define EXTRA_SHIFT 2
+#include "simple_idct_template.c"
+
+#undef EXTRA_SHIFT
#undef BIT_DEPTH
#define BIT_DEPTH 12
@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
block[i] *= qmat[i];
for (i = 0; i < 8; i++)
- idctRowCondDC_10(block + i*8, 2);
+ idctRowCondDC_extrashift_10(block + i*8, 2);
for (i = 0; i < 8; i++) {
block[i] += 8192;
- idctSparseCol_10(block + i);
+ idctSparseCol_extrashift_10(block + i);
}
}
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 789db8d..0585679 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -66,19 +66,26 @@
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
-#if BIT_DEPTH == 10
-#define W1 (22725*4) // 90901
-#define W2 (21407*4) // 85627
-#define W3 (19265*4) // 77062
-#define W4 (16384*4) // 65535
-#define W5 (12873*4) // 51491
-#define W6 ( 8867*4) // 35468
-#define W7 ( 4520*4) // 18081
-
-#define ROW_SHIFT 15
-#define COL_SHIFT 20
-#define DC_SHIFT 1
-#else
+# if BIT_DEPTH == 10
+#define W1 22725 // 90901
+#define W2 21407 // 85627
+#define W3 19265 // 77062
+#define W4 16384 // 65535
+#define W5 12873 // 51491
+#define W6 8867 // 35468
+#define W7 4520 // 18081
+
+# ifdef EXTRA_SHIFT
+#define ROW_SHIFT 13
+#define COL_SHIFT 18
+#define DC_SHIFT 1
+# else
+#define ROW_SHIFT 12
+#define COL_SHIFT 19
+#define DC_SHIFT 2
+# endif
+
+# else
#define W1 45451
#define W2 42813
#define W3 38531
@@ -90,7 +97,7 @@
#define ROW_SHIFT 16
#define COL_SHIFT 17
#define DC_SHIFT -1
-#endif
+# endif
#define MUL(a, b) ((a) * (b))
#define MAC(a, b, c) ((a) += (b) * (c))
@@ -101,7 +108,11 @@
#endif
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
+#else
static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
+#endif
{
int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
} \
} while (0)
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
+#else
static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
int16_t *col)
{
@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
}
static inline void FUNC(idctSparseCol)(int16_t *col)
+#endif
{
int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
col[56] = ((a0 - b0) >> COL_SHIFT);
}
+#ifndef EXTRA_SHIFT
void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
{
pixel *dest = (pixel *)dest_;
@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block)
for (i = 0; i < 8; i++)
FUNC(idctSparseCol)(block + i);
}
+#endif
diff --git a/tests/ref/fate/dnxhr-444 b/tests/ref/fate/dnxhr-444
index 743067d..f9e73c3 100644
--- a/tests/ref/fate/dnxhr-444
+++ b/tests/ref/fate/dnxhr-444
@@ -1,2 +1,2 @@
#tb 0: 1/24
-0, 0, 0, 1, 9665280, 0x238a023e
+0, 0, 0, 1, 9665280, 0x19ef4057
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
index ab58807..dc808f3 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
-3cc84f9e8d2e704475b410de27dd9951 *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
+87f1f0e074466facd3a9922ecc8311db *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
stddev: 6.23 PSNR: 32.23 MAXDIFF: 64 bytes: 7603200/ 760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
index 5c21985..0d2068d 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
-a98c4b69d4d036089a455e147d6922a7 *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
+1e6e1ef90e5c9b16a80acc17fde596ff *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
stddev: 1.54 PSNR: 44.36 MAXDIFF: 31 bytes: 7603200/ 760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
index 1dcadd8..b9c9e03 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
-2b497215c57558910a605ff8c78430d9 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
+0e9fcec94aeff70bac5dec02cf2391bc *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
stddev: 1.33 PSNR: 45.61 MAXDIFF: 22 bytes: 7603200/ 760320
--
2.6.0
More information about the ffmpeg-devel
mailing list