[FFmpeg-devel] [PATCH] avfilter/vf_ssim: improve precision
Paul B Mahol
onemda at gmail.com
Fri Jan 31 11:04:54 EET 2020
Use doubles for accumulating floats.
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavfilter/ssim.h | 2 +-
libavfilter/vf_ssim.c | 18 +++++------
libavfilter/x86/vf_ssim.asm | 36 ++++++++++++++--------
libavfilter/x86/vf_ssim_init.c | 2 +-
tests/ref/fate/filter-refcmp-ssim-rgb | 44 +++++++++++++--------------
tests/ref/fate/filter-refcmp-ssim-yuv | 26 ++++++++--------
6 files changed, 70 insertions(+), 58 deletions(-)
diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h
index ac0395a22a..a6a41aabe6 100644
--- a/libavfilter/ssim.h
+++ b/libavfilter/ssim.h
@@ -28,7 +28,7 @@ typedef struct SSIMDSPContext {
void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int (*sums)[4], int w);
- float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w);
+ double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w);
} SSIMDSPContext;
void ff_ssim_init_x86(SSIMDSPContext *dsp);
diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c
index c08fbcdcc2..17dce8e8e8 100644
--- a/libavfilter/vf_ssim.c
+++ b/libavfilter/vf_ssim.c
@@ -55,13 +55,13 @@ typedef struct SSIMContext {
uint64_t nb_frames;
double ssim[4], ssim_total;
char comps[4];
- float coefs[4];
+ double coefs[4];
uint8_t rgba_map[4];
int planewidth[4];
int planeheight[4];
int *temp;
int is_rgb;
- float (*ssim_plane)(SSIMDSPContext *dsp,
+ double (*ssim_plane)(SSIMDSPContext *dsp,
uint8_t *main, int main_stride,
uint8_t *ref, int ref_stride,
int width, int height, void *temp,
@@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4],
return ssim;
}
-static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
+static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
{
- float ssim = 0.0;
+ double ssim = 0.0;
int i;
for (i = 0; i < width; i++)
@@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt
#define SUM_LEN(w) (((w) >> 2) + 3)
-static float ssim_plane_16bit(SSIMDSPContext *dsp,
+static double ssim_plane_16bit(SSIMDSPContext *dsp,
uint8_t *main, int main_stride,
uint8_t *ref, int ref_stride,
int width, int height, void *temp,
int max)
{
int z = 0, y;
- float ssim = 0.0;
+ double ssim = 0.0;
int64_t (*sum0)[4] = temp;
int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
@@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp,
return ssim / ((height - 1) * (width - 1));
}
-static float ssim_plane(SSIMDSPContext *dsp,
+static double ssim_plane(SSIMDSPContext *dsp,
uint8_t *main, int main_stride,
uint8_t *ref, int ref_stride,
int width, int height, void *temp,
int max)
{
int z = 0, y;
- float ssim = 0.0;
+ double ssim = 0.0;
int (*sum0)[4] = temp;
int (*sum1)[4] = sum0 + SUM_LEN(width);
@@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs)
SSIMContext *s = ctx->priv;
AVFrame *master, *ref;
AVDictionary **metadata;
- float c[4], ssimv = 0.0;
+ double c[4], ssimv = 0.0;
int ret, i;
ret = ff_framesync_dualinput_get(fs, &master, &ref);
diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm
index 3293e66701..4cd6293b59 100644
--- a/libavfilter/x86/vf_ssim.asm
+++ b/libavfilter/x86/vf_ssim.asm
@@ -169,8 +169,9 @@ SSIM_4X4_LINE 8
%endif
INIT_XMM sse4
-cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
+cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w
pxor m0, m0
+ pxor m6, m6
.loop:
mova m1, [sum0q+mmsize*0]
mova m2, [sum0q+mmsize*1]
@@ -214,34 +215,45 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
mulps m4, m5
mulps m3, m1
divps m4, m3 ; ssim_endl
- addps m0, m4 ; ssim
+ mova m5, m4
+ cvtps2pd m3, m5
+ movhlps m5, m5
+ cvtps2pd m5, m5
+ addpd m0, m3 ; ssim
+ addpd m6, m5 ; ssim
add sum0q, mmsize*4
add sum1q, mmsize*4
sub wd, 4
jg .loop
- ; subps the ones we added too much
+ ; subpd the ones we added too much
test wd, wd
jz .end
add wd, 4
+ test wd, 3
+ jz .skip3
test wd, 2
jz .skip2
- psrldq m4, 8
-.skip2:
test wd, 1
jz .skip1
- psrldq m4, 4
+.skip3:
+ psrldq m5, 8
+ subpd m6, m5
+ jmp .end
+.skip2:
+ psrldq m3, 8
+ subpd m0, m3
+ jmp .end
.skip1:
- subps m0, m4
+ psrldq m5, 8
+ subpd m6, m5
.end:
+ addpd m0, m6
movhlps m4, m0
- addps m0, m4
- movss m4, m0
- shufps m0, m0, 1
- addss m0, m4
+ addpd m0, m4
%if ARCH_X86_32
- movss r0m, m0
+ movsd r0m, m0
fld r0mp
%endif
RET
diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c
index 599c928403..cbaa20ef16 100644
--- a/libavfilter/x86/vf_ssim_init.c
+++ b/libavfilter/x86/vf_ssim_init.c
@@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride,
void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int (*sums)[4], int w);
-float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
+double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
void ff_ssim_init_x86(SSIMDSPContext *dsp)
{
diff --git a/tests/ref/fate/filter-refcmp-ssim-rgb b/tests/ref/fate/filter-refcmp-ssim-rgb
index 8c23c60b37..cb3b5f2e12 100644
--- a/tests/ref/fate/filter-refcmp-ssim-rgb
+++ b/tests/ref/fate/filter-refcmp-ssim-rgb
@@ -1,30 +1,30 @@
frame:0 pts:0 pts_time:0
+lavfi.ssim.R=0.73
+lavfi.ssim.G=0.77
+lavfi.ssim.B=0.90
+lavfi.ssim.All=0.80
+lavfi.ssim.dB=7.01
+frame:1 pts:1 pts_time:1
lavfi.ssim.R=0.72
lavfi.ssim.G=0.76
-lavfi.ssim.B=0.89
-lavfi.ssim.All=0.79
-lavfi.ssim.dB=6.74
-frame:1 pts:1 pts_time:1
-lavfi.ssim.R=0.70
-lavfi.ssim.G=0.74
-lavfi.ssim.B=0.85
-lavfi.ssim.All=0.77
-lavfi.ssim.dB=6.31
+lavfi.ssim.B=0.86
+lavfi.ssim.All=0.78
+lavfi.ssim.dB=6.56
frame:2 pts:2 pts_time:2
-lavfi.ssim.R=0.71
+lavfi.ssim.R=0.72
+lavfi.ssim.G=0.76
+lavfi.ssim.B=0.85
+lavfi.ssim.All=0.78
+lavfi.ssim.dB=6.53
+frame:3 pts:3 pts_time:3
+lavfi.ssim.R=0.72
lavfi.ssim.G=0.75
lavfi.ssim.B=0.84
+lavfi.ssim.All=0.77
+lavfi.ssim.dB=6.35
+frame:4 pts:4 pts_time:4
+lavfi.ssim.R=0.72
+lavfi.ssim.G=0.75
+lavfi.ssim.B=0.82
lavfi.ssim.All=0.76
lavfi.ssim.dB=6.29
-frame:3 pts:3 pts_time:3
-lavfi.ssim.R=0.70
-lavfi.ssim.G=0.73
-lavfi.ssim.B=0.83
-lavfi.ssim.All=0.76
-lavfi.ssim.dB=6.11
-frame:4 pts:4 pts_time:4
-lavfi.ssim.R=0.71
-lavfi.ssim.G=0.74
-lavfi.ssim.B=0.80
-lavfi.ssim.All=0.75
-lavfi.ssim.dB=6.05
diff --git a/tests/ref/fate/filter-refcmp-ssim-yuv b/tests/ref/fate/filter-refcmp-ssim-yuv
index 5c8ffb9483..209c8bd600 100644
--- a/tests/ref/fate/filter-refcmp-ssim-yuv
+++ b/tests/ref/fate/filter-refcmp-ssim-yuv
@@ -1,30 +1,30 @@
frame:0 pts:0 pts_time:0
-lavfi.ssim.Y=0.80
+lavfi.ssim.Y=0.82
lavfi.ssim.U=0.76
lavfi.ssim.V=0.69
-lavfi.ssim.All=0.76
-lavfi.ssim.dB=6.25
+lavfi.ssim.All=0.77
+lavfi.ssim.dB=6.37
frame:1 pts:1 pts_time:1
-lavfi.ssim.Y=0.80
+lavfi.ssim.Y=0.81
lavfi.ssim.U=0.73
lavfi.ssim.V=0.68
-lavfi.ssim.All=0.75
-lavfi.ssim.dB=6.08
+lavfi.ssim.All=0.76
+lavfi.ssim.dB=6.20
frame:2 pts:2 pts_time:2
-lavfi.ssim.Y=0.80
+lavfi.ssim.Y=0.82
lavfi.ssim.U=0.73
lavfi.ssim.V=0.68
-lavfi.ssim.All=0.75
-lavfi.ssim.dB=6.10
+lavfi.ssim.All=0.76
+lavfi.ssim.dB=6.22
frame:3 pts:3 pts_time:3
-lavfi.ssim.Y=0.79
+lavfi.ssim.Y=0.81
lavfi.ssim.U=0.72
lavfi.ssim.V=0.68
lavfi.ssim.All=0.75
-lavfi.ssim.dB=5.94
+lavfi.ssim.dB=6.06
frame:4 pts:4 pts_time:4
-lavfi.ssim.Y=0.80
+lavfi.ssim.Y=0.81
lavfi.ssim.U=0.72
lavfi.ssim.V=0.68
lavfi.ssim.All=0.75
-lavfi.ssim.dB=5.97
+lavfi.ssim.dB=6.05
--
2.17.1
More information about the ffmpeg-devel
mailing list