[FFmpeg-cvslog] avfilter/x86/vf_gblur: add postscale SIMD
Paul B Mahol
git at videolan.org
Tue Feb 16 22:14:48 EET 2021
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sat Feb 13 12:09:47 2021 +0100| [44cf3a2b16324c2a04545a6b7304acd77e5cf24a] | committer: Paul B Mahol
avfilter/x86/vf_gblur: add postscale SIMD
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=44cf3a2b16324c2a04545a6b7304acd77e5cf24a
---
libavfilter/vf_gblur.c | 13 ++++++-----
libavfilter/x86/vf_gblur.asm | 49 +++++++++++++++++++++++++++++++++++++++++
libavfilter/x86/vf_gblur_init.c | 17 +++++++++++---
3 files changed, 70 insertions(+), 9 deletions(-)
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 70e2a668b4..109a7a95f9 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -171,13 +171,14 @@ static int filter_postscale(AVFilterContext *ctx, void *arg, int jobnr, int nb_j
const float min = s->flt ? -FLT_MAX : 0.f;
const int height = td->height;
const int width = td->width;
- const int64_t numpixels = width * (int64_t)height;
- const int slice_start = (numpixels * jobnr ) / nb_jobs;
- const int slice_end = (numpixels * (jobnr+1)) / nb_jobs;
+ const int awidth = FFALIGN(width, 64);
+ const int slice_start = (height * jobnr ) / nb_jobs;
+ const int slice_end = (height * (jobnr+1)) / nb_jobs;
const float postscale = s->postscale * s->postscaleV;
- float *buffer = s->buffer + slice_start;
+ const int slice_size = slice_end - slice_start;
- s->postscale_slice(buffer, slice_end - slice_start, postscale, min, max);
+ s->postscale_slice(s->buffer + slice_start * awidth,
+ slice_size * awidth, postscale, min, max);
return 0;
}
@@ -251,7 +252,7 @@ static int config_input(AVFilterLink *inlink)
s->nb_planes = av_pix_fmt_count_planes(inlink->format);
- s->buffer = av_malloc_array(FFALIGN(inlink->w, 16), FFALIGN(inlink->h, 16) * sizeof(*s->buffer));
+ s->buffer = av_malloc_array(FFALIGN(inlink->w, 64), FFALIGN(inlink->h, 64) * sizeof(*s->buffer));
if (!s->buffer)
return AVERROR(ENOMEM);
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index a25b1659f5..c29ecba889 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -183,3 +183,52 @@ HORIZ_SLICE
INIT_XMM avx2
HORIZ_SLICE
%endif
+
+%macro POSTSCALE_SLICE 0
+%if UNIX64
+cglobal postscale_slice, 2, 2, 4, ptr, length
+%else
+cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max
+%endif
+ shl lengthd, 2
+ add ptrq, lengthq
+ neg lengthq
+%if WIN64
+ SWAP 0, 2
+ SWAP 1, 3
+ SWAP 2, 4
+%endif
+%if cpuflag(avx2)
+ vbroadcastss m0, xm0
+ vbroadcastss m1, xm1
+ vbroadcastss m2, xm2
+%else
+ shufps xm0, xm0, 0
+ shufps xm1, xm1, 0
+ shufps xm2, xm2, 0
+%endif
+
+ .loop:
+%if cpuflag(avx2)
+ mulps m3, m0, [ptrq + lengthq]
+%else
+ movu m3, [ptrq + lengthq]
+ mulps m3, m0
+%endif
+ maxps m3, m1
+ minps m3, m2
+ movu [ptrq+lengthq], m3
+
+ add lengthq, mmsize
+ jl .loop
+
+ RET
+%endmacro
+
+INIT_XMM sse
+POSTSCALE_SLICE
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+POSTSCALE_SLICE
+%endif
diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
index e63e59fe23..d80fb46fe4 100644
--- a/libavfilter/x86/vf_gblur_init.c
+++ b/libavfilter/x86/vf_gblur_init.c
@@ -27,14 +27,25 @@
void ff_horiz_slice_sse4(float *ptr, int width, int height, int steps, float nu, float bscale);
void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, float bscale);
+void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
+void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
+
av_cold void ff_gblur_init_x86(GBlurContext *s)
{
-#if ARCH_X86_64
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_SSE4(cpu_flags))
+ if (EXTERNAL_SSE(cpu_flags)) {
+ s->postscale_slice = ff_postscale_slice_sse;
+ }
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ s->postscale_slice = ff_postscale_slice_avx2;
+ }
+#if ARCH_X86_64
+ if (EXTERNAL_SSE4(cpu_flags)) {
s->horiz_slice = ff_horiz_slice_sse4;
- if (EXTERNAL_AVX2(cpu_flags))
+ }
+ if (EXTERNAL_AVX2(cpu_flags)) {
s->horiz_slice = ff_horiz_slice_avx2;
+ }
#endif
}
More information about the ffmpeg-cvslog
mailing list