[FFmpeg-cvslog] lavu/tx: add fft_inplace_small transforms

Lynne git at videolan.org
Thu Nov 24 16:59:24 EET 2022


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Thu Nov 17 20:10:45 2022 +0100| [68cabf875015610decda7e564dc5697f6c21f707] | committer: Lynne

lavu/tx: add fft_inplace_small transforms

This is much faster than the loop.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=68cabf875015610decda7e564dc5697f6c21f707
---

 libavutil/tx_template.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 5274133ec4..747731a06d 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -754,20 +754,34 @@ static av_cold int TX_NAME(ff_tx_fft_init)(AVTXContext *s,
     return 0;
 }
 
+static av_cold int TX_NAME(ff_tx_fft_inplace_small_init)(AVTXContext *s,
+                                                         const FFTXCodelet *cd,
+                                                         uint64_t flags,
+                                                         FFTXCodeletOptions *opts,
+                                                         int len, int inv,
+                                                         const void *scale)
+{
+    if (!(s->tmp = av_malloc(len*sizeof(*s->tmp))))
+        return AVERROR(ENOMEM);
+    flags &= ~AV_TX_INPLACE;
+    return TX_NAME(ff_tx_fft_init)(s, cd, flags, opts, len, inv, scale);
+}
+
 static void TX_NAME(ff_tx_fft)(AVTXContext *s, void *_dst,
                                void *_src, ptrdiff_t stride)
 {
     TXComplex *src = _src;
-    TXComplex *dst = _dst;
+    TXComplex *dst1 = s->flags & AV_TX_INPLACE ? s->tmp : _dst;
+    TXComplex *dst2 = _dst;
     int *map = s->sub[0].map;
     int len = s->len;
 
     /* Compilers can't vectorize this anyway without assuming AVX2, which they
      * generally don't, at least without -march=native -mtune=native */
     for (int i = 0; i < len; i++)
-        dst[i] = src[map[i]];
+        dst1[i] = src[map[i]];
 
-    s->fn[0](&s->sub[0], dst, dst, stride);
+    s->fn[0](&s->sub[0], dst2, dst1, stride);
 }
 
 static void TX_NAME(ff_tx_fft_inplace)(AVTXContext *s, void *_dst,
@@ -807,6 +821,19 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_def) = {
     .prio       = FF_TX_PRIO_BASE,
 };
 
+static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_small_def) = {
+    .name       = TX_NAME_STR("fft_inplace_small"),
+    .function   = TX_NAME(ff_tx_fft),
+    .type       = TX_TYPE(FFT),
+    .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
+    .factors[0] = TX_FACTOR_ANY,
+    .min_len    = 2,
+    .max_len    = 65536,
+    .init       = TX_NAME(ff_tx_fft_inplace_small_init),
+    .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
+    .prio       = FF_TX_PRIO_BASE - 256,
+};
+
 static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = {
     .name       = TX_NAME_STR("fft_inplace"),
     .function   = TX_NAME(ff_tx_fft_inplace),
@@ -1638,6 +1665,7 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
     /* Standalone transforms */
     &TX_NAME(ff_tx_fft_def),
     &TX_NAME(ff_tx_fft_inplace_def),
+    &TX_NAME(ff_tx_fft_inplace_small_def),
     &TX_NAME(ff_tx_fft_pfa_3xM_def),
     &TX_NAME(ff_tx_fft_pfa_5xM_def),
     &TX_NAME(ff_tx_fft_pfa_7xM_def),



More information about the ffmpeg-cvslog mailing list