[FFmpeg-cvslog] lavu/tx: generalize MDCTs

Lynne git at videolan.org
Sat Sep 10 03:37:44 EEST 2022


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Sep 10 02:28:10 2022 +0200| [51172223fd1a5b71b46fc0d398f4fdc9ed081b83] | committer: Lynne

lavu/tx: generalize MDCTs

The same code can perform any-length MDCTs with minimal changes.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=51172223fd1a5b71b46fc0d398f4fdc9ed081b83
---

 libavutil/tx_template.c | 75 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 542c15e480..1d4c4d294b 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -940,12 +940,12 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = {
     .prio       = FF_TX_PRIO_MIN,
 };
 
-static av_cold int TX_NAME(ff_tx_mdct_sr_init)(AVTXContext *s,
-                                               const FFTXCodelet *cd,
-                                               uint64_t flags,
-                                               FFTXCodeletOptions *opts,
-                                               int len, int inv,
-                                               const void *scale)
+static av_cold int TX_NAME(ff_tx_mdct_init)(AVTXContext *s,
+                                            const FFTXCodelet *cd,
+                                            uint64_t flags,
+                                            FFTXCodeletOptions *opts,
+                                            int len, int inv,
+                                            const void *scale)
 {
     int ret;
     FFTXCodeletOptions sub_opts = { .invert_lookup = inv };
@@ -955,32 +955,49 @@ static av_cold int TX_NAME(ff_tx_mdct_sr_init)(AVTXContext *s,
 
     flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */
     flags |=  AV_TX_INPLACE;      /* in-place */
-    flags |=  FF_TX_PRESHUFFLE;   /* This function handles the permute step */
+    flags |=  FF_TX_PRESHUFFLE;   /* First try with an in-place transform */
 
     if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
-                                inv, scale)))
-        return ret;
+                                inv, scale))) {
+        flags &= ~FF_TX_PRESHUFFLE; /* Now try with a generic FFT */
+        if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1,
+                                    inv, scale)))
+            return ret;
+    }
+
+    /* If we need to preshuffle just steal the map from the subcontext */
+    if (s->sub[0].flags & FF_TX_PRESHUFFLE) {
+        s->map = s->sub[0].map;
+        s->sub[0].map = NULL;
+    } else {
+        s->map = av_malloc((len >> 1)*sizeof(*s->map));
+        if (!s->map)
+            return AVERROR(ENOMEM);
+
+        for (int i = 0; i < len >> 1; i++)
+            s->map[i] = i;
+    }
 
-    if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->sub->map : NULL)))
+    if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL)))
         return ret;
 
     /* Saves a multiply in a hot path. */
     if (inv)
         for (int i = 0; i < (s->len >> 1); i++)
-            s->sub->map[i] <<= 1;
+            s->map[i] <<= 1;
 
     return 0;
 }
 
-static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src,
-                                       ptrdiff_t stride)
+static void TX_NAME(ff_tx_mdct_fwd)(AVTXContext *s, void *_dst, void *_src,
+                                    ptrdiff_t stride)
 {
     TXSample *src = _src, *dst = _dst;
     TXComplex *exp = s->exp, tmp, *z = _dst;
     const int len2 = s->len >> 1;
     const int len4 = s->len >> 2;
     const int len3 = len2 * 3;
-    const int *sub_map = s->sub->map;
+    const int *sub_map = s->map;
 
     stride /= sizeof(*dst);
 
@@ -1011,14 +1028,14 @@ static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src,
     }
 }
 
-static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src,
-                                       ptrdiff_t stride)
+static void TX_NAME(ff_tx_mdct_inv)(AVTXContext *s, void *_dst, void *_src,
+                                    ptrdiff_t stride)
 {
     TXComplex *z = _dst, *exp = s->exp;
     const TXSample *src = _src, *in1, *in2;
     const int len2 = s->len >> 1;
     const int len4 = s->len >> 2;
-    const int *sub_map = s->sub->map;
+    const int *sub_map = s->map;
 
     stride /= sizeof(*src);
     in1 = src;
@@ -1043,28 +1060,28 @@ static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src,
     }
 }
 
-static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_fwd_def) = {
-    .name       = TX_NAME_STR("mdct_sr_fwd"),
-    .function   = TX_NAME(ff_tx_mdct_sr_fwd),
+static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = {
+    .name       = TX_NAME_STR("mdct_fwd"),
+    .function   = TX_NAME(ff_tx_mdct_fwd),
     .type       = TX_TYPE(MDCT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
-    .factors[0] = 2,
+    .factors    = { 2, TX_FACTOR_ANY },
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
-    .init       = TX_NAME(ff_tx_mdct_sr_init),
+    .init       = TX_NAME(ff_tx_mdct_init),
     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
     .prio       = FF_TX_PRIO_BASE,
 };
 
-static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_inv_def) = {
-    .name       = TX_NAME_STR("mdct_sr_inv"),
-    .function   = TX_NAME(ff_tx_mdct_sr_inv),
+static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = {
+    .name       = TX_NAME_STR("mdct_inv"),
+    .function   = TX_NAME(ff_tx_mdct_inv),
     .type       = TX_TYPE(MDCT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
-    .factors[0] = 2,
+    .factors    = { 2, TX_FACTOR_ANY },
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
-    .init       = TX_NAME(ff_tx_mdct_sr_init),
+    .init       = TX_NAME(ff_tx_mdct_init),
     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
     .prio       = FF_TX_PRIO_BASE,
 };
@@ -1477,8 +1494,8 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
     &TX_NAME(ff_tx_fft_pfa_9xM_def),
     &TX_NAME(ff_tx_fft_pfa_15xM_def),
     &TX_NAME(ff_tx_fft_naive_def),
-    &TX_NAME(ff_tx_mdct_sr_fwd_def),
-    &TX_NAME(ff_tx_mdct_sr_inv_def),
+    &TX_NAME(ff_tx_mdct_fwd_def),
+    &TX_NAME(ff_tx_mdct_inv_def),
     &TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
     &TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
     &TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),



More information about the ffmpeg-cvslog mailing list