[FFmpeg-cvslog] lavu/tx: add length decomposition function

Lynne git at videolan.org
Thu Nov 24 16:59:48 EET 2022


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Nov 19 00:58:13 2022 +0100| [7f019e77586ab62d37856d09071fcdeef880bcd9] | committer: Lynne

lavu/tx: add length decomposition function

Rather than using a list of lengths supported, this goes a step beyond
and uses all registered codelets to come up with a good decomposition.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7f019e77586ab62d37856d09071fcdeef880bcd9
---

 libavutil/tx.c      | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 libavutil/tx_priv.h |   8 +++
 2 files changed, 151 insertions(+)

diff --git a/libavutil/tx.c b/libavutil/tx.c
index 8027e983ba..c692239656 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -17,6 +17,7 @@
  */
 
 #include "avassert.h"
+#include "intmath.h"
 #include "cpu.h"
 #include "qsort.h"
 #include "bprint.h"
@@ -395,6 +396,148 @@ static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
     return prio;
 }
 
+typedef struct FFTXLenDecomp {
+    int len;
+    int len2;
+    int prio;
+    const FFTXCodelet *cd;
+} FFTXLenDecomp;
+
+static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
+{
+    return FFDIFFSIGN(b->prio, a->prio);
+}
+
+int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
+                           int len, int inv)
+{
+    int nb_decomp = 0;
+    FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
+    int codelet_list_idx = codelet_list_num;
+
+    const int cpu_flags = av_get_cpu_flags();
+
+    /* Loop through all codelets in all codelet lists to find matches
+     * to the requirements */
+    while (codelet_list_idx--) {
+        const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
+        const FFTXCodelet *cd = NULL;
+
+        while ((cd = *list++)) {
+            int fl = len;
+            int skip = 0, prio;
+            int factors_product = 1, factors_mod = 0;
+
+            if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
+                goto sort;
+
+            /* Check if the type matches */
+            if (cd->type != TX_TYPE_ANY && type != cd->type)
+                continue;
+
+            /* Check direction for non-orthogonal codelets */
+            if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
+                ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+                continue;
+
+            /* Check if the CPU supports the required ISA */
+            if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
+                !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
+                continue;
+
+            for (int i = 0; i < TX_MAX_FACTORS; i++) {
+                if (!cd->factors[i] || (fl == 1))
+                    break;
+
+                if (cd->factors[i] == TX_FACTOR_ANY) {
+                    factors_mod++;
+                    factors_product *= fl;
+                } else if (!(fl % cd->factors[i])) {
+                    factors_mod++;
+                    if (cd->factors[i] == 2) {
+                        int b = ff_ctz(fl);
+                        fl >>= b;
+                        factors_product <<= b;
+                    } else {
+                        do {
+                            fl /= cd->factors[i];
+                            factors_product *= cd->factors[i];
+                        } while (!(fl % cd->factors[i]));
+                    }
+                }
+            }
+
+            /* Disqualify if factor requirements are not satisfied or if trivial */
+            if ((factors_mod < cd->nb_factors) || (len == factors_product))
+                continue;
+
+            if (av_gcd(factors_product, fl) != 1)
+                continue;
+
+            /* Check if length is supported and factorization was successful */
+            if ((factors_product < cd->min_len) ||
+                (cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
+                continue;
+
+            prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
+
+            /* Check for duplicates */
+            for (int i = 0; i < nb_decomp; i++) {
+                if (factors_product == ld[i].len) {
+                    /* Update priority if new one is higher */
+                    if (prio > ld[i].prio)
+                        ld[i].prio = prio;
+                    skip = 1;
+                    break;
+                }
+            }
+
+            /* Add decomposition if unique */
+            if (!skip) {
+                ld[nb_decomp].cd = cd;
+                ld[nb_decomp].len = factors_product;
+                ld[nb_decomp].len2 = fl;
+                ld[nb_decomp].prio = prio;
+                nb_decomp++;
+            }
+        }
+    }
+
+    if (!nb_decomp)
+        return AVERROR(EINVAL);
+
+sort:
+    AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
+
+    for (int i = 0; i < nb_decomp; i++) {
+        if (ld[i].cd->nb_factors > 1)
+            dst[i] = ld[i].len2;
+        else
+            dst[i] = ld[i].len;
+    }
+
+    return nb_decomp;
+}
+
+int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
+{
+    s->map = av_malloc(s->len*sizeof(*s->map));
+    if (!s->map)
+        return AVERROR(ENOMEM);
+
+    s->map[0] = 0; /* DC is always at the start */
+    if (s->inv) /* Reversing the ACs flips the transform direction */
+        for (int i = 1; i < s->len; i++)
+            s->map[i] = s->len - i;
+    else
+        for (int i = 1; i < s->len; i++)
+            s->map[i] = i;
+
+    s->map_dir = FF_TX_MAP_GATHER;
+
+    return 0;
+}
+
 #if !CONFIG_SMALL
 static void print_flags(AVBPrint *bp, uint64_t f)
 {
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index 207f79dfb8..f11061d051 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -183,6 +183,9 @@ typedef struct FFTXCodeletOptions {
 /* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */
 #define TX_MAX_SUB 4
 
+/* Maximum number of returned results for ff_tx_decompose_length. Arbitrary. */
+#define TX_MAX_DECOMPOSITIONS 512
+
 typedef struct FFTXCodelet {
     const char    *name;          /* Codelet name, for debugging */
     av_tx_fn       function;      /* Codelet function, != NULL */
@@ -284,6 +287,11 @@ int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
 /* Clear the context by freeing all tables, maps and subtransforms. */
 void ff_tx_clear_ctx(AVTXContext *s);
 
+/* Attempt to factorize a length into 2 integers such that
+ * len / dst1 == dst2, where dst1 and dst2 are coprime. */
+int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
+                           int len, int inv);
+
 /* Generate a default map (0->len or 0, (len-1)->1 for inverse transforms)
  * for a context. */
 int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts);



More information about the ffmpeg-cvslog mailing list