[FFmpeg-cvslog] lavu/tx: allow codelets to specify a minimum number of matching factors

Lynne git at videolan.org
Thu Nov 24 16:59:40 EET 2022


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Oct  1 12:21:28 2022 +0200| [6ddd10c3e2d63d1ad1ea1034b0e3862107a27063] | committer: Lynne

lavu/tx: allow codelets to specify a minimum number of matching factors

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6ddd10c3e2d63d1ad1ea1034b0e3862107a27063
---

 libavutil/tx.c          | 30 +++++++++++++-----------------
 libavutil/tx_priv.h     | 11 +++++++++--
 libavutil/tx_template.c | 18 ++++++++++++++++++
 3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/libavutil/tx.c b/libavutil/tx.c
index 13fb54f916..a1173f6137 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -409,42 +409,38 @@ static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
 /* We want all factors to completely cover the length */
 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
 {
-    int all_flag = 0;
+    int matches = 0, any_flag = 0;
 
-    for (int i = 0; i < TX_MAX_SUB; i++) {
+    for (int i = 0; i < TX_MAX_FACTORS; i++) {
         int factor = cd->factors[i];
 
-        /* Conditions satisfied */
-        if (len == 1)
-            return 1;
-
-        /* No more factors */
-        if (!factor) {
-            break;
-        } else if (factor == TX_FACTOR_ANY) {
-            all_flag = 1;
+        if (factor == TX_FACTOR_ANY) {
+            any_flag = 1;
+            matches++;
             continue;
-        }
-
-        if (factor == 2) { /* Fast path */
+        } else if (len <= 1 || !factor) {
+            break;
+        } else if (factor == 2) { /* Fast path */
             int bits_2 = ff_ctz(len);
             if (!bits_2)
-                return 0; /* Factor not supported */
+                continue; /* Factor not supported */
 
             len >>= bits_2;
+            matches++;
         } else {
             int res = len % factor;
             if (res)
-                return 0; /* Factor not supported */
+                continue; /* Factor not supported */
 
             while (!res) {
                 len /= factor;
                 res = len % factor;
             }
+            matches++;
         }
     }
 
-    return all_flag || (len == 1);
+    return (cd->nb_factors <= matches) && (any_flag || len == 1);
 }
 
 av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index d9e38ba19b..80d045f6af 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -71,7 +71,8 @@ typedef void TXComplex;
         .function   = TX_FN_NAME(fn, suffix),                                  \
         .type       = TX_TYPE(tx_type),                                        \
         .flags      = FF_TX_ALIGNED | FF_TX_OUT_OF_PLACE | cd_flags,           \
-        .factors    = { f1, f2 },                                              \
+        .factors    = { (f1), (f2) },                                          \
+        .nb_factors = !!(f1) + !!(f2),                                         \
         .min_len    = len_min,                                                 \
         .max_len    = len_max,                                                 \
         .init       = init_fn,                                                 \
@@ -163,6 +164,9 @@ typedef struct FFTXCodeletOptions {
                               invert the lookup direction for the map generated */
 } FFTXCodeletOptions;
 
+/* Maximum number of factors a codelet may have. Arbitrary. */
+#define TX_MAX_FACTORS 16
+
 /* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */
 #define TX_MAX_SUB 4
 
@@ -175,13 +179,16 @@ typedef struct FFTXCodelet {
     uint64_t flags;               /* A combination of AVTXFlags and codelet
                                    * flags that describe its properties. */
 
-    int factors[TX_MAX_SUB];      /* Length factors */
+    int factors[TX_MAX_FACTORS];  /* Length factors. MUST be coprime. */
 #define TX_FACTOR_ANY -1          /* When used alone, signals that the codelet
                                    * supports all factors. Otherwise, if other
                                    * factors are present, it signals that whatever
                                    * remains will be supported, as long as the
                                    * other factors are a component of the length */
 
+    int nb_factors;               /* Minimum number of factors that have to
+                                   * be a modulo of the length. Must not be 0. */
+
     int min_len;                  /* Minimum length of transform, must be >= 1 */
     int max_len;                  /* Maximum length of transform */
 #define TX_LEN_UNLIMITED -1       /* Special length value to permit all lengths */
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 228209521b..c157719d73 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -518,6 +518,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = {                    \
     .flags      = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |                         \
                   AV_TX_UNALIGNED | FF_TX_PRESHUFFLE,                          \
     .factors[0] = n,                                                           \
+    .nb_factors = 1,                                                           \
     .min_len    = n,                                                           \
     .max_len    = n,                                                           \
     .init       = TX_NAME(ff_tx_fft_factor_init),                              \
@@ -534,6 +535,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = {                   \
     .flags      = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |                         \
                   AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY,                        \
     .factors[0] = n,                                                           \
+    .nb_factors = 1,                                                           \
     .min_len    = n,                                                           \
     .max_len    = n,                                                           \
     .init       = TX_NAME(ff_tx_fft_factor_init),                              \
@@ -614,6 +616,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
     .flags      = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE |      \
                   AV_TX_UNALIGNED | FF_TX_PRESHUFFLE,       \
     .factors[0] = 2,                                        \
+    .nb_factors = 1,                                        \
     .min_len    = n,                                        \
     .max_len    = n,                                        \
     .init       = TX_NAME(ff_tx_fft_sr_codelet_init),       \
@@ -814,6 +817,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_def) = {
     .type       = TX_TYPE(FFT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
     .factors[0] = TX_FACTOR_ANY,
+    .nb_factors = 1,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_fft_init),
@@ -827,6 +831,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_small_def) = {
     .type       = TX_TYPE(FFT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
     .factors[0] = TX_FACTOR_ANY,
+    .nb_factors = 1,
     .min_len    = 2,
     .max_len    = 65536,
     .init       = TX_NAME(ff_tx_fft_inplace_small_init),
@@ -840,6 +845,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = {
     .type       = TX_TYPE(FFT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
     .factors[0] = TX_FACTOR_ANY,
+    .nb_factors = 1,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_fft_init),
@@ -927,6 +933,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_naive_small_def) = {
     .type       = TX_TYPE(FFT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
     .factors[0] = TX_FACTOR_ANY,
+    .nb_factors = 1,
     .min_len    = 2,
     .max_len    = 1024,
     .init       = TX_NAME(ff_tx_fft_init_naive_small),
@@ -940,6 +947,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_naive_def) = {
     .type       = TX_TYPE(FFT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
     .factors[0] = TX_FACTOR_ANY,
+    .nb_factors = 1,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = NULL,
@@ -1007,6 +1015,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_##N##xM_def) = {                \
     .type       = TX_TYPE(FFT),                                                \
     .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,        \
     .factors    = { N, TX_FACTOR_ANY },                                        \
+    .nb_factors = 2,                                                           \
     .min_len    = N*2,                                                         \
     .max_len    = TX_LEN_UNLIMITED,                                            \
     .init       = TX_NAME(ff_tx_fft_pfa_init),                                 \
@@ -1089,6 +1098,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_fwd_def) = {
     .type       = TX_TYPE(MDCT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
     .factors    = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_mdct_naive_init),
@@ -1102,6 +1112,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = {
     .type       = TX_TYPE(MDCT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
     .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_mdct_naive_init),
@@ -1234,6 +1245,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = {
     .type       = TX_TYPE(MDCT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
     .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_mdct_init),
@@ -1247,6 +1259,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = {
     .type       = TX_TYPE(MDCT),
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
     .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_mdct_init),
@@ -1299,6 +1312,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_full_def) = {
     .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE |
                   FF_TX_OUT_OF_PLACE | AV_TX_FULL_IMDCT,
     .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_mdct_inv_full_init),
@@ -1396,6 +1410,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = {           \
     .type       = TX_TYPE(MDCT),                                               \
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,   \
     .factors    = { N, TX_FACTOR_ANY },                                        \
+    .nb_factors = 2,                                                           \
     .min_len    = N*2,                                                         \
     .max_len    = TX_LEN_UNLIMITED,                                            \
     .init       = TX_NAME(ff_tx_mdct_pfa_init),                                \
@@ -1463,6 +1478,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = {           \
     .type       = TX_TYPE(MDCT),                                               \
     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,   \
     .factors    = { N, TX_FACTOR_ANY },                                        \
+    .nb_factors = 2,                                                           \
     .min_len    = N*2,                                                         \
     .max_len    = TX_LEN_UNLIMITED,                                            \
     .init       = TX_NAME(ff_tx_mdct_pfa_init),                                \
@@ -1583,6 +1599,7 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
     .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE |
                   FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
     .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_rdft_init),
@@ -1597,6 +1614,7 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
     .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE |
                   FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
     .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
     .min_len    = 2,
     .max_len    = TX_LEN_UNLIMITED,
     .init       = TX_NAME(ff_tx_rdft_init),



More information about the ffmpeg-cvslog mailing list