[FFmpeg-cvslog] lavu/tx: invert permutation lookups
Lynne
git at videolan.org
Sat Feb 27 05:21:50 EET 2021
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Feb 27 04:11:04 2021 +0100| [8e94b7cff03539bcb4c360d2550a031a5378df03] | committer: Lynne
lavu/tx: invert permutation lookups
out[lut[i]] = in[i] lookups were 4.04 times(!) slower than
out[i] = in[lut[i]] lookups for an out-of-place FFT of length 4096.
The permutes remain unchanged for anything but out-of-place monolithic
FFT, as those benefit quite a lot from the current order (it means
there's only 1 lookup necessary to add to an offset, rather than
a full gather).
The code was based around non-power-of-two FFTs, so this wasn't
benchmarked early on.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8e94b7cff03539bcb4c360d2550a031a5378df03
---
libavutil/tx.c | 7 +++++--
libavutil/tx_priv.h | 2 +-
libavutil/tx_template.c | 4 ++--
3 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/libavutil/tx.c b/libavutil/tx.c
index ac67b354be..1161df3285 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -91,7 +91,7 @@ int ff_tx_gen_compound_mapping(AVTXContext *s)
return 0;
}
-int ff_tx_gen_ptwo_revtab(AVTXContext *s)
+int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
{
const int m = s->m, inv = s->inv;
@@ -101,7 +101,10 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s)
/* Default */
for (int i = 0; i < m; i++) {
int k = -split_radix_permutation(i, m, inv) & (m - 1);
- s->revtab[k] = i;
+ if (invert_lookup)
+ s->revtab[i] = k;
+ else
+ s->revtab[k] = i;
}
return 0;
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index e9fba02a35..e2f4314a4f 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -123,7 +123,7 @@ struct AVTXContext {
/* Shared functions */
int ff_tx_type_is_mdct(enum AVTXType type);
int ff_tx_gen_compound_mapping(AVTXContext *s);
-int ff_tx_gen_ptwo_revtab(AVTXContext *s);
+int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup);
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s);
/* Also used by SIMD init */
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 711013c352..0c76e0ed6f 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -410,7 +410,7 @@ static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
} while ((src = *inplace_idx++));
} else {
for (int i = 0; i < m; i++)
- out[s->revtab[i]] = in[i];
+ out[i] = in[s->revtab[i]];
}
fft_dispatch[mb](out);
@@ -738,7 +738,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
if (n != 1)
init_cos_tabs(0);
if (m != 1) {
- if ((err = ff_tx_gen_ptwo_revtab(s)))
+ if ((err = ff_tx_gen_ptwo_revtab(s, n == 1 && !(flags & AV_TX_INPLACE))))
return err;
if (flags & AV_TX_INPLACE) {
if (is_mdct) /* In-place MDCTs are not supported yet */
More information about the ffmpeg-cvslog
mailing list