[FFmpeg-cvslog] lavu/tx: add DCT-III implementation

Lynne git at videolan.org
Thu Nov 24 16:59:59 EET 2022


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sun Nov 20 03:44:29 2022 +0100| [a56d7e0ca3be82cb5155ab0cf8206fc8b8d6861d] | committer: Lynne

lavu/tx: add DCT-III implementation

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a56d7e0ca3be82cb5155ab0cf8206fc8b8d6861d
---

 libavutil/tx_template.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 5d73809b58..1de92b9786 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1737,6 +1737,11 @@ static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
     TXSample *tab;
     SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
 
+    if (inv) {
+        len *= 2;
+        s->len *= 2;
+        rsc *= 0.5;
+    }
 
     if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL, len, inv, &rsc)))
         return ret;
@@ -1752,8 +1757,13 @@ static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
     for (int i = 0; i < len; i++)
         tab[i] = RESCALE(cos(i*freq)*(!inv + 1));
 
-    for (int i = 0; i < len/2; i++)
-        tab[len + i] = RESCALE(cos((len - 2*i - 1)*freq));
+    if (inv) {
+        for (int i = 0; i < len/2; i++)
+            tab[len + i] = RESCALE(0.5 / sin((2*i + 1)*freq));
+    } else {
+        for (int i = 0; i < len/2; i++)
+            tab[len + i] = RESCALE(cos((len - 2*i - 1)*freq));
+    }
 
     return 0;
 }
@@ -1818,6 +1828,49 @@ static void TX_NAME(ff_tx_dctII)(AVTXContext *s, void *_dst,
     dst[1] = next;
 }
 
+static void TX_NAME(ff_tx_dctIII)(AVTXContext *s, void *_dst,
+                                  void *_src, ptrdiff_t stride)
+{
+    TXSample *dst = _dst;
+    TXSample *src = _src;
+    const int len = s->len;
+    const int len2 = len >> 1;
+    const TXSample *exp = (void *)s->exp;
+#ifdef TX_INT32
+    int64_t  tmp1, tmp2 = src[len - 1];
+    tmp2 = (2*tmp2 + 0x40000000) >> 31;
+#else
+    TXSample tmp1, tmp2 = 2*src[len - 1];
+#endif
+
+    src[len] = tmp2;
+
+    for (int i = len - 2; i >= 2; i -= 2) {
+        TXSample val1 = src[i - 0];
+        TXSample val2 = src[i - 1] - src[i + 1];
+
+        CMUL(src[i + 1], src[i], exp[len - i], exp[i], val1, val2);
+    }
+
+    s->fn[0](&s->sub[0], dst, src, sizeof(float));
+
+    for (int i = 0; i < len2; i++) {
+        TXSample in1 = dst[i];
+        TXSample in2 = dst[len - i - 1];
+        TXSample c   = exp[len + i];
+
+        tmp1 = in1 + in2;
+        tmp2 = in1 - in2;
+        tmp2 *= c;
+#ifdef TX_INT32
+        tmp2 = (tmp2 + 0x40000000) >> 31;
+#endif
+
+        dst[i]            = tmp1 + tmp2;
+        dst[len - i - 1]  = tmp1 - tmp2;
+    }
+}
+
 static const FFTXCodelet TX_NAME(ff_tx_dctII_def) = {
     .name       = TX_NAME_STR("dctII"),
     .function   = TX_NAME(ff_tx_dctII),
@@ -1832,6 +1885,20 @@ static const FFTXCodelet TX_NAME(ff_tx_dctII_def) = {
     .prio       = FF_TX_PRIO_BASE,
 };
 
+static const FFTXCodelet TX_NAME(ff_tx_dctIII_def) = {
+    .name       = TX_NAME_STR("dctIII"),
+    .function   = TX_NAME(ff_tx_dctIII),
+    .type       = TX_TYPE(DCT),
+    .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE |
+                  FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
+    .factors    = { 2, TX_FACTOR_ANY },
+    .min_len    = 2,
+    .max_len    = TX_LEN_UNLIMITED,
+    .init       = TX_NAME(ff_tx_dct_init),
+    .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
+    .prio       = FF_TX_PRIO_BASE,
+};
+
 int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
 {
     int off = 0;
@@ -1920,6 +1987,7 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
     &TX_NAME(ff_tx_rdft_r2c_def),
     &TX_NAME(ff_tx_rdft_c2r_def),
     &TX_NAME(ff_tx_dctII_def),
+    &TX_NAME(ff_tx_dctIII_def),
 
     NULL,
 };



More information about the ffmpeg-cvslog mailing list