[FFmpeg-devel] Subject: [PATCH] avcodec/fft: out of place permutation with av_fft_permute2
Muhammad Faiz
mfcc64 at gmail.com
Mon Oct 12 20:01:57 CEST 2015
-------------- next part --------------
From a403f93a8fa20ce0d7345d9a00d75bc90fe7d73f Mon Sep 17 00:00:00 2001
From: Muhammad Faiz <mfcc64 at gmail.com>
Date: Tue, 13 Oct 2015 00:31:29 +0700
Subject: [PATCH] avcodec/fft: out of place permutation with av_fft_permute2
with optimization (more cache friendly)
also optimize av_fft_permute
machine specific code (unfortunately) is not touched
speedup (at least on my machine, bits = 14):
with av_fft_permute ~ 1.5x - 2x times
with av_fft_permute2 ~ 2.5x - 3x times
---
libavcodec/avfft.c | 5 +++++
libavcodec/avfft.h | 3 +++
libavcodec/fft.h | 2 ++
libavcodec/fft_template.c | 34 +++++++++++++++++++++++++++++-----
libavcodec/version.h | 2 +-
5 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 675d2b9..6b33ab5 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -40,6 +40,11 @@ void av_fft_permute(FFTContext *s, FFTComplex *z)
s->fft_permute(s, z);
}
+void av_fft_permute2(FFTContext *s, FFTComplex *dst, const FFTComplex *src)
+{
+ s->fft_permute2(s, dst, src);
+}
+
void av_fft_calc(FFTContext *s, FFTComplex *z)
{
s->fft_calc(s, z);
diff --git a/libavcodec/avfft.h b/libavcodec/avfft.h
index 0c0f9b8..31d5420 100644
--- a/libavcodec/avfft.h
+++ b/libavcodec/avfft.h
@@ -52,6 +52,9 @@ FFTContext *av_fft_init(int nbits, int inverse);
*/
void av_fft_permute(FFTContext *s, FFTComplex *z);
+/* out of place permutation */
+void av_fft_permute2(FFTContext *s, FFTComplex *dst, const FFTComplex *src);
+
/**
* Do a complex FFT with the parameters defined in av_fft_init(). The
* input data must be permuted before. No 1.0/sqrt(n) normalization is done.
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 64f0f63..c7f2bdb 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -110,6 +110,8 @@ struct FFTContext {
void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
enum fft_permutation_type fft_permutation;
enum mdct_permutation_type mdct_permutation;
+ /* out of place permutation */
+ void (*fft_permute2)(struct FFTContext *s, FFTComplex *dst, const FFTComplex* src);
};
#if CONFIG_HARDCODED_TABLES
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 23ea453..00e652b 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -72,6 +72,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
#endif /* FFT_FIXED_32 */
static void fft_permute_c(FFTContext *s, FFTComplex *z);
+static void fft_permute2_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src);
+static void fft_permute2_wrapper_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src);
static void fft_calc_c(FFTContext *s, FFTComplex *z);
static int split_radix_permutation(int i, int n, int inverse)
@@ -156,6 +158,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
s->fft_permutation = FF_FFT_PERM_DEFAULT;
s->fft_permute = fft_permute_c;
+ s->fft_permute2= fft_permute2_c;
s->fft_calc = fft_calc_c;
#if CONFIG_MDCT
s->imdct_calc = ff_imdct_calc_c;
@@ -197,6 +200,9 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
}
}
+ if (s->fft_permute != fft_permute_c && s->fft_permute2 == fft_permute2_c)
+ s->fft_permute2 = fft_permute2_wrapper_c;
+
return 0;
fail:
av_freep(&s->revtab);
@@ -206,12 +212,30 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
static void fft_permute_c(FFTContext *s, FFTComplex *z)
{
- int j, np;
+ int n = 1 << s->nbits;
+ fft_permute2_c(s, s->tmp_buf, z);
+ memcpy(z, s->tmp_buf, n * sizeof(FFTComplex));
+}
+
+static void fft_permute2_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src)
+{
+ int j, n, q;
const uint16_t *revtab = s->revtab;
- np = 1 << s->nbits;
- /* TODO: handle split-radix permute in a more optimal way, probably in-place */
- for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
- memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+ n = 1 << s->nbits;
+ q = n >> 2;
+ for (j = 0; j < q; j++) {
+ dst[revtab[j ]] = src[j ];
+ dst[revtab[j+ q]] = src[j+ q];
+ dst[revtab[j+2*q]] = src[j+2*q];
+ dst[revtab[j+3*q]] = src[j+3*q];
+ }
+}
+
+/* for fft_permute other than fft_permute_c */
+static void fft_permute2_wrapper_c(FFTContext *s, FFTComplex *dst, const FFTComplex *src)
+{
+ memcpy(dst, src, (1 << s->nbits) * sizeof(FFTComplex));
+ s->fft_permute(s, dst);
}
av_cold void ff_fft_end(FFTContext *s)
diff --git a/libavcodec/version.h b/libavcodec/version.h
index c7fc1f1..953ff9f 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
#include "libavutil/version.h"
#define LIBAVCODEC_VERSION_MAJOR 57
-#define LIBAVCODEC_VERSION_MINOR 5
+#define LIBAVCODEC_VERSION_MINOR 6
#define LIBAVCODEC_VERSION_MICRO 100
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
--
1.8.3.1
More information about the ffmpeg-devel
mailing list