[FFmpeg-devel] [RFC][PATCH] simple_idct: Template functions to support an input bitdepth parameter
Michael Niedermayer
michael at niedermayer.cc
Wed Dec 27 21:53:49 EET 2017
On Wed, Dec 27, 2017 at 01:11:56AM +0000, Kieran Kunhya wrote:
> For MPEG-4 Simple Studio Profile, I need to be able to support int32_t
> input coeffcients to the IDCT functions. I have attempted to implement this
> with the attached patch.
> Any comments would be appreciated, I'm pretty sure it is not optimal as-is.
>
> Regards,
> Kieran Kunhya
> bit_depth_template.c | 17 ++++++++++++++++-
> idctdsp.c | 18 +++++++++---------
> me_cmp.c | 2 +-
> simple_idct.c | 15 ++++++++++++---
> simple_idct.h | 24 ++++++++++++++----------
> simple_idct_template.c | 43 ++++++++++++++++++++++++++++---------------
> vc1.c | 4 ++--
> 7 files changed, 82 insertions(+), 41 deletions(-)
> b69afd1419eafb71e999874a220369b08c01d931 0001-simple_idct-Template-functions-to-support-an-input-b.patch
> From 9675ff0714df15e433dbe78d6e40c2430c21b519 Mon Sep 17 00:00:00 2001
> From: Kieran Kunhya <kieran at kunhya.com>
> Date: Wed, 27 Dec 2017 01:08:39 +0000
> Subject: [PATCH] simple_idct: Template functions to support an input bitdepth
> parameter
>
> ---
> libavcodec/bit_depth_template.c | 17 +++++++++++++++-
> libavcodec/idctdsp.c | 18 ++++++++--------
> libavcodec/me_cmp.c | 2 +-
> libavcodec/simple_idct.c | 15 +++++++++++---
> libavcodec/simple_idct.h | 24 +++++++++++++---------
> libavcodec/simple_idct_template.c | 43 +++++++++++++++++++++++++--------------
> libavcodec/vc1.c | 4 ++--
> 7 files changed, 82 insertions(+), 41 deletions(-)
>
> diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
> index 8018489..bd7237f 100644
> --- a/libavcodec/bit_depth_template.c
> +++ b/libavcodec/bit_depth_template.c
> @@ -29,6 +29,7 @@
> # undef pixel2
> # undef pixel4
> # undef dctcoef
> +# undef idctin
> # undef INIT_CLIP
> # undef no_rnd_avg_pixel4
> # undef rnd_avg_pixel4
> @@ -53,6 +54,16 @@
> # define pixel4 uint64_t
> # define dctcoef int32_t
>
> +#ifdef IN_IDCT_DEPTH
> +#if IN_IDCT_DEPTH == 32
> +# define idctin int32_t
> +#else
> +# define idctin int16_t
> +#endif
> +#else
> +# define idctin int16_t
> +#endif
> +
> # define INIT_CLIP
> # define no_rnd_avg_pixel4 no_rnd_avg64
> # define rnd_avg_pixel4 rnd_avg64
> @@ -71,6 +82,7 @@
> # define pixel2 uint16_t
> # define pixel4 uint32_t
> # define dctcoef int16_t
> +# define idctin int16_t
>
> # define INIT_CLIP
> # define no_rnd_avg_pixel4 no_rnd_avg32
> @@ -87,7 +99,10 @@
> # define CLIP(a) av_clip_uint8(a)
> #endif
>
> -#define FUNC3(a, b, c) a ## _ ## b ## c
> +#define FUNC3(a, b, c) a ## _ ## b ## c
> #define FUNC2(a, b, c) FUNC3(a, b, c)
> #define FUNC(a) FUNC2(a, BIT_DEPTH,)
> #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
> +#define FUNC4(a, b, c) a ## _ ## b ## _ ## c
> +#define FUNC5(a, b, c) FUNC4(a, b, c)
> +#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)
> \ No newline at end of file
> diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
> index 0ff74d8..16703aa 100644
> --- a/libavcodec/idctdsp.c
> +++ b/libavcodec/idctdsp.c
> @@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
> c->perm_type = FF_IDCT_PERM_NONE;
> } else {
> if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
> - c->idct_put = ff_simple_idct_put_10;
> - c->idct_add = ff_simple_idct_add_10;
> - c->idct = ff_simple_idct_10;
> + c->idct_put = ff_simple_idct_put_16_10;
> + c->idct_add = ff_simple_idct_add_16_10;
> + c->idct = ff_simple_idct_16_10;
please call the functions ff_simple_idct_int16_10bit or something that makes it
clear what the 2 numbers mean.
[...]
> diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
> index f532313..8d60b50 100644
> --- a/libavcodec/simple_idct_template.c
> +++ b/libavcodec/simple_idct_template.c
> @@ -77,6 +77,10 @@
> #define ROW_SHIFT 13
> #define COL_SHIFT 18
> #define DC_SHIFT 1
> +# elif IN_IDCT_DEPTH == 32
> +#define ROW_SHIFT 13
> +#define COL_SHIFT 21
> +#define DC_SHIFT 2
> # else
> #define ROW_SHIFT 12
> #define COL_SHIFT 19
> @@ -109,11 +113,12 @@
> #ifdef EXTRA_SHIFT
> static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
> #else
> -static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
> +static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
> #endif
> {
> SUINT a0, a1, a2, a3, b0, b1, b2, b3;
>
> +#if IN_IDCT_DEPTH == 16
> #if HAVE_FAST_64BIT
> #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
> if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
> @@ -148,6 +153,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
> return;
> }
> #endif
> +#endif
>
> a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
> a1 = a0;
> @@ -168,7 +174,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
> b3 = MUL(W7, row[1]);
> MAC(b3, -W5, row[3]);
>
> +#if IN_IDCT_DEPTH == 32
> + if (1) {
is that faster than checking row 4-7 for 0 with sparse matrixes as occuring in
video data ?
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20171227/219a269b/attachment.sig>
More information about the ffmpeg-devel
mailing list