[FFmpeg-devel] [RFC][PATCH] simple_idct: Template functions to support an input bitdepth parameter

Wed Dec 27 21:53:49 EET 2017

On Wed, Dec 27, 2017 at 01:11:56AM +0000, Kieran Kunhya wrote:
> For MPEG-4 Simple Studio Profile, I need to be able to support int32_t
> input coeffcients to the IDCT functions. I have attempted to implement this
> with the attached patch.
> Any comments would be appreciated, I'm pretty sure it is not optimal as-is.
> 
> Regards,
> Kieran Kunhya

>  bit_depth_template.c   |   17 ++++++++++++++++-
>  idctdsp.c              |   18 +++++++++---------
>  me_cmp.c               |    2 +-
>  simple_idct.c          |   15 ++++++++++++---
>  simple_idct.h          |   24 ++++++++++++++----------
>  simple_idct_template.c |   43 ++++++++++++++++++++++++++++---------------
>  vc1.c                  |    4 ++--
>  7 files changed, 82 insertions(+), 41 deletions(-)
> b69afd1419eafb71e999874a220369b08c01d931  0001-simple_idct-Template-functions-to-support-an-input-b.patch
> From 9675ff0714df15e433dbe78d6e40c2430c21b519 Mon Sep 17 00:00:00 2001
> From: Kieran Kunhya <kieran at kunhya.com>
> Date: Wed, 27 Dec 2017 01:08:39 +0000
> Subject: [PATCH] simple_idct: Template functions to support an input bitdepth
>  parameter
> 
> ---
>  libavcodec/bit_depth_template.c   | 17 +++++++++++++++-
>  libavcodec/idctdsp.c              | 18 ++++++++--------
>  libavcodec/me_cmp.c               |  2 +-
>  libavcodec/simple_idct.c          | 15 +++++++++++---
>  libavcodec/simple_idct.h          | 24 +++++++++++++---------
>  libavcodec/simple_idct_template.c | 43 +++++++++++++++++++++++++--------------
>  libavcodec/vc1.c                  |  4 ++--
>  7 files changed, 82 insertions(+), 41 deletions(-)
> 
> diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
> index 8018489..bd7237f 100644
> --- a/libavcodec/bit_depth_template.c
> +++ b/libavcodec/bit_depth_template.c
> @@ -29,6 +29,7 @@
>  #   undef pixel2
>  #   undef pixel4
>  #   undef dctcoef
> +#   undef idctin
>  #   undef INIT_CLIP
>  #   undef no_rnd_avg_pixel4
>  #   undef rnd_avg_pixel4
> @@ -53,6 +54,16 @@
>  #   define pixel4 uint64_t
>  #   define dctcoef int32_t
>  
> +#ifdef IN_IDCT_DEPTH
> +#if IN_IDCT_DEPTH == 32
> +#   define idctin int32_t
> +#else
> +#   define idctin int16_t
> +#endif
> +#else
> +#   define idctin int16_t
> +#endif
> +
>  #   define INIT_CLIP
>  #   define no_rnd_avg_pixel4 no_rnd_avg64
>  #   define    rnd_avg_pixel4    rnd_avg64
> @@ -71,6 +82,7 @@
>  #   define pixel2 uint16_t
>  #   define pixel4 uint32_t
>  #   define dctcoef int16_t
> +#   define idctin  int16_t
>  
>  #   define INIT_CLIP
>  #   define no_rnd_avg_pixel4 no_rnd_avg32
> @@ -87,7 +99,10 @@
>  #   define CLIP(a) av_clip_uint8(a)
>  #endif
>  
> -#define FUNC3(a, b, c)  a ## _ ## b ## c
> +#define FUNC3(a, b, c)  a ## _ ## b ##  c
>  #define FUNC2(a, b, c)  FUNC3(a, b, c)
>  #define FUNC(a)  FUNC2(a, BIT_DEPTH,)
>  #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
> +#define FUNC4(a, b, c)  a ## _ ## b ## _ ## c
> +#define FUNC5(a, b, c)  FUNC4(a, b, c)
> +#define FUNC6(a)  FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)
> \ No newline at end of file
> diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
> index 0ff74d8..16703aa 100644
> --- a/libavcodec/idctdsp.c
> +++ b/libavcodec/idctdsp.c
> @@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
>          c->perm_type = FF_IDCT_PERM_NONE;
>      } else {
>          if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
> -            c->idct_put              = ff_simple_idct_put_10;
> -            c->idct_add              = ff_simple_idct_add_10;
> -            c->idct                  = ff_simple_idct_10;
> +            c->idct_put              = ff_simple_idct_put_16_10;
> +            c->idct_add              = ff_simple_idct_add_16_10;
> +            c->idct                  = ff_simple_idct_16_10;

please call the functions ff_simple_idct_int16_10bit or something that makes it
clear what the 2 numbers mean.

[...]
> diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
> index f532313..8d60b50 100644
> --- a/libavcodec/simple_idct_template.c
> +++ b/libavcodec/simple_idct_template.c
> @@ -77,6 +77,10 @@
>  #define ROW_SHIFT 13
>  #define COL_SHIFT 18
>  #define DC_SHIFT  1
> +#   elif IN_IDCT_DEPTH == 32
> +#define ROW_SHIFT 13
> +#define COL_SHIFT 21
> +#define DC_SHIFT  2
>  #   else
>  #define ROW_SHIFT 12
>  #define COL_SHIFT 19
> @@ -109,11 +113,12 @@
>  #ifdef EXTRA_SHIFT
>  static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
>  #else
> -static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
> +static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
>  #endif
>  {
>      SUINT a0, a1, a2, a3, b0, b1, b2, b3;
>  
> +#if IN_IDCT_DEPTH == 16
>  #if HAVE_FAST_64BIT
>  #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
>      if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
> @@ -148,6 +153,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
>          return;
>      }
>  #endif
> +#endif
>  
>      a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
>      a1 = a0;
> @@ -168,7 +174,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
>      b3 = MUL(W7, row[1]);
>      MAC(b3, -W5, row[3]);
>  
> +#if IN_IDCT_DEPTH == 32

> +    if (1) {

is that faster than checking row 4-7 for 0 with sparse matrixes as occuring in
video data ?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20171227/219a269b/attachment.sig>