[FFmpeg-devel] [PATCH 3/7] proresdec2: use VLC for level instead of EC switch

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Fri Sep 8 12:58:16 EEST 2023


Andreas Rheinhardt:
> Christophe Gisquet:
>> x86/x64: 61/52 -> 55/46
>> Around 7-10% speedup.
>>
>> Run and DC do not lend themselves to such changes, likely because
>> their distribution is less skewed, and need larger average vlc read
>> iterations.
>> ---
>>  libavcodec/proresdec.h  |  1 +
>>  libavcodec/proresdec2.c | 77 ++++++++++++++++++++++++++++++++++-------
>>  2 files changed, 66 insertions(+), 12 deletions(-)
>>
>> diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h
>> index 1e48752e6f..7ebacaeb21 100644
>> --- a/libavcodec/proresdec.h
>> +++ b/libavcodec/proresdec.h
>> @@ -22,6 +22,7 @@
>>  #ifndef AVCODEC_PRORESDEC_H
>>  #define AVCODEC_PRORESDEC_H
>>  
>> +#define CACHED_BITSTREAM_READER 1
> 
> This should be in the commit switching to the cached bitstream reader.

Correction: This header is included in videotoolbox.c and there is other
stuff that also includes get_bits.h included in said file (and currently
gets included before proresdec.h). This means that proresdec2.c and
videotoolbox.c will have different opinions on what a GetBitContext is:
It will be the non-cached one in videotoolbox.c and the cached one in
proresdec2.c. This will work in practice, because ProresContext does not
need the complete GetBitContext type at all (it does not contain a
GetBitContext at all), so offsets are not affected. But it is
nevertheless undefined behaviour and could become dangerous when using LTO.

So you should switch the type of the pointer to BitstreamContextBE* in
proresdec2.h. Furthermore, you can either include bitstream.h in
proresdec.h or (IMO better) use a forward declaration and struct
BitstreamContextBE* in the function pointer without including get_bits.h
in the header at all.

> 
>>  #include "get_bits.h"
>>  #include "blockdsp.h"
>>  #include "proresdsp.h"
>> diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
>> index 65e8b01755..91c689d9ef 100644
>> --- a/libavcodec/proresdec2.c
>> +++ b/libavcodec/proresdec2.c
>> @@ -24,17 +24,17 @@
>>   * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'apco' (Proxy), 'ap4h' (4444), 'ap4x' (4444 XQ)
>>   */
>>  
>> -#define CACHED_BITSTREAM_READER 1
>> +//#define DEBUG
>>  
>>  #include "config_components.h"
>>  
>>  #include "libavutil/internal.h"
>>  #include "libavutil/mem_internal.h"
>> +#include "libavutil/thread.h"
>>  
>>  #include "avcodec.h"
>>  #include "codec_internal.h"
>>  #include "decode.h"
>> -#include "get_bits.h"
>>  #include "hwaccel_internal.h"
>>  #include "hwconfig.h"
>>  #include "idctdsp.h"
>> @@ -129,8 +129,64 @@ static void unpack_alpha_12(GetBitContext *gb, uint16_t *dst, int num_coeffs,
>>      }
>>  }
>>  
>> +#define AC_BITS 12
>> +#define PRORES_LEV_BITS 9
>> +
>> +static const uint8_t ac_info[] = { 0x04, 0x0A, 0x05, 0x06, 0x28, 0x4C };
>> +static VLC ac_vlc[6];
>> +
>> +static av_cold void init_vlcs(void)
>> +{
>> +    int i;
>> +    for (i = 0; i < sizeof(ac_info); i++) {
> 
> FF_ARRAY_ELEMS() is cleaner; also we support and prefer declarations
> inside for-loops: for (int i = 0;
> 
>> +        uint32_t ac_codes[1<<AC_BITS];
>> +        uint8_t ac_bits[1<<AC_BITS];
>> +        unsigned int rice_order, exp_order, switch_bits, switch_val;
>> +        int ac, max_bits = 0, codebook = ac_info[i];
>> +
>> +        /* number of prefix bits to switch between Rice and expGolomb */
>> +        switch_bits = (codebook & 3);
>> +        rice_order  =  codebook >> 5;       /* rice code order */
>> +        exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
>> +
>> +        switch_val  = (switch_bits+1) << rice_order;
>> +
>> +        // Values are actually transformed, but this is more a wrapping
>> +        for (ac = 0; ac <1<<AC_BITS; ac++) {
>> +            int exponent, bits, val = ac;
>> +            unsigned int code;
>> +
>> +            if (val >= switch_val) {
>> +                val += (1 << exp_order) - switch_val;
>> +                exponent = av_log2(val);
>> +                bits = exponent+1+switch_bits-exp_order/*0*/ + exponent+1/*val*/;
>> +                code = val;
>> +            } else if (rice_order) {
>> +                bits = (val >> rice_order)/*0*/ + 1/*1*/ + rice_order/*val*/;
>> +                code = (1 << rice_order) | val;
>> +            } else {
>> +                bits = val/*0*/ + 1/*1*/;
>> +                code = 1;
>> +            }
>> +            if (bits > max_bits) max_bits = bits;
>> +            ac_bits [ac] = bits;
>> +            ac_codes[ac] = code;
>> +        }
>> +
>> +        ff_free_vlc(ac_vlc+i);
> 
> This is unnecessary, as the VLC is initially blank and is not
> initialized multiple times.
> 
>> +
>> +        if (init_vlc(ac_vlc+i, PRORES_LEV_BITS, 1<<AC_BITS,
>> +                     ac_bits, 1, 1, ac_codes, 4, 4, 0) < 0) {
>> +            av_log(NULL, AV_LOG_ERROR, "Error for %d(0x%02X), max bits %d\n",
>> +                   i, codebook, max_bits);
>> +            break; //return AVERROR_BUG;
> 
> This is not how you initialize a static table (you miss the
> INIT_VLC_USE_NEW_STATIC flag and don't set the static store buffer).
> Search for INIT_VLC_STATIC_OVERLONG for an idea of how to do it.
> 
>> +        }
>> +    }
>> +}
>> +
>>  static av_cold int decode_init(AVCodecContext *avctx)
>>  {
>> +    static AVOnce init_static_once = AV_ONCE_INIT;
>>      int ret = 0;
>>      ProresContext *ctx = avctx->priv_data;
>>      uint8_t idct_permutation[64];
>> @@ -184,6 +240,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
>>  
>>      ctx->pix_fmt = AV_PIX_FMT_NONE;
>>  
>> +    // init dc_tables
>> +    ff_thread_once(&init_static_once, init_vlcs);
>> +
>>      if (avctx->bits_per_raw_sample == 10){
>>          ctx->unpack_alpha = unpack_alpha_10;
>>      } else if (avctx->bits_per_raw_sample == 12){
>> @@ -510,7 +569,7 @@ static av_always_inline int decode_dc_coeffs(GetBitContext *gb, int16_t *out,
>>      return 0;
>>  }
>>  
>> -// adaptive codebook switching lut according to previous run/level values
>> +// adaptive codebook switching lut according to previous run values
>>  static const char run_to_cb[16][4] = {
>>      { 2, 0, -1,  1 }, { 2, 0, -1,  1 }, { 1, 0, 0,  0 }, { 1, 0,  0,  0 }, { 0, 0, 1, -1 },
>>      { 1, 1,  1,  0 }, { 1, 1,  1,  0 }, { 1, 1, 1,  0 }, { 1, 1,  1,  0 },
>> @@ -518,12 +577,6 @@ static const char run_to_cb[16][4] = {
>>      { 0, 2,  3, -4 }
>>  };
>>  
>> -static const char lev_to_cb[10][4] = {
>> -    { 0, 0,  1, -1 }, { 2, 0,  0, -1 }, { 1, 0, 0,  0 }, { 2, 0, -1,  1 }, { 0, 0, 1, -1 },
>> -    { 0, 1,  2, -2 }, { 0, 1,  2, -2 }, { 0, 1, 2, -2 }, { 0, 1,  2, -2 },
>> -    { 0, 2,  3, -4 }
>> -};
>> -
>>  static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb,
>>                                               int16_t *out, int blocks_per_slice)
>>  {
>> @@ -540,8 +593,9 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
>>      block_mask = blocks_per_slice - 1;
>>  
>>      for (pos = block_mask;;) {
>> +        static const uint8_t ctx_to_tbl[] = { 0, 1, 2, 3, 0, 4, 4, 4, 4, 5 };
>> +        const VLC* tbl = ac_vlc + ctx_to_tbl[FFMIN(level, 9)];
>>          unsigned int runcb = FFMIN(run,  15);
>> -        unsigned int levcb = FFMIN(level, 9);
>>          bits_rem = get_bits_left(gb);
>>          if (!bits_rem || (bits_rem < 16 && !show_bits(gb, bits_rem)))
>>              break;
>> @@ -554,8 +608,7 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
>>              return AVERROR_INVALIDDATA;
>>          }
>>  
>> -        DECODE_CODEWORD2(level, lev_to_cb[levcb][0], lev_to_cb[levcb][1],
>> -                                lev_to_cb[levcb][2], lev_to_cb[levcb][3]);
>> +        level = get_vlc2(gb, tbl->table, PRORES_LEV_BITS, 3);
>>          level += 1;
>>  
>>          i = pos >> log2_block_count;
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".



More information about the ffmpeg-devel mailing list