[FFmpeg-devel] [PATCH 12/12] WMAPRO: use some type punning in decode_coeffs()

Reimar Döffinger Reimar.Doeffinger
Tue Sep 29 16:03:09 CEST 2009


On Sun, Sep 27, 2009 at 11:49:28AM +0100, Mans Rullgard wrote:
> ---
>  libavcodec/wmaprodec.c |   35 ++++++++++++++++++++++-------------
>  1 files changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
> index e483733..2598ff2 100644
> --- a/libavcodec/wmaprodec.c
> +++ b/libavcodec/wmaprodec.c
> @@ -766,6 +766,12 @@ static int decode_channel_transform(WMAProDecodeCtx* s)
>   */
>  static int decode_coeffs(WMAProDecodeCtx *s, int c)
>  {
> +    static const int fval_tab[16] = {
> +        0x00000000, 0x3f800000, 0x40000000, 0x40400000,
> +        0x40800000, 0x40a00000, 0x40c00000, 0x40e00000,
> +        0x41000000, 0x41100000, 0x41200000, 0x41300000,
> +        0x41400000, 0x41500000, 0x41600000, 0x41700000,
> +    };
>      int vlctable;
>      VLC* vlc;
>      WMAProChannelCtx* ci = &s->channel[c];
> @@ -801,29 +807,32 @@ static int decode_coeffs(WMAProDecodeCtx *s, int c)
>              for (i = 0; i < 4; i += 2) {
>                  idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
>                  if (idx == HUFF_VEC2_SIZE - 1) {
> -                    vals[i] = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
> -                    if (vals[i] == HUFF_VEC1_SIZE - 1)
> -                        vals[i] += ff_wma_get_large_val(&s->gb);
> -                    vals[i+1] = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
> -                    if (vals[i+1] == HUFF_VEC1_SIZE - 1)
> -                        vals[i+1] += ff_wma_get_large_val(&s->gb);
> +                    int v0, v1;
> +                    v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
> +                    if (v0 == HUFF_VEC1_SIZE - 1)
> +                        v0 += ff_wma_get_large_val(&s->gb);
> +                    v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
> +                    if (v1 == HUFF_VEC1_SIZE - 1)
> +                        v1 += ff_wma_get_large_val(&s->gb);
> +                    ((float*)vals)[i  ] = v0;
> +                    ((float*)vals)[i+1] = v1;
>                  } else {
> -                    vals[i]   = symbol_to_vec2[idx] >> 4;
> -                    vals[i+1] = symbol_to_vec2[idx] & 0xF;
> +                    vals[i]   = fval_tab[symbol_to_vec2[idx] >> 4 ];
> +                    vals[i+1] = fval_tab[symbol_to_vec2[idx] & 0xF];
>                  }
>              }
>          } else {
> -            vals[0] =  symbol_to_vec4[idx] >> 12;
> -            vals[1] = (symbol_to_vec4[idx] >> 8) & 0xF;
> -            vals[2] = (symbol_to_vec4[idx] >> 4) & 0xF;
> -            vals[3] =  symbol_to_vec4[idx]       & 0xF;
> +            vals[0] = fval_tab[ symbol_to_vec4[idx] >> 12      ];
> +            vals[1] = fval_tab[(symbol_to_vec4[idx] >> 8) & 0xF];
> +            vals[2] = fval_tab[(symbol_to_vec4[idx] >> 4) & 0xF];
> +            vals[3] = fval_tab[ symbol_to_vec4[idx]       & 0xF];
>          }
>  
>          /** decode sign */
>          for (i = 0; i < 4; i++) {
>              if (vals[i]) {
>                  int sign = get_bits1(&s->gb) - 1;
> -                ci->coeffs[cur_coeff] = (vals[i] ^ sign) - sign;
> +                *(uint32_t*)&ci->coeffs[cur_coeff] = vals[i] ^ sign<<31;
>                  num_zeros = 0;

Hmm... Have you tried how much of the speedup is just from using the
table?
I.e. making vals float, fval_tab float,
ci->coeffs[cur_coeff] = get_bits1(&s->gb) ? vals[i] : -vals[i];
?
Would be a bit less ugly and avoid aliasing issues, though since as you
confirmed in the other patch gcc is really stupid it might not make
anything faster...



More information about the ffmpeg-devel mailing list