[FFmpeg-devel] [patch][OpenHEVC]added ASM functions for epel + qpel

Fri Mar 7 13:25:25 CET 2014

Hi,

> diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
[..]
> @@ -41,6 +42,7 @@
>  const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
>  const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
>  const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
> +uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3,
[12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };

const?

> @@ -1078,8 +1080,8 @@ static void luma_mc(HEVCContext *s, int16_t *dst,
ptrdiff_t dststride,
>      int pic_width        = s->sps->width;
>      int pic_height       = s->sps->height;
>
> -    int mx         = mv->x & 3;
> -    int my         = mv->y & 3;
> +    intptr_t mx         = mv->x & 3;
> +    intptr_t my         = mv->y & 3;

(Note that I don't really care whether these are int or intptr_t - as long
as the function prototype in the DSP context has intptr_t.)

> @@ -3125,3 +3124,4 @@ AVCodec ff_hevc_decoder = {
>                               CODEC_CAP_SLICE_THREADS |
CODEC_CAP_FRAME_THREADS,
>      .profiles              = NULL_IF_CONFIG_SMALL(profiles),
>  };
> +

Stray space.

> +DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][4]) = {
> +    { -2, 58, 10, -2},
> +    { -4, 54, 16, -2},
> +    { -6, 46, 28, -4},
> +    { -4, 36, 36, -4},
> +    { -4, 28, 46, -6},
> +    { -2, 16, 54, -4},
> +    { -2, 10, 58, -2},
> +};

Does this still need to be 16-byte aligned? I mean, you're not using it in
SIMD anymore.

> @@ -175,7 +188,7 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int
bit_depth)
>      hevcdsp->hevc_v_loop_filter_luma_c   = FUNC(hevc_v_loop_filter_luma,
depth);   \
>      hevcdsp->hevc_h_loop_filter_chroma_c =
FUNC(hevc_h_loop_filter_chroma, depth); \
>      hevcdsp->hevc_v_loop_filter_chroma_c =
FUNC(hevc_v_loop_filter_chroma, depth);
> -
> +    int i = 0;

Leftover debug code ;) with trailing spaces.

> diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
[..]
> @@ -58,12 +59,11 @@ typedef struct HEVCDSPContext {
>                                 int height, int c_idx, uint8_t vert_edge,
>                                 uint8_t horiz_edge, uint8_t diag_edge);
>
> -    void (*put_hevc_qpel[4][4])(int16_t *dst, ptrdiff_t dststride,
uint8_t *src,
> -                                ptrdiff_t srcstride, int width, int
height,
> -                                int16_t *mcbuffer);
> -    void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride,
uint8_t *src,
> -                                ptrdiff_t srcstride, int width, int
height,
> -                                int mx, int my, int16_t *mcbuffer);
> +    void (*put_hevc_qpel[10][2][2])(int16_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride,
> +                                int height, intptr_t mx, intptr_t my,
int width);
> +
> +    void (*put_hevc_epel[10][2][2])(int16_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride,
> +                                int height, intptr_t mx, intptr_t my,
int width);

Do you still need the width? I mean, you should be able to remove it right?
The advantage is that it removes one function argument preparation in the
caller, which the SIMD doesn't use anyway.

I understand this makes the C code a little bit more tricky, but it should
be macro'able, right?

> diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
[..]
> @@ -1340,3 +1276,4 @@ static void FUNC(hevc_v_loop_filter_luma)(uint8_t
*pix, ptrdiff_t stride,
>  #undef TQ1
>  #undef TQ2
>  #undef TQ3
> +

You tend to add trailing spaces to all files...

> diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm

Yeah! On it.

Ronald