[FFmpeg-devel] [PATCH] yadif: restore speed of the C filtering code

Paul B Mahol onemda at gmail.com
Fri Mar 1 18:31:45 CET 2013


On 3/1/13, James Darnley <james.darnley at gmail.com> wrote:
> Always use the special filter for the first and last 3 columns (only).
>
> The changes made in 64ed397 slowed the filter to just under 3/4 of what
> it was.  This commit restores almost all of that speed while maintaining
> identical output.
>
> For reference, on my Athlon64:
> 1733222 decicycles in old
> 2358563 decicycles in new
> 1740014 decicycles in this
> ---
>  libavfilter/vf_yadif.c          |   93
> +++++++++++++++++++++++---------------
>  libavfilter/x86/vf_yadif_init.c |   12 +----
>  libavfilter/yadif.h             |    4 +-
>  3 files changed, 60 insertions(+), 49 deletions(-)

That commit claimed it fixed overreads.
Have you checked that no overreads happens with this patch?

>
> diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
> index b7c2d80..3bd0d17 100644
> --- a/libavfilter/vf_yadif.c
> +++ b/libavfilter/vf_yadif.c
> @@ -34,9 +34,9 @@
>  #define PERM_RWP AV_PERM_WRITE | AV_PERM_PRESERVE | AV_PERM_REUSE
>
>  #define CHECK(j)\
> -    {   int score = FFABS(cur[mrefs + off_left + (j)] - cur[prefs +
> off_left - (j)])\
> +    {   int score = FFABS(cur[mrefs - 1 + (j)] - cur[prefs - 1 - (j)])\
>                    + FFABS(cur[mrefs  +(j)] - cur[prefs  -(j)])\
> -                  + FFABS(cur[mrefs + off_right + (j)] - cur[prefs +
> off_right - (j)]);\
> +                  + FFABS(cur[mrefs + 1 + (j)] - cur[prefs + 1 - (j)]);\
>          if (score < spatial_score) {\
>              spatial_score= score;\
>              spatial_pred= (cur[mrefs  +(j)] + cur[prefs  -(j)])>>1;\
> @@ -51,15 +51,46 @@
>          int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] -
> e) )>>1; \
>          int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1,
> temporal_diff2); \
>          int spatial_pred = (c+e) >> 1; \
> -        int off_right = (x < w - 1) ? 1 : -1;\
> -        int off_left  = x ? -1 : 1;\
> -        int spatial_score = FFABS(cur[mrefs + off_left]  - cur[prefs +
> off_left]) + FFABS(c-e) \
> -                          + FFABS(cur[mrefs + off_right] - cur[prefs +
> off_right]) - 1; \
> +        int spatial_score = FFABS(cur[mrefs - 1] - cur[prefs - 1]) +
> FFABS(c-e) \
> +                          + FFABS(cur[mrefs + 1] - cur[prefs + 1]) - 1; \
>   \
> -        if (x > 2 && x < w - 3) {\
> -            CHECK(-1) CHECK(-2) }} }} \
> -            CHECK( 1) CHECK( 2) }} }} \
> -        }\
> +        CHECK(-1) CHECK(-2) }} }} \
> +        CHECK( 1) CHECK( 2) }} }} \
> + \
> +        if (mode < 2) { \
> +            int b = (prev2[2 * mrefs] + next2[2 * mrefs])>>1; \
> +            int f = (prev2[2 * prefs] + next2[2 * prefs])>>1; \
> +            int max = FFMAX3(d - e, d - c, FFMIN(b - c, f - e)); \
> +            int min = FFMIN3(d - e, d - c, FFMAX(b - c, f - e)); \
> + \
> +            diff = FFMAX3(diff, min, -max); \
> +        } \
> + \
> +        if (spatial_pred > d + diff) \
> +           spatial_pred = d + diff; \
> +        else if (spatial_pred < d - diff) \
> +           spatial_pred = d - diff; \
> + \
> +        dst[0] = spatial_pred; \
> + \
> +        dst++; \
> +        cur++; \
> +        prev++; \
> +        next++; \
> +        prev2++; \
> +        next2++; \
> +    }
> +
> +#define FILTER_EDGES(start, end) \
> +    for (x = start;  x < end; x++) { \
> +        int c = cur[mrefs]; \
> +        int d = (prev2[0] + next2[0])>>1; \
> +        int e = cur[prefs]; \
> +        int temporal_diff0 = FFABS(prev2[0] - next2[0]); \
> +        int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] -
> e) )>>1; \
> +        int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] -
> e) )>>1; \
> +        int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1,
> temporal_diff2); \
> +        int spatial_pred = (c+e) >> 1; \
>   \
>          if (mode < 2) { \
>              int b = (prev2[2 * mrefs] + next2[2 * mrefs])>>1; \
> @@ -101,8 +132,7 @@ static void filter_line_c(void *dst1,
>  }
>
>  static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
> -                         int w, int prefs, int mrefs, int parity, int
> mode,
> -                         int l_edge)
> +                         int w, int prefs, int mrefs, int parity, int
> mode)
>  {
>      uint8_t *dst  = dst1;
>      uint8_t *prev = prev1;
> @@ -112,7 +142,7 @@ static void filter_edges(void *dst1, void *prev1, void
> *cur1, void *next1,
>      uint8_t *prev2 = parity ? prev : cur ;
>      uint8_t *next2 = parity ? cur  : next;
>
> -    FILTER(0, l_edge)
> +    FILTER_EDGES(0, 3)
>
>      dst  = (uint8_t*)dst1  + w - 3;
>      prev = (uint8_t*)prev1 + w - 3;
> @@ -121,7 +151,7 @@ static void filter_edges(void *dst1, void *prev1, void
> *cur1, void *next1,
>      prev2 = (uint8_t*)(parity ? prev : cur);
>      next2 = (uint8_t*)(parity ? cur  : next);
>
> -    FILTER(w - 3, w)
> +    FILTER_EDGES(w - 3, w)
>  }
>
>
> @@ -144,8 +174,7 @@ static void filter_line_c_16bit(void *dst1,
>  }
>
>  static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void
> *next1,
> -                               int w, int prefs, int mrefs, int parity, int
> mode,
> -                               int l_edge)
> +                               int w, int prefs, int mrefs, int parity, int
> mode)
>  {
>      uint16_t *dst  = dst1;
>      uint16_t *prev = prev1;
> @@ -155,7 +184,7 @@ static void filter_edges_16bit(void *dst1, void *prev1,
> void *cur1, void *next1,
>      uint16_t *prev2 = parity ? prev : cur ;
>      uint16_t *next2 = parity ? cur  : next;
>
> -    FILTER(0, l_edge)
> +    FILTER_EDGES(0, 3)
>
>      dst   = (uint16_t*)dst1  + w - 3;
>      prev  = (uint16_t*)prev1 + w - 3;
> @@ -164,7 +193,7 @@ static void filter_edges_16bit(void *dst1, void *prev1,
> void *cur1, void *next1,
>      prev2 = (uint16_t*)(parity ? prev : cur);
>      next2 = (uint16_t*)(parity ? cur  : next);
>
> -    FILTER(w - 3, w)
> +    FILTER_EDGES(w - 3, w)
>  }
>
>  static void filter(AVFilterContext *ctx, AVFilterBufferRef *dstpic,
> @@ -178,7 +207,7 @@ static void filter(AVFilterContext *ctx,
> AVFilterBufferRef *dstpic,
>          int h = dstpic->video->h;
>          int refs = yadif->cur->linesize[i];
>          int df = (yadif->csp->comp[i].depth_minus1 + 8) / 8;
> -        int l_edge, l_edge_pix;
> +        int pix_3 = 3 * df;
>
>          if (i == 1 || i == 2) {
>          /* Why is this not part of the per-plane description thing? */
> @@ -189,8 +218,6 @@ static void filter(AVFilterContext *ctx,
> AVFilterBufferRef *dstpic,
>          /* filtering reads 3 pixels to the left/right; to avoid invalid
> reads,
>           * we need to call the c variant which avoids this for border
> pixels
>           */
> -        l_edge     = yadif->req_align;
> -        l_edge_pix = l_edge / df;
>
>          for (y = 0; y < h; y++) {
>              if ((y ^ parity) & 1) {
> @@ -199,22 +226,14 @@ static void filter(AVFilterContext *ctx,
> AVFilterBufferRef *dstpic,
>                  uint8_t *next = &yadif->next->data[i][y * refs];
>                  uint8_t *dst  = &dstpic->data[i][y * dstpic->linesize[i]];
>                  int     mode  = y == 1 || y + 2 == h ? 2 : yadif->mode;
> -                if (yadif->req_align) {
> -                    yadif->filter_line(dst + l_edge, prev + l_edge, cur +
> l_edge,
> -                                       next + l_edge, w - l_edge_pix - 3,
> -                                       y + 1 < h ? refs : -refs,
> -                                       y ? -refs : refs,
> -                                       parity ^ tff, mode);
> -                    yadif->filter_edges(dst, prev, cur, next, w,
> -                                         y + 1 < h ? refs : -refs,
> -                                         y ? -refs : refs,
> -                                         parity ^ tff, mode, l_edge_pix);
> -                } else {
> -                    yadif->filter_line(dst, prev, cur, next + l_edge, w,
> -                                       y + 1 < h ? refs : -refs,
> -                                       y ? -refs : refs,
> -                                       parity ^ tff, mode);
> -                }
> +                yadif->filter_line(dst + pix_3, prev + pix_3, cur + pix_3,
> next + pix_3, w - 6,
> +                                    y + 1 < h ? refs : -refs,
> +                                    y ? -refs : refs,
> +                                    parity ^ tff, mode);
> +                yadif->filter_edges(dst, prev, cur, next, w,
> +                                    y + 1 < h ? refs : -refs,
> +                                    y ? -refs : refs,
> +                                    parity ^ tff, mode);
>              } else {
>                  memcpy(&dstpic->data[i][y * dstpic->linesize[i]],
>                         &yadif->cur->data[i][y * refs], w * df);
> diff --git a/libavfilter/x86/vf_yadif_init.c
> b/libavfilter/x86/vf_yadif_init.c
> index 2873744..8d5e768 100644
> --- a/libavfilter/x86/vf_yadif_init.c
> +++ b/libavfilter/x86/vf_yadif_init.c
> @@ -42,18 +42,12 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
>
>  #if HAVE_YASM
>  #if ARCH_X86_32
> -    if (EXTERNAL_MMXEXT(cpu_flags)) {
> +    if (EXTERNAL_MMXEXT(cpu_flags))
>          yadif->filter_line = ff_yadif_filter_line_mmxext;
> -        yadif->req_align   = 8;
> -    }
>  #endif /* ARCH_X86_32 */
> -    if (EXTERNAL_SSE2(cpu_flags)) {
> +    if (EXTERNAL_SSE2(cpu_flags))
>          yadif->filter_line = ff_yadif_filter_line_sse2;
> -        yadif->req_align   = 16;
> -    }
> -    if (EXTERNAL_SSSE3(cpu_flags)) {
> +    if (EXTERNAL_SSSE3(cpu_flags))
>          yadif->filter_line = ff_yadif_filter_line_ssse3;
> -        yadif->req_align   = 16;
> -    }
>  #endif /* HAVE_YASM */
>  }
> diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h
> index 50fc856..2c3f125 100644
> --- a/libavfilter/yadif.h
> +++ b/libavfilter/yadif.h
> @@ -57,13 +57,11 @@ typedef struct YADIFContext {
>      /**
>       * Required alignment for filter_line
>       */
> -    int req_align;
>      void (*filter_line)(void *dst,
>                          void *prev, void *cur, void *next,
>                          int w, int prefs, int mrefs, int parity, int
> mode);
>      void (*filter_edges)(void *dst, void *prev, void *cur, void *next,
> -                         int w, int prefs, int mrefs, int parity, int
> mode,
> -                         int l_edge);
> +                         int w, int prefs, int mrefs, int parity, int
> mode);
>
>      const AVPixFmtDescriptor *csp;
>      int eof;
> --
> 1.7.9
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>


More information about the ffmpeg-devel mailing list