[FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp

Wed Sep 6 12:37:10 EEST 2023

Andreas Rheinhardt:
> Once upon a time, 413abbe16465a7b49472ac110e42939e853e24a1
> added versions of some put_no_rnd_pixels functions for use
> in VP3 and Theora (with an explicit check so that they are
> only used for VP3 and Theora). When this was moved to hpeldsp
> (from dsputil) in 3ced55d51c2e65b37e50d500dff88bcd80e01b9c,
> the check was replaced by a check for the bitexact flag
> (and a CONFIG_VP3_DECODER compile-time check), so that
> these functions were now used for other codecs as well.
> 
> Later commit 1dfc3cf89d0eb026af28be46294b85d79499ffb5
> split off the "VP3-specific bits into a separate file",
> yet these bits were not really VP3-specific bits at all
> any more. (The error was repeated in commit
> 0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553.) This commit
> has not been reverted, because this would make future
> changes from Libav (from where it originated) harder,
> yet Libav is no more, so this commit effectively reverts
> 1dfc3cf89d0eb026af28be46294b85d79499ffb5.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> ---
>  libavcodec/x86/Makefile           |  2 -
>  libavcodec/x86/hpeldsp.asm        | 77 ++++++++++++++++++++++++
>  libavcodec/x86/hpeldsp.h          |  4 --
>  libavcodec/x86/hpeldsp_init.c     | 14 +++--
>  libavcodec/x86/hpeldsp_vp3.asm    | 99 -------------------------------
>  libavcodec/x86/hpeldsp_vp3_init.c | 43 --------------
>  6 files changed, 86 insertions(+), 153 deletions(-)
>  delete mode 100644 libavcodec/x86/hpeldsp_vp3.asm
>  delete mode 100644 libavcodec/x86/hpeldsp_vp3_init.c
> 
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index 118daca333..b4cc5e0d08 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -75,7 +75,6 @@ OBJS-$(CONFIG_UTVIDEO_DECODER)         += x86/utvideodsp_init.o
>  OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
>  OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
>  OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
> -OBJS-$(CONFIG_VP3_DECODER)             += x86/hpeldsp_vp3_init.o
>  OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
>  OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o            \
>                                            x86/vp9dsp_init_10bpp.o      \
> @@ -192,7 +191,6 @@ X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER)  += x86/utvideodsp.o
>  X86ASM-OBJS-$(CONFIG_V210_ENCODER)     += x86/v210enc.o
>  X86ASM-OBJS-$(CONFIG_V210_DECODER)     += x86/v210.o
>  X86ASM-OBJS-$(CONFIG_VORBIS_DECODER)   += x86/vorbisdsp.o
> -X86ASM-OBJS-$(CONFIG_VP3_DECODER)      += x86/hpeldsp_vp3.o
>  X86ASM-OBJS-$(CONFIG_VP6_DECODER)      += x86/vp6dsp.o
>  X86ASM-OBJS-$(CONFIG_VP9_DECODER)      += x86/vp9intrapred.o            \
>                                            x86/vp9intrapred_16bpp.o      \
> diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
> index 7a2b7135d8..3bc278618c 100644
> --- a/libavcodec/x86/hpeldsp.asm
> +++ b/libavcodec/x86/hpeldsp.asm
> @@ -165,6 +165,47 @@ cglobal put_no_rnd_pixels8_x2, 4,5
>      RET
>  
>  
> +; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> +INIT_MMX mmxext
> +cglobal put_no_rnd_pixels8_x2_exact, 4,5
> +    lea          r4, [r2*3]
> +    pcmpeqb      m6, m6
> +.loop:
> +    mova         m0, [r1]
> +    mova         m2, [r1+r2]
> +    mova         m1, [r1+1]
> +    mova         m3, [r1+r2+1]
> +    pxor         m0, m6
> +    pxor         m2, m6
> +    pxor         m1, m6
> +    pxor         m3, m6
> +    PAVGB        m0, m1
> +    PAVGB        m2, m3
> +    pxor         m0, m6
> +    pxor         m2, m6
> +    mova       [r0], m0
> +    mova    [r0+r2], m2
> +    mova         m0, [r1+r2*2]
> +    mova         m1, [r1+r2*2+1]
> +    mova         m2, [r1+r4]
> +    mova         m3, [r1+r4+1]
> +    pxor         m0, m6
> +    pxor         m1, m6
> +    pxor         m2, m6
> +    pxor         m3, m6
> +    PAVGB        m0, m1
> +    PAVGB        m2, m3
> +    pxor         m0, m6
> +    pxor         m2, m6
> +    mova  [r0+r2*2], m0
> +    mova    [r0+r4], m2
> +    lea          r1, [r1+r2*4]
> +    lea          r0, [r0+r2*4]
> +    sub         r3d, 4
> +    jg .loop
> +    RET
> +
> +
>  ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
>  %macro PUT_PIXELS8_Y2 0
>  %if cpuflag(sse2)
> @@ -235,6 +276,42 @@ cglobal put_no_rnd_pixels8_y2, 4,5
>      RET
>  
>  
> +; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> +INIT_MMX mmxext
> +cglobal put_no_rnd_pixels8_y2_exact, 4,5
> +    lea          r4, [r2*3]
> +    mova         m0, [r1]
> +    pcmpeqb      m6, m6
> +    add          r1, r2
> +    pxor         m0, m6
> +.loop:
> +    mova         m1, [r1]
> +    mova         m2, [r1+r2]
> +    pxor         m1, m6
> +    pxor         m2, m6
> +    PAVGB        m0, m1
> +    PAVGB        m1, m2
> +    pxor         m0, m6
> +    pxor         m1, m6
> +    mova       [r0], m0
> +    mova    [r0+r2], m1
> +    mova         m1, [r1+r2*2]
> +    mova         m0, [r1+r4]
> +    pxor         m1, m6
> +    pxor         m0, m6
> +    PAVGB        m2, m1
> +    PAVGB        m1, m0
> +    pxor         m2, m6
> +    pxor         m1, m6
> +    mova  [r0+r2*2], m2
> +    mova    [r0+r4], m1
> +    lea          r1, [r1+r2*4]
> +    lea          r0, [r0+r2*4]
> +    sub         r3d, 4
> +    jg .loop
> +    RET
> +
> +
>  ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
>  %macro AVG_PIXELS8_X2 0
>  %if cpuflag(sse2)
> diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
> index fd740da72e..ac7e625fda 100644
> --- a/libavcodec/x86/hpeldsp.h
> +++ b/libavcodec/x86/hpeldsp.h
> @@ -22,8 +22,6 @@
>  #include <stddef.h>
>  #include <stdint.h>
>  
> -#include "libavcodec/hpeldsp.h"
> -
>  void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
>                             ptrdiff_t line_size, int h);
>  
> @@ -50,6 +48,4 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
>  void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
>                                 ptrdiff_t line_size, int h);
>  
> -void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
> -
>  #endif /* AVCODEC_X86_HPELDSP_H */
> diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
> index 09c48c341e..f08c66f5c8 100644
> --- a/libavcodec/x86/hpeldsp_init.c
> +++ b/libavcodec/x86/hpeldsp_init.c
> @@ -22,8 +22,6 @@
>   * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
>   */
>  
> -#include "config_components.h"
> -
>  #include "libavutil/attributes.h"
>  #include "libavutil/cpu.h"
>  #include "libavutil/x86/cpu.h"
> @@ -47,10 +45,16 @@ void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
>                               ptrdiff_t line_size, int h);
>  void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
>                                       ptrdiff_t line_size, int h);
> +void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
> +                                           const uint8_t *pixels,
> +                                           ptrdiff_t line_size, int h);
>  void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
>                                ptrdiff_t line_size, int h);
>  void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
>                                       ptrdiff_t line_size, int h);
> +void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
> +                                           const uint8_t *pixels,
> +                                           ptrdiff_t line_size, int h);
>  void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
>                                ptrdiff_t line_size, int h);
>  void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> @@ -183,6 +187,9 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
>      c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
>      c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
>  
> +    c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
> +    c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
> +
>      if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
>          c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
>          c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
> @@ -235,7 +242,4 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
>  
>      if (EXTERNAL_SSSE3(cpu_flags))
>          hpeldsp_init_ssse3(c, flags);
> -
> -    if (CONFIG_VP3_DECODER)
> -        ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
>  }
> diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
> deleted file mode 100644
> index e580133e45..0000000000
> --- a/libavcodec/x86/hpeldsp_vp3.asm
> +++ /dev/null
> @@ -1,99 +0,0 @@
> -;******************************************************************************
> -;* SIMD-optimized halfpel functions for VP3
> -;*
> -;* This file is part of FFmpeg.
> -;*
> -;* FFmpeg is free software; you can redistribute it and/or
> -;* modify it under the terms of the GNU Lesser General Public
> -;* License as published by the Free Software Foundation; either
> -;* version 2.1 of the License, or (at your option) any later version.
> -;*
> -;* FFmpeg is distributed in the hope that it will be useful,
> -;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -;* Lesser General Public License for more details.
> -;*
> -;* You should have received a copy of the GNU Lesser General Public
> -;* License along with FFmpeg; if not, write to the Free Software
> -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> -;******************************************************************************
> -
> -%include "libavutil/x86/x86util.asm"
> -
> -SECTION .text
> -
> -; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> -INIT_MMX mmxext
> -cglobal put_no_rnd_pixels8_x2_exact, 4,5
> -    lea          r4, [r2*3]
> -    pcmpeqb      m6, m6
> -.loop:
> -    mova         m0, [r1]
> -    mova         m2, [r1+r2]
> -    mova         m1, [r1+1]
> -    mova         m3, [r1+r2+1]
> -    pxor         m0, m6
> -    pxor         m2, m6
> -    pxor         m1, m6
> -    pxor         m3, m6
> -    PAVGB        m0, m1
> -    PAVGB        m2, m3
> -    pxor         m0, m6
> -    pxor         m2, m6
> -    mova       [r0], m0
> -    mova    [r0+r2], m2
> -    mova         m0, [r1+r2*2]
> -    mova         m1, [r1+r2*2+1]
> -    mova         m2, [r1+r4]
> -    mova         m3, [r1+r4+1]
> -    pxor         m0, m6
> -    pxor         m1, m6
> -    pxor         m2, m6
> -    pxor         m3, m6
> -    PAVGB        m0, m1
> -    PAVGB        m2, m3
> -    pxor         m0, m6
> -    pxor         m2, m6
> -    mova  [r0+r2*2], m0
> -    mova    [r0+r4], m2
> -    lea          r1, [r1+r2*4]
> -    lea          r0, [r0+r2*4]
> -    sub         r3d, 4
> -    jg .loop
> -    RET
> -
> -
> -; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> -INIT_MMX mmxext
> -cglobal put_no_rnd_pixels8_y2_exact, 4,5
> -    lea          r4, [r2*3]
> -    mova         m0, [r1]
> -    pcmpeqb      m6, m6
> -    add          r1, r2
> -    pxor         m0, m6
> -.loop:
> -    mova         m1, [r1]
> -    mova         m2, [r1+r2]
> -    pxor         m1, m6
> -    pxor         m2, m6
> -    PAVGB        m0, m1
> -    PAVGB        m1, m2
> -    pxor         m0, m6
> -    pxor         m1, m6
> -    mova       [r0], m0
> -    mova    [r0+r2], m1
> -    mova         m1, [r1+r2*2]
> -    mova         m0, [r1+r4]
> -    pxor         m1, m6
> -    pxor         m0, m6
> -    PAVGB        m2, m1
> -    PAVGB        m1, m0
> -    pxor         m2, m6
> -    pxor         m1, m6
> -    mova  [r0+r2*2], m2
> -    mova    [r0+r4], m1
> -    lea          r1, [r1+r2*4]
> -    lea          r0, [r0+r2*4]
> -    sub         r3d, 4
> -    jg .loop
> -    RET
> diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
> deleted file mode 100644
> index 1dbd1ba6f9..0000000000
> --- a/libavcodec/x86/hpeldsp_vp3_init.c
> +++ /dev/null
> @@ -1,43 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - */
> -
> -#include "libavutil/attributes.h"
> -#include "libavutil/cpu.h"
> -#include "libavutil/x86/cpu.h"
> -
> -#include "libavcodec/avcodec.h"
> -#include "libavcodec/hpeldsp.h"
> -
> -#include "hpeldsp.h"
> -
> -void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
> -                                           const uint8_t *pixels,
> -                                           ptrdiff_t line_size, int h);
> -void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
> -                                           const uint8_t *pixels,
> -                                           ptrdiff_t line_size, int h);
> -
> -av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
> -{
> -    if (EXTERNAL_MMXEXT(cpu_flags)) {
> -        if (flags & AV_CODEC_FLAG_BITEXACT) {
> -            c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
> -            c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
> -        }
> -    }
> -}

Will apply this tomorrow unless there are objections.

- Andreas