[FFmpeg-devel] [PATCH V4 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Michael Niedermayer
michael at niedermayer.cc
Fri Dec 27 13:38:29 EET 2019
On Thu, Dec 19, 2019 at 11:35:51AM +0800, Ting Fu wrote:
> Tested using this command:
> ./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
> -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null
>
> The fps increase from 151 to 389 on my local machine.
Thats nice but why is there such a difference from changing the way the code
is assembled ?
This should definitly be explained more detailedly in the commit message
>
> Signed-off-by: Ting Fu <ting.fu at intel.com>
> ---
> libswscale/x86/Makefile | 1 +
> libswscale/x86/swscale.c | 16 +-
> libswscale/x86/yuv2rgb.c | 81 +++---
> libswscale/x86/yuv2rgb_template.c | 441 ++++++------------------------
> libswscale/x86/yuv_2_rgb.asm | 270 ++++++++++++++++++
> 5 files changed, 395 insertions(+), 414 deletions(-)
> create mode 100644 libswscale/x86/yuv_2_rgb.asm
>
> diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
> index f317d5dd9b..831d5359aa 100644
> --- a/libswscale/x86/Makefile
> +++ b/libswscale/x86/Makefile
> @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \
> x86/output.o \
> x86/scale.o \
> x86/rgb_2_rgb.o \
> + x86/yuv_2_rgb.o \
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index 0eed4f18d5..e9d474a1e8 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -29,6 +29,14 @@
> #include "libavutil/cpu.h"
> #include "libavutil/pixdesc.h"
>
> +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
> + 0x0103010301030103LL,
> + 0x0200020002000200LL,};
> +
> +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
> + 0x0602060206020602LL,
> + 0x0004000400040004LL,};
> +
> #if HAVE_INLINE_ASM
>
> #define DITHER1XBPP
> @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
> DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
> DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
>
> -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
> - 0x0103010301030103LL,
> - 0x0200020002000200LL,};
> -
> -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
> - 0x0602060206020602LL,
> - 0x0004000400040004LL,};
> -
> DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
> DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
> DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
> diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> index 5e2f77c20f..ed9b613cab 100644
> --- a/libswscale/x86/yuv2rgb.c
> +++ b/libswscale/x86/yuv2rgb.c
> @@ -37,7 +37,7 @@
> #include "libavutil/x86/cpu.h"
> #include "libavutil/cpu.h"
>
> -#if HAVE_INLINE_ASM
> +#if HAVE_X86ASM
>
> #define DITHER1XBPP // only for MMX
>
> @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
> DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
>
> //MMX versions
> -#if HAVE_MMX_INLINE && HAVE_6REGS
> -#undef RENAME
> +#if HAVE_MMX
> #undef COMPILE_TEMPLATE_MMXEXT
> #define COMPILE_TEMPLATE_MMXEXT 0
> -#define RENAME(a) a ## _mmx
> -#include "yuv2rgb_template.c"
> -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
> +#endif /* HAVE_MMX */
>
> // MMXEXT versions
> -#if HAVE_MMXEXT_INLINE && HAVE_6REGS
> -#undef RENAME
> +#if HAVE_MMXEXT
> #undef COMPILE_TEMPLATE_MMXEXT
> #define COMPILE_TEMPLATE_MMXEXT 1
> -#define RENAME(a) a ## _mmxext
> -#include "yuv2rgb_template.c"
> -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
> +#endif /* HAVE_MMXEXT */
>
> -#endif /* HAVE_INLINE_ASM */
> +#include "yuv2rgb_template.c"
>
> av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
> {
> -#if HAVE_MMX_INLINE && HAVE_6REGS
> int cpu_flags = av_get_cpu_flags();
>
> -#if HAVE_MMXEXT_INLINE
> - if (INLINE_MMXEXT(cpu_flags)) {
> - switch (c->dstFormat) {
> - case AV_PIX_FMT_RGB24:
> - return yuv420_rgb24_mmxext;
> - case AV_PIX_FMT_BGR24:
> - return yuv420_bgr24_mmxext;
> - }
> - }
> -#endif
> -
> - if (INLINE_MMX(cpu_flags)) {
> + if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
i would expect EXTERNAL_MMXEXT to imply EXTERNAL_MMX
[...]
> + case AV_PIX_FMT_RGB24:
> + return yuv420_rgb24;
[...]
> +static inline int yuv420_rgb24(SwsContext *c, const uint8_t *src[],
> + int srcStride[],
> + int srcSliceY, int srcSliceH,
> + uint8_t *dst[], int dstStride[])
> +{
This looks a bit odd, a inline function which only use is taking a pointer
to it and returning it
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Whats the most studid thing your enemy could do ? Blow himself up
Whats the most studid thing you could do ? Give up your rights and
freedom because your enemy blew himself up.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20191227/7724c40c/attachment.sig>
More information about the ffmpeg-devel
mailing list