[FFmpeg-devel] [PATCH] Make swscale x86 ASM depend less on registers been preserved across blocks
Michael Niedermayer
michaelni at gmx.at
Thu Sep 18 00:47:33 CEST 2014
On Wed, Sep 17, 2014 at 10:55:52PM +0200, Vitor Sessak wrote:
> On Wed, Sep 17, 2014 at 10:31 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
> > On Wed, Sep 17, 2014 at 09:21:08PM +0200, Vitor Sessak wrote:
> >> Hi!
> >>
> >> I'm not sure we can count on registers been preserved across ASM
> >> blocks. Moreover, this causes problems with utils that instrument the
> >> code by inserting function calls between lines (like ThreadSanitizer).
> >>
> >> The attached patch fix one instance of this problem in swscale.
> >>
> >> -Vitor
> >
> >> swscale.c | 82 +++++++++++++++++++++++++++++++-------------------------------
> >> 1 file changed, 42 insertions(+), 40 deletions(-)
> >> 7fe9f7a0c1a7c1ed843b891f5f810fbd94a2e03f 0001-swscale-x86-do-not-expect-registers-to-be-preserved-.patch
> >> From f1c59628b2baeb9994bed8947c0a2c228610bf4f Mon Sep 17 00:00:00 2001
> >> From: Vitor Sessak <vsessak at google.com>
> >> Date: Wed, 17 Sep 2014 21:10:16 +0200
> >> Subject: [PATCH] swscale/x86: do not expect registers to be preserved across
> >> inline ASM blocks
> >>
> >> ---
> >> libswscale/x86/swscale.c | 82 +++++++++++++++++++++++++-----------------------
> >> 1 file changed, 42 insertions(+), 40 deletions(-)
> >>
> >> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> >> index c4c0e28..38157c8 100644
> >> --- a/libswscale/x86/swscale.c
> >> +++ b/libswscale/x86/swscale.c
> >> @@ -205,36 +205,19 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
> >> yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
> >> return;
> >> }
> >> - if (offset) {
> >> - __asm__ volatile("movq (%0), %%xmm3\n\t"
> >> - "movdqa %%xmm3, %%xmm4\n\t"
> >> - "psrlq $24, %%xmm3\n\t"
> >> - "psllq $40, %%xmm4\n\t"
> >> - "por %%xmm4, %%xmm3\n\t"
> >> - :: "r"(dither)
> >> - );
> >> - } else {
> >> - __asm__ volatile("movq (%0), %%xmm3\n\t"
> >> - :: "r"(dither)
> >> - );
> >> - }
> >
> >> - filterSize--;
> >
> > not sure i missed it but it seems this is lost in teh new code
>
> Good catch. Fixed!
>
> >> - __asm__ volatile(
> >> - "pxor %%xmm0, %%xmm0\n\t"
> >> - "punpcklbw %%xmm0, %%xmm3\n\t"
> >> - "movd %0, %%xmm1\n\t"
> >> - "punpcklwd %%xmm1, %%xmm1\n\t"
> >> - "punpckldq %%xmm1, %%xmm1\n\t"
> >> - "punpcklqdq %%xmm1, %%xmm1\n\t"
> >> - "psllw $3, %%xmm1\n\t"
> >> - "paddw %%xmm1, %%xmm3\n\t"
> >> - "psraw $4, %%xmm3\n\t"
> >> - ::"m"(filterSize)
> >> - );
> >> - __asm__ volatile(
> >> - "movdqa %%xmm3, %%xmm4\n\t"
> >> - "movdqa %%xmm3, %%xmm7\n\t"
> >> - "movl %3, %%ecx\n\t"
> >> +#define MAIN_FUNCTION \
> >> + "pxor %%xmm0, %%xmm0 \n\t" \
> >> + "punpcklbw %%xmm0, %%xmm3 \n\t" \
> >> + "movd %4, %%xmm1 \n\t" \
> >> + "punpcklwd %%xmm1, %%xmm1 \n\t" \
> >> + "punpckldq %%xmm1, %%xmm1 \n\t" \
> >> + "punpcklqdq %%xmm1, %%xmm1 \n\t" \
> >> + "psllw $3, %%xmm1 \n\t" \
> >> + "paddw %%xmm1, %%xmm3 \n\t" \
> >> + "psraw $4, %%xmm3 \n\t" \
> >> + "movdqa %%xmm3, %%xmm4 \n\t" \
> >> + "movdqa %%xmm3, %%xmm7 \n\t" \
> >> + "movl %3, %%ecx \n\t" \
> >> "mov %0, %%"REG_d" \n\t"\
> >> "mov (%%"REG_d"), %%"REG_S" \n\t"\
> >> ".p2align 4 \n\t" /* FIXME Unroll? */\
> >> @@ -252,20 +235,39 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
> >> " jnz 1b \n\t"\
> >> "psraw $3, %%xmm3 \n\t"\
> >> "psraw $3, %%xmm4 \n\t"\
> >> - "packuswb %%xmm4, %%xmm3 \n\t"
> >> - "movntdq %%xmm3, (%1, %%"REG_c")\n\t"
> >> + "packuswb %%xmm4, %%xmm3 \n\t"\
> >> + "movntdq %%xmm3, (%1, %%"REG_c")\n\t"\
> >> "add $16, %%"REG_c" \n\t"\
> >> "cmp %2, %%"REG_c" \n\t"\
> >> - "movdqa %%xmm7, %%xmm3\n\t"
> >> - "movdqa %%xmm7, %%xmm4\n\t"
> >> + "movdqa %%xmm7, %%xmm3 \n\t" \
> >> + "movdqa %%xmm7, %%xmm4 \n\t" \
> >> "mov %0, %%"REG_d" \n\t"\
> >> "mov (%%"REG_d"), %%"REG_S" \n\t"\
> >> - "jb 1b \n\t"\
> >> - :: "g" (filter),
> >> - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
> >> - : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
> >> - "%"REG_d, "%"REG_S, "%"REG_c
> >> - );
> >> + "jb 1b \n\t"
> >> +
> >> + if (offset) {
> >> + __asm__ volatile(
> >> + "movq %5, %%xmm3 \n\t"
> >> + "movdqa %%xmm3, %%xmm4 \n\t"
> >> + "psrlq $24, %%xmm3 \n\t"
> >> + "psllq $40, %%xmm4 \n\t"
> >> + "por %%xmm4, %%xmm3 \n\t"
> >> + MAIN_FUNCTION
> >> + :: "g" (filter),
> >> + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
> >> + "m"(filterSize), "m"(((uint64_t *) dither)[0])
> >> + : "%"REG_d, "%"REG_S, "%"REG_c
> >> + );
> >
> > you lost the XMM_CLOBBERS()
>
> True. Restored it.
>
> >> + } else {
> >> + __asm__ volatile(
> >> + "movq %5, %%xmm3 \n\t"
> >> + MAIN_FUNCTION
> >> + :: "g" (filter),
> >> + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
> >> + "m"(filterSize), "m"(((uint64_t *) dither)[0])
> >> + : "%"REG_d, "%"REG_S, "%"REG_c
> >> + );
> >
> > tabs
>
> Why the hell the default emacs profile is so bad :-(
>
> New patch attached.
>
> -Vitor
> swscale.c | 83 ++++++++++++++++++++++++++++++++------------------------------
> 1 file changed, 44 insertions(+), 39 deletions(-)
> 89ceea8040113ac92b4b7420c1b5876275ee9c78 0001-swscale-x86-do-not-expect-registers-to-be-preserved-.patch
> From 19c13aa203f5d5970a125fbeb30c3e36d7ab9899 Mon Sep 17 00:00:00 2001
> From: Vitor Sessak <vsessak at google.com>
> Date: Wed, 17 Sep 2014 21:10:16 +0200
> Subject: [PATCH] swscale/x86: do not expect registers to be preserved across
> inline ASM blocks
applied, hopefully it wont break with old GCC versions, i tested a few
but not many
thanks
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140918/5c03c9c6/attachment.asc>
More information about the ffmpeg-devel
mailing list