[FFmpeg-devel] [PATCH] Higher bit-depth x86 SIMD assembly for yadif
James Darnley
james.darnley at gmail.com
Sun Jan 22 17:53:44 CET 2012
On 2012-01-19 22:44, Michael Niedermayer wrote:
> CC-ing to dark shikari & loren as they might want to review too?
likewise
>> Something else to think about. The source code clarity could be
>> greatly improved by using yasm and its preprocessor. I wonder how
>> much abstraction it would need to roll the source to all three
>> functions together and whether it would save source code size.
>
> if you want to convert it to yasm, thats fine, if not its fine too.
> whichever way you prefer
After all this tinkering with inline asm it sounds better than ever.
For a later time anyway.
>> + "paddd "MM"6, "MM"3 \n\t" /* d+diff */\
>> + PMAXSD(MM"2",MM"1",MM"7")\
>> + PMINSD(MM"3",MM"1",MM"7")\
>> + PACK(MM"1")\
>> +\
>> + :\
>> + :[tmpA] "r"(tmpA),\
>> + [prev] "r"(prev),\
>> + [cur] "r"(cur),\
>> + [next] "r"(next),\
>> + [prefs]"r"(prefs),\
>> + [mrefs]"r"(mrefs),\
>> + [mode] "g"(mode)\
>
> this should list the SIMD registers written to on the clobber list
> otherwise with SSE* there may be issues on win64 and in theory also
> elsewhere
>
>
>> + );\
>> + __asm__ volatile(MOVH" "MM"1, %0" :"=m"(*dst));\
>
> I guess it should be ok in reality but its not guranteed that
> SIMD registers dont change between blocks
I've made a solution for these these two points and pasted the relevant
changes below. Please give your comments and when everyone is satisfied
I will update the patches and send them.
> diff --git a/libavfilter/x86/yadif_template_16bit.c b/libavfilter/x86/yadif_template_16bit.c
> index a3a7394..35fc085 100644
> --- a/libavfilter/x86/yadif_template_16bit.c
> +++ b/libavfilter/x86/yadif_template_16bit.c
> @@ -25,12 +25,14 @@
> #define MOVA "movdqa"
> #define MOVU "movdqu"
> #define MMSIZE 16
> + #define CLOBBER_LIST "%xmm0", "%xmm1", "%xmm2", "%xmm3","%xmm4", "%xmm5", "%xmm6", "%xmm7"
> #else
> #define MM "%%mm"
> #define MOVH "movd"
> #define MOVA "movq"
> #define MOVU "movq"
> #define MMSIZE 8
> + #define CLOBBER_LIST "%mm0", "%mm1", "%mm2", "%mm3","%mm4", "%mm5", "%mm6", "%mm7"
> #endif
>
> #define LOAD(mem,dst)\
> @@ -261,6 +263,7 @@ void RENAME(ff_yadif_filter_line_16bit)(uint8_t *dst,
> PMAXSD(MM"2",MM"1",MM"7")\
> PMINSD(MM"3",MM"1",MM"7")\
> PACK(MM"1")\
> + MOVH" "MM"1, (%[dst])"\
> \
> :\
> :[tmpA] "r"(tmpA),\
> @@ -269,9 +272,10 @@ void RENAME(ff_yadif_filter_line_16bit)(uint8_t *dst,
> [next] "r"(next),\
> [prefs]"r"(prefs),\
> [mrefs]"r"(mrefs),\
> - [mode] "g"(mode)\
> + [mode] "g"(mode),\
> + [dst] "r"(dst)\
> + :CLOBBER_LIST\
> );\
> - __asm__ volatile(MOVH" "MM"1, %0" :"=m"(*dst));\
> dst += MMSIZE/2;\
> prev+= MMSIZE/2;\
> cur += MMSIZE/2;\
> @@ -309,3 +313,4 @@ void RENAME(ff_yadif_filter_line_16bit)(uint8_t *dst,
> #undef CHECK1
> #undef CHECK2
> #undef FILTER
> +#undef CLOBBER_LIST
> diff --git a/libavfilter/x86/yadif_template_9_14bit.c b/libavfilter/x86/yadif_template_9_14bit.c
> index 8eeddaa..26b1e93 100644
> --- a/libavfilter/x86/yadif_template_9_14bit.c
> +++ b/libavfilter/x86/yadif_template_9_14bit.c
> @@ -25,12 +25,14 @@
> #define MOVA "movdqa"
> #define MOVU "movdqu"
> #define MMSIZE 16
> + #define CLOBBER_LIST "%xmm0", "%xmm1", "%xmm2", "%xmm3","%xmm4", "%xmm5", "%xmm6", "%xmm7"
> #else
> #define MM "%%mm"
> #define MOVH "movd"
> #define MOVA "movq"
> #define MOVU "movq"
> #define MMSIZE 8
> + #define CLOBBER_LIST "%mm0", "%mm1", "%mm2", "%mm3","%mm4", "%mm5", "%mm6", "%mm7"
> #endif
>
> #define LOAD(mem,dst)\
> @@ -233,6 +235,7 @@ void RENAME(ff_yadif_filter_line_9_14bit)(uint8_t *dst,
> "paddw "MM"6, "MM"3 \n\t" /* d+diff */\
> "pmaxsw "MM"2, "MM"1 \n\t"\
> "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
> + MOVU" "MM"1, (%[dst])"\
> \
> :\
> :[tmpA] "r"(tmpA),\
> @@ -241,9 +244,10 @@ void RENAME(ff_yadif_filter_line_9_14bit)(uint8_t *dst,
> [next] "r"(next),\
> [prefs]"r"((x86_reg)prefs),\
> [mrefs]"r"((x86_reg)mrefs),\
> - [mode] "g"(mode)\
> + [mode] "g"(mode),\
> + [dst] "r"(dst)\
> + :CLOBBER_LIST\
> );\
> - __asm__ volatile(MOVU" "MM"1, %0" :"=m"(*dst));\
> dst += MMSIZE-4;\
> prev+= MMSIZE-4;\
> cur += MMSIZE-4;\
> @@ -281,3 +285,4 @@ void RENAME(ff_yadif_filter_line_9_14bit)(uint8_t *dst,
> #undef CHECK1
> #undef CHECK2
> #undef FILTER
> +#undef CLOBBER_LIST
P.S. The same issues exist in the 8-bit code.
More information about the ffmpeg-devel
mailing list