[FFmpeg-devel] [patch] libpostproc: mmx code uses stack below %esp
Michael Niedermayer
michaelni
Sat Jan 30 13:48:02 CET 2010
On Fri, Jan 29, 2010 at 10:24:11PM +0300, Yuriy Kaminskiy wrote:
> Hello!
> While trying to catch unrelated bug (that finally was not bug, but
> misconfiguration [-lavdopts fast was too fragile for broken mpeg-2 stream]),
> I've run mplayer under valgrind, and got bunch of warnings:
> ==32414==
> ==32414== Invalid write of size 8
> ==32414== at 0x874CF44: postProcess_MMX2 (in /path/to/mplayer)
> ==32414== Address 0xbeffa9d0 is just below the stack ptr. To suppress, use:
> --workaround-gcc296-bugs=yes
> I, of course, don't use gcc-2.96 ;-)
> I've looked into libpostproc/postprocess_template.c, and, indeed, it uses memory
> below %esp:
> === cut ===
> static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
> {
> [...]
> __asm__ volatile(
> "pxor %%mm7, %%mm7 \n\t"
> "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte
> vars
> "and "ALIGN_MASK", %%"REG_c" \n\t" // align
> ...
> }
> [...]
> static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
> [...same...]
> static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int
> stride, PPContext *c){
> [...same...]
> === cut ===
> Not sure if this *must* be fixed, but it feels unsafe, so...
> Patch attached; doVertDefFilter and do_a_deblock changes should not affect
> speed, not sure about dering one.
please put a START/STOP TIMER around things to make sure
> postprocess_template.c | 61 ++++++++++++++++++++++++-------------------------
> 1 file changed, 30 insertions(+), 31 deletions(-)
> ebb4e02086f03dd082ee8025fa50eb34bb456d0a postproc-invalid-stack-3.patch
> Index: MPlayer-20100125+lavc-mt/libpostproc/postprocess_template.c
> ===================================================================
> --- MPlayer-20100125+lavc-mt.orig/libpostproc/postprocess_template.c 2010-01-23 17:23:38.000000000 +0300
> +++ MPlayer-20100125+lavc-mt/libpostproc/postprocess_template.c 2010-01-26 17:58:37.000000000 +0300
> @@ -767,10 +767,10 @@ static inline void RENAME(doVertDefFilte
> */
> #elif HAVE_MMX
> src+= stride*4;
> + {
> + DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
tabs are forbidden in our svn
> __asm__ volatile(
> "pxor %%mm7, %%mm7 \n\t"
> - "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
> - "and "ALIGN_MASK", %%"REG_c" \n\t" // align
> // 0 1 2 3 4 5 6 7
> // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1
> // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1
> @@ -812,8 +812,8 @@ static inline void RENAME(doVertDefFilte
> "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
> "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
> "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
> - "movq %%mm0, (%%"REG_c") \n\t" // 2L0 - 5L1 + 5L2 - 2L3
> - "movq %%mm1, 8(%%"REG_c") \n\t" // 2H0 - 5H1 + 5H2 - 2H3
> + "movq %%mm0, %3 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
> + "movq %%mm1, 8+%3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
>
> "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
> "movq %%mm0, %%mm1 \n\t"
> @@ -822,8 +822,8 @@ static inline void RENAME(doVertDefFilte
>
> "psubw %%mm0, %%mm2 \n\t" // L3 - L4
> "psubw %%mm1, %%mm3 \n\t" // H3 - H4
> - "movq %%mm2, 16(%%"REG_c") \n\t" // L3 - L4
> - "movq %%mm3, 24(%%"REG_c") \n\t" // H3 - H4
> + "movq %%mm2, 16+%3 \n\t" // L3 - L4
> + "movq %%mm3, 24+%3 \n\t" // H3 - H4
> "paddw %%mm4, %%mm4 \n\t" // 2L2
> "paddw %%mm5, %%mm5 \n\t" // 2H2
> "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
> @@ -871,8 +871,8 @@ static inline void RENAME(doVertDefFilte
> "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
> "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
>
> - "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
> - "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
> + "movq %3, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
> + "movq 8+%3, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
i have a bad feeling about this, what makes you belive this will work
better than the last time such syntax was tried
also my gcc replaces "o" with 4+%0 happily by 4+(%edx)
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The worst form of inequality is to try to make unequal things equal.
-- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100130/d1f70224/attachment.pgp>
More information about the ffmpeg-devel
mailing list