[FFmpeg-devel] [PATCH] lavfi/hqdn3d/x86: use full register for lowpass.
Loren Merritt
lorenm at u.washington.edu
Fri Sep 21 05:45:00 CEST 2012
On Fri, 21 Sep 2012, Michael Niedermayer wrote:
> On Wed, Sep 19, 2012 at 05:08:10PM +0200, Nicolas George wrote:
> > LOWPASS starts using the full register for prevsample and cursample,
> > but currently only returns a valid 32-bits register; later calls will
> > reuse the value as full register, leading to possibly incorrect
> > high-order bits.
> >
> > Fix trac ticket #1752.
> >
> > Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
> > ---
> > libavfilter/x86/hqdn3d.asm | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> >
> > I am rather out of my league here. Using gdb, I observed this for the second
> > line of the offending sample:
> >
> > "LOWPASS t1, pixelant, spatial" becomes:
> >
> > sub %r10,%rbx rbx = 0xfffffffffffffff9 = -7
> > sar $0x4,%rbx rbx = 0xffffffffffffffff = -1
> > movswl (%r9,%rbx,2),%ebx rbx = 0xfffffff8 = (uint32_t)-8
> > add %r10d,%ebx rbx = 0xffffffff
> >
> > t1 is r10, pixelant is rbx.
> >
> > Then, a few lines below "LOWPASS t0, t1, temporal" becomes:
> >
> > sub %rbx,%r11 r11 = 0xffffffff00000001
> > sar $0x4,%r11 r11 = 0xfffffffff0000000
> > movswl (%rax,%r11,2),%r11d SEGV
> > add %ebx,%r11d
> >
> > The first subtraction on rbx seems to result in an incorrect sign extension,
> > leading to a segfault when it is used as an index in the "temporal" array.
> >
> >
> > diff --git a/libavfilter/x86/hqdn3d.asm b/libavfilter/x86/hqdn3d.asm
> > index 88b9b0d..324642d 100644
> > --- a/libavfilter/x86/hqdn3d.asm
> > +++ b/libavfilter/x86/hqdn3d.asm
> > @@ -27,8 +27,8 @@ SECTION .text
> > %if lut_bits != 8
> > sar %1q, 8-lut_bits
> > %endif
> > - movsx %1d, word [%3q+%1q*2]
> > - add %1d, %2d
> > + movsx %1q, word [%3q+%1q*2]
> > + add %1q, %2q
> > %endmacro
This value should never be negative.
Different fix that more closely tracks the fact that pixels are unsigned:
---
libavfilter/vf_hqdn3d.c | 7 +++----
libavfilter/x86/hqdn3d.asm | 2 +-
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/libavfilter/vf_hqdn3d.c b/libavfilter/vf_hqdn3d.c
index 71b670d..e3e45f2 100644
--- a/libavfilter/vf_hqdn3d.c
+++ b/libavfilter/vf_hqdn3d.c
@@ -50,10 +50,9 @@ void ff_hqdn3d_row_10_x86(uint8_t *src, uint8_t *dst, uint16_t *line_ant, uint16
void ff_hqdn3d_row_16_x86(uint8_t *src, uint8_t *dst, uint16_t *line_ant, uint16_t *frame_ant, ptrdiff_t w, int16_t *spatial, int16_t *temporal);
#define LUT_BITS (depth==16 ? 8 : 4)
-#define RIGHTSHIFT(a,b) (((a)+(((1<<(b))-1)>>1))>>(b))
-#define LOAD(x) ((depth==8 ? src[x] : AV_RN16A(src+(x)*2)) << (16-depth))
-#define STORE(x,val) (depth==8 ? dst[x] = RIGHTSHIFT(val, 16-depth)\
- : AV_WN16A(dst+(x)*2, RIGHTSHIFT(val, 16-depth)))
+#define LOAD(x) (((depth==8 ? src[x] : AV_RN16A(src+(x)*2)) << (16-depth)) + (((1<<(16-depth))-1)>>1))
+#define STORE(x,val) (depth==8 ? dst[x] = (val) >> (16-depth)\
+ : AV_WN16A(dst+(x)*2, (val) >> (16-depth)))
av_always_inline
static uint32_t lowpass(int prev, int cur, int16_t *coef, int depth)
diff --git a/libavfilter/x86/hqdn3d.asm b/libavfilter/x86/hqdn3d.asm
index 7254194..8554093 100644
--- a/libavfilter/x86/hqdn3d.asm
+++ b/libavfilter/x86/hqdn3d.asm
@@ -39,6 +39,7 @@ SECTION .text
%endif
%if %3 != 16
shl %1, 16-%3
+ add %1, (1<<(15-%3))-1
%endif
%endmacro
@@ -86,7 +87,6 @@ ALIGN 16
mov [frameantq+xq*2], t0w
movifnidn dstq, dstmp
%if %1 != 16
- add t0d, (1<<(15-%1))-1
shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation
%endif
%if %1 == 8
--
1.7.4.1
More information about the ffmpeg-devel
mailing list