[FFmpeg-devel] [PATCH 04/11] x86: dcadsp: implement SSE lfe_dir
Loren Merritt
lorenm at u.washington.edu
Fri Feb 7 03:12:55 CET 2014
On Thu, 6 Feb 2014, Christophe Gisquet wrote:
> diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
> index 03593ce..4a682be 100644
> --- a/libavcodec/x86/dcadsp.asm
> +++ b/libavcodec/x86/dcadsp.asm
> @@ -88,3 +88,108 @@ INT8X8_FMUL_INT32 3
>
> INIT_XMM sse4
> INT8X8_FMUL_INT32 3
> +
> +; %1=v0/v1 %2=in1 %3=in2
> +%macro FIR_LOOP 2-3
> +.loop%1:
> +%define va m1
> +%define vb m2
> +%if %1
> +%define OFFSET 0
> +%else
> +%define OFFSET NUM_COEF*count
> +%endif
> +; for v0, incrementint and for v1, decrementing
> + mova va, [cf0q + OFFSET]
> + mova vb, [cf0q + OFFSET + 4*NUM_COEF]
> +%if %0 == 3
> + mova m4, [cf0q + OFFSET + mmsize]
> + mova SCALE, [cf0q + OFFSET + 4*NUM_COEF + mmsize]
> +%endif
> + mulps va, %2
> + mulps vb, %2
> +%if %0 == 3
> + mulps m4, %3
> + mulps SCALE, %3
> + addps va, m4
> + addps vb, SCALE
> +%endif
> + ; va = va1 va2 va3 va4
> + ; vb = vb1 vb2 vb3 vb4
> +%if %1
> +%define O1 vb
> +%define O2 va
> +%else
> +%define O1 va
> +%define O2 vb
> +%endif
Can this be simplified with
%if %1
SWAP va, vb
%endif
and no O1, O2 variables?
> + mova m4, O1
> + unpcklps O1, O2 ; va3 vb3 va4 vb4
> + unpckhps m4, O2 ; va1 vb1 va2 vb2
> + addps m4, O1 ; va1+3 vb1+3 va2+4 vb2+4
> + movhlps O2, m4 ; va1+3 vb1+3
> + addps O2, m4 ; va0..4 vb0..4
> +%if %1
> + movh [outq + count], O2
> + sub cf0q, 8*NUM_COEF
> +%else
> + movh [outq + count], O2
factor out of the %if
> +%endif
> + add count, 8
> + jl .loop%1
> +%endmacro
--Loren Merritt
More information about the ffmpeg-devel
mailing list