[FFmpeg-devel] [PATCH 4/7] cosmetic alignment
James Darnley
james.darnley at gmail.com
Fri Feb 14 12:40:09 CET 2014
---
libavcodec/x86/flac_dsp_gpl.asm | 74 +++++++++++++++++++-------------------
1 files changed, 37 insertions(+), 37 deletions(-)
diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index 5028e49..ae73d0c 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -37,56 +37,56 @@ INIT_XMM sse4
; to be copied is 32 so we might as well just unroll the loop and do all 32.
%assign iter 0
%rep 32/(mmsize/4)
- movu m0, [smpq+iter]
- movu [resq+iter], m0
+ movu m0, [smpq+iter]
+ movu [resq+iter], m0
%assign iter iter+mmsize
%endrep
-lea resq, [resq+orderq*4]
-lea smpq, [smpq+orderq*4]
-lea coefsq, [coefsq+orderq*4]
-sub lenmp, orderq
-movd m3, shiftmp
-neg orderq
+lea resq, [resq+orderq*4]
+lea smpq, [smpq+orderq*4]
+lea coefsq, [coefsq+orderq*4]
+sub lenmp, orderq
+movd m3, shiftmp
+neg orderq
.looplen:
- pxor m0, m0
- pxor m4, m4
- pxor m6, m6
- mov posj, orderq
- xor negj, negj
+ pxor m0, m0
+ pxor m4, m4
+ pxor m6, m6
+ mov posj, orderq
+ xor negj, negj
.looporder:
- movd m2, [coefsq+posj*4] ; c = coefs[j]
+ movd m2, [coefsq+posj*4] ; c = coefs[j]
SPLATD m2
- movu m1, [smpq+negj*4-4] ; s = smp[i-j-1]
- movu m5, [smpq+negj*4-4+mmsize]
- movu m7, [smpq+negj*4-4+mmsize*2]
- pmulld m1, m2
- pmulld m5, m2
- pmulld m7, m2
- paddd m0, m1 ; p += c * s
- paddd m4, m5
- paddd m6, m7
+ movu m1, [smpq+negj*4-4] ; s = smp[i-j-1]
+ movu m5, [smpq+negj*4-4+mmsize]
+ movu m7, [smpq+negj*4-4+mmsize*2]
+ pmulld m1, m2
+ pmulld m5, m2
+ pmulld m7, m2
+ paddd m0, m1 ; p += c * s
+ paddd m4, m5
+ paddd m6, m7
- dec negj
- inc posj
+ dec negj
+ inc posj
jnz .looporder
- psrad m0, m3 ; p >>= shift
- psrad m4, m3
- psrad m6, m3
- movu m1, [smpq]
- movu m5, [smpq+mmsize]
- movu m7, [smpq+mmsize*2]
- psubd m1, m0 ; smp[i] - p
- psubd m5, m4
- psubd m7, m6
- movu [resq], m1 ; res[i] = smp[i] - (p >> shift)
+ psrad m0, m3 ; p >>= shift
+ psrad m4, m3
+ psrad m6, m3
+ movu m1, [smpq]
+ movu m5, [smpq+mmsize]
+ movu m7, [smpq+mmsize*2]
+ psubd m1, m0 ; smp[i] - p
+ psubd m5, m4
+ psubd m7, m6
+ movu [resq], m1 ; res[i] = smp[i] - (p >> shift)
movu [resq+mmsize], m5
movu [resq+mmsize*2], m7
- add resq, 3*mmsize
- add smpq, 3*mmsize
+ add resq, 3*mmsize
+ add smpq, 3*mmsize
sub lenmp, (3*mmsize)/4
jg .looplen
RET
--
1.7.9
More information about the ffmpeg-devel
mailing list