[FFmpeg-devel] [WIP] [PATCH 4/4] x86: dsputilenc: convert hf_noise*_mmx to yasm
Christophe Gisquet
christophe.gisquet at gmail.com
Mon Jun 2 16:53:22 CEST 2014
2014-06-02 15:38 GMT+02:00 Christophe Gisquet <christophe.gisquet at gmail.com>:
> but unless I'm
> mistaken, the solution I proposed earlier is much simpler.
See attached patch on top of Timothy's.
> Also, don't
> hesitate to run objdump -d on the object file just to make sure you
> missed nothing.
Which helps notice issues with HF_NOISE_PART2.
--
Christophe
-------------- next part --------------
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 3fdc006..84cb7b3 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -634,9 +634,9 @@ SUM_ABS_DCTELEM 6, 2
%endmacro
; %1-2 = m#
-%macro HF_NOISE_PART2 2
- psubw m0, m4
- psubw m2, m5
+%macro HF_NOISE_PART2 4
+ psubw m%1, m%3
+ psubw m%2, m%4
pxor m3, m3
pxor m1, m1
pcmpgtw m3, m%1
@@ -653,24 +653,20 @@ SUM_ABS_DCTELEM 6, 2
%macro HF_NOISE 1
cglobal hf_noise%1, 3,3,0, pix1, lsize, h
movsxdifnidn lsizeq, lsized
-%if %1 == 16
- push pix1q
- push hq
-%endif
sub hd, 2
pxor m7, m7
pxor m6, m6
HF_NOISE_PART1 %1, 0, 1, 2, 3
add pix1q, lsizeq
HF_NOISE_PART1 %1, 4, 1, 5, 3
- HF_NOISE_PART2 0, 2
+ HF_NOISE_PART2 0, 2, 4, 5
add pix1q, lsizeq
.loop:
HF_NOISE_PART1 %1, 0, 1, 2, 3
- HF_NOISE_PART2 4, 5
+ HF_NOISE_PART2 4, 5, 0, 2
add pix1q, lsizeq
HF_NOISE_PART1 %1, 4, 1, 5, 3
- HF_NOISE_PART2 0, 2
+ HF_NOISE_PART2 0, 2, 4, 5
add pix1q, lsizeq
sub hd, 2
jne .loop
@@ -682,18 +678,8 @@ cglobal hf_noise%1, 3,3,0, pix1, lsize, h
mova m0, m6
psrlq m6, 32
paddd m0, m6
-%if %1 == 16
- movd ebx, m0 ; ebx = result of hf_noise16;
- pop hq ; restore h and pix1
- pop pix1q
- ; lsize is unchanged (except movsxd, which hf_noise8 is going to do anyway)
- add pix1q, 8 ; pix1 = pix1 + 8;
- call hf_noise8 ; eax = hf_noise8_mmx(pix1, lsize, h);
- add eax, ebx ; eax = eax + ebx;
-%else
movd eax, m0 ; eax = result of hf_noise8;
-%endif
- RET ; return eax;
+ REP_RET ; return eax;
%endmacro
INIT_MMX mmx
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index f215347..e180486 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -77,8 +77,8 @@ static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
score1 = c->dsp.sse[0](c, pix1, pix2, line_size, h);
else
score1 = ff_sse16_mmx(c, pix1, pix2, line_size, h);
- score2 = ff_hf_noise16_mmx(pix1, line_size, h) -
- ff_hf_noise16_mmx(pix2, line_size, h);
+ score2 = ff_hf_noise16_mmx(pix1, line_size, h) + ff_hf_noise8_mmx(pix1+8, line_size, h)
+ - ff_hf_noise16_mmx(pix2, line_size, h) - ff_hf_noise8_mmx(pix2+8, line_size, h);
if (c)
return score1 + FFABS(score2) * c->avctx->nsse_weight;
More information about the ffmpeg-devel
mailing list