diff options
author | Andy Polyakov <appro@openssl.org> | 2012-07-15 13:29:23 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2012-07-15 13:29:23 +0000 |
commit | ae007d4d09f2ed9940c6e581bab9290d78615506 (patch) | |
tree | 38506fe833687baee33c61d28803cb18a8ed2cfa /crypto/whrlpool | |
parent | 660164a9ed6b76e0dac440583a5efa0ab473c73e (diff) |
wp-mmx.pl: ~10% performance improvement.
Diffstat (limited to 'crypto/whrlpool')
-rw-r--r-- | crypto/whrlpool/asm/wp-mmx.pl | 42 |
1 files changed, 22 insertions, 20 deletions
diff --git a/crypto/whrlpool/asm/wp-mmx.pl b/crypto/whrlpool/asm/wp-mmx.pl index cb2381c22b..c584e5b92b 100644 --- a/crypto/whrlpool/asm/wp-mmx.pl +++ b/crypto/whrlpool/asm/wp-mmx.pl @@ -118,34 +118,36 @@ $tbl="ebp"; &movq (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8)); # rc[r] &mov ("eax",&DWP(0,"esp")); &mov ("ebx",&DWP(4,"esp")); + &movz ("ecx",&LB("eax")); + &movz ("edx",&HB("eax")); for($i=0;$i<8;$i++) { my $func = ($i==0)? \&movq : \&pxor; - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &$func (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(($i+1)*8,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &$func (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &$func (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &$func (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &$func (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(($i+1)*8+4,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &$func (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &$func (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); @@ -154,32 +156,32 @@ for($i=0;$i<8;$i++) { for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); } # K=L for($i=0;$i<8;$i++) { - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &pxor (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(64+($i+1)*8,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &pxor (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &pxor (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &pxor (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &pxor (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(64+($i+1)*8+4,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &pxor (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &pxor (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); |