summaryrefslogtreecommitdiffstats
path: root/crypto/md5
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2012-10-14 16:51:27 +0000
committerAndy Polyakov <appro@openssl.org>2012-10-14 16:51:27 +0000
commitd17b59e49f32ec47be8e2418b439c239a5cd9618 (patch)
treece32950fb4099f6b3754e659f1167ff20a3e16c5 /crypto/md5
parentaea4126e4e99e7d7ae10374f30fbb566be6f07ff (diff)
md5-sparcv9.pl: avoid %asi modifications, improve short input performance
by 30-20%.
Diffstat (limited to 'crypto/md5')
-rw-r--r--crypto/md5/asm/md5-sparcv9.pl27
1 files changed, 14 insertions, 13 deletions
diff --git a/crypto/md5/asm/md5-sparcv9.pl b/crypto/md5/asm/md5-sparcv9.pl
index 062a0738c4..ef16666cc3 100644
--- a/crypto/md5/asm/md5-sparcv9.pl
+++ b/crypto/md5/asm/md5-sparcv9.pl
@@ -12,7 +12,7 @@
# MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than
# code generated by Sun C 5.2.
-# SPARC T4 MD5 hardware achieves 3.24 cycles per byte, which is 2.1x
+# SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x
# faster than software. Multi-process benchmark saturates at 12x
# single-process result on 8-core processor, or ~11GBps per 2.85GHz
# socket.
@@ -221,15 +221,15 @@ md5_block_asm_data_order:
be .Lsoftware
nop
- rd %asi, %g5
- wr %g0, 0x88, %asi ! ASI_PRIMARY_LITTLE
-
- lda [%o0 + 0x00] %asi, %f0 ! load context
- lda [%o0 + 0x04] %asi, %f1
+ mov 4, %g1
andcc %o1, 0x7, %g0
- lda [%o0 + 0x08] %asi, %f2
+ lda [%o0 + %g0]0x88, %f0 ! load context
+ lda [%o0 + %g1]0x88, %f1
+ add %o0, 8, %o0
+ lda [%o0 + %g0]0x88, %f2
+ lda [%o0 + %g1]0x88, %f3
bne,pn %icc, .Lhwunaligned
- lda [%o0 + 0x0c] %asi, %f3
+ sub %o0, 8, %o0
.Lhw_loop:
ldd [%o1 + 0x00], %f8
@@ -250,12 +250,13 @@ md5_block_asm_data_order:
nop
.Lhwfinish:
- sta %f0, [%o0 + 0x00] %asi ! store context
- sta %f1, [%o0 + 0x04] %asi
- sta %f2, [%o0 + 0x08] %asi
- sta %f3, [%o0 + 0x0c] %asi
+ sta %f0, [%o0 + %g0]0x88 ! store context
+ sta %f1, [%o0 + %g1]0x88
+ add %o0, 8, %o0
+ sta %f2, [%o0 + %g0]0x88
+ sta %f3, [%o0 + %g1]0x88
retl
- wr %g5, 0x0, %asi ! restore %asi
+ nop
.align 8
.Lhwunaligned: