summaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2011-10-17 17:39:59 +0000
committerAndy Polyakov <appro@openssl.org>2011-10-17 17:39:59 +0000
commit3f66f2040aeac30715347572cd2c798018e34a8d (patch)
treeaa0610d13bc515e3506572ac7d1a8f9c5f34f973 /crypto/bn/asm
parent253489187432522e74a2138289ae19ffcd9ca086 (diff)
x86_64-mont.pl: minor optimization.
Diffstat (limited to 'crypto/bn/asm')
-rwxr-xr-xcrypto/bn/asm/x86_64-mont.pl31
1 files changed, 13 insertions, 18 deletions
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index c2a308ddfa..5d79b35e1c 100755
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -817,15 +817,14 @@ bn_sqr4x_mont:
xor $A0[1],$A0[1]
add $A1[0],$A0[0]
- lea 16($j),$j
adc \$0,$A0[1]
mul $a0 # a[5]*a[2]
add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
mov $ai,%rax
adc %rdx,$A0[1]
- mov $A0[0],-8($tptr,$j) # t[5]
+ mov $A0[0],8($tptr,$j) # t[5]
- mov ($aptr,$j),$ai # a[6]
+ mov 16($aptr,$j),$ai # a[6]
xor $A1[0],$A1[0]
mul $a1 # a[5]*a[3]
add %rax,$A1[1] # a[5]*a[3]+t[6]
@@ -839,10 +838,10 @@ bn_sqr4x_mont:
add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6]
mov $ai,%rax # a[3]
adc %rdx,$A0[0]
- mov $A0[1],($tptr,$j) # t[6]
+ mov $A0[1],16($tptr,$j) # t[6]
- mov 8($aptr,$j),$ai # a[7]
+ mov 24($aptr,$j),$ai # a[7]
xor $A1[1],$A1[1]
mul $a1 # a[6]*a[5]
add %rax,$A1[0] # a[6]*a[5]+t[7]
@@ -851,7 +850,7 @@ bn_sqr4x_mont:
xor $A0[1],$A0[1]
add $A1[0],$A0[0]
- lea 16($j),$j
+ lea 32($j),$j
adc \$0,$A0[1]
mul $a0 # a[7]*a[4]
add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6]
@@ -962,7 +961,7 @@ bn_sqr4x_mont:
add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
mov $ai,%rax
adc %rdx,$A0[1]
- mov $A0[0],-8($tptr,$j) # t[5]
+ mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below
cmp \$0,$j
jne .Lsqr4x_inner
@@ -974,8 +973,8 @@ bn_sqr4x_mont:
add %rax,$A1[1]
adc %rdx,$A1[0]
- mov $A1[1],($tptr) # t[6]
- mov $A1[0],8($tptr) # t[7]
+ mov $A1[1],($tptr) # t[6], "preloaded t[2]" below
+ mov $A1[0],8($tptr) # t[7], "preloaded t[3]" below
add \$16,$i
jnz .Lsqr4x_outer
@@ -988,16 +987,15 @@ bn_sqr4x_mont:
mov -16($aptr),$ai # a[2]
mov %rax,$a1
- mov -24($tptr),$A0[0] # t[1]
xor $A0[1],$A0[1]
mul $a0 # a[1]*a[0]
- add %rax,$A0[0] # a[1]*a[0]+t[1]
+ add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1]
mov $ai,%rax # a[2]
adc %rdx,$A0[1]
mov $A0[0],-24($tptr) # t[1]
xor $A0[0],$A0[0]
- add -16($tptr),$A0[1] # a[2]*a[0]+t[2]
+ add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2]
adc \$0,$A0[0]
mul $a0 # a[2]*a[0]
add %rax,$A0[1]
@@ -1005,18 +1003,15 @@ bn_sqr4x_mont:
adc %rdx,$A0[0]
mov $A0[1],-16($tptr) # t[2]
- xor $A1[0],$A1[0]
mov -8($aptr),$ai # a[3]
- xor $A1[1],$A1[1]
- add -8($tptr),$A1[0]
- adc \$0,$A1[1]
mul $a1 # a[2]*a[1]
- add %rax,$A1[0] # a[2]*a[1]+t[3]
+ add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3]
mov $ai,%rax
- adc %rdx,$A1[1]
+ adc \$0,%rdx
xor $A0[1],$A0[1]
add $A1[0],$A0[0]
+ mov %rdx,$A1[1]
adc \$0,$A0[1]
mul $a0 # a[3]*a[0]
add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]