summaryrefslogtreecommitdiffstats
path: root/crypto/ec
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2017-06-03 21:08:57 +0200
committerAndy Polyakov <appro@openssl.org>2017-06-05 14:26:38 +0200
commit7fb37eed05059e1b135ef5818fb9f2b3b93b8daa (patch)
tree4348c302e9e5e97a6207616b43e69ee7819394ac /crypto/ec
parentc8ec34109cab8c92685958ddfef0776a4b3b8460 (diff)
ec/asm/ecp_nistz256-x86_64.pl: minor sqr_montx cleanup.
Drop some redundant instructions in reduction in ecp_nistz256_sqr_montx. Reviewed-by: Rich Salz <rsalz@openssl.org> (cherry picked from commit 8fc063dcc9668589fd95533d25932396d60987f9)
Diffstat (limited to 'crypto/ec')
-rwxr-xr-xcrypto/ec/asm/ecp_nistz256-x86_64.pl22
1 files changed, 9 insertions, 13 deletions
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index 16b6639b54..714e852a18 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -1185,19 +1185,18 @@ __ecp_nistz256_sqr_montx:
adox $t1, $acc5
.byte 0x67,0x67
mulx %rdx, $t0, $t4
- mov $acc0, %rdx
+ mov .Lpoly+8*3(%rip), %rdx
adox $t0, $acc6
shlx $a_ptr, $acc0, $t0
adox $t4, $acc7
shrx $a_ptr, $acc0, $t4
- mov .Lpoly+8*3(%rip), $t1
+ mov %rdx,$t1
# reduction step 1
add $t0, $acc1
adc $t4, $acc2
- mulx $t1, $t0, $acc0
- mov $acc1, %rdx
+ mulx $acc0, $t0, $acc0
adc $t0, $acc3
shlx $a_ptr, $acc1, $t0
adc \$0, $acc0
@@ -1207,8 +1206,7 @@ __ecp_nistz256_sqr_montx:
add $t0, $acc2
adc $t4, $acc3
- mulx $t1, $t0, $acc1
- mov $acc2, %rdx
+ mulx $acc1, $t0, $acc1
adc $t0, $acc0
shlx $a_ptr, $acc2, $t0
adc \$0, $acc1
@@ -1218,8 +1216,7 @@ __ecp_nistz256_sqr_montx:
add $t0, $acc3
adc $t4, $acc0
- mulx $t1, $t0, $acc2
- mov $acc3, %rdx
+ mulx $acc2, $t0, $acc2
adc $t0, $acc1
shlx $a_ptr, $acc3, $t0
adc \$0, $acc2
@@ -1229,12 +1226,12 @@ __ecp_nistz256_sqr_montx:
add $t0, $acc0
adc $t4, $acc1
- mulx $t1, $t0, $acc3
+ mulx $acc3, $t0, $acc3
adc $t0, $acc2
adc \$0, $acc3
- xor $t3, $t3 # cf=0
- adc $acc0, $acc4 # accumulate upper half
+ xor $t3, $t3
+ add $acc0, $acc4 # accumulate upper half
mov .Lpoly+8*1(%rip), $a_ptr
adc $acc1, $acc5
mov $acc4, $acc0
@@ -1243,8 +1240,7 @@ __ecp_nistz256_sqr_montx:
mov $acc5, $acc1
adc \$0, $t3
- xor %eax, %eax # cf=0
- sbb \$-1, $acc4 # .Lpoly[0]
+ sub \$-1, $acc4 # .Lpoly[0]
mov $acc6, $acc2
sbb $a_ptr, $acc5 # .Lpoly[1]
sbb \$0, $acc6 # .Lpoly[2]