summaryrefslogtreecommitdiffstats
path: root/crypto/poly1305/asm/poly1305-armv8.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/poly1305/asm/poly1305-armv8.pl')
-rwxr-xr-xcrypto/poly1305/asm/poly1305-armv8.pl26
1 files changed, 13 insertions, 13 deletions
diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl
index 79185d2bdd..f1359fd44a 100755
--- a/crypto/poly1305/asm/poly1305-armv8.pl
+++ b/crypto/poly1305/asm/poly1305-armv8.pl
@@ -791,6 +791,19 @@ poly1305_blocks_neon:
.Lshort_tail:
////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp $ACC3,$ACC3,$ACC3
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp $ACC0,$ACC0,$ACC0
+ ldp d10,d11,[sp,#32]
+ addp $ACC4,$ACC4,$ACC4
+ ldp d12,d13,[sp,#48]
+ addp $ACC1,$ACC1,$ACC1
+ ldp d14,d15,[sp,#64]
+ addp $ACC2,$ACC2,$ACC2
+
+ ////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr $T0.2d,$ACC3,#26
@@ -822,19 +835,6 @@ poly1305_blocks_neon:
add $ACC4,$ACC4,$T1.2d // h3 -> h4
////////////////////////////////////////////////////////////////
- // horizontal add
-
- addp $ACC2,$ACC2,$ACC2
- ldp d8,d9,[sp,#16] // meet ABI requirements
- addp $ACC0,$ACC0,$ACC0
- ldp d10,d11,[sp,#32]
- addp $ACC1,$ACC1,$ACC1
- ldp d12,d13,[sp,#48]
- addp $ACC3,$ACC3,$ACC3
- ldp d14,d15,[sp,#64]
- addp $ACC4,$ACC4,$ACC4
-
- ////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16