summaryrefslogtreecommitdiffstats
path: root/crypto/poly1305
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2019-02-15 09:44:39 +0100
committerRichard Levitte <levitte@openssl.org>2019-02-16 16:59:23 +0100
commit3405db97e5448c784729b56837f3f8c776a01067 (patch)
tree3038d1d73fb0964915fdfd6377bb717722c02ea7 /crypto/poly1305
parentb2b580fe445e064da50c13d3e00f71022da16ece (diff)
ARM assembly pack: make it Windows-friendly.
"Windows friendliness" means a) flipping .thumb and .text directives, b) always generate Thumb-2 code when asked(*); c) Windows-specific references to external OPENSSL_armcap_P. (*) so far *some* modules were compiled as .code 32 even if Thumb-2 was targeted. It works at hardware level because processor can alternate between the modes with no overhead. But clang --target=arm-windows's builtin assembler just refuses to compile .code 32... Reviewed-by: Paul Dale <paul.dale@oracle.com> Reviewed-by: Richard Levitte <levitte@openssl.org> (Merged from https://github.com/openssl/openssl/pull/8252)
Diffstat (limited to 'crypto/poly1305')
-rwxr-xr-xcrypto/poly1305/asm/poly1305-armv4.pl48
1 files changed, 24 insertions, 24 deletions
diff --git a/crypto/poly1305/asm/poly1305-armv4.pl b/crypto/poly1305/asm/poly1305-armv4.pl
index 7003cc7770..38622af1ab 100755
--- a/crypto/poly1305/asm/poly1305-armv4.pl
+++ b/crypto/poly1305/asm/poly1305-armv4.pl
@@ -48,7 +48,6 @@ if ($flavour && $flavour ne "void") {
$code.=<<___;
#include "arm_arch.h"
-.text
#if defined(__thumb2__)
.syntax unified
.thumb
@@ -56,6 +55,8 @@ $code.=<<___;
.code 32
#endif
+.text
+
.globl poly1305_emit
.globl poly1305_blocks
.globl poly1305_init
@@ -100,8 +101,10 @@ poly1305_init:
and r4,r4,r10
#if __ARM_MAX_ARCH__>=7
+# if !defined(_WIN32)
ldr r12,[r11,r12] @ OPENSSL_armcap_P
-# ifdef __APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
# endif
#endif
@@ -116,31 +119,21 @@ poly1305_init:
#if __ARM_MAX_ARCH__>=7
tst r12,#ARMV7_NEON @ check for NEON
-# ifdef __APPLE__
- adr r9,poly1305_blocks_neon
- adr r11,poly1305_blocks
-# ifdef __thumb2__
- it ne
-# endif
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ adr r12,.Lpoly1305_emit
+ adr r10,.Lpoly1305_emit_neon
+ itt ne
movne r11,r9
- adr r12,poly1305_emit
- adr r10,poly1305_emit_neon
-# ifdef __thumb2__
- it ne
-# endif
movne r12,r10
+ orr r11,r11,#1 @ thumb-ify address
+ orr r12,r12,#1
# else
-# ifdef __thumb2__
- itete eq
-# endif
- addeq r12,r11,#(poly1305_emit-.Lpoly1305_init)
- addne r12,r11,#(poly1305_emit_neon-.Lpoly1305_init)
- addeq r11,r11,#(poly1305_blocks-.Lpoly1305_init)
- addne r11,r11,#(poly1305_blocks_neon-.Lpoly1305_init)
-# endif
-# ifdef __thumb2__
- orr r12,r12,#1 @ thumb-ify address
- orr r11,r11,#1
+ addeq r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ addne r12,r11,#(.Lpoly1305_emit_neon-.Lpoly1305_init)
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
# endif
#endif
ldrb r9,[$inp,#11]
@@ -352,6 +345,7 @@ $code.=<<___;
.type poly1305_emit,%function
.align 5
poly1305_emit:
+.Lpoly1305_emit:
stmdb sp!,{r4-r11}
.Lpoly1305_emit_enter:
@@ -671,6 +665,7 @@ poly1305_init_neon:
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
ldr ip,[$ctx,#36] @ is_base2_26
ands $len,$len,#-16
beq .Lno_data_neon
@@ -1157,6 +1152,7 @@ poly1305_blocks_neon:
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
+.Lpoly1305_emit_neon:
ldr ip,[$ctx,#36] @ is_base2_26
stmdb sp!,{r4-r11}
@@ -1229,7 +1225,11 @@ poly1305_emit_neon:
.Lzeros:
.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
#endif
___
} }