summaryrefslogtreecommitdiffstats
path: root/crypto/sha/asm
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2015-09-23 18:41:27 +0200
committerAndy Polyakov <appro@openssl.org>2015-09-25 13:34:02 +0200
commit11208dcfb9105e8afa37233185decefd45e89e17 (patch)
tree1294a09f6ed63951181da9c4a9c6b16835c80f60 /crypto/sha/asm
parente7a68985d5b734890489a9d47f82210c384e8c3a (diff)
ARMv4 assembly pack: implement support for Thumb2.
As some of ARM processors, more specifically Cortex-Mx series, are Thumb2-only, we need to support Thumb2-only builds even in assembly. Reviewed-by: Tim Hudson <tjh@openssl.org>
Diffstat (limited to 'crypto/sha/asm')
-rw-r--r--crypto/sha/asm/sha1-armv4-large.pl47
-rw-r--r--crypto/sha/asm/sha256-armv4.pl14
-rw-r--r--crypto/sha/asm/sha512-armv4.pl16
3 files changed, 52 insertions, 25 deletions
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl
index 356b52fc1b..9d34e04f7b 100644
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -181,7 +181,12 @@ $code=<<___;
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
.global sha1_block_data_order
.type sha1_block_data_order,%function
@@ -189,7 +194,8 @@ $code=<<___;
.align 5
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
- sub r3,pc,#8 @ sha1_block_data_order
+.Lsha1_block:
+ adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
@@ -216,7 +222,12 @@ for($i=0;$i<5;$i++) {
&BODY_00_15(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
+#endif
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
___
@@ -235,7 +246,12 @@ for($i=0;$i<5;$i++) {
&BODY_20_39(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp @ preserve carry
+#endif
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
@@ -247,7 +263,12 @@ for($i=0;$i<5;$i++) {
&BODY_40_59(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
+#endif
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr $K,.LK_60_79
@@ -283,7 +304,7 @@ $code.=<<___;
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha1_block_data_order
+.word OPENSSL_armcap_P-.Lsha1_block
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
@@ -458,6 +479,7 @@ sub Xuplast_80 ()
&teq ($inp,$len);
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
+ &it ("eq");
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
eval(shift(@insns));
@@ -508,12 +530,12 @@ sha1_block_data_order_neon:
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov $saved_sp,sp
- sub sp,sp,#64 @ alloca
+ sub $Xfer,sp,#64
adr $K_XX_XX,.LK_00_19
- bic sp,sp,#15 @ align for 128-bit stores
+ bic $Xfer,$Xfer,#15 @ align for 128-bit stores
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
- mov $Xfer,sp
+ mov sp,$Xfer @ alloca
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
veor $zero,$zero,$zero
@@ -560,10 +582,13 @@ $code.=<<___;
add $b,$b,$t0
add $c,$c,$t1
add $d,$d,$Xfer
+ it eq
moveq sp,$saved_sp
add $e,$e,$Ki
+ it ne
ldrne $Ki,[sp]
stmia $ctx,{$a,$b,$c,$d,$e}
+ itt ne
addne $Xfer,sp,#3*16
bne .Loop_neon
@@ -584,6 +609,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
+
+# if defined(__thumb2__) && !defined(__APPLE__)
+# define INST(a,b,c,d) .byte c,d|0xf,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d|0x10
+# endif
+
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
@@ -677,7 +709,10 @@ ___
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
- sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+
+ # this fix-up provides Thumb encoding in conjunction with INST
+ $word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000);
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
index efee1fb1f3..c65073b7f9 100644
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -175,16 +175,12 @@ $code=<<___;
#endif
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
+#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
-# if defined(__thumb2__) && !defined(__APPLE__)
-# define adrl adr
.thumb
-# else
+# define adrl adr
+#else
.code 32
-# endif
#endif
.type K256,%object
@@ -218,10 +214,10 @@ K256:
.type sha256_block_data_order,%function
sha256_block_data_order:
.Lsha256_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha256_block_data_order
#else
- adr r3,sha256_block_data_order
+ adr r3,.Lsha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index 77d6c5eae9..a83d6772c8 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -212,16 +212,12 @@ $code=<<___;
#endif
.text
-#if __ARM_ARCH__<7 || defined(__APPLE__)
-.code 32
-#else
+#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
-# ifdef __thumb2__
-# define adrl adr
.thumb
-# else
-.code 32
-# endif
+# define adrl adr
+#else
+.code 32
#endif
.type K512,%object
@@ -280,10 +276,10 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.type sha512_block_data_order,%function
sha512_block_data_order:
.Lsha512_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha512_block_data_order
#else
- adr r3,sha512_block_data_order
+ adr r3,.Lsha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap