diff options
author | Andy Polyakov <appro@openssl.org> | 2015-09-23 18:41:27 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2015-09-25 13:34:02 +0200 |
commit | 11208dcfb9105e8afa37233185decefd45e89e17 (patch) | |
tree | 1294a09f6ed63951181da9c4a9c6b16835c80f60 /crypto/sha/asm | |
parent | e7a68985d5b734890489a9d47f82210c384e8c3a (diff) |
ARMv4 assembly pack: implement support for Thumb2.
As some of ARM processors, more specifically Cortex-Mx series, are
Thumb2-only, we need to support Thumb2-only builds even in assembly.
Reviewed-by: Tim Hudson <tjh@openssl.org>
Diffstat (limited to 'crypto/sha/asm')
-rw-r--r-- | crypto/sha/asm/sha1-armv4-large.pl | 47 | ||||
-rw-r--r-- | crypto/sha/asm/sha256-armv4.pl | 14 | ||||
-rw-r--r-- | crypto/sha/asm/sha512-armv4.pl | 16 |
3 files changed, 52 insertions, 25 deletions
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index 356b52fc1b..9d34e04f7b 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -181,7 +181,12 @@ $code=<<___; #include "arm_arch.h" .text +#if defined(__thumb2__) && !defined(__APPLE__) +.syntax unified +.thumb +#else .code 32 +#endif .global sha1_block_data_order .type sha1_block_data_order,%function @@ -189,7 +194,8 @@ $code=<<___; .align 5 sha1_block_data_order: #if __ARM_MAX_ARCH__>=7 - sub r3,pc,#8 @ sha1_block_data_order +.Lsha1_block: + adr r3,.Lsha1_block ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P #ifdef __APPLE__ @@ -216,7 +222,12 @@ for($i=0;$i<5;$i++) { &BODY_00_15(@V); unshift(@V,pop(@V)); } $code.=<<___; +#if defined(__thumb2__) && !defined(__APPLE__) + mov $t3,sp + teq $Xi,$t3 +#else teq $Xi,sp +#endif bne .L_00_15 @ [((11+4)*5+2)*3] sub sp,sp,#25*4 ___ @@ -235,7 +246,12 @@ for($i=0;$i<5;$i++) { &BODY_20_39(@V); unshift(@V,pop(@V)); } $code.=<<___; +#if defined(__thumb2__) && !defined(__APPLE__) + mov $t3,sp + teq $Xi,$t3 +#else teq $Xi,sp @ preserve carry +#endif bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes @@ -247,7 +263,12 @@ for($i=0;$i<5;$i++) { &BODY_40_59(@V); unshift(@V,pop(@V)); } $code.=<<___; +#if defined(__thumb2__) && !defined(__APPLE__) + mov $t3,sp + teq $Xi,$t3 +#else teq $Xi,sp +#endif bne .L_40_59 @ [+((12+5)*5+2)*4] ldr $K,.LK_60_79 @@ -283,7 +304,7 @@ $code.=<<___; .LK_60_79: .word 0xca62c1d6 #if __ARM_MAX_ARCH__>=7 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha1_block_data_order +.word OPENSSL_armcap_P-.Lsha1_block #endif .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 5 @@ -458,6 +479,7 @@ sub Xuplast_80 () &teq ($inp,$len); &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX + &it ("eq"); &subeq ($inp,$inp,64); # reload last block to avoid SEGV &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); eval(shift(@insns)); @@ -508,12 +530,12 @@ sha1_block_data_order_neon: @ dmb @ errata #451034 on early Cortex A8 @ vstmdb sp!,{d8-d15} @ ABI specification says so mov $saved_sp,sp - sub sp,sp,#64 @ alloca + sub $Xfer,sp,#64 adr $K_XX_XX,.LK_00_19 - bic sp,sp,#15 @ align for 128-bit stores + bic $Xfer,$Xfer,#15 @ align for 128-bit stores ldmia $ctx,{$a,$b,$c,$d,$e} @ load context - mov $Xfer,sp + mov sp,$Xfer @ alloca vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned veor $zero,$zero,$zero @@ -560,10 +582,13 @@ $code.=<<___; add $b,$b,$t0 add $c,$c,$t1 add $d,$d,$Xfer + it eq moveq sp,$saved_sp add $e,$e,$Ki + it ne ldrne $Ki,[sp] stmia $ctx,{$a,$b,$c,$d,$e} + itt ne addne $Xfer,sp,#3*16 bne .Loop_neon @@ -584,6 +609,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); $code.=<<___; #if __ARM_MAX_ARCH__>=7 + +# if defined(__thumb2__) && !defined(__APPLE__) +# define INST(a,b,c,d) .byte c,d|0xf,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d|0x10 +# endif + .type sha1_block_data_order_armv8,%function .align 5 sha1_block_data_order_armv8: @@ -677,7 +709,10 @@ ___ # since ARMv7 instructions are always encoded little-endian. # correct solution is to use .inst directive, but older # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + + # this fix-up provides Thumb encoding in conjunction with INST + $word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000); + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", $word&0xff,($word>>8)&0xff, ($word>>16)&0xff,($word>>24)&0xff, $mnemonic,$arg; diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl index efee1fb1f3..c65073b7f9 100644 --- a/crypto/sha/asm/sha256-armv4.pl +++ b/crypto/sha/asm/sha256-armv4.pl @@ -175,16 +175,12 @@ $code=<<___; #endif .text -#if __ARM_ARCH__<7 -.code 32 -#else +#if defined(__thumb2__) && !defined(__APPLE__) .syntax unified -# if defined(__thumb2__) && !defined(__APPLE__) -# define adrl adr .thumb -# else +# define adrl adr +#else .code 32 -# endif #endif .type K256,%object @@ -218,10 +214,10 @@ K256: .type sha256_block_data_order,%function sha256_block_data_order: .Lsha256_block_data_order: -#if __ARM_ARCH__<7 +#if __ARM_ARCH__<7 && !defined(__thumb2__) sub r3,pc,#8 @ sha256_block_data_order #else - adr r3,sha256_block_data_order + adr r3,.Lsha256_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl index 77d6c5eae9..a83d6772c8 100644 --- a/crypto/sha/asm/sha512-armv4.pl +++ b/crypto/sha/asm/sha512-armv4.pl @@ -212,16 +212,12 @@ $code=<<___; #endif .text -#if __ARM_ARCH__<7 || defined(__APPLE__) -.code 32 -#else +#if defined(__thumb2__) && !defined(__APPLE__) .syntax unified -# ifdef __thumb2__ -# define adrl adr .thumb -# else -.code 32 -# endif +# define adrl adr +#else +.code 32 #endif .type K512,%object @@ -280,10 +276,10 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .type sha512_block_data_order,%function sha512_block_data_order: .Lsha512_block_data_order: -#if __ARM_ARCH__<7 +#if __ARM_ARCH__<7 && !defined(__thumb2__) sub r3,pc,#8 @ sha512_block_data_order #else - adr r3,sha512_block_data_order + adr r3,.Lsha512_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap |