diff options
author | Andy Polyakov <appro@openssl.org> | 2015-04-02 10:17:42 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2015-04-20 15:06:22 +0200 |
commit | 313e6ec11fb8a7bda1676ce5804bee8755664141 (patch) | |
tree | 48646a0e7df9d05ab25b537a201c26f1bfee0b0b /crypto/sha | |
parent | cc98b998b82c4cf4e83ccaf4c3cc4963d2f9eace (diff) |
Add assembly support for 32-bit iOS.
Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Richard Levitte <levitte@openssl.org>
Diffstat (limited to 'crypto/sha')
-rw-r--r-- | crypto/sha/asm/sha1-armv4-large.pl | 19 | ||||
-rw-r--r-- | crypto/sha/asm/sha256-armv4.pl | 32 | ||||
-rw-r--r-- | crypto/sha/asm/sha512-armv4.pl | 26 |
3 files changed, 63 insertions, 14 deletions
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index 61307b7c61..356b52fc1b 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -68,8 +68,20 @@ # # Add ARMv8 code path performing at 2.35 cpb on Apple A7. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; $inp="r1"; @@ -180,6 +192,9 @@ sha1_block_data_order: sub r3,pc,#8 @ sha1_block_data_order ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif tst r12,#ARMV8_SHA1 bne .LARMv8 tst r12,#ARMV7_NEON diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl index fac0533ea6..efee1fb1f3 100644 --- a/crypto/sha/asm/sha256-armv4.pl +++ b/crypto/sha/asm/sha256-armv4.pl @@ -37,8 +37,20 @@ # # Add ARMv8 code path performing at 2.0 cpb on Apple A7. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; $t0="r0"; $inp="r1"; $t4="r1"; @@ -167,7 +179,7 @@ $code=<<___; .code 32 #else .syntax unified -# ifdef __thumb2__ +# if defined(__thumb2__) && !defined(__APPLE__) # define adrl adr .thumb # else @@ -198,13 +210,14 @@ K256: .word 0 @ terminator #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha256_block_data_order +.word OPENSSL_armcap_P-.Lsha256_block_data_order #endif .align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +.Lsha256_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order #else @@ -213,6 +226,9 @@ sha256_block_data_order: #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif tst r12,#ARMV8_SHA256 bne .LARMv8 tst r12,#ARMV7_NEON @@ -463,7 +479,7 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr} sub $H,sp,#16*4+16 - adrl $Ktbl,K256 + adr $Ktbl,K256 bic $H,$H,#15 @ align for 128-bit stores mov $t2,sp mov sp,$H @ alloca @@ -583,7 +599,7 @@ my $Ktbl="r3"; $code.=<<___; #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -# ifdef __thumb2__ +# if defined(__thumb2__) && !defined(__APPLE__) # define INST(a,b,c,d) .byte c,d|0xc,a,b # else # define INST(a,b,c,d) .byte a,b,c,d @@ -594,7 +610,9 @@ $code.=<<___; sha256_block_data_order_armv8: .LARMv8: vld1.32 {$ABCD,$EFGH},[$ctx] -# ifdef __thumb2__ +# ifdef __APPLE__ + sub $Ktbl,$Ktbl,#256+32 +# elif defined(__thumb2__) adr $Ktbl,.LARMv8 sub $Ktbl,$Ktbl,#.LARMv8-K256 # else diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl index a2b11a8443..77d6c5eae9 100644 --- a/crypto/sha/asm/sha512-armv4.pl +++ b/crypto/sha/asm/sha512-armv4.pl @@ -50,8 +50,20 @@ $hi="HI"; $lo="LO"; # ==================================================================== -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; # parameter block $inp="r1"; @@ -200,7 +212,7 @@ $code=<<___; #endif .text -#if __ARM_ARCH__<7 +#if __ARM_ARCH__<7 || defined(__APPLE__) .code 32 #else .syntax unified @@ -258,7 +270,7 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha512_block_data_order +.word OPENSSL_armcap_P-.Lsha512_block_data_order .skip 32-4 #else .skip 32 @@ -267,6 +279,7 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +.Lsha512_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha512_block_data_order #else @@ -275,6 +288,9 @@ sha512_block_data_order: #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif tst r12,#1 bne .LNEON #endif @@ -593,8 +609,8 @@ sha512_block_data_order_neon: .LNEON: dmb @ errata #451034 on early Cortex A8 add $len,$inp,$len,lsl#7 @ len to point at the end of inp + adr $Ktbl,K512 VFP_ABI_PUSH - adrl $Ktbl,K512 vldmia $ctx,{$A-$H} @ load context .Loop_neon: ___ |