diff options
author | Andy Polyakov <appro@openssl.org> | 2015-09-23 18:41:27 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2015-09-25 13:34:02 +0200 |
commit | 11208dcfb9105e8afa37233185decefd45e89e17 (patch) | |
tree | 1294a09f6ed63951181da9c4a9c6b16835c80f60 /crypto/bn/asm/armv4-gf2m.pl | |
parent | e7a68985d5b734890489a9d47f82210c384e8c3a (diff) |
ARMv4 assembly pack: implement support for Thumb2.
As some of ARM processors, more specifically Cortex-Mx series, are
Thumb2-only, we need to support Thumb2-only builds even in assembly.
Reviewed-by: Tim Hudson <tjh@openssl.org>
Diffstat (limited to 'crypto/bn/asm/armv4-gf2m.pl')
-rw-r--r-- | crypto/bn/asm/armv4-gf2m.pl | 36 |
1 files changed, 30 insertions, 6 deletions
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index a0b018c508..227581e10c 100644 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -51,7 +51,12 @@ $code=<<___; #include "arm_arch.h" .text +#if defined(__thumb2__) && !defined(__APPLE__) +.syntax unified +.thumb +#else .code 32 +#endif ___ ################ # private interface to mul_1x1_ialu @@ -132,11 +137,17 @@ mul_1x1_ialu: eor $hi,$hi,$t0,lsr#8 ldr $t0,[sp,$i0] @ tab[b >> 30 ] +#ifdef __thumb2__ + itt ne +#endif eorne $lo,$lo,$b,lsl#30 eorne $hi,$hi,$b,lsr#2 tst $a,#1<<31 eor $lo,$lo,$t1,lsl#27 eor $hi,$hi,$t1,lsr#5 +#ifdef __thumb2__ + itt ne +#endif eorne $lo,$lo,$b,lsl#31 eorne $hi,$hi,$b,lsr#1 eor $lo,$lo,$t0,lsl#30 @@ -156,20 +167,33 @@ $code.=<<___; .align 5 bn_GF2m_mul_2x2: #if __ARM_MAX_ARCH__>=7 + stmdb sp!,{r10,lr} ldr r12,.LOPENSSL_armcap -.Lpic: ldr r12,[pc,r12] - tst r12,#1 + adr r10,.LOPENSSL_armcap + ldr r12,[r12,r10] +#ifdef __APPLE__ + ldr r12,[r12] +#endif + tst r12,#ARMV7_NEON + itt ne + ldrne r10,[sp],#8 bne .LNEON + stmdb sp!,{r4-r9} +#else + stmdb sp!,{r4-r10,lr} #endif ___ $ret="r10"; # reassigned 1st argument $code.=<<___; - stmdb sp!,{r4-r10,lr} mov $ret,r0 @ reassign 1st argument mov $b,r3 @ $b=b1 + sub r7,sp,#36 + mov r8,sp + and r7,r7,#-32 ldr r3,[sp,#32] @ load b0 mov $mask,#7<<2 - sub sp,sp,#32 @ allocate tab[8] + mov sp,r7 @ allocate tab[8] + str r8,[r7,#32] bl mul_1x1_ialu @ a1·b1 str $lo,[$ret,#8] @@ -193,6 +217,7 @@ ___ $code.=<<___; ldmia $ret,{@r[0]-@r[3]} eor $lo,$lo,$hi + ldr sp,[sp,#32] @ destroy tab[8] eor $hi,$hi,@r[1] eor $lo,$lo,@r[0] eor $hi,$hi,@r[2] @@ -200,7 +225,6 @@ $code.=<<___; eor $hi,$hi,@r[3] str $hi,[$ret,#8] eor $lo,$lo,$hi - add sp,sp,#32 @ destroy tab[8] str $lo,[$ret,#4] #if __ARM_ARCH__>=5 @@ -279,7 +303,7 @@ $code.=<<___; #if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-(.Lpic+8) +.word OPENSSL_armcap_P-. #endif .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .align 5 |