summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2012-11-28 13:19:10 +0000
committerAndy Polyakov <appro@openssl.org>2012-11-28 13:19:10 +0000
commit904732f68bcc6ebd3f8961a9272bc811dc26bcbd (patch)
tree7ab988b23467d2545e4aa2e50bd176bf6923fc2a /crypto
parentcf5ecc3e1fd112dd8a544bfb26bfb96c96b604c7 (diff)
C64x+ assembly pack: improve EABI support.
Diffstat (limited to 'crypto')
-rw-r--r--crypto/aes/asm/aes-c64xplus.pl10
-rw-r--r--crypto/bn/asm/bn-c64xplus.asm51
-rw-r--r--crypto/bn/asm/c64xplus-gf2m.pl3
-rw-r--r--crypto/c64xpluscpuid.pl8
-rw-r--r--crypto/modes/asm/ghash-c64xplus.pl9
-rw-r--r--crypto/sha/asm/sha1-c64xplus.pl3
-rw-r--r--crypto/sha/asm/sha256-c64xplus.pl6
-rw-r--r--crypto/sha/asm/sha512-c64xplus.pl5
8 files changed, 86 insertions, 9 deletions
diff --git a/crypto/aes/asm/aes-c64xplus.pl b/crypto/aes/asm/aes-c64xplus.pl
index ad0c15a36f..cc14ae3157 100644
--- a/crypto/aes/asm/aes-c64xplus.pl
+++ b/crypto/aes/asm/aes-c64xplus.pl
@@ -46,6 +46,11 @@ $code=<<___;
.text
.if __TI_EABI__
.nocmp
+ .asg AES_encrypt,_AES_encrypt
+ .asg AES_decrypt,_AES_decrypt
+ .asg AES_set_encrypt_key,_AES_set_encrypt_key
+ .asg AES_set_decrypt_key,_AES_set_decrypt_key
+ .asg AES_ctr32_encrypt,_AES_ctr32_encrypt
.endif
.asg B3,RA
@@ -1021,7 +1026,11 @@ ___
}
# Tables are kept in endian-neutral manner
$code.=<<___;
+ .if __TI_EABI__
+ .sect ".text:aes_asm.const"
+ .else
.sect ".const:aes_asm"
+ .endif
.align 128
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84
@@ -1359,3 +1368,4 @@ AES_Td4:
___
print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/bn-c64xplus.asm b/crypto/bn/asm/bn-c64xplus.asm
index 161547c3b0..f07b09e439 100644
--- a/crypto/bn/asm/bn-c64xplus.asm
+++ b/crypto/bn/asm/bn-c64xplus.asm
@@ -12,6 +12,18 @@
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;====================================================================
.text
+ .if __TI_EABI__
+ .asg bn_mul_add_words,_bn_mul_add_words
+ .asg bn_mul_words,_bn_mul_words
+ .asg bn_sqr_words,_bn_sqr_words
+ .asg bn_add_words,_bn_add_words
+ .asg bn_sub_words,_bn_sub_words
+ .asg bn_div_words,_bn_div_words
+ .asg bn_sqr_comba8,_bn_sqr_comba8
+ .asg bn_mul_comba8,_bn_mul_comba8
+ .asg bn_sqr_comba4,_bn_sqr_comba4
+ .asg bn_mul_comba4,_bn_mul_comba4
+ .endif
.asg B3,RA
.asg A4,ARG0
@@ -158,14 +170,39 @@ _bn_sub_words:
.endasmfunc
.global _bn_div_words
- .global __divull
_bn_div_words:
.asmfunc
- CALLP __divull,A3 ; jump to rts64plus.lib
-|| MV ARG0,A5
-|| MV ARG1,ARG0
-|| MV ARG2,ARG1
-|| ZERO B5
+ LMBD 1,A6,A0 ; leading zero bits in dv
+ LMBD 1,A4,A1 ; leading zero bits in hi
+|| MVK 32,B0
+ CMPLTU A1,A0,A2
+|| ADD A0,B0,B0
+ [ A2] BNOP RA
+||[ A2] MVK -1,A4 ; return overflow
+||[!A2] MV A4,A3 ; reassign hi
+ [!A2] MV B4,A4 ; reassign lo, will be quotient
+||[!A2] MVC B0,ILC
+ [!A2] SHL A6,A0,A6 ; normalize dv
+|| MVK 1,A1
+
+ [!A2] CMPLTU A3,A6,A1 ; hi<dv?
+||[!A2] SHL A4,1,A5:A4 ; lo<<1
+ [!A1] SUB A3,A6,A3 ; hi-=dv
+||[!A1] OR 1,A4,A4
+ [!A2] SHRU A3,31,A1 ; upper bit
+||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31
+
+ SPLOOP 3
+ [!A1] CMPLTU A3,A6,A1 ; hi<dv?
+||[ A1] ZERO A1
+|| SHL A4,1,A5:A4 ; lo<<1
+ [!A1] SUB A3,A6,A3 ; hi-=dv
+||[!A1] OR 1,A4,A4 ; quotient
+ SHRU A3,31,A1 ; upper bit
+|| ADDAH A5,A3,A3 ; hi<<1|lo>>31
+ SPKERNEL
+
+ BNOP RA,5
.endasmfunc
;;====================================================================
@@ -256,7 +293,7 @@ _bn_mul_comba4:
|| LDW *A5++,B6 ; ap[0]
|| MV A0,A3 ; const A3=M
.else
- ;; This alternative is exercise in fully unrolled Comba
+ ;; This alternative is an exercise in fully unrolled Comba
;; algorithm implementation that operates at n*(n+1)+12, or
;; as little as 32 cycles...
LDW *ARG1[0],B16 ; a[0]
diff --git a/crypto/bn/asm/c64xplus-gf2m.pl b/crypto/bn/asm/c64xplus-gf2m.pl
index cef83942c9..1b3ecc2c94 100644
--- a/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/crypto/bn/asm/c64xplus-gf2m.pl
@@ -107,6 +107,9 @@ ___
}
$code.=<<___;
.text
+ .if __TI_EABI__
+ .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
+ .endif
.global _bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2:
diff --git a/crypto/c64xpluscpuid.pl b/crypto/c64xpluscpuid.pl
index 067b693d5c..0ee0a4e86f 100644
--- a/crypto/c64xpluscpuid.pl
+++ b/crypto/c64xpluscpuid.pl
@@ -6,6 +6,14 @@ open STDOUT,">$output";
$code.=<<___;
.text
+ .if __TI_EABI__
+ .asg OPENSSL_rdtsc,_OPENSSL_rdtsc
+ .asg OPENSSL_cleanse,_OPENSSL_cleanse
+ .asg OPENSSL_atomic_add,_OPENSSL_atomic_add
+ .asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
+ .asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus
+ .asg OPENSSL_instrument_bus2,_OPENSSL_instrument_bus2
+ .endif
.asg B3,RA
diff --git a/crypto/modes/asm/ghash-c64xplus.pl b/crypto/modes/asm/ghash-c64xplus.pl
index 1ac4d927d0..409b0d61b9 100644
--- a/crypto/modes/asm/ghash-c64xplus.pl
+++ b/crypto/modes/asm/ghash-c64xplus.pl
@@ -35,6 +35,11 @@ open STDOUT,">$output";
$code.=<<___;
.text
+ .if __TI_EABI__
+ .asg gcm_gmult_1bit,_gcm_gmult_1bit
+ .asg gcm_gmult_4bit,_gcm_gmult_4bit
+ .asg gcm_ghash_4bit,_gcm_ghash_4bit
+ .endif
.asg B3,RA
@@ -144,7 +149,7 @@ ___
# 8/2 S1 L1x S2 | ....
#####... ................|............
$code.=<<___;
- XORMPY $H0,$xia,$H0x ; 0 ; H·Xi[i]
+ XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1)
|| XORMPY $H01u,$xib,$H01y
|| [A0] LDBU *--${xip},$x0
XORMPY $H1,$xia,$H1x ; 1
@@ -153,7 +158,7 @@ $code.=<<___;
XORMPY $H3,$xia,$H3x ; 3
|| XORMPY $H3u,$xib,$H3y
||[!A0] MVK.D 15,A0 ; *--${xip} counter
- XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·Xi[i]
+ XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1)
|| [A0] SUB.S A0,1,A0
XOR.L $H1x,$Z1,$Z1 ; 5
|| AND.D $H01y,$FF000000,$H0z
diff --git a/crypto/sha/asm/sha1-c64xplus.pl b/crypto/sha/asm/sha1-c64xplus.pl
index 87000d1e8f..456f80a86e 100644
--- a/crypto/sha/asm/sha1-c64xplus.pl
+++ b/crypto/sha/asm/sha1-c64xplus.pl
@@ -38,6 +38,9 @@ open STDOUT,">$output";
$code=<<___;
.text
+ .if __TI_EABI__
+ .asg sha1_block_data_order,_sha1_block_data_order
+ .endif
.asg B3,RA
.asg A15,FP
diff --git a/crypto/sha/asm/sha256-c64xplus.pl b/crypto/sha/asm/sha256-c64xplus.pl
index 5a057868b4..798f78309b 100644
--- a/crypto/sha/asm/sha256-c64xplus.pl
+++ b/crypto/sha/asm/sha256-c64xplus.pl
@@ -40,6 +40,7 @@ $code.=<<___;
.text
.if __TI_EABI__
.nocmp
+ .asg sha256_block_data_order,_sha256_block_data_order
.endif
.asg B3,RA
@@ -275,7 +276,11 @@ outerloop?:
|| STW $H,*${CTXB}[7]
.endasmfunc
+ .if __TI_EABI__
+ .sect ".text:sha_asm.const"
+ .else
.sect ".const:sha_asm"
+ .endif
.align 128
K256:
.uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
@@ -300,3 +305,4 @@ K256:
___
print $code;
+close STDOUT;
diff --git a/crypto/sha/asm/sha512-c64xplus.pl b/crypto/sha/asm/sha512-c64xplus.pl
index e4e7c042fd..77a62523e5 100644
--- a/crypto/sha/asm/sha512-c64xplus.pl
+++ b/crypto/sha/asm/sha512-c64xplus.pl
@@ -48,6 +48,7 @@ $code.=<<___;
.text
.if __TI_EABI__
.nocmp
+ .asg sha512_block_data_order,_sha512_block_data_order
.endif
.asg B3,RA
@@ -370,7 +371,11 @@ break?:
NOP 2 ; wait till FP is committed
.endasmfunc
+ .if __TI_EABI__
+ .sect ".text:sha_asm.const"
+ .else
.sect ".const:sha_asm"
+ .endif
.align 128
K512:
.uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd