summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Levitte <levitte@openssl.org>2000-09-17 20:37:33 +0000
committerRichard Levitte <levitte@openssl.org>2000-09-17 20:37:33 +0000
commitce6990739eaa9f8a4a232cdb2fa008ce08480fec (patch)
treeaf949a223309952491cd74f126fbeb13a43f1b87
parent42cc16030197d74058b1350052f0c3df0db0b8ce (diff)
The rest of the merge in of the main trunk, a few conflicts resolved.OpenSSL-engine-0_9_6-beta2
-rwxr-xr-xConfigure11
-rw-r--r--README2
-rw-r--r--TABLE68
-rw-r--r--apps/apps.c2
-rw-r--r--apps/crl2p7.c6
-rw-r--r--apps/dgst.c2
-rw-r--r--apps/nseq.c2
-rw-r--r--apps/pkcs12.c10
-rw-r--r--crypto/asn1/a_set.c2
-rw-r--r--crypto/bn/asm/README12
-rw-r--r--crypto/bn/asm/pa-risc2.s2024
-rw-r--r--crypto/dso/dso_lib.c4
-rw-r--r--crypto/opensslv.h4
-rw-r--r--crypto/pkcs12/p12_attr.c12
-rw-r--r--crypto/pkcs12/p12_crt.c6
-rw-r--r--crypto/pkcs12/p12_npas.c2
-rw-r--r--crypto/pkcs7/pk7_doit.c2
-rw-r--r--crypto/pkcs7/pk7_mime.c2
-rw-r--r--crypto/pkcs7/pk7_smime.c4
-rw-r--r--crypto/rand/rand_win.c92
-rw-r--r--crypto/x509v3/v3_akey.c2
-rw-r--r--crypto/x509v3/v3_alt.c6
-rw-r--r--crypto/x509v3/v3_crld.c2
-rw-r--r--crypto/x509v3/v3_extku.c2
-rw-r--r--crypto/x509v3/v3_genn.c2
-rw-r--r--crypto/x509v3/v3_info.c4
-rw-r--r--crypto/x509v3/v3_utl.c2
-rw-r--r--doc/crypto/BIO_s_fd.pod28
-rw-r--r--doc/crypto/BIO_s_file.pod41
-rw-r--r--ssl/ssl_cert.c2
-rw-r--r--ssl/ssl_ciph.c2
-rw-r--r--ssl/ssl_lib.c2
32 files changed, 1866 insertions, 498 deletions
diff --git a/Configure b/Configure
index afe66c440e..0c92260a06 100755
--- a/Configure
+++ b/Configure
@@ -221,12 +221,21 @@ my %table=(
# crypto/sha/sha_lcl.h.
# <appro@fy.chalmers.se>
#
-"hpux-parisc-cc","cc:-Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl",
+#!#"hpux-parisc-cc","cc:-Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl",
# Since there is mention of this in shlib/hpux10-cc.sh
"hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl",
"hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl",
"hpux64-parisc-cc","cc:-Ae +DD64 +O3 +ESlit -z -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldld:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::::::::::dl",
+# More attempts at unified 10.X and 11.X targets for HP C compiler.
+#
+# Chris Ruemmler <ruemmler@cup.hp.com>
+# Kevin Steves <ks@hp.se>
+"hpux-parisc-cc","cc:+O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT:-ldl:MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::::::::::dl",
+"hpux-parisc2-cc","cc:+DA2.0 +DS2.0 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldl:SIXTY_FOUR_BIT MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:asm/pa-risc2.o:::::::::dl",
+"hpux64-parisc2-cc","cc:+DD64 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:asm/pa-risc2W.o:::::::::dl",
+"hpux-parisc1_1-cc","cc:+DA1.1 +DS1.1 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldl:MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::::::::::dl",
+
# HPUX 9.X config.
# Don't use the bundled cc. It is broken. Use HP ANSI C if possible, or
# egcs. gcc 2.8.1 is also broken.
diff --git a/README b/README
index 739dd0d62f..46252db971 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
- OpenSSL 0.9.6-beta1 [engine] 11 Sep 2000
+ OpenSSL 0.9.6-beta2 [engine] 17 Sep 2000
Copyright (c) 1998-2000 The OpenSSL Project
Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson
diff --git a/TABLE b/TABLE
index eb561a69b0..d2d06f3b12 100644
--- a/TABLE
+++ b/TABLE
@@ -1162,11 +1162,11 @@ $shared_cflag =
*** hpux-parisc-cc
$cc = cc
-$cflags = -Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY
+$cflags = +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY
$unistd =
-$thread_cflag =
-$lflags = -ldld
-$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1
+$thread_cflag = -D_REENTRANT
+$lflags = -ldl
+$bn_ops = MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT
$bn_obj =
$des_obj =
$bf_obj =
@@ -1220,6 +1220,46 @@ $dso_scheme = dl
$shared_target=
$shared_cflag =
+*** hpux-parisc1_1-cc
+$cc = cc
+$cflags = +DA1.1 +DS1.1 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY
+$unistd =
+$thread_cflag = -D_REENTRANT
+$lflags = -ldl
+$bn_ops = MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT
+$bn_obj =
+$des_obj =
+$bf_obj =
+$md5_obj =
+$sha1_obj =
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$dso_scheme = dl
+$shared_target=
+$shared_cflag =
+
+*** hpux-parisc2-cc
+$cc = cc
+$cflags = +DA2.0 +DS2.0 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY
+$unistd =
+$thread_cflag = -D_REENTRANT
+$lflags = -ldl
+$bn_ops = SIXTY_FOUR_BIT MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT
+$bn_obj = asm/pa-risc2.o
+$des_obj =
+$bf_obj =
+$md5_obj =
+$sha1_obj =
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$dso_scheme = dl
+$shared_target=
+$shared_cflag =
+
*** hpux10-brokencc
$cc = cc
$cflags = -DB_ENDIAN -DBN_DIV2W -Ae +ESlit +O2 -z
@@ -1320,6 +1360,26 @@ $dso_scheme = dl
$shared_target=
$shared_cflag =
+*** hpux64-parisc2-cc
+$cc = cc
+$cflags = +DD64 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY
+$unistd =
+$thread_cflag = -D_REENTRANT
+$lflags = -ldl
+$bn_ops = SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT
+$bn_obj = asm/pa-risc2W.o
+$des_obj =
+$bf_obj =
+$md5_obj =
+$sha1_obj =
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$dso_scheme = dl
+$shared_target=
+$shared_cflag =
+
*** irix-cc
$cc = cc
$cflags = -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN
diff --git a/apps/apps.c b/apps/apps.c
index 50ea09baca..167c319ebe 100644
--- a/apps/apps.c
+++ b/apps/apps.c
@@ -662,7 +662,7 @@ STACK_OF(X509) *load_certs(BIO *err, char *file, int format)
if (format == FORMAT_PEM)
{
- othercerts = sk_X509_new(NULL);
+ othercerts = sk_X509_new_null();
if(!othercerts)
{
sk_X509_free(othercerts);
diff --git a/apps/crl2p7.c b/apps/crl2p7.c
index ebf5fe90ec..d02862710d 100644
--- a/apps/crl2p7.c
+++ b/apps/crl2p7.c
@@ -141,7 +141,7 @@ int MAIN(int argc, char **argv)
else if (strcmp(*argv,"-certfile") == 0)
{
if (--argc < 1) goto bad;
- if(!certflst) certflst = sk_new(NULL);
+ if(!certflst) certflst = sk_new_null();
sk_push(certflst,*(++argv));
}
else
@@ -215,7 +215,7 @@ bad:
p7s->contents->type=OBJ_nid2obj(NID_pkcs7_data);
if (!ASN1_INTEGER_set(p7s->version,1)) goto end;
- if ((crl_stack=sk_X509_CRL_new(NULL)) == NULL) goto end;
+ if ((crl_stack=sk_X509_CRL_new_null()) == NULL) goto end;
p7s->crl=crl_stack;
if (crl != NULL)
{
@@ -223,7 +223,7 @@ bad:
crl=NULL; /* now part of p7 for OPENSSL_freeing */
}
- if ((cert_stack=sk_X509_new(NULL)) == NULL) goto end;
+ if ((cert_stack=sk_X509_new_null()) == NULL) goto end;
p7s->cert=cert_stack;
if(certflst) for(i = 0; i < sk_num(certflst); i++) {
diff --git a/apps/dgst.c b/apps/dgst.c
index f0e5f954da..dbfa387f33 100644
--- a/apps/dgst.c
+++ b/apps/dgst.c
@@ -307,7 +307,7 @@ int MAIN(int argc, char **argv)
}
siglen = BIO_read(sigbio, sigbuf, siglen);
BIO_free(sigbio);
- if(siglen <= 0) {
+ if(siglen == 0) {
BIO_printf(bio_err, "Error reading signature file %s\n",
sigfile);
ERR_print_errors(bio_err);
diff --git a/apps/nseq.c b/apps/nseq.c
index cc88d50ceb..7210fbdb5e 100644
--- a/apps/nseq.c
+++ b/apps/nseq.c
@@ -123,7 +123,7 @@ int MAIN(int argc, char **argv)
if (toseq) {
seq = NETSCAPE_CERT_SEQUENCE_new();
- seq->certs = sk_X509_new(NULL);
+ seq->certs = sk_X509_new_null();
while((x509 = PEM_read_bio_X509(in, NULL, NULL, NULL)))
sk_X509_push(seq->certs,x509);
diff --git a/apps/pkcs12.c b/apps/pkcs12.c
index b57cf5f829..2cc9a0caee 100644
--- a/apps/pkcs12.c
+++ b/apps/pkcs12.c
@@ -200,7 +200,7 @@ int MAIN(int argc, char **argv)
} else if (!strcmp (*args, "-caname")) {
if (args[1]) {
args++;
- if (!canames) canames = sk_new(NULL);
+ if (!canames) canames = sk_new_null();
sk_push(canames, *args);
} else badarg = 1;
} else if (!strcmp (*args, "-in")) {
@@ -427,7 +427,7 @@ int MAIN(int argc, char **argv)
CRYPTO_push_info("reading certs from input");
#endif
- certs = sk_X509_new(NULL);
+ certs = sk_X509_new_null();
/* Load in all certs in input file */
if(!cert_load(in, certs)) {
@@ -459,7 +459,7 @@ int MAIN(int argc, char **argv)
CRYPTO_push_info("reading certs from certfile");
#endif
- bags = sk_PKCS12_SAFEBAG_new (NULL);
+ bags = sk_PKCS12_SAFEBAG_new_null ();
/* Add any more certificates asked for */
if (certsin) {
@@ -550,7 +550,7 @@ int MAIN(int argc, char **argv)
goto export_end;
}
- safes = sk_PKCS7_new (NULL);
+ safes = sk_PKCS7_new_null ();
sk_PKCS7_push (safes, authsafe);
#ifdef CRYPTO_MDEBUG
@@ -566,7 +566,7 @@ int MAIN(int argc, char **argv)
p8 = NULL;
if (name) PKCS12_add_friendlyname (bag, name, -1);
PKCS12_add_localkeyid (bag, keyid, keyidlen);
- bags = sk_PKCS12_SAFEBAG_new(NULL);
+ bags = sk_PKCS12_SAFEBAG_new_null();
sk_PKCS12_SAFEBAG_push (bags, bag);
#ifdef CRYPTO_MDEBUG
diff --git a/crypto/asn1/a_set.c b/crypto/asn1/a_set.c
index 1921f5eaa1..caf5a1419c 100644
--- a/crypto/asn1/a_set.c
+++ b/crypto/asn1/a_set.c
@@ -158,7 +158,7 @@ STACK *d2i_ASN1_SET(STACK **a, unsigned char **pp, long length,
STACK *ret=NULL;
if ((a == NULL) || ((*a) == NULL))
- { if ((ret=sk_new(NULL)) == NULL) goto err; }
+ { if ((ret=sk_new_null()) == NULL) goto err; }
else
ret=(*a);
diff --git a/crypto/bn/asm/README b/crypto/bn/asm/README
index 86bf64cfc2..a0fe58a677 100644
--- a/crypto/bn/asm/README
+++ b/crypto/bn/asm/README
@@ -15,9 +15,9 @@ On the 2 alpha C compilers I had access to, it was not possible to do
were 64 bits). So the hand assember gives access to the 128 bit result and
a 2 times speedup :-).
-There are 2 versions of assember for the HP PA-RISC.
-pa-risc.s is the origional one which works fine.
-pa-risc2.s is a new version that often generates warnings but if the
-tests pass, it gives performance that is over 2 times faster than
-pa-risc.s.
-Both were generated using gcc :-)
+There are 3 versions of assember for the HP PA-RISC.
+
+pa-risc.s is the origional one which works fine and generated using gcc :-)
+
+pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations
+by Chris Ruemmler from HP (with some help from the HP C compiler).
diff --git a/crypto/bn/asm/pa-risc2.s b/crypto/bn/asm/pa-risc2.s
index c2725996a4..7239aa2c76 100644
--- a/crypto/bn/asm/pa-risc2.s
+++ b/crypto/bn/asm/pa-risc2.s
@@ -1,416 +1,1618 @@
- .SPACE $PRIVATE$
- .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
- .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
- .SPACE $TEXT$
- .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
- .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
- .IMPORT $global$,DATA
- .IMPORT $$dyncall,MILLICODE
-; gcc_compiled.:
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
+;
+; PA-RISC 2.0 implementation of bn_asm code, based on the
+; 64-bit version of the code. This code is effectively the
+; same as the 64-bit version except the register model is
+; slightly different given all values must be 32-bit between
+; function calls. Thus the 64-bit return values are returned
+; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
+;
+;
+; This code is approximately 2x faster than the C version
+; for RSA/DSA.
+;
+; See http://devresource.hp.com/ for more details on the PA-RISC
+; architecture. Also see the book "PA-RISC 2.0 Architecture"
+; by Gerry Kane for information on the instruction set architecture.
+;
+; Code written by Chris Ruemmler (with some help from the HP C
+; compiler).
+;
+; The code compiles with HP's assembler
+;
+
+ .level 2.0N
+ .space $TEXT$
+ .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
+
+;
+; Global Register definitions used for the routines.
+;
+; Some information about HP's runtime architecture for 32-bits.
+;
+; "Caller save" means the calling function must save the register
+; if it wants the register to be preserved.
+; "Callee save" means if a function uses the register, it must save
+; the value before using it.
+;
+; For the floating point registers
+;
+; "caller save" registers: fr4-fr11, fr22-fr31
+; "callee save" registers: fr12-fr21
+; "special" registers: fr0-fr3 (status and exception registers)
+;
+; For the integer registers
+; value zero : r0
+; "caller save" registers: r1,r19-r26
+; "callee save" registers: r3-r18
+; return register : r2 (rp)
+; return values ; r28,r29 (ret0,ret1)
+; Stack pointer ; r30 (sp)
+; millicode return ptr ; r31 (also a caller save register)
+
+
+;
+; Arguments to the routines
+;
+r_ptr .reg %r26
+a_ptr .reg %r25
+b_ptr .reg %r24
+num .reg %r24
+n .reg %r23
+
+;
+; Note that the "w" argument for bn_mul_add_words and bn_mul_words
+; is passed on the stack at a delta of -56 from the top of stack
+; as the routine is entered.
+;
+
+;
+; Globals used in some routines
+;
+
+top_overflow .reg %r23
+high_mask .reg %r22 ; value 0xffffffff80000000L
+
+
+;------------------------------------------------------------------------------
+;
+; bn_mul_add_words
+;
+;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
+; int num, BN_ULONG w)
+;
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg3 = num
+; -56(sp) = w
+;
+; Local register definitions
+;
+
+fm1 .reg %fr22
+fm .reg %fr23
+ht_temp .reg %fr24
+ht_temp_1 .reg %fr25
+lt_temp .reg %fr26
+lt_temp_1 .reg %fr27
+fm1_1 .reg %fr28
+fm_1 .reg %fr29
+
+fw_h .reg %fr7L
+fw_l .reg %fr7R
+fw .reg %fr7
+
+fht_0 .reg %fr8L
+flt_0 .reg %fr8R
+t_float_0 .reg %fr8
+
+fht_1 .reg %fr9L
+flt_1 .reg %fr9R
+t_float_1 .reg %fr9
+
+tmp_0 .reg %r31
+tmp_1 .reg %r21
+m_0 .reg %r20
+m_1 .reg %r19
+ht_0 .reg %r1
+ht_1 .reg %r3
+lt_0 .reg %r4
+lt_1 .reg %r5
+m1_0 .reg %r6
+m1_1 .reg %r7
+rp_val .reg %r8
+rp_val_1 .reg %r9
+
bn_mul_add_words
- .PROC
- .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4
- .ENTRY
- stw %r2,-20(0,%r30)
- stwm %r4,64(0,%r30)
- copy %r24,%r31
- stw %r3,-60(0,%r30)
- ldi 0,%r20
- ldo 12(%r26),%r2
- stw %r23,-16(0,%r30)
- copy %r25,%r3
- ldo 12(%r3),%r1
- fldws -16(0,%r30),%fr8L
-L$0010
- copy %r20,%r25
- ldi 0,%r24
- fldws 0(0,%r3),%fr9L
- ldw 0(0,%r26),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,0(0,%r26)
- copy %r20,%r25
- ldi 0,%r24
- fldws -8(0,%r1),%fr9L
- ldw -8(0,%r2),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,-8(0,%r2)
- copy %r20,%r25
- ldi 0,%r24
- fldws -4(0,%r1),%fr9L
- ldw -4(0,%r2),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,-4(0,%r2)
- copy %r20,%r25
- ldi 0,%r24
- fldws 0(0,%r1),%fr9L
- ldw 0(0,%r2),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,0(0,%r2)
- ldo 16(%r1),%r1
- ldo 16(%r3),%r3
- ldo 16(%r2),%r2
- bl L$0010,0
- ldo 16(%r26),%r26
-L$0011
- copy %r20,%r28
- ldw -84(0,%r30),%r2
- ldw -60(0,%r30),%r3
- bv 0(%r2)
- ldwm -64(0,%r30),%r4
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
+ .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
+ .proc
+ .callinfo frame=128
+ .entry
+ .align 64
+
+ STD %r3,0(%sp) ; save r3
+ STD %r4,8(%sp) ; save r4
+ NOP ; Needed to make the loop 16-byte aligned
+ NOP ; needed to make the loop 16-byte aligned
+
+ STD %r5,16(%sp) ; save r5
+ NOP
+ STD %r6,24(%sp) ; save r6
+ STD %r7,32(%sp) ; save r7
+
+ STD %r8,40(%sp) ; save r8
+ STD %r9,48(%sp) ; save r9
+ COPY %r0,%ret1 ; return 0 by default
+ DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
+
+ CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
+ LDO 128(%sp),%sp ; bump stack
+
+ ;
+ ; The loop is unrolled twice, so if there is only 1 number
+ ; then go straight to the cleanup code.
+ ;
+ CMPIB,= 1,num,bn_mul_add_words_single_top
+ FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
+
+ ;
+ ; This loop is unrolled 2 times (64-byte aligned as well)
+ ;
+ ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
+ ; two 32-bit mutiplies can be issued per cycle.
+ ;
+bn_mul_add_words_unroll2
+
+ FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
+ FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
+ LDD 0(r_ptr),rp_val ; rp[0]
+ LDD 8(r_ptr),rp_val_1 ; rp[1]
+
+ XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
+ XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
+ FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
+ FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
+
+ XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
+ XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
+ FSTD fm,-8(%sp) ; -8(sp) = m[0]
+ FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
+
+ XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
+ XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
+ FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
+ FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
+
+ XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
+ XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
+ FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
+ FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
+
+ LDD -8(%sp),m_0 ; m[0]
+ LDD -40(%sp),m_1 ; m[1]
+ LDD -16(%sp),m1_0 ; m1[0]
+ LDD -48(%sp),m1_1 ; m1[1]
+
+ LDD -24(%sp),ht_0 ; ht[0]
+ LDD -56(%sp),ht_1 ; ht[1]
+ ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
+ ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
+
+ LDD -32(%sp),lt_0
+ LDD -64(%sp),lt_1
+ CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
+ ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
+
+ CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
+ ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
+ EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
+ DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
+
+ EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
+ DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
+ ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
+ ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
+
+ ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
+ ADD,DC ht_0,%r0,ht_0 ; ht[0]++
+ ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
+ ADD,DC ht_1,%r0,ht_1 ; ht[1]++
+
+ ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
+ ADD,DC ht_0,%r0,ht_0 ; ht[0]++
+ ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
+ ADD,DC ht_0,%r0,ht_0 ; ht[0]++
+
+ LDO -2(num),num ; num = num - 2;
+ ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
+ ADD,DC ht_1,%r0,ht_1 ; ht[1]++
+ STD lt_0,0(r_ptr) ; rp[0] = lt[0]
+
+ ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
+ ADD,DC ht_1,%r0,%ret1 ; ht[1]++
+ LDO 16(a_ptr),a_ptr ; a_ptr += 2
+
+ STD lt_1,8(r_ptr) ; rp[1] = lt[1]
+ CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
+ LDO 16(r_ptr),r_ptr ; r_ptr += 2
+
+ CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
+
+ ;
+ ; Top of loop aligned on 64-byte boundary
+ ;
+bn_mul_add_words_single_top
+ FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
+ LDD 0(r_ptr),rp_val ; rp[0]
+ LDO 8(a_ptr),a_ptr ; a_ptr++
+ XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
+ FSTD fm1,-16(%sp) ; -16(sp) = m1
+ XMPYU flt_0,fw_h,fm ; m = lt*fw_h
+ FSTD fm,-8(%sp) ; -8(sp) = m
+ XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
+ FSTD ht_temp,-24(%sp) ; -24(sp) = ht
+ XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
+ FSTD lt_temp,-32(%sp) ; -32(sp) = lt
+
+ LDD -8(%sp),m_0
+ LDD -16(%sp),m1_0 ; m1 = temp1
+ ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
+ LDD -24(%sp),ht_0
+ LDD -32(%sp),lt_0
+
+ CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
+ ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
+
+ EXTRD,U tmp_0,31,32,m_0 ; m>>32
+ DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
+
+ ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
+ ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
+ ADD,DC ht_0,%r0,ht_0 ; ht++
+ ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
+ ADD,DC ht_0,%r0,ht_0 ; ht++
+ ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
+ ADD,DC ht_0,%r0,%ret1 ; ht++
+ STD lt_0,0(r_ptr) ; rp[0] = lt
+
+bn_mul_add_words_exit
+ .EXIT
+
+ EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
+ LDD -80(%sp),%r9 ; restore r9
+ LDD -88(%sp),%r8 ; restore r8
+ LDD -96(%sp),%r7 ; restore r7
+ LDD -104(%sp),%r6 ; restore r6
+ LDD -112(%sp),%r5 ; restore r5
+ LDD -120(%sp),%r4 ; restore r4
+ BVE (%rp)
+ LDD,MB -128(%sp),%r3 ; restore r3
+ .PROCEND ;in=23,24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+;
+; arg0 = rp
+; arg1 = ap
+; arg3 = num
+; w on stack at -56(sp)
+
bn_mul_words
- .PROC
- .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3
- .ENTRY
- stw %r2,-20(0,%r30)
- copy %r25,%r2
- stwm %r4,64(0,%r30)
- copy %r24,%r19
- ldi 0,%r28
- stw %r23,-16(0,%r30)
- ldo 12(%r26),%r31
- ldo 12(%r2),%r29
- fldws -16(0,%r30),%fr8L
-L$0026
- fldws 0(0,%r2),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,0(0,%r26)
- fldws -8(0,%r29),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,-8(0,%r31)
- fldws -4(0,%r29),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,-4(0,%r31)
- fldws 0(0,%r29),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,0(0,%r31)
- ldo 16(%r29),%r29
- ldo 16(%r2),%r2
- ldo 16(%r31),%r31
- bl L$0026,0
- ldo 16(%r26),%r26
-L$0027
- ldw -84(0,%r30),%r2
- bv 0(%r2)
- ldwm -64(0,%r30),%r4
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+ .proc
+ .callinfo frame=128
+ .entry
+ .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+ .align 64
+
+ STD %r3,0(%sp) ; save r3
+ STD %r4,8(%sp) ; save r4
+ NOP
+ STD %r5,16(%sp) ; save r5
+
+ STD %r6,24(%sp) ; save r6
+ STD %r7,32(%sp) ; save r7
+ COPY %r0,%ret1 ; return 0 by default
+ DEPDI,Z 1,31,