summaryrefslogtreecommitdiffstats
path: root/crypto/bn
diff options
context:
space:
mode:
authorRalf S. Engelschall <rse@openssl.org>1998-12-21 11:00:56 +0000
committerRalf S. Engelschall <rse@openssl.org>1998-12-21 11:00:56 +0000
commitdfeab0689f69c0b4bd3480ffd37a9cacc2f17d9c (patch)
tree2f74e0cfd76a9e092548a9bf52e579aef984299b /crypto/bn
parent58964a492275ca9a59a0cd9c8155cb2491b4b909 (diff)
Import of old SSLeay release: SSLeay 0.9.1b (unreleased)SSLeay
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/DSA2
-rw-r--r--crypto/bn/Makefile.ssl50
-rw-r--r--crypto/bn/alpha.s0
-rw-r--r--crypto/bn/asm/a.outbin0 -> 5795 bytes
-rw-r--r--crypto/bn/asm/alpha.s1846
-rw-r--r--crypto/bn/asm/alpha.s.works533
-rw-r--r--crypto/bn/asm/alpha.works/add.pl119
-rw-r--r--crypto/bn/asm/alpha.works/div.pl144
-rw-r--r--crypto/bn/asm/alpha.works/mul.pl116
-rw-r--r--crypto/bn/asm/alpha.works/mul_add.pl120
-rw-r--r--crypto/bn/asm/alpha.works/mul_c4.pl213
-rw-r--r--crypto/bn/asm/alpha.works/mul_c4.works.pl98
-rw-r--r--crypto/bn/asm/alpha.works/mul_c8.pl177
-rw-r--r--crypto/bn/asm/alpha.works/sqr.pl113
-rw-r--r--crypto/bn/asm/alpha.works/sqr_c4.pl109
-rw-r--r--crypto/bn/asm/alpha.works/sqr_c8.pl132
-rw-r--r--crypto/bn/asm/alpha.works/sub.pl108
-rw-r--r--crypto/bn/asm/alpha/add.pl118
-rw-r--r--crypto/bn/asm/alpha/div.pl144
-rw-r--r--crypto/bn/asm/alpha/mul.pl104
-rw-r--r--crypto/bn/asm/alpha/mul_add.pl123
-rw-r--r--crypto/bn/asm/alpha/mul_c4.pl215
-rw-r--r--crypto/bn/asm/alpha/mul_c4.works.pl98
-rw-r--r--crypto/bn/asm/alpha/mul_c8.pl177
-rw-r--r--crypto/bn/asm/alpha/sqr.pl113
-rw-r--r--crypto/bn/asm/alpha/sqr_c4.pl109
-rw-r--r--crypto/bn/asm/alpha/sqr_c8.pl132
-rw-r--r--crypto/bn/asm/alpha/sub.pl108
-rw-r--r--crypto/bn/asm/bn-586.pl82
-rw-r--r--crypto/bn/asm/bn-alpha.pl571
-rw-r--r--crypto/bn/asm/bn-win32.asm1441
-rw-r--r--crypto/bn/asm/bn86unix.cpp1465
-rw-r--r--crypto/bn/asm/ca.pl33
-rw-r--r--crypto/bn/asm/co-586.pl286
-rw-r--r--crypto/bn/asm/co-alpha.pl116
-rw-r--r--crypto/bn/asm/co86unix.cpp1315
-rw-r--r--crypto/bn/asm/elf.s1269
-rw-r--r--crypto/bn/asm/f500
-rw-r--r--crypto/bn/asm/f.c8
-rw-r--r--crypto/bn/asm/f.elf2149
-rw-r--r--crypto/bn/asm/f.s1773
-rw-r--r--crypto/bn/asm/ff724
-rw-r--r--crypto/bn/asm/mips1.s539
-rw-r--r--crypto/bn/asm/mips3.s544
-rw-r--r--crypto/bn/asm/x86.pl28
-rw-r--r--crypto/bn/asm/x86/add.pl76
-rw-r--r--crypto/bn/asm/x86/comba.pl277
-rw-r--r--crypto/bn/asm/x86/div.pl15
-rw-r--r--crypto/bn/asm/x86/f3
-rw-r--r--crypto/bn/asm/x86/mul.pl77
-rw-r--r--crypto/bn/asm/x86/mul_add.pl87
-rw-r--r--crypto/bn/asm/x86/sqr.pl60
-rw-r--r--crypto/bn/asm/x86/sub.pl76
-rw-r--r--crypto/bn/asm/x86w16.asm6
-rw-r--r--crypto/bn/asm/x86w32.asm34
-rw-r--r--crypto/bn/bn.err17
-rw-r--r--crypto/bn/bn.h193
-rw-r--r--crypto/bn/bn.mul19
-rw-r--r--crypto/bn/bn.org193
-rw-r--r--crypto/bn/bn_add.c192
-rw-r--r--crypto/bn/bn_asm.c829
-rw-r--r--crypto/bn/bn_blind.c12
-rw-r--r--crypto/bn/bn_comba.c349
-rw-r--r--crypto/bn/bn_div.c68
-rw-r--r--crypto/bn/bn_err.c7
-rw-r--r--crypto/bn/bn_exp.c146
-rw-r--r--crypto/bn/bn_exp2.c202
-rw-r--r--crypto/bn/bn_gcd.c39
-rw-r--r--crypto/bn/bn_lcl.h77
-rw-r--r--crypto/bn/bn_lib.c330
-rw-r--r--crypto/bn/bn_mont.c411
-rw-r--r--crypto/bn/bn_mpi.c2
-rw-r--r--crypto/bn/bn_mul.c759
-rw-r--r--crypto/bn/bn_opts.c342
-rw-r--r--crypto/bn/bn_prime.c62
-rw-r--r--crypto/bn/bn_recp.c176
-rw-r--r--crypto/bn/bn_sqr.c189
-rw-r--r--crypto/bn/bn_word.c20
-rw-r--r--crypto/bn/bnspeed.c16
-rw-r--r--crypto/bn/bntest.c307
-rw-r--r--crypto/bn/comba.pl285
-rw-r--r--crypto/bn/d.c72
-rw-r--r--crypto/bn/exp.c60
-rw-r--r--crypto/bn/expspeed.c3
-rw-r--r--crypto/bn/exptest.c15
-rw-r--r--crypto/bn/m.pl32
-rw-r--r--crypto/bn/new23
-rw-r--r--crypto/bn/old/b_sqr.c205
-rw-r--r--crypto/bn/old/bn_com.c90
-rw-r--r--crypto/bn/old/bn_high.c137
-rw-r--r--crypto/bn/old/bn_ka.c578
-rw-r--r--crypto/bn/old/bn_low.c201
-rw-r--r--crypto/bn/old/bn_m.c142
-rw-r--r--crypto/bn/old/bn_mul.c.works219
-rw-r--r--crypto/bn/old/bn_wmul.c181
-rwxr-xr-xcrypto/bn/old/build3
-rw-r--r--crypto/bn/old/info22
-rw-r--r--crypto/bn/old/test.works205
-rw-r--r--crypto/bn/test.c252
-rw-r--r--crypto/bn/todo3
100 files changed, 25588 insertions, 972 deletions
diff --git a/crypto/bn/DSA b/crypto/bn/DSA
new file mode 100644
index 0000000000..83f257c84f
--- /dev/null
+++ b/crypto/bn/DSA
@@ -0,0 +1,2 @@
+DSA wants 64*32 to use word mont mul, but
+RSA wants to use full.
diff --git a/crypto/bn/Makefile.ssl b/crypto/bn/Makefile.ssl
index 9809d26cbc..0a365fca6a 100644
--- a/crypto/bn/Makefile.ssl
+++ b/crypto/bn/Makefile.ssl
@@ -13,9 +13,9 @@ MAKEDEPEND= makedepend -f Makefile.ssl
MAKEFILE= Makefile.ssl
AR= ar r
-BN_MULW= bn_mulw.o
+BN_ASM= bn_asm.o
# or use
-#BN_MULW= bn86-elf.o
+#BN_ASM= bn86-elf.o
CFLAGS= $(INCLUDES) $(CFLAG)
@@ -26,16 +26,15 @@ TEST=bntest.c exptest.c
APPS=
LIB=$(TOP)/libcrypto.a
-LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mod.c bn_mul.c \
- bn_print.c bn_rand.c bn_shift.c bn_sub.c bn_word.c bn_blind.c \
- bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_mulw.c bn_recp.c bn_mont.c \
- bn_mpi.c
-
-LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mod.o bn_mul.o \
- bn_print.o bn_rand.o bn_shift.o bn_sub.o bn_word.o bn_blind.o \
- bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_MULW) bn_recp.o bn_mont.o \
- bn_mpi.o
+LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mul.c \
+ bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
+ bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_asm.c bn_recp.c bn_mont.c \
+ bn_mpi.c bn_exp2.c
+LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mul.o \
+ bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
+ bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_ASM) bn_recp.o bn_mont.o \
+ bn_mpi.o bn_exp2.o
SRC= $(LIBSRC)
@@ -65,23 +64,48 @@ lib: $(LIBOBJ)
asm/bn86-elf.o: asm/bn86unix.cpp
$(CPP) -DELF asm/bn86unix.cpp | as -o asm/bn86-elf.o
+asm/co86-elf.o: asm/co86unix.cpp
+ $(CPP) -DELF asm/co86unix.cpp | as -o asm/co86-elf.o
+
# solaris
asm/bn86-sol.o: asm/bn86unix.cpp
$(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s
as -o asm/bn86-sol.o asm/bn86-sol.s
rm -f asm/bn86-sol.s
+asm/co86-sol.o: asm/co86unix.cpp
+ $(CC) -E -DSOL asm/co86unix.cpp | sed 's/^#.*//' > asm/co86-sol.s
+ as -o asm/co86-sol.o asm/co86-sol.s
+ rm -f asm/co86-sol.s
+
# a.out
asm/bn86-out.o: asm/bn86unix.cpp
$(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o
+asm/co86-out.o: asm/co86unix.cpp
+ $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o
+
# bsdi
asm/bn86bsdi.o: asm/bn86unix.cpp
- $(CPP) -DBSDI asm/bn86unix.cpp | as -o asm/bn86bsdi.o
+ $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o
+
+asm/co86bsdi.o: asm/co86unix.cpp
+ $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o
asm/bn86unix.cpp:
(cd asm; perl bn-586.pl cpp >bn86unix.cpp )
+asm/co86unix.cpp:
+ (cd asm; perl co-586.pl cpp >co86unix.cpp )
+
+# MIPS 64 bit assember
+asm/mips3.o: asm/mips3.s
+ /usr/bin/as -mips3 -O2 -o asm/mips3.o asm/mips3.s
+
+# MIPS 32 bit assember
+asm/mips1.o: asm/mips1.s
+ /usr/bin/as -O2 -o asm/mips1.o asm/mips1.s
+
files:
perl $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
@@ -123,7 +147,7 @@ dclean:
mv -f Makefile.new $(MAKEFILE)
clean:
- /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_mulw.s
+ /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s
errors:
perl $(TOP)/util/err-ins.pl $(ERR).err $(ERR).org # special case .org
diff --git a/crypto/bn/alpha.s b/crypto/bn/alpha.s
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/crypto/bn/alpha.s
diff --git a/crypto/bn/asm/a.out b/crypto/bn/asm/a.out
new file mode 100644
index 0000000000..cc5094ff45
--- /dev/null
+++ b/crypto/bn/asm/a.out
Binary files differ
diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s
index 1d17b1d619..cf0b69cff9 100644
--- a/crypto/bn/asm/alpha.s
+++ b/crypto/bn/asm/alpha.s
@@ -2,7 +2,13 @@
# The bn_div64 is actually gcc output but the other parts are hand done.
# Thanks to tzeruch@ceddec.com for sending me the gcc output for
# bn_div64.
- .file 1 "bn_mulw.c"
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed. I've done modification to help
+ # improve this. Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+ .file 1 "bn_asm.c"
.set noat
gcc2_compiled.:
__gnu_compiled_c:
@@ -14,65 +20,91 @@ bn_mul_add_words:
bn_mul_add_words..ng:
.frame $30,0,$26,0
.prologue 0
- subq $18,2,$25 # num=-2
- bis $31,$31,$0
- blt $25,$42
.align 5
-$142:
- subq $18,2,$18 # num-=2
- subq $25,2,$25 # num-=2
-
- ldq $1,0($17) # a[0]
- ldq $2,8($17) # a[1]
-
- mulq $19,$1,$3 # a[0]*w low part r3
- umulh $19,$1,$1 # a[0]*w high part r1
- mulq $19,$2,$4 # a[1]*w low part r4
- umulh $19,$2,$2 # a[1]*w high part r2
-
- ldq $22,0($16) # r[0] r22
- ldq $23,8($16) # r[1] r23
-
- addq $3,$22,$3 # a0 low part + r[0]
- addq $4,$23,$4 # a1 low part + r[1]
- cmpult $3,$22,$5 # overflow?
- cmpult $4,$23,$6 # overflow?
- addq $5,$1,$1 # high part + overflow
- addq $6,$2,$2 # high part + overflow
-
- addq $3,$0,$3 # add c
- cmpult $3,$0,$5 # overflow?
- stq $3,0($16)
- addq $5,$1,$0 # c=high part + overflow
-
- addq $4,$0,$4 # add c
- cmpult $4,$0,$5 # overflow?
- stq $4,8($16)
- addq $5,$2,$0 # c=high part + overflow
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
+ .align 3
+$42:
+ mulq $20,$19,$5 # 1 2 1 ######
+ ldq $21,8($17) # 2 1
+ ldq $2,8($16) # 2 1
+ umulh $20,$19,$20 # 1 2 ######
+ ldq $27,16($17) # 3 1
+ ldq $3,16($16) # 3 1
+ mulq $21,$19,$6 # 2 2 1 ######
+ ldq $28,24($17) # 4 1
+ addq $1,$5,$1 # 1 2 2
+ ldq $4,24($16) # 4 1
+ umulh $21,$19,$21 # 2 2 ######
+ cmpult $1,$5,$22 # 1 2 3 1
+ addq $20,$22,$20 # 1 3 1
+ addq $1,$0,$1 # 1 2 3 1
+ mulq $27,$19,$7 # 3 2 1 ######
+ cmpult $1,$0,$0 # 1 2 3 2
+ addq $2,$6,$2 # 2 2 2
+ addq $20,$0,$0 # 1 3 2
+ cmpult $2,$6,$23 # 2 2 3 1
+ addq $21,$23,$21 # 2 3 1
+ umulh $27,$19,$27 # 3 2 ######
+ addq $2,$0,$2 # 2 2 3 1
+ cmpult $2,$0,$0 # 2 2 3 2
+ subq $18,4,$18
+ mulq $28,$19,$8 # 4 2 1 ######
+ addq $21,$0,$0 # 2 3 2
+ addq $3,$7,$3 # 3 2 2
+ addq $16,32,$16
+ cmpult $3,$7,$24 # 3 2 3 1
+ stq $1,-32($16) # 1 2 4
+ umulh $28,$19,$28 # 4 2 ######
+ addq $27,$24,$27 # 3 3 1
+ addq $3,$0,$3 # 3 2 3 1
+ stq $2,-24($16) # 2 2 4
+ cmpult $3,$0,$0 # 3 2 3 2
+ stq $3,-16($16) # 3 2 4
+ addq $4,$8,$4 # 4 2 2
+ addq $27,$0,$0 # 3 3 2
+ cmpult $4,$8,$25 # 4 2 3 1
+ addq $17,32,$17
+ addq $28,$25,$28 # 4 3 1
+ addq $4,$0,$4 # 4 2 3 1
+ cmpult $4,$0,$0 # 4 2 3 2
+ stq $4,-8($16) # 4 2 4
+ addq $28,$0,$0 # 4 3 2
+ blt $18,$43
- ble $18,$43
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
- addq $16,16,$16
- addq $17,16,$17
- blt $25,$42
+ br $42
- br $31,$142
-$42:
- ldq $1,0($17) # a[0]
- umulh $19,$1,$3 # a[0]*w high part
- mulq $19,$1,$1 # a[0]*w low part
- ldq $2,0($16) # r[0]
- addq $1,$2,$1 # low part + r[0]
- cmpult $1,$2,$4 # overflow?
- addq $4,$3,$3 # high part + overflow
- addq $1,$0,$1 # add c
- cmpult $1,$0,$4 # overflow?
- addq $4,$3,$0 # c=high part + overflow
- stq $1,0($16)
+ .align 4
+$45:
+ ldq $20,0($17) # 4 1
+ ldq $1,0($16) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ addq $16,8,$16
+ addq $17,8,$17
+ umulh $20,$19,$20 # 4 2
+ addq $1,$5,$1 # 4 2 2
+ cmpult $1,$5,$22 # 4 2 3 1
+ addq $20,$22,$20 # 4 3 1
+ addq $1,$0,$1 # 4 2 3 1
+ cmpult $1,$0,$0 # 4 2 3 2
+ addq $20,$0,$0 # 4 3 2
+ stq $1,-8($16) # 4 2 4
+ bgt $18,$45
+ ret $31,($26),1 # else exit
.align 4
$43:
- ret $31,($26),1
+ addq $18,4,$18
+ bgt $18,$45 # goto tail code
+ ret $31,($26),1 # else exit
+
.end bn_mul_add_words
.align 3
.globl bn_mul_words
@@ -81,49 +113,75 @@ bn_mul_words:
bn_mul_words..ng:
.frame $30,0,$26,0
.prologue 0
- subq $18,2,$25 # num=-2
- bis $31,$31,$0
- blt $25,$242
.align 5
-$342:
- subq $18,2,$18 # num-=2
- subq $25,2,$25 # num-=2
-
- ldq $1,0($17) # a[0]
- ldq $2,8($17) # a[1]
-
- mulq $19,$1,$3 # a[0]*w low part r3
- umulh $19,$1,$1 # a[0]*w high part r1
- mulq $19,$2,$4 # a[1]*w low part r4
- umulh $19,$2,$2 # a[1]*w high part r2
-
- addq $3,$0,$3 # add c
- cmpult $3,$0,$5 # overflow?
- stq $3,0($16)
- addq $5,$1,$0 # c=high part + overflow
-
- addq $4,$0,$4 # add c
- cmpult $4,$0,$5 # overflow?
- stq $4,8($16)
- addq $5,$2,$0 # c=high part + overflow
-
- ble $18,$243
-
- addq $16,16,$16
- addq $17,16,$17
- blt $25,$242
-
- br $31,$342
-$242:
- ldq $1,0($17) # a[0]
- umulh $19,$1,$3 # a[0]*w high part
- mulq $19,$1,$1 # a[0]*w low part
- addq $1,$0,$1 # add c
- cmpult $1,$0,$4 # overflow?
- addq $4,$3,$0 # c=high part + overflow
- stq $1,0($16)
-$243:
- ret $31,($26),1
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$142:
+
+ mulq $20,$19,$5 # 1 2 1 #####
+ ldq $21,8($17) # 2 1
+ ldq $27,16($17) # 3 1
+ umulh $20,$19,$20 # 1 2 #####
+ ldq $28,24($17) # 4 1
+ mulq $21,$19,$6 # 2 2 1 #####
+ addq $5,$0,$5 # 1 2 3 1
+ subq $18,4,$18
+ cmpult $5,$0,$0 # 1 2 3 2
+ umulh $21,$19,$21 # 2 2 #####
+ addq $20,$0,$0 # 1 3 2
+ addq $17,32,$17
+ addq $6,$0,$6 # 2 2 3 1
+ mulq $27,$19,$7 # 3 2 1 #####
+ cmpult $6,$0,$0 # 2 2 3 2
+ addq $21,$0,$0 # 2 3 2