diff options
Diffstat (limited to 'crypto/bn')
100 files changed, 25588 insertions, 972 deletions
diff --git a/crypto/bn/DSA b/crypto/bn/DSA new file mode 100644 index 0000000000..83f257c84f --- /dev/null +++ b/crypto/bn/DSA @@ -0,0 +1,2 @@ +DSA wants 64*32 to use word mont mul, but +RSA wants to use full. diff --git a/crypto/bn/Makefile.ssl b/crypto/bn/Makefile.ssl index 9809d26cbc..0a365fca6a 100644 --- a/crypto/bn/Makefile.ssl +++ b/crypto/bn/Makefile.ssl @@ -13,9 +13,9 @@ MAKEDEPEND= makedepend -f Makefile.ssl MAKEFILE= Makefile.ssl AR= ar r -BN_MULW= bn_mulw.o +BN_ASM= bn_asm.o # or use -#BN_MULW= bn86-elf.o +#BN_ASM= bn86-elf.o CFLAGS= $(INCLUDES) $(CFLAG) @@ -26,16 +26,15 @@ TEST=bntest.c exptest.c APPS= LIB=$(TOP)/libcrypto.a -LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mod.c bn_mul.c \ - bn_print.c bn_rand.c bn_shift.c bn_sub.c bn_word.c bn_blind.c \ - bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_mulw.c bn_recp.c bn_mont.c \ - bn_mpi.c - -LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mod.o bn_mul.o \ - bn_print.o bn_rand.o bn_shift.o bn_sub.o bn_word.o bn_blind.o \ - bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_MULW) bn_recp.o bn_mont.o \ - bn_mpi.o +LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mul.c \ + bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ + bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_asm.c bn_recp.c bn_mont.c \ + bn_mpi.c bn_exp2.c +LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mul.o \ + bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ + bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_ASM) bn_recp.o bn_mont.o \ + bn_mpi.o bn_exp2.o SRC= $(LIBSRC) @@ -65,23 +64,48 @@ lib: $(LIBOBJ) asm/bn86-elf.o: asm/bn86unix.cpp $(CPP) -DELF asm/bn86unix.cpp | as -o asm/bn86-elf.o +asm/co86-elf.o: asm/co86unix.cpp + $(CPP) -DELF asm/co86unix.cpp | as -o asm/co86-elf.o + # solaris asm/bn86-sol.o: asm/bn86unix.cpp $(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s as -o asm/bn86-sol.o asm/bn86-sol.s rm -f asm/bn86-sol.s +asm/co86-sol.o: asm/co86unix.cpp + $(CC) -E -DSOL asm/co86unix.cpp | sed 's/^#.*//' > asm/co86-sol.s + as -o asm/co86-sol.o asm/co86-sol.s + rm -f asm/co86-sol.s + # a.out asm/bn86-out.o: asm/bn86unix.cpp $(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o +asm/co86-out.o: asm/co86unix.cpp + $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o + # bsdi asm/bn86bsdi.o: asm/bn86unix.cpp - $(CPP) -DBSDI asm/bn86unix.cpp | as -o asm/bn86bsdi.o + $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o + +asm/co86bsdi.o: asm/co86unix.cpp + $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o asm/bn86unix.cpp: (cd asm; perl bn-586.pl cpp >bn86unix.cpp ) +asm/co86unix.cpp: + (cd asm; perl co-586.pl cpp >co86unix.cpp ) + +# MIPS 64 bit assember +asm/mips3.o: asm/mips3.s + /usr/bin/as -mips3 -O2 -o asm/mips3.o asm/mips3.s + +# MIPS 32 bit assember +asm/mips1.o: asm/mips1.s + /usr/bin/as -O2 -o asm/mips1.o asm/mips1.s + files: perl $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO @@ -123,7 +147,7 @@ dclean: mv -f Makefile.new $(MAKEFILE) clean: - /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_mulw.s + /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s errors: perl $(TOP)/util/err-ins.pl $(ERR).err $(ERR).org # special case .org diff --git a/crypto/bn/alpha.s b/crypto/bn/alpha.s new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/crypto/bn/alpha.s diff --git a/crypto/bn/asm/a.out b/crypto/bn/asm/a.out Binary files differnew file mode 100644 index 0000000000..cc5094ff45 --- /dev/null +++ b/crypto/bn/asm/a.out diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s index 1d17b1d619..cf0b69cff9 100644 --- a/crypto/bn/asm/alpha.s +++ b/crypto/bn/asm/alpha.s @@ -2,7 +2,13 @@ # The bn_div64 is actually gcc output but the other parts are hand done. # Thanks to tzeruch@ceddec.com for sending me the gcc output for # bn_div64. - .file 1 "bn_mulw.c" + # I've gone back and re-done most of routines. + # The key thing to remeber for the 164 CPU is that while a + # multiply operation takes 8 cycles, another one can only be issued + # after 4 cycles have elapsed. I've done modification to help + # improve this. Also, normally, a ld instruction will not be available + # for about 3 cycles. + .file 1 "bn_asm.c" .set noat gcc2_compiled.: __gnu_compiled_c: @@ -14,65 +20,91 @@ bn_mul_add_words: bn_mul_add_words..ng: .frame $30,0,$26,0 .prologue 0 - subq $18,2,$25 # num=-2 - bis $31,$31,$0 - blt $25,$42 .align 5 -$142: - subq $18,2,$18 # num-=2 - subq $25,2,$25 # num-=2 - - ldq $1,0($17) # a[0] - ldq $2,8($17) # a[1] - - mulq $19,$1,$3 # a[0]*w low part r3 - umulh $19,$1,$1 # a[0]*w high part r1 - mulq $19,$2,$4 # a[1]*w low part r4 - umulh $19,$2,$2 # a[1]*w high part r2 - - ldq $22,0($16) # r[0] r22 - ldq $23,8($16) # r[1] r23 - - addq $3,$22,$3 # a0 low part + r[0] - addq $4,$23,$4 # a1 low part + r[1] - cmpult $3,$22,$5 # overflow? - cmpult $4,$23,$6 # overflow? - addq $5,$1,$1 # high part + overflow - addq $6,$2,$2 # high part + overflow - - addq $3,$0,$3 # add c - cmpult $3,$0,$5 # overflow? - stq $3,0($16) - addq $5,$1,$0 # c=high part + overflow - - addq $4,$0,$4 # add c - cmpult $4,$0,$5 # overflow? - stq $4,8($16) - addq $5,$2,$0 # c=high part + overflow + subq $18,4,$18 + bis $31,$31,$0 + blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code + ldq $20,0($17) # 1 1 + ldq $1,0($16) # 1 1 + .align 3 +$42: + mulq $20,$19,$5 # 1 2 1 ###### + ldq $21,8($17) # 2 1 + ldq $2,8($16) # 2 1 + umulh $20,$19,$20 # 1 2 ###### + ldq $27,16($17) # 3 1 + ldq $3,16($16) # 3 1 + mulq $21,$19,$6 # 2 2 1 ###### + ldq $28,24($17) # 4 1 + addq $1,$5,$1 # 1 2 2 + ldq $4,24($16) # 4 1 + umulh $21,$19,$21 # 2 2 ###### + cmpult $1,$5,$22 # 1 2 3 1 + addq $20,$22,$20 # 1 3 1 + addq $1,$0,$1 # 1 2 3 1 + mulq $27,$19,$7 # 3 2 1 ###### + cmpult $1,$0,$0 # 1 2 3 2 + addq $2,$6,$2 # 2 2 2 + addq $20,$0,$0 # 1 3 2 + cmpult $2,$6,$23 # 2 2 3 1 + addq $21,$23,$21 # 2 3 1 + umulh $27,$19,$27 # 3 2 ###### + addq $2,$0,$2 # 2 2 3 1 + cmpult $2,$0,$0 # 2 2 3 2 + subq $18,4,$18 + mulq $28,$19,$8 # 4 2 1 ###### + addq $21,$0,$0 # 2 3 2 + addq $3,$7,$3 # 3 2 2 + addq $16,32,$16 + cmpult $3,$7,$24 # 3 2 3 1 + stq $1,-32($16) # 1 2 4 + umulh $28,$19,$28 # 4 2 ###### + addq $27,$24,$27 # 3 3 1 + addq $3,$0,$3 # 3 2 3 1 + stq $2,-24($16) # 2 2 4 + cmpult $3,$0,$0 # 3 2 3 2 + stq $3,-16($16) # 3 2 4 + addq $4,$8,$4 # 4 2 2 + addq $27,$0,$0 # 3 3 2 + cmpult $4,$8,$25 # 4 2 3 1 + addq $17,32,$17 + addq $28,$25,$28 # 4 3 1 + addq $4,$0,$4 # 4 2 3 1 + cmpult $4,$0,$0 # 4 2 3 2 + stq $4,-8($16) # 4 2 4 + addq $28,$0,$0 # 4 3 2 + blt $18,$43 - ble $18,$43 + ldq $20,0($17) # 1 1 + ldq $1,0($16) # 1 1 - addq $16,16,$16 - addq $17,16,$17 - blt $25,$42 + br $42 - br $31,$142 -$42: - ldq $1,0($17) # a[0] - umulh $19,$1,$3 # a[0]*w high part - mulq $19,$1,$1 # a[0]*w low part - ldq $2,0($16) # r[0] - addq $1,$2,$1 # low part + r[0] - cmpult $1,$2,$4 # overflow? - addq $4,$3,$3 # high part + overflow - addq $1,$0,$1 # add c - cmpult $1,$0,$4 # overflow? - addq $4,$3,$0 # c=high part + overflow - stq $1,0($16) + .align 4 +$45: + ldq $20,0($17) # 4 1 + ldq $1,0($16) # 4 1 + mulq $20,$19,$5 # 4 2 1 + subq $18,1,$18 + addq $16,8,$16 + addq $17,8,$17 + umulh $20,$19,$20 # 4 2 + addq $1,$5,$1 # 4 2 2 + cmpult $1,$5,$22 # 4 2 3 1 + addq $20,$22,$20 # 4 3 1 + addq $1,$0,$1 # 4 2 3 1 + cmpult $1,$0,$0 # 4 2 3 2 + addq $20,$0,$0 # 4 3 2 + stq $1,-8($16) # 4 2 4 + bgt $18,$45 + ret $31,($26),1 # else exit .align 4 $43: - ret $31,($26),1 + addq $18,4,$18 + bgt $18,$45 # goto tail code + ret $31,($26),1 # else exit + .end bn_mul_add_words .align 3 .globl bn_mul_words @@ -81,49 +113,75 @@ bn_mul_words: bn_mul_words..ng: .frame $30,0,$26,0 .prologue 0 - subq $18,2,$25 # num=-2 - bis $31,$31,$0 - blt $25,$242 .align 5 -$342: - subq $18,2,$18 # num-=2 - subq $25,2,$25 # num-=2 - - ldq $1,0($17) # a[0] - ldq $2,8($17) # a[1] - - mulq $19,$1,$3 # a[0]*w low part r3 - umulh $19,$1,$1 # a[0]*w high part r1 - mulq $19,$2,$4 # a[1]*w low part r4 - umulh $19,$2,$2 # a[1]*w high part r2 - - addq $3,$0,$3 # add c - cmpult $3,$0,$5 # overflow? - stq $3,0($16) - addq $5,$1,$0 # c=high part + overflow - - addq $4,$0,$4 # add c - cmpult $4,$0,$5 # overflow? - stq $4,8($16) - addq $5,$2,$0 # c=high part + overflow - - ble $18,$243 - - addq $16,16,$16 - addq $17,16,$17 - blt $25,$242 - - br $31,$342 -$242: - ldq $1,0($17) # a[0] - umulh $19,$1,$3 # a[0]*w high part - mulq $19,$1,$1 # a[0]*w low part - addq $1,$0,$1 # add c - cmpult $1,$0,$4 # overflow? - addq $4,$3,$0 # c=high part + overflow - stq $1,0($16) -$243: - ret $31,($26),1 + subq $18,4,$18 + bis $31,$31,$0 + blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code + ldq $20,0($17) # 1 1 + .align 3 +$142: + + mulq $20,$19,$5 # 1 2 1 ##### + ldq $21,8($17) # 2 1 + ldq $27,16($17) # 3 1 + umulh $20,$19,$20 # 1 2 ##### + ldq $28,24($17) # 4 1 + mulq $21,$19,$6 # 2 2 1 ##### + addq $5,$0,$5 # 1 2 3 1 + subq $18,4,$18 + cmpult $5,$0,$0 # 1 2 3 2 + umulh $21,$19,$21 # 2 2 ##### + addq $20,$0,$0 # 1 3 2 + addq $17,32,$17 + addq $6,$0,$6 # 2 2 3 1 + mulq $27,$19,$7 # 3 2 1 ##### + cmpult $6,$0,$0 # 2 2 3 2 + addq $21,$0,$0 # 2 3 2 + addq $16,32,$16 + umulh $27,$19,$27 # 3 2 ##### + stq $5,-32($16) # 1 2 4 + mulq $28,$19,$8 # 4 2 1 ##### + addq $7,$0,$7 # 3 2 3 1 + stq $6,-24($16) # 2 2 4 + cmpult $7,$0,$0 # 3 2 3 2 + umulh $28,$19,$28 # 4 2 ##### + addq $27,$0,$0 # 3 3 2 + stq $7,-16($16) # 3 2 4 + addq $8,$0,$8 # 4 2 3 1 + cmpult $8,$0,$0 # 4 2 3 2 + + addq $28,$0,$0 # 4 3 2 + + stq $8,-8($16) # 4 2 4 + + blt $18,$143 + + ldq $20,0($17) # 1 1 + + br $142 + + .align 4 +$145: + ldq $20,0($17) # 4 1 + mulq $20,$19,$5 # 4 2 1 + subq $18,1,$18 + umulh $20,$19,$20 # 4 2 + addq $5,$0,$5 # 4 2 3 1 + addq $16,8,$16 + cmpult $5,$0,$0 # 4 2 3 2 + addq $17,8,$17 + addq $20,$0,$0 # 4 3 2 |