diff options
author | Ulf Möller <ulf@openssl.org> | 1999-05-13 13:16:42 +0000 |
---|---|---|
committer | Ulf Möller <ulf@openssl.org> | 1999-05-13 13:16:42 +0000 |
commit | bd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c (patch) | |
tree | e64a4b13276c3663c3ad9f6f4235909ecadc8852 /crypto/md5 | |
parent | 7d7d2cbcb02206f3393681f2bce198e11e2e185b (diff) |
Reorganize and speed up MD5.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
Diffstat (limited to 'crypto/md5')
-rw-r--r-- | crypto/md5/Makefile.ssl | 10 | ||||
-rw-r--r-- | crypto/md5/asm/md5-586.pl | 3 | ||||
-rw-r--r-- | crypto/md5/asm/md5-sparcv9.S | 1035 | ||||
-rw-r--r-- | crypto/md5/md5.h | 36 | ||||
-rw-r--r-- | crypto/md5/md5_dgst.c | 365 | ||||
-rw-r--r-- | crypto/md5/md5_locl.h | 153 | ||||
-rw-r--r-- | crypto/md5/md5_one.c | 3 |
7 files changed, 1269 insertions, 336 deletions
diff --git a/crypto/md5/Makefile.ssl b/crypto/md5/Makefile.ssl index e27cfca0b0..f8eaf628b3 100644 --- a/crypto/md5/Makefile.ssl +++ b/crypto/md5/Makefile.ssl @@ -66,6 +66,14 @@ asm/mx86bsdi.o: asm/mx86unix.cpp asm/mx86unix.cpp: asm/md5-586.pl (cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp) +# works for both SC and gcc +asm/md5-sparcv8plus.o: asm/md5-sparcv9.S + $(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o + +asm/md5-sparcv9.o: asm/md5-sparcv9.S + $(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o + + files: $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO @@ -103,5 +111,5 @@ clean: # DO NOT DELETE THIS LINE -- make depend depends on it. md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h -md5_dgst.o: md5_locl.h +md5_dgst.o: ../md32_common.h md5_locl.h md5_one.o: ../../include/openssl/md5.h md5_locl.h diff --git a/crypto/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl index 0249e100e1..5fc6a205ce 100644 --- a/crypto/md5/asm/md5-586.pl +++ b/crypto/md5/asm/md5-586.pl @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); -&md5_block("md5_block_x86"); +&md5_block("md5_block_asm_host_order"); &asm_finish(); sub Np @@ -183,6 +183,7 @@ sub md5_block &mov($X, &wparam(1)); # esi &mov($C, &wparam(2)); &push("ebp"); + &shl($C, 6); &push("ebx"); &add($C, $X); # offset we end at &sub($C, 64); diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S new file mode 100644 index 0000000000..955b3680b5 --- /dev/null +++ b/crypto/md5/asm/md5-sparcv9.S @@ -0,0 +1,1035 @@ +.ident "md5-sparcv9.S, Version 1.0" +.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.file "md5-sparcv9.S" + +/* + * ==================================================================== + * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>. + * + * Rights for redistribution and usage in source and binary forms are + * granted as long as above copyright notices are retained. Warranty + * of any kind is (of course:-) disclaimed. + * ==================================================================== + */ + +/* + * This is my modest contribution to OpenSSL project (see + * http://www.openssl.org/ for more information about it) and is an + * assembler implementation of MD5 block hash function. I've hand-coded + * this for the sole reason to reach UltraSPARC-specific "load in + * little-endian byte order" instruction. This gives up to 15% + * performance improvement for cases when input message is aligned at + * 32 bits boundary. The module was tested under both 32 *and* 64 bit + * kernels. For updates see http://fy.chalmers.se/~appro/hpe/. + * + * To compile with SC4.x/SC5.x: + * + * cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \ + * -c md5-sparcv9.S + * + * and with gcc: + * + * gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \ + * -c md5-sparcv9.S + * + * or if above fails (it does if you have gas): + * + * gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \ + * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o + */ + +#define A %o0 +#define B %o1 +#define C %o2 +#define D %o3 +#define T1 %o4 +#define T2 %o5 + +#define R0 %l0 +#define R1 %l1 +#define R2 %l2 +#define R3 %l3 +#define R4 %l4 +#define R5 %l5 +#define R6 %l6 +#define R7 %l7 +#define R8 %i3 +#define R9 %i4 +#define R10 %i5 +#define R11 %g1 +#define R12 %g2 +#define R13 %g3 +#define RX %g4 + +#define Aptr %i0+0 +#define Bptr %i0+4 +#define Cptr %i0+8 +#define Dptr %i0+12 + +#define Aval R5 /* those not used at the end of the last round */ +#define Bval R6 +#define Cval R7 +#define Dval R8 + +#if defined(MD5_BLOCK_DATA_ORDER) +# if defined(ULTRASPARC) +# define LOAD lda +# define X(i) [%i1+i*4]%asi +# define md5_block md5_block_asm_data_order_aligned +# define ASI_PRIMARY_LITTLE 0x88 +# else +# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!" +# endif +#else +# define LOAD ld +# define X(i) [%i1+i*4] +# define md5_block md5_block_asm_host_order +#endif + +.section ".text",#alloc,#execinstr +#if defined(__SUNPRO_C) && defined(__sparcv9) + /* They've said -xarch=v9 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME -192 +#else +# define FRAME -96 +#endif + +.align 32 + +.global md5_block +md5_block: + save %sp,FRAME,%sp + + ld [Dptr],D +#ifdef ASI_PRIMARY_LITTLE + mov %asi,%o7 ! How dare I? Well, I just do:-) +#else + nop +#endif + ld [Cptr],C +#ifdef ASI_PRIMARY_LITTLE + mov ASI_PRIMARY_LITTLE,%asi +#else + nop +#endif + ld [Bptr],B + nop + ld [Aptr],A + nop + LOAD X(0),R0 + nop + ba .Lmd5_block_loop + nop + +.align 32 +.Lmd5_block_loop: + +!!!!!!!!Round 0 + + xor C,D,T1 + sethi %hi(0xd76aa478),T2 + and T1,B,T1 + or T2,%lo(0xd76aa478),T2 != + xor T1,D,T1 + add T1,R0,T1 + LOAD X(1),R1 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0xe8c7b756),T2 + and T1,A,T1 != + or T2,%lo(0xe8c7b756),T2 + xor T1,C,T1 + LOAD X(2),R2 + add T1,R1,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0x242070db),T2 != + and T1,D,T1 + or T2,%lo(0x242070db),T2 + xor T1,B,T1 + add T1,R2,T1 != + LOAD X(3),R3 + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0xc1bdceee),T2 + and T1,C,T1 + or T2,%lo(0xc1bdceee),T2 + xor T1,A,T1 != + add T1,R3,T1 + LOAD X(4),R4 + add T1,T2,T1 + add B,T1,B != + sll B,22,T2 + srl B,32-22,B + or B,T2,B + xor C,D,T1 != + add B,C,B + + sethi %hi(0xf57c0faf),T2 + and T1,B,T1 + or T2,%lo(0xf57c0faf),T2 != + xor T1,D,T1 + add T1,R4,T1 + LOAD X(5),R5 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0x4787c62a),T2 + and T1,A,T1 != + or T2,%lo(0x4787c62a),T2 + xor T1,C,T1 + LOAD X(6),R6 + add T1,R5,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0xa8304613),T2 != + and T1,D,T1 + or T2,%lo(0xa8304613),T2 + xor T1,B,T1 + add T1,R6,T1 != + LOAD X(7),R7 + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0xfd469501),T2 + and T1,C,T1 + or T2,%lo(0xfd469501),T2 + xor T1,A,T1 != + add T1,R7,T1 + LOAD X(8),R8 + add T1,T2,T1 + add B,T1,B != + sll B,22,T2 + srl B,32-22,B + or B,T2,B + xor C,D,T1 != + add B,C,B + + sethi %hi(0x698098d8),T2 + and T1,B,T1 + or T2,%lo(0x698098d8),T2 != + xor T1,D,T1 + add T1,R8,T1 + LOAD X(9),R9 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0x8b44f7af),T2 + and T1,A,T1 != + or T2,%lo(0x8b44f7af),T2 + xor T1,C,T1 + LOAD X(10),R10 + add T1,R9,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0xffff5bb1),T2 != + and T1,D,T1 + or T2,%lo(0xffff5bb1),T2 + xor T1,B,T1 + add T1,R10,T1 != + LOAD X(11),R11 + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0x895cd7be),T2 + and T1,C,T1 + or T2,%lo(0x895cd7be),T2 + xor T1,A,T1 != + add T1,R11,T1 + LOAD X(12),R12 + add T1,T2,T1 + add B,T1,B != + sll B,22,T2 + srl B,32-22,B + or B,T2,B + xor C,D,T1 != + add B,C,B + + sethi %hi(0x6b901122),T2 + and T1,B,T1 + or T2,%lo(0x6b901122),T2 != + xor T1,D,T1 + add T1,R12,T1 + LOAD X(13),R13 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0xfd987193),T2 + and T1,A,T1 != + or T2,%lo(0xfd987193),T2 + xor T1,C,T1 + LOAD X(14),RX + add T1,R13,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0xa679438e),T2 != + and T1,D,T1 + or T2,%lo(0xa679438e),T2 + xor T1,B,T1 + add T1,RX,T1 != + LOAD X(15),RX + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0x49b40821),T2 + and T1,C,T1 + or T2,%lo(0x49b40821),T2 + xor T1,A,T1 != + add T1,RX,T1 + !pre-LOADed X(1),R1 + add T1,T2,T1 + add B,T1,B + sll B,22,T2 != + srl B,32-22,B + or B,T2,B + add B,C,B + +!!!!!!!!Round 1 + + xor B,C,T1 != + sethi %hi(0xf61e2562),T2 + and T1,D,T1 + or T2,%lo(0xf61e2562),T2 + xor T1,C,T1 != + add T1,R1,T1 + !pre-LOADed X(6),R6 + add T1,T2,T1 + add A,T1,A + sll A,5,T2 != + srl A,32-5,A + or A,T2,A + add A,B,A + + xor A,B,T1 != + sethi %hi(0xc040b340),T2 + and T1,C,T1 + or T2,%lo(0xc040b340),T2 + xor T1,B,T1 != + add T1,R6,T1 + !pre-LOADed X(11),R11 + add T1,T2,T1 + add D,T1,D + sll D,9,T2 != + srl D,32-9,D + or D,T2,D + add D,A,D + + xor D,A,T1 != + sethi %hi(0x265e5a51),T2 + and T1,B,T1 + or T2,%lo(0x265e5a51),T2 + xor T1,A,T1 != + add T1,R11,T1 + !pre-LOADed X(0),R0 + add T1,T2,T1 + add C,T1,C + sll C,14,T2 != + srl C,32-14,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0xe9b6c7aa),T2 + and T1,A,T1 + or T2,%lo(0xe9b6c7aa),T2 + xor T1,D,T1 != + add T1,R0,T1 + !pre-LOADed X(5),R5 + add T1,T2,T1 + add B,T1,B + sll B,20,T2 != + srl B,32-20,B + or B,T2,B + add B,C,B + + xor B,C,T1 != + sethi %hi(0xd62f105d),T2 + and T1,D,T1 + or T2,%lo(0xd62f105d),T2 + xor T1,C,T1 != + add T1,R5,T1 + !pre-LOADed X(10),R10 + add T1,T2,T1 + add A,T1,A + sll A,5,T2 != + srl A,32-5,A + or A,T2,A + add A,B,A + + xor A,B,T1 != + sethi %hi(0x02441453),T2 + and T1,C,T1 + or T2,%lo(0x02441453),T2 + xor T1,B,T1 != + add T1,R10,T1 + LOAD X(15),RX + add T1,T2,T1 + add D,T1,D != + sll D,9,T2 + srl D,32-9,D + or D,T2,D + add D,A,D != + + xor D,A,T1 + sethi %hi(0xd8a1e681),T2 + and T1,B,T1 + or T2,%lo(0xd8a1e681),T2 != + xor T1,A,T1 + add T1,RX,T1 + !pre-LOADed X(4),R4 + add T1,T2,T1 + add C,T1,C != + sll C,14,T2 + srl C,32-14,C + or C,T2,C + add C,D,C != + + xor C,D,T1 + sethi %hi(0xe7d3fbc8),T2 + and T1,A,T1 + or T2,%lo(0xe7d3fbc8),T2 != + xor T1,D,T1 + add T1,R4,T1 + !pre-LOADed X(9),R9 + add T1,T2,T1 + add B,T1,B != + sll B,20,T2 + srl B,32-20,B + or B,T2,B + add B,C,B != + + xor B,C,T1 + sethi %hi(0x21e1cde6),T2 + and T1,D,T1 + or T2,%lo(0x21e1cde6),T2 != + xor T1,C,T1 + add T1,R9,T1 + LOAD X(14),RX + add T1,T2,T1 != + add A,T1,A + sll A,5,T2 + srl A,32-5,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xc33707d6),T2 + and T1,C,T1 != + or T2,%lo(0xc33707d6),T2 + xor T1,B,T1 + add T1,RX,T1 + !pre-LOADed X(3),R3 + add T1,T2,T1 != + add D,T1,D + sll D,9,T2 + srl D,32-9,D + or D,T2,D != + add D,A,D + + xor D,A,T1 + sethi %hi(0xf4d50d87),T2 + and T1,B,T1 != + or T2,%lo(0xf4d50d87),T2 + xor T1,A,T1 + add T1,R3,T1 + !pre-LOADed X(8),R8 + add T1,T2,T1 != + add C,T1,C + sll C,14,T2 + srl C,32-14,C + or C,T2,C != + add C,D,C + + xor C,D,T1 + sethi %hi(0x455a14ed),T2 + and T1,A,T1 != + or T2,%lo(0x455a14ed),T2 + xor T1,D,T1 + add T1,R8,T1 + !pre-LOADed X(13),R13 + add T1,T2,T1 != + add B,T1,B + sll B,20,T2 + srl B,32-20,B + or B,T2,B != + add B,C,B + + xor B,C,T1 + sethi %hi(0xa9e3e905),T2 + and T1,D,T1 != + or T2,%lo(0xa9e3e905),T2 + xor T1,C,T1 + add T1,R13,T1 + !pre-LOADed X(2),R2 + add T1,T2,T1 != + add A,T1,A + sll A,5,T2 + srl A,32-5,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xfcefa3f8),T2 + and T1,C,T1 != + or T2,%lo(0xfcefa3f8),T2 + xor T1,B,T1 + add T1,R2,T1 + !pre-LOADed X(7),R7 + add T1,T2,T1 != + add D,T1,D + sll D,9,T2 + srl D,32-9,D + or D,T2,D != + add D,A,D + + xor D,A,T1 + sethi %hi(0x676f02d9),T2 + and T1,B,T1 != + or T2,%lo(0x676f02d9),T2 + xor T1,A,T1 + add T1,R7,T1 + !pre-LOADed X(12),R12 + add T1,T2,T1 != + add C,T1,C + sll C,14,T2 + srl C,32-14,C + or C,T2,C != + add C,D,C + + xor C,D,T1 + sethi %hi(0x8d2a4c8a),T2 + and T1,A,T1 != + or T2,%lo(0x8d2a4c8a),T2 + xor T1,D,T1 + add T1,R12,T1 + !pre-LOADed X(5),R5 + add T1,T2,T1 != + add B,T1,B + sll B,20,T2 + srl B,32-20,B + or B,T2,B != + add B,C,B + +!!!!!!!!Round 2 + + xor B,C,T1 + sethi %hi(0xfffa3942),T2 + xor T1,D,T1 != + or T2,%lo(0xfffa3942),T2 + add T1,R5,T1 + !pre-LOADed X(8),R8 + add T1,T2,T1 + add A,T1,A != + sll A,4,T2 + srl A,32-4,A + or A,T2,A + add A,B,A != + + xor A,B,T1 + sethi %hi(0x8771f681),T2 + xor T1,C,T1 + or T2,%lo(0x8771f681),T2 != + add T1,R8,T1 + !pre-LOADed X(11),R11 + add T1,T2,T1 + add D,T1,D + sll D,11,T2 != + srl D,32-11,D + or D,T2,D + add D,A,D + + xor D,A,T1 != + sethi %hi(0x6d9d6122),T2 + xor T1,B,T1 + or T2,%lo(0x6d9d6122),T2 + add T1,R11,T1 != + LOAD X(14),RX + add T1,T2,T1 + add C,T1,C + sll C,16,T2 != + srl C,32-16,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0xfde5380c),T2 + xor T1,A,T1 + or T2,%lo(0xfde5380c),T2 + add T1,RX,T1 != + !pre-LOADed X(1),R1 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 + srl B,32-23,B != + or B,T2,B + add B,C,B + + xor B,C,T1 + sethi %hi(0xa4beea44),T2 != + xor T1,D,T1 + or T2,%lo(0xa4beea44),T2 + add T1,R1,T1 + !pre-LOADed X(4),R4 + add T1,T2,T1 != + add A,T1,A + sll A,4,T2 + srl A,32-4,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0x4bdecfa9),T2 + xor T1,C,T1 != + or T2,%lo(0x4bdecfa9),T2 + add T1,R4,T1 + !pre-LOADed X(7),R7 + add T1,T2,T1 + add D,T1,D != + sll D,11,T2 + srl D,32-11,D + or D,T2,D + add D,A,D != + + xor D,A,T1 + sethi %hi(0xf6bb4b60),T2 + xor T1,B,T1 + or T2,%lo(0xf6bb4b60),T2 != + add T1,R7,T1 + !pre-LOADed X(10),R10 + add T1,T2,T1 + add C,T1,C + sll C,16,T2 != + srl C,32-16,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0xbebfbc70),T2 + xor T1,A,T1 + or T2,%lo(0xbebfbc70),T2 + add T1,R10,T1 != + !pre-LOADed X(13),R13 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 + srl B,32-23,B != + or B,T2,B + add B,C,B + + xor B,C,T1 + sethi %hi(0x289b7ec6),T2 != + xor T1,D,T1 + or T2,%lo(0x289b7ec6),T2 + add T1,R13,T1 + !pre-LOADed X(0),R0 + add T1,T2,T1 != + add A,T1,A + sll A,4,T2 + srl A,32-4,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xeaa127fa),T2 + xor T1,C,T1 != + or T2,%lo(0xeaa127fa),T2 + add T1,R0,T1 + !pre-LOADed X(3),R3 + add T1,T2,T1 + add D,T1,D != + sll D,11,T2 + srl D,32-11,D + or D,T2,D + add D,A,D != + + xor D,A,T1 + sethi %hi(0xd4ef3085),T2 + xor T1,B,T1 + or T2,%lo(0xd4ef3085),T2 != + add T1,R3,T1 + !pre-LOADed X(6),R6 + add T1,T2,T1 + add C,T1,C + sll C,16,T2 != + srl C,32-16,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0x04881d05),T2 + xor T1,A,T1 + or T2,%lo(0x04881d05),T2 + add T1,R6,T1 != + !pre-LOADed X(9),R9 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 + srl B,32-23,B != + or B,T2,B + add B,C,B + + xor B,C,T1 + sethi %hi(0xd9d4d039),T2 != + xor T1,D,T1 + or T2,%lo(0xd9d4d039),T2 + add T1,R9,T1 + !pre-LOADed X(12),R12 + add T1,T2,T1 != + add A,T1,A + sll A,4,T2 + srl A,32-4,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xe6db99e5),T2 + xor T1,C,T1 != + or T2,%lo(0xe6db99e5),T2 + add T1,R12,T1 + LOAD X(15),RX + add T1,T2,T1 != + add D,T1,D + sll D,11,T2 + srl D,32-11,D + or D,T2,D != + add D,A,D + + xor D,A,T1 + sethi %hi(0x1fa27cf8),T2 + xor T1,B,T1 != + or T2,%lo(0x1fa27cf8),T2 + add T1,RX,T1 + !pre-LOADed X(2),R2 + add T1,T2,T1 + add C,T1,C != + sll C,16,T2 + srl C,32-16,C + or C,T2,C + add C,D,C != + + xor C,D,T1 + sethi %hi(0xc4ac5665),T2 + xor T1,A,T1 + or T2,%lo(0xc4ac5665),T2 != + add T1,R2,T1 + !pre-LOADed X(0),R0 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 != + srl B,32-23,B + or B,T2,B + add B,C,B + +!!!!!!!!Round 3 + + orn B,D,T1 != + sethi %hi(0xf4292244),T2 + xor T1,C,T1 + or T2,%lo(0xf4292244),T2 + add T1,R0,T1 != + !pre-LOADed X(7),R7 + add T1,T2,T1 + add A,T1,A + sll A,6,T2 + srl A,32-6,A != + or A,T2,A + add A,B,A + + orn A,C,T1 + sethi %hi(0x432aff97),T2 != + xor T1,B,T1 + or T2,%lo(0x432aff97),T2 + LOAD X(14),RX + add T1,R7,T1 != + add T1,T2,T1 + add D,T1,D + sll D,10,T2 + srl D,32-10,D != + or D,T2,D + add D,A,D + + orn D,B,T1 + sethi %hi(0xab9423a7),T2 != + xor T1,A,T1 + or T2,%lo(0xab9423a7),T2 + add T1,RX,T1 + !pre-LOADed X(5),R5 + add T1,T2,T1 != + add C,T1,C + sll C,15,T2 + srl C,32-15,C + or C,T2,C != + add C,D,C + + orn C,A,T1 + sethi %hi(0xfc93a039),T2 + xor T1,D,T1 != + or T2,%lo(0xfc93a039),T2 + add T1,R5,T1 + !pre-LOADed X(12),R12 + add T1,T2,T1 + add B,T1,B != + sll B,21,T2 + srl B,32-21,B + or B,T2,B + add B,C,B != + + orn B,D,T1 + sethi %hi(0x655b59c3),T2 + xor T1,C,T1 + or T2,%lo(0x655b59c3),T2 != + add T1,R12,T1 + !pre-LOADed X(3),R3 + add T1,T2,T1 + add A,T1,A + sll A,6,T2 != + srl A,32-6,A + or A,T2,A + add A,B,A + + orn A,C,T1 != + sethi %hi(0x8f0ccc92),T2 + xor T1,B,T1 + or T2,%lo(0x8f0ccc92),T2 + add T1,R3,T1 != + !pre-LOADed X(10),R10 + add T1,T2,T1 + add D,T1,D + sll D,10,T2 + srl D,32-10,D != + or D,T2,D + add D,A,D + + orn D,B,T1 + sethi %hi(0xffeff47d),T2 != + xor T1,A,T1 + or T2,%lo(0xffeff47d),T2 + add T1,R10,T1 + !pre-LOADed X(1),R1 + add T1,T2,T1 != + add C,T1,C + sll C,15,T2 + srl C,32-15,C + or C,T2,C != + add C,D,C + + orn C,A,T1 + sethi %hi(0x85845dd1),T2 + xor T1,D,T1 != + or T2,%lo(0x85845dd1),T2 + add T1,R1,T1 + !pre-LOADed X(8),R8 + add T1,T2,T1 + add B,T1,B != + sll B,21,T2 + srl B,32-21,B + or B,T2,B + add B,C,B != + + orn B,D,T1 + sethi %hi(0x6fa87e4f),T2 + xor T1,C,T1 + or T2,%lo(0x6fa87e4f),T2 != + add T1,R8,T1 + LOAD X(15),RX + add T1,T2,T1 + add A,T1,A != + sll A,6,T2 + srl A,32-6,A + or A,T2,A + add A,B,A != + + orn A,C,T1 + sethi %hi(0xfe2ce6e0),T2 + xor T1,B,T1 + or T2,%lo(0xfe2ce6e0),T2 != + add T1,RX,T1 + !pre-LOADed X(6),R6 + add T1,T2,T1 + add D,T1,D + sll D,10,T2 != + srl D,32-10,D + or D,T2,D + add D,A,D + + orn D,B,T1 != + sethi %hi(0xa3014314),T2 + xor T1,A,T1 + or T2,%lo(0xa3014314),T2 + add T1,R6,T1 != + !pre-LOADed X(13),R13 + add T1,T2,T1 + add C,T1,C + sll C,15,T2 + srl C,32-15,C != + or C,T2,C + add C,D,C + + orn C,A,T1 + sethi %hi(0x4e0811a1),T2 != + xor T1,D,T1 + or T2,%lo(0x4e0811a1),T2 + !pre-LOADed X(4),R4 + ld [Aptr],Aval + add T1,R13,T1 != + add T1,T2,T1 + add B,T1,B + sll B,21,T2 + srl B,32-21,B != + or B,T2,B + add B,C,B + + orn B,D,T1 + sethi %hi(0xf7537e82),T2 != + xor T1,C,T1 + or T2,%lo(0xf7537e82),T2 + !pre-LOADed X(11),R11 + ld [Dptr],Dval + add T1,R4,T1 != + add T1,T2,T1 + add A,T1,A + sll A,6,T2 + srl A,32-6,A != + or A,T2,A + add A,B,A + + orn A,C,T1 + sethi %hi(0xbd3af235),T2 != + xor T1,B,T1 + or T2,%lo(0xbd3af235),T2 + !pre-LOADed X(2),R2 + ld [Cptr],Cval + add T1,R11,T1 != + add T1,T2,T1 + add D,T1,D + sll D,10,T2 + srl D,32-10,D != + or D,T2,D + add D,A,D + + orn D,B,T1 + sethi %hi(0x2ad7d2bb),T2 != + xor T1,A,T1 + or T2,%lo(0x2ad7d2bb),T2 + !pre-LOADed X(9),R9 + ld [Bptr],Bval + add T1,R2,T1 != + add Aval,A,Aval + add T1,T2,T1 + st Aval,[Aptr] + add C,T1,C != + sll C,15,T2 + add Dval,D,Dval + srl C,32-15,C + or C,T2,C != + st Dval,[Dptr] + add C,D,C + + orn C,A,T1 + sethi %hi(0xeb86d391),T2 != + xor T1,D,T1 + or T2,%lo(0xeb86d391),T2 + add T1,R9,T1 + !pre-LOADed X(0),R0 + mov Aval,A != + add T1,T2,T1 + mov Dval,D + add B,T1,B + sll B,21,T2 != + add Cval,C,Cval + srl B,32-21,B + st Cval,[Cptr] + or B,T2,B != + add B,C,B + + deccc %i2 + mov Cval,C + add B,Bval,B != + inc 64,%i1 + nop + st B,[Bptr] + nop != + +#ifdef ULTRASPARC + bg,a,pt %icc,.Lmd5_block_loop +#else + bg,a .Lmd5_block_loop +#endif + LOAD X(0),R0 + +#ifdef ASI_PRIMARY_LITTLE + mov %o7,%asi +#endif + ret + restore %g0,0,%o0 + +.type md5_block,#function +.size md5_block,(.-md5_block) diff --git a/crypto/md5/md5.h b/crypto/md5/md5.h index 6e97fe1e4f..148fb963d4 100644 --- a/crypto/md5/md5.h +++ b/crypto/md5/md5.h @@ -67,23 +67,43 @@ extern "C" { #error MD5 is disabled. #endif +/* + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * ! MD5_LONG has to be at least 32 bits wide. If it's wider, then ! + * ! MD5_LONG_LOG2 has to be defined along. ! + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + */ + +#if defined(WIN16) || defined(__LP32__) +#define MD5_LONG unsigned long +#elif defined(_CRAY) || defined(__ILP64__) +#define MD5_LONG unsigned long +#define MD5_LONG_LOG2 3 +/* + * _CRAY note. I could declare short, but I have no idea what impact + * does it have on performance on none-T3E machines. I could declare + * int, but at least on C90 sizeof(int) can be chosen at compile time. + * So I've chosen long... + * <appro@fy.chalmers.se> + */ +#else +#define MD5_LONG unsigned int +#endif + #define MD5_CBLOCK 64 -#define MD5_LBLOCK 16 -#define MD5_BLOCK 16 -#define MD5_LAST_BLOCK 56 -#define MD5_LENGTH_BLOCK 8 +#define MD5_LBLOCK (MD5_CBLOCK/4) #define MD5_DIGEST_LENGTH 16 typedef struct MD5state_st { - unsigned long A,B,C,D; - unsigned long Nl,Nh; - unsigned long data[MD5_LBLOCK]; + MD5_LONG A,B,C,D; + MD5_LONG Nl,Nh; + MD5_LONG data[MD5_LBLOCK]; int num; } MD5_CTX; void MD5_Init(MD5_CTX *c); -void MD5_Update(MD5_CTX *c, const void *data, unsigned long len); +void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len); void MD5_Final(unsigned char *md, MD5_CTX *c); unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md); void MD5_Transform(MD5_CTX *c, unsigned char *b); diff --git a/crypto/md5/md5_dgst.c b/crypto/md5/md5_dgst.c index 2671b00172..830e04d02a 100644 --- a/crypto/md5/md5_dgst.c +++ b/crypto/md5/md5_dgst.c @@ -70,12 +70,6 @@ char *MD5_version="MD5" OPENSSL_VERSION_PTEXT; #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L -# ifdef MD5_ASM - void md5_block_x86(MD5_CTX *c, unsigned long *p,int num); -# define md5_block md5_block_x86 -# else - static void md5_block(MD5_CTX *c, unsigned long *p,int num); -# endif void MD5_Init(MD5_CTX *c) { c->A=INIT_DATA_A; @@ -87,183 +81,31 @@ void MD5_Init(MD5_CTX *c) c->num=0; } -void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len) +#ifndef md5_block_host_order +void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num) { - register const unsigned char *data=_data; - register ULONG *p; - int sw,sc; - ULONG l; - - if (len == 0) return; - - l=(c->Nl+(len<<3))&0xffffffffL; - /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to - * Wei Dai <weidai@eskimo.com> for pointing it out. */ - if (l < c->Nl) /* overflow */ - c->Nh++; - c->Nh+=(len>>29); - c->Nl=l; - - if (c->num != 0) - { - p=c->data; - sw=c->num>>2; - sc=c->num&0x03; - - if ((c->num+len) >= MD5_CBLOCK) - { - l= p[sw]; - p_c2l(data,l,sc); - p[sw++]=l; - for (; sw<MD5_LBLOCK; sw++) - { - c2l(data,l); - p[sw]=l; - } - len-=(MD5_CBLOCK-c->num); - - md5_block(c,p,64); - c->num=0; - /* drop through and do the rest */ - } - else - { - int ew,ec; - - c->num+=(int)len; - if ((sc+len) < 4) /* ugly, add char's to a word */ - { - l= p[sw]; - p_c2l_p(data,l,sc,len); - p[sw]=l; - } - else - { - ew=(c->num>>2); - ec=(c->num&0x03); - l= p[sw]; - p_c2l(data,l,sc); - p[sw++]=l; - for (; sw < ew; sw++) - { c2l(data,l); p[sw]=l; } - if (ec) - { - c2l_p(data,l,ec); - p[sw]=l; - } - } - return; - } - } - /* we now can process the input data in blocks of MD5_CBLOCK - * chars and save the leftovers to c->data. */ -#ifdef L_ENDIAN - if ((((unsigned long)data)%sizeof(ULONG)) == 0) - { - sw=(int)len/MD5_CBLOCK; - if (sw > 0) - { - sw*=MD5_CBLOCK; - md5_block(c,(ULONG *)data,sw); - data+=sw; - len-=sw; - } - } -#endif - p=c->data; - while (len >= MD5_CBLOCK) - { -#if defined(L_ENDIAN) || defined(B_ENDIAN) - if (p != (unsigned long *)data) - memcpy(p,data,MD5_CBLOCK); - data+=MD5_CBLOCK; -#ifdef B_ENDIAN - for (sw=(MD5_LBLOCK/4); sw; sw--) - { - Endian_Reverse32(p[0]); - Endian_Reverse32(p[1]); - Endian_Reverse32(p[2]); - Endian_Reverse32(p[3]); - p+=4; - } -#endif -#else - for (sw=(MD5_LBLOCK/4); sw; sw--) - { - c2l(data,l); *(p++)=l; - c2l(data,l); *(p++)=l; - c2l(data,l); *(p++)=l; - c2l(data,l); *(p++)=l; - } -#endif - p=c->data; - md5_block(c,p,64); - len-=MD5_CBLOCK; - } - sc=(int)len; - c->num=sc; - if (sc) - { - sw=sc>>2; /* words to copy */ -#ifdef L_ENDIAN - p[sw]=0; - memcpy(p,data,sc); -#else - sc&=0x03; - for ( ; sw; sw--) - { c2l(data,l); *(p++)=l; } - c2l_p(data,l,sc); - *p=l; -#endif - } - } - -void MD5_Transform(MD5_CTX *c, unsigned char *b) - { - ULONG p[16]; -#if !defined(L_ENDIAN) - ULONG *q; - int i; -#endif - -#if defined(B_ENDIAN) || defined(L_ENDIAN) - memcpy(p,b,64); -#ifdef B_ENDIAN - q=p; - for (i=(MD5_LBLOCK/4); i; i--) - { - Endian_Reverse32(q[0]); - Endian_Reverse32(q[1]); - Endian_Reverse32(q[2]); - Endian_Reverse32(q[3]); - q+=4; - } -#endif -#else - q=p; - for (i=(MD5_LBLOCK/4); i; i--) - { - ULONG l; - c2l(b,l); *(q++)=l; - c2l(b,l); *(q++)=l; - c2l(b,l); *(q++)=l; - c2l(b,l); *(q++)=l; - } -#endif - md5_block(c,p,64); - } |