summaryrefslogtreecommitdiffstats
path: root/crypto/md5
diff options
context:
space:
mode:
authorUlf Möller <ulf@openssl.org>1999-05-13 13:16:42 +0000
committerUlf Möller <ulf@openssl.org>1999-05-13 13:16:42 +0000
commitbd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c (patch)
treee64a4b13276c3663c3ad9f6f4235909ecadc8852 /crypto/md5
parent7d7d2cbcb02206f3393681f2bce198e11e2e185b (diff)
Reorganize and speed up MD5.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
Diffstat (limited to 'crypto/md5')
-rw-r--r--crypto/md5/Makefile.ssl10
-rw-r--r--crypto/md5/asm/md5-586.pl3
-rw-r--r--crypto/md5/asm/md5-sparcv9.S1035
-rw-r--r--crypto/md5/md5.h36
-rw-r--r--crypto/md5/md5_dgst.c365
-rw-r--r--crypto/md5/md5_locl.h153
-rw-r--r--crypto/md5/md5_one.c3
7 files changed, 1269 insertions, 336 deletions
diff --git a/crypto/md5/Makefile.ssl b/crypto/md5/Makefile.ssl
index e27cfca0b0..f8eaf628b3 100644
--- a/crypto/md5/Makefile.ssl
+++ b/crypto/md5/Makefile.ssl
@@ -66,6 +66,14 @@ asm/mx86bsdi.o: asm/mx86unix.cpp
asm/mx86unix.cpp: asm/md5-586.pl
(cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp)
+# works for both SC and gcc
+asm/md5-sparcv8plus.o: asm/md5-sparcv9.S
+ $(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o
+
+asm/md5-sparcv9.o: asm/md5-sparcv9.S
+ $(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o
+
+
files:
$(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
@@ -103,5 +111,5 @@ clean:
# DO NOT DELETE THIS LINE -- make depend depends on it.
md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h
-md5_dgst.o: md5_locl.h
+md5_dgst.o: ../md32_common.h md5_locl.h
md5_one.o: ../../include/openssl/md5.h md5_locl.h
diff --git a/crypto/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl
index 0249e100e1..5fc6a205ce 100644
--- a/crypto/md5/asm/md5-586.pl
+++ b/crypto/md5/asm/md5-586.pl
@@ -29,7 +29,7 @@ $X="esi";
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
);
-&md5_block("md5_block_x86");
+&md5_block("md5_block_asm_host_order");
&asm_finish();
sub Np
@@ -183,6 +183,7 @@ sub md5_block
&mov($X, &wparam(1)); # esi
&mov($C, &wparam(2));
&push("ebp");
+ &shl($C, 6);
&push("ebx");
&add($C, $X); # offset we end at
&sub($C, 64);
diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S
new file mode 100644
index 0000000000..955b3680b5
--- /dev/null
+++ b/crypto/md5/asm/md5-sparcv9.S
@@ -0,0 +1,1035 @@
+.ident "md5-sparcv9.S, Version 1.0"
+.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.file "md5-sparcv9.S"
+
+/*
+ * ====================================================================
+ * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>.
+ *
+ * Rights for redistribution and usage in source and binary forms are
+ * granted as long as above copyright notices are retained. Warranty
+ * of any kind is (of course:-) disclaimed.
+ * ====================================================================
+ */
+
+/*
+ * This is my modest contribution to OpenSSL project (see
+ * http://www.openssl.org/ for more information about it) and is an
+ * assembler implementation of MD5 block hash function. I've hand-coded
+ * this for the sole reason to reach UltraSPARC-specific "load in
+ * little-endian byte order" instruction. This gives up to 15%
+ * performance improvement for cases when input message is aligned at
+ * 32 bits boundary. The module was tested under both 32 *and* 64 bit
+ * kernels. For updates see http://fy.chalmers.se/~appro/hpe/.
+ *
+ * To compile with SC4.x/SC5.x:
+ *
+ * cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
+ * -c md5-sparcv9.S
+ *
+ * and with gcc:
+ *
+ * gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
+ * -c md5-sparcv9.S
+ *
+ * or if above fails (it does if you have gas):
+ *
+ * gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \
+ * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o
+ */
+
+#define A %o0
+#define B %o1
+#define C %o2
+#define D %o3
+#define T1 %o4
+#define T2 %o5
+
+#define R0 %l0
+#define R1 %l1
+#define R2 %l2
+#define R3 %l3
+#define R4 %l4
+#define R5 %l5
+#define R6 %l6
+#define R7 %l7
+#define R8 %i3
+#define R9 %i4
+#define R10 %i5
+#define R11 %g1
+#define R12 %g2
+#define R13 %g3
+#define RX %g4
+
+#define Aptr %i0+0
+#define Bptr %i0+4
+#define Cptr %i0+8
+#define Dptr %i0+12
+
+#define Aval R5 /* those not used at the end of the last round */
+#define Bval R6
+#define Cval R7
+#define Dval R8
+
+#if defined(MD5_BLOCK_DATA_ORDER)
+# if defined(ULTRASPARC)
+# define LOAD lda
+# define X(i) [%i1+i*4]%asi
+# define md5_block md5_block_asm_data_order_aligned
+# define ASI_PRIMARY_LITTLE 0x88
+# else
+# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!"
+# endif
+#else
+# define LOAD ld
+# define X(i) [%i1+i*4]
+# define md5_block md5_block_asm_host_order
+#endif
+
+.section ".text",#alloc,#execinstr
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+ /* They've said -xarch=v9 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME -192
+#else
+# define FRAME -96
+#endif
+
+.align 32
+
+.global md5_block
+md5_block:
+ save %sp,FRAME,%sp
+
+ ld [Dptr],D
+#ifdef ASI_PRIMARY_LITTLE
+ mov %asi,%o7 ! How dare I? Well, I just do:-)
+#else
+ nop
+#endif
+ ld [Cptr],C
+#ifdef ASI_PRIMARY_LITTLE
+ mov ASI_PRIMARY_LITTLE,%asi
+#else
+ nop
+#endif
+ ld [Bptr],B
+ nop
+ ld [Aptr],A
+ nop
+ LOAD X(0),R0
+ nop
+ ba .Lmd5_block_loop
+ nop
+
+.align 32
+.Lmd5_block_loop:
+
+!!!!!!!!Round 0
+
+ xor C,D,T1
+ sethi %hi(0xd76aa478),T2
+ and T1,B,T1
+ or T2,%lo(0xd76aa478),T2 !=
+ xor T1,D,T1
+ add T1,R0,T1
+ LOAD X(1),R1
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0xe8c7b756),T2
+ and T1,A,T1 !=
+ or T2,%lo(0xe8c7b756),T2
+ xor T1,C,T1
+ LOAD X(2),R2
+ add T1,R1,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0x242070db),T2 !=
+ and T1,D,T1
+ or T2,%lo(0x242070db),T2
+ xor T1,B,T1
+ add T1,R2,T1 !=
+ LOAD X(3),R3
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0xc1bdceee),T2
+ and T1,C,T1
+ or T2,%lo(0xc1bdceee),T2
+ xor T1,A,T1 !=
+ add T1,R3,T1
+ LOAD X(4),R4
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0xf57c0faf),T2
+ and T1,B,T1
+ or T2,%lo(0xf57c0faf),T2 !=
+ xor T1,D,T1
+ add T1,R4,T1
+ LOAD X(5),R5
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0x4787c62a),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x4787c62a),T2
+ xor T1,C,T1
+ LOAD X(6),R6
+ add T1,R5,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xa8304613),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xa8304613),T2
+ xor T1,B,T1
+ add T1,R6,T1 !=
+ LOAD X(7),R7
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0xfd469501),T2
+ and T1,C,T1
+ or T2,%lo(0xfd469501),T2
+ xor T1,A,T1 !=
+ add T1,R7,T1
+ LOAD X(8),R8
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0x698098d8),T2
+ and T1,B,T1
+ or T2,%lo(0x698098d8),T2 !=
+ xor T1,D,T1
+ add T1,R8,T1
+ LOAD X(9),R9
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0x8b44f7af),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x8b44f7af),T2
+ xor T1,C,T1
+ LOAD X(10),R10
+ add T1,R9,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xffff5bb1),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xffff5bb1),T2
+ xor T1,B,T1
+ add T1,R10,T1 !=
+ LOAD X(11),R11
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0x895cd7be),T2
+ and T1,C,T1
+ or T2,%lo(0x895cd7be),T2
+ xor T1,A,T1 !=
+ add T1,R11,T1
+ LOAD X(12),R12
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0x6b901122),T2
+ and T1,B,T1
+ or T2,%lo(0x6b901122),T2 !=
+ xor T1,D,T1
+ add T1,R12,T1
+ LOAD X(13),R13
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0xfd987193),T2
+ and T1,A,T1 !=
+ or T2,%lo(0xfd987193),T2
+ xor T1,C,T1
+ LOAD X(14),RX
+ add T1,R13,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xa679438e),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xa679438e),T2
+ xor T1,B,T1
+ add T1,RX,T1 !=
+ LOAD X(15),RX
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0x49b40821),T2
+ and T1,C,T1
+ or T2,%lo(0x49b40821),T2
+ xor T1,A,T1 !=
+ add T1,RX,T1
+ !pre-LOADed X(1),R1
+ add T1,T2,T1
+ add B,T1,B
+ sll B,22,T2 !=
+ srl B,32-22,B
+ or B,T2,B
+ add B,C,B
+
+!!!!!!!!Round 1
+
+ xor B,C,T1 !=
+ sethi %hi(0xf61e2562),T2
+ and T1,D,T1
+ or T2,%lo(0xf61e2562),T2
+ xor T1,C,T1 !=
+ add T1,R1,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add A,T1,A
+ sll A,5,T2 !=
+ srl A,32-5,A
+ or A,T2,A
+ add A,B,A
+
+ xor A,B,T1 !=
+ sethi %hi(0xc040b340),T2
+ and T1,C,T1
+ or T2,%lo(0xc040b340),T2
+ xor T1,B,T1 !=
+ add T1,R6,T1
+ !pre-LOADed X(11),R11
+ add T1,T2,T1
+ add D,T1,D
+ sll D,9,T2 !=
+ srl D,32-9,D
+ or D,T2,D
+ add D,A,D
+
+ xor D,A,T1 !=
+ sethi %hi(0x265e5a51),T2
+ and T1,B,T1
+ or T2,%lo(0x265e5a51),T2
+ xor T1,A,T1 !=
+ add T1,R11,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1
+ add C,T1,C
+ sll C,14,T2 !=
+ srl C,32-14,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xe9b6c7aa),T2
+ and T1,A,T1
+ or T2,%lo(0xe9b6c7aa),T2
+ xor T1,D,T1 !=
+ add T1,R0,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1
+ add B,T1,B
+ sll B,20,T2 !=
+ srl B,32-20,B
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1 !=
+ sethi %hi(0xd62f105d),T2
+ and T1,D,T1
+ or T2,%lo(0xd62f105d),T2
+ xor T1,C,T1 !=
+ add T1,R5,T1
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add A,T1,A
+ sll A,5,T2 !=
+ srl A,32-5,A
+ or A,T2,A
+ add A,B,A
+
+ xor A,B,T1 !=
+ sethi %hi(0x02441453),T2
+ and T1,C,T1
+ or T2,%lo(0x02441453),T2
+ xor T1,B,T1 !=
+ add T1,R10,T1
+ LOAD X(15),RX
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xd8a1e681),T2
+ and T1,B,T1
+ or T2,%lo(0xd8a1e681),T2 !=
+ xor T1,A,T1
+ add T1,RX,T1
+ !pre-LOADed X(4),R4
+ add T1,T2,T1
+ add C,T1,C !=
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C
+ add C,D,C !=
+
+ xor C,D,T1
+ sethi %hi(0xe7d3fbc8),T2
+ and T1,A,T1
+ or T2,%lo(0xe7d3fbc8),T2 !=
+ xor T1,D,T1
+ add T1,R4,T1
+ !pre-LOADed X(9),R9
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B
+ add B,C,B !=
+
+ xor B,C,T1
+ sethi %hi(0x21e1cde6),T2
+ and T1,D,T1
+ or T2,%lo(0x21e1cde6),T2 !=
+ xor T1,C,T1
+ add T1,R9,T1
+ LOAD X(14),RX
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,5,T2
+ srl A,32-5,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xc33707d6),T2
+ and T1,C,T1 !=
+ or T2,%lo(0xc33707d6),T2
+ xor T1,B,T1
+ add T1,RX,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0xf4d50d87),T2
+ and T1,B,T1 !=
+ or T2,%lo(0xf4d50d87),T2
+ xor T1,A,T1
+ add T1,R3,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C !=
+ add C,D,C
+
+ xor C,D,T1
+ sethi %hi(0x455a14ed),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x455a14ed),T2
+ xor T1,D,T1
+ add T1,R8,T1
+ !pre-LOADed X(13),R13
+ add T1,T2,T1 !=
+ add B,T1,B
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B !=
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xa9e3e905),T2
+ and T1,D,T1 !=
+ or T2,%lo(0xa9e3e905),T2
+ xor T1,C,T1
+ add T1,R13,T1
+ !pre-LOADed X(2),R2
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,5,T2
+ srl A,32-5,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xfcefa3f8),T2
+ and T1,C,T1 !=
+ or T2,%lo(0xfcefa3f8),T2
+ xor T1,B,T1
+ add T1,R2,T1
+ !pre-LOADed X(7),R7
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0x676f02d9),T2
+ and T1,B,T1 !=
+ or T2,%lo(0x676f02d9),T2
+ xor T1,A,T1
+ add T1,R7,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C !=
+ add C,D,C
+
+ xor C,D,T1
+ sethi %hi(0x8d2a4c8a),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x8d2a4c8a),T2
+ xor T1,D,T1
+ add T1,R12,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1 !=
+ add B,T1,B
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B !=
+ add B,C,B
+
+!!!!!!!!Round 2
+
+ xor B,C,T1
+ sethi %hi(0xfffa3942),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0xfffa3942),T2
+ add T1,R5,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1
+ add A,T1,A !=
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A
+ add A,B,A !=
+
+ xor A,B,T1
+ sethi %hi(0x8771f681),T2
+ xor T1,C,T1
+ or T2,%lo(0x8771f681),T2 !=
+ add T1,R8,T1
+ !pre-LOADed X(11),R11
+ add T1,T2,T1
+ add D,T1,D
+ sll D,11,T2 !=
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D
+
+ xor D,A,T1 !=
+ sethi %hi(0x6d9d6122),T2
+ xor T1,B,T1
+ or T2,%lo(0x6d9d6122),T2
+ add T1,R11,T1 !=
+ LOAD X(14),RX
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xfde5380c),T2
+ xor T1,A,T1
+ or T2,%lo(0xfde5380c),T2
+ add T1,RX,T1 !=
+ !pre-LOADed X(1),R1
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xa4beea44),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xa4beea44),T2
+ add T1,R1,T1
+ !pre-LOADed X(4),R4
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0x4bdecfa9),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0x4bdecfa9),T2
+ add T1,R4,T1
+ !pre-LOADed X(7),R7
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xf6bb4b60),T2
+ xor T1,B,T1
+ or T2,%lo(0xf6bb4b60),T2 !=
+ add T1,R7,T1
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xbebfbc70),T2
+ xor T1,A,T1
+ or T2,%lo(0xbebfbc70),T2
+ add T1,R10,T1 !=
+ !pre-LOADed X(13),R13
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0x289b7ec6),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0x289b7ec6),T2
+ add T1,R13,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xeaa127fa),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0xeaa127fa),T2
+ add T1,R0,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xd4ef3085),T2
+ xor T1,B,T1
+ or T2,%lo(0xd4ef3085),T2 !=
+ add T1,R3,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0x04881d05),T2
+ xor T1,A,T1
+ or T2,%lo(0x04881d05),T2
+ add T1,R6,T1 !=
+ !pre-LOADed X(9),R9
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xd9d4d039),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xd9d4d039),T2
+ add T1,R9,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xe6db99e5),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0xe6db99e5),T2
+ add T1,R12,T1
+ LOAD X(15),RX
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0x1fa27cf8),T2
+ xor T1,B,T1 !=
+ or T2,%lo(0x1fa27cf8),T2
+ add T1,RX,T1
+ !pre-LOADed X(2),R2
+ add T1,T2,T1
+ add C,T1,C !=
+ sll C,16,T2
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C !=
+
+ xor C,D,T1
+ sethi %hi(0xc4ac5665),T2
+ xor T1,A,T1
+ or T2,%lo(0xc4ac5665),T2 !=
+ add T1,R2,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2 !=
+ srl B,32-23,B
+ or B,T2,B
+ add B,C,B
+
+!!!!!!!!Round 3
+
+ orn B,D,T1 !=
+ sethi %hi(0xf4292244),T2
+ xor T1,C,T1
+ or T2,%lo(0xf4292244),T2
+ add T1,R0,T1 !=
+ !pre-LOADed X(7),R7
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2
+ srl A,32-6,A !=
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1
+ sethi %hi(0x432aff97),T2 !=
+ xor T1,B,T1
+ or T2,%lo(0x432aff97),T2
+ LOAD X(14),RX
+ add T1,R7,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0xab9423a7),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0xab9423a7),T2
+ add T1,RX,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C
+ or C,T2,C !=
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0xfc93a039),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0xfc93a039),T2
+ add T1,R5,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,21,T2
+ srl B,32-21,B
+ or B,T2,B
+ add B,C,B !=
+
+ orn B,D,T1
+ sethi %hi(0x655b59c3),T2
+ xor T1,C,T1
+ or T2,%lo(0x655b59c3),T2 !=
+ add T1,R12,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2 !=
+ srl A,32-6,A
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1 !=
+ sethi %hi(0x8f0ccc92),T2
+ xor T1,B,T1
+ or T2,%lo(0x8f0ccc92),T2
+ add T1,R3,T1 !=
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0xffeff47d),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0xffeff47d),T2
+ add T1,R10,T1
+ !pre-LOADed X(1),R1
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C
+ or C,T2,C !=
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0x85845dd1),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0x85845dd1),T2
+ add T1,R1,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,21,T2
+ srl B,32-21,B
+ or B,T2,B
+ add B,C,B !=
+
+ orn B,D,T1
+ sethi %hi(0x6fa87e4f),T2
+ xor T1,C,T1
+ or T2,%lo(0x6fa87e4f),T2 !=
+ add T1,R8,T1
+ LOAD X(15),RX
+ add T1,T2,T1
+ add A,T1,A !=
+ sll A,6,T2
+ srl A,32-6,A
+ or A,T2,A
+ add A,B,A !=
+
+ orn A,C,T1
+ sethi %hi(0xfe2ce6e0),T2
+ xor T1,B,T1
+ or T2,%lo(0xfe2ce6e0),T2 !=
+ add T1,RX,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2 !=
+ srl D,32-10,D
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1 !=
+ sethi %hi(0xa3014314),T2
+ xor T1,A,T1
+ or T2,%lo(0xa3014314),T2
+ add T1,R6,T1 !=
+ !pre-LOADed X(13),R13
+ add T1,T2,T1
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C !=
+ or C,T2,C
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0x4e0811a1),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0x4e0811a1),T2
+ !pre-LOADed X(4),R4
+ ld [Aptr],Aval
+ add T1,R13,T1 !=
+ add T1,T2,T1
+ add B,T1,B
+ sll B,21,T2
+ srl B,32-21,B !=
+ or B,T2,B
+ add B,C,B
+
+ orn B,D,T1
+ sethi %hi(0xf7537e82),T2 !=
+ xor T1,C,T1
+ or T2,%lo(0xf7537e82),T2
+ !pre-LOADed X(11),R11
+ ld [Dptr],Dval
+ add T1,R4,T1 !=
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2
+ srl A,32-6,A !=
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1
+ sethi %hi(0xbd3af235),T2 !=
+ xor T1,B,T1
+ or T2,%lo(0xbd3af235),T2
+ !pre-LOADed X(2),R2
+ ld [Cptr],Cval
+ add T1,R11,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0x2ad7d2bb),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0x2ad7d2bb),T2
+ !pre-LOADed X(9),R9
+ ld [Bptr],Bval
+ add T1,R2,T1 !=
+ add Aval,A,Aval
+ add T1,T2,T1
+ st Aval,[Aptr]
+ add C,T1,C !=
+ sll C,15,T2
+ add Dval,D,Dval
+ srl C,32-15,C
+ or C,T2,C !=
+ st Dval,[Dptr]
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0xeb86d391),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xeb86d391),T2
+ add T1,R9,T1
+ !pre-LOADed X(0),R0
+ mov Aval,A !=
+ add T1,T2,T1
+ mov Dval,D
+ add B,T1,B
+ sll B,21,T2 !=
+ add Cval,C,Cval
+ srl B,32-21,B
+ st Cval,[Cptr]
+ or B,T2,B !=
+ add B,C,B
+
+ deccc %i2
+ mov Cval,C
+ add B,Bval,B !=
+ inc 64,%i1
+ nop
+ st B,[Bptr]
+ nop !=
+
+#ifdef ULTRASPARC
+ bg,a,pt %icc,.Lmd5_block_loop
+#else
+ bg,a .Lmd5_block_loop
+#endif
+ LOAD X(0),R0
+
+#ifdef ASI_PRIMARY_LITTLE
+ mov %o7,%asi
+#endif
+ ret
+ restore %g0,0,%o0
+
+.type md5_block,#function
+.size md5_block,(.-md5_block)
diff --git a/crypto/md5/md5.h b/crypto/md5/md5.h
index 6e97fe1e4f..148fb963d4 100644
--- a/crypto/md5/md5.h
+++ b/crypto/md5/md5.h
@@ -67,23 +67,43 @@ extern "C" {
#error MD5 is disabled.
#endif
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! MD5_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! MD5_LONG_LOG2 has to be defined along. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+
+#if defined(WIN16) || defined(__LP32__)
+#define MD5_LONG unsigned long
+#elif defined(_CRAY) || defined(__ILP64__)
+#define MD5_LONG unsigned long
+#define MD5_LONG_LOG2 3
+/*
+ * _CRAY note. I could declare short, but I have no idea what impact
+ * does it have on performance on none-T3E machines. I could declare
+ * int, but at least on C90 sizeof(int) can be chosen at compile time.
+ * So I've chosen long...
+ * <appro@fy.chalmers.se>
+ */
+#else
+#define MD5_LONG unsigned int
+#endif
+
#define MD5_CBLOCK 64
-#define MD5_LBLOCK 16
-#define MD5_BLOCK 16
-#define MD5_LAST_BLOCK 56
-#define MD5_LENGTH_BLOCK 8
+#define MD5_LBLOCK (MD5_CBLOCK/4)
#define MD5_DIGEST_LENGTH 16
typedef struct MD5state_st
{
- unsigned long A,B,C,D;
- unsigned long Nl,Nh;
- unsigned long data[MD5_LBLOCK];
+ MD5_LONG A,B,C,D;
+ MD5_LONG Nl,Nh;
+ MD5_LONG data[MD5_LBLOCK];
int num;
} MD5_CTX;
void MD5_Init(MD5_CTX *c);
-void MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
+void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len);
void MD5_Final(unsigned char *md, MD5_CTX *c);
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md);
void MD5_Transform(MD5_CTX *c, unsigned char *b);
diff --git a/crypto/md5/md5_dgst.c b/crypto/md5/md5_dgst.c
index 2671b00172..830e04d02a 100644
--- a/crypto/md5/md5_dgst.c
+++ b/crypto/md5/md5_dgst.c
@@ -70,12 +70,6 @@ char *MD5_version="MD5" OPENSSL_VERSION_PTEXT;
#define INIT_DATA_C (unsigned long)0x98badcfeL
#define INIT_DATA_D (unsigned long)0x10325476L
-# ifdef MD5_ASM
- void md5_block_x86(MD5_CTX *c, unsigned long *p,int num);
-# define md5_block md5_block_x86
-# else
- static void md5_block(MD5_CTX *c, unsigned long *p,int num);
-# endif
void MD5_Init(MD5_CTX *c)
{
c->A=INIT_DATA_A;
@@ -87,183 +81,31 @@ void MD5_Init(MD5_CTX *c)
c->num=0;
}
-void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len)
+#ifndef md5_block_host_order
+void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num)
{
- register const unsigned char *data=_data;
- register ULONG *p;
- int sw,sc;
- ULONG l;
-
- if (len == 0) return;
-
- l=(c->Nl+(len<<3))&0xffffffffL;
- /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
- * Wei Dai <weidai@eskimo.com> for pointing it out. */
- if (l < c->Nl) /* overflow */
- c->Nh++;
- c->Nh+=(len>>29);
- c->Nl=l;
-
- if (c->num != 0)
- {
- p=c->data;
- sw=c->num>>2;
- sc=c->num&0x03;
-
- if ((c->num+len) >= MD5_CBLOCK)
- {
- l= p[sw];
- p_c2l(data,l,sc);
- p[sw++]=l;
- for (; sw<MD5_LBLOCK; sw++)
- {
- c2l(data,l);
- p[sw]=l;
- }
- len-=(MD5_CBLOCK-c->num);
-
- md5_block(c,p,64);
- c->num=0;
- /* drop through and do the rest */
- }
- else
- {
- int ew,ec;
-
- c->num+=(int)len;
- if ((sc+len) < 4) /* ugly, add char's to a word */
- {
- l= p[sw];
- p_c2l_p(data,l,sc,len);
- p[sw]=l;
- }
- else
- {
- ew=(c->num>>2);
- ec=(c->num&0x03);
- l= p[sw];
- p_c2l(data,l,sc);
- p[sw++]=l;
- for (; sw < ew; sw++)
- { c2l(data,l); p[sw]=l; }
- if (ec)
- {
- c2l_p(data,l,ec);
- p[sw]=l;
- }
- }
- return;
- }
- }
- /* we now can process the input data in blocks of MD5_CBLOCK
- * chars and save the leftovers to c->data. */
-#ifdef L_ENDIAN
- if ((((unsigned long)data)%sizeof(ULONG)) == 0)
- {
- sw=(int)len/MD5_CBLOCK;
- if (sw > 0)
- {
- sw*=MD5_CBLOCK;
- md5_block(c,(ULONG *)data,sw);
- data+=sw;
- len-=sw;
- }
- }
-#endif
- p=c->data;
- while (len >= MD5_CBLOCK)
- {
-#if defined(L_ENDIAN) || defined(B_ENDIAN)
- if (p != (unsigned long *)data)
- memcpy(p,data,MD5_CBLOCK);
- data+=MD5_CBLOCK;
-#ifdef B_ENDIAN
- for (sw=(MD5_LBLOCK/4); sw; sw--)
- {
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
- }
-#endif
-#else
- for (sw=(MD5_LBLOCK/4); sw; sw--)
- {
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- }
-#endif
- p=c->data;
- md5_block(c,p,64);
- len-=MD5_CBLOCK;
- }
- sc=(int)len;
- c->num=sc;
- if (sc)
- {
- sw=sc>>2; /* words to copy */
-#ifdef L_ENDIAN
- p[sw]=0;
- memcpy(p,data,sc);
-#else
- sc&=0x03;
- for ( ; sw; sw--)
- { c2l(data,l); *(p++)=l; }
- c2l_p(data,l,sc);
- *p=l;
-#endif
- }
- }
-
-void MD5_Transform(MD5_CTX *c, unsigned char *b)
- {
- ULONG p[16];
-#if !defined(L_ENDIAN)
- ULONG *q;
- int i;
-#endif
-
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
- memcpy(p,b,64);
-#ifdef B_ENDIAN
- q=p;
- for (i=(MD5_LBLOCK/4); i; i--)
- {
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
- }
-#endif
-#else
- q=p;
- for (i=(MD5_LBLOCK/4); i; i--)
- {
- ULONG l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- }
-#endif
- md5_block(c,p,64);
- }