diff options
author | Andy Polyakov <appro@openssl.org> | 2007-07-23 14:15:36 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2007-07-23 14:15:36 +0000 |
commit | 28feb1f8da3839543137906c74870168c76a2757 (patch) | |
tree | 6a26a773dab53bf92dc952829340953ad63fd049 | |
parent | 9596d1e63b4be1655915344f943059b570cbb726 (diff) |
md32_common.h update from HEAD.
-rw-r--r-- | crypto/md32_common.h | 373 | ||||
-rw-r--r-- | crypto/md4/md4_dgst.c | 89 | ||||
-rw-r--r-- | crypto/md4/md4_locl.h | 44 | ||||
-rw-r--r-- | crypto/md5/asm/md5-586.pl | 2 | ||||
-rw-r--r-- | crypto/md5/asm/md5-sparcv9.S | 1031 | ||||
-rwxr-xr-x | crypto/md5/asm/md5-x86_64.pl | 8 | ||||
-rw-r--r-- | crypto/md5/md5_dgst.c | 106 | ||||
-rw-r--r-- | crypto/md5/md5_locl.h | 54 | ||||
-rw-r--r-- | crypto/ripemd/asm/rmd-586.pl | 4 | ||||
-rw-r--r-- | crypto/ripemd/rmd_dgst.c | 201 | ||||
-rw-r--r-- | crypto/ripemd/rmd_locl.h | 16 | ||||
-rw-r--r-- | crypto/sha/asm/sha1-586.pl | 433 | ||||
-rw-r--r-- | crypto/sha/asm/sha1-ia64.pl | 346 | ||||
-rwxr-xr-x | crypto/sha/asm/sha512-ia64.pl | 422 | ||||
-rw-r--r-- | crypto/sha/sha256.c | 67 | ||||
-rw-r--r-- | crypto/sha/sha512.c | 100 | ||||
-rw-r--r-- | crypto/sha/sha_locl.h | 278 |
17 files changed, 723 insertions, 2851 deletions
diff --git a/crypto/md32_common.h b/crypto/md32_common.h index 0e625a8e55..089c450290 100644 --- a/crypto/md32_common.h +++ b/crypto/md32_common.h @@ -1,6 +1,6 @@ /* crypto/md32_common.h */ /* ==================================================================== - * Copyright (c) 1999-2002 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -47,10 +47,6 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * */ /* @@ -76,40 +72,27 @@ * typedef struct { * ... * HASH_LONG Nl,Nh; + * either { * HASH_LONG data[HASH_LBLOCK]; + * unsigned char data[HASH_CBLOCK]; + * }; * unsigned int num; * ... * } HASH_CTX; + * data[] vector is expected to be zeroed upon first call to + * HASH_UPDATE. * HASH_UPDATE * name of "Update" function, implemented here. * HASH_TRANSFORM * name of "Transform" function, implemented here. * HASH_FINAL * name of "Final" function, implemented here. - * HASH_BLOCK_HOST_ORDER - * name of "block" function treating *aligned* input message - * in host byte order, implemented externally. * HASH_BLOCK_DATA_ORDER - * name of "block" function treating *unaligned* input message - * in original (data) byte order, implemented externally (it - * actually is optional if data and host are of the same - * "endianess"). + * name of "block" function capable of treating *unaligned* input + * message in original (data) byte order, implemented externally. * HASH_MAKE_STRING * macro convering context variables to an ASCII hash string. * - * Optional macros: - * - * B_ENDIAN or L_ENDIAN - * defines host byte-order. - * HASH_LONG_LOG2 - * defaults to 2 if not states otherwise. - * HASH_LBLOCK - * assumed to be HASH_CBLOCK/4 if not stated otherwise. - * HASH_BLOCK_DATA_ORDER_ALIGNED - * alternative "block" function capable of treating - * aligned input message in original (data) order, - * implemented externally. - * * MD5 example: * * #define DATA_ORDER_IS_LITTLE_ENDIAN @@ -118,11 +101,9 @@ * #define HASH_LONG_LOG2 MD5_LONG_LOG2 * #define HASH_CTX MD5_CTX * #define HASH_CBLOCK MD5_CBLOCK - * #define HASH_LBLOCK MD5_LBLOCK * #define HASH_UPDATE MD5_Update * #define HASH_TRANSFORM MD5_Transform * #define HASH_FINAL MD5_Final - * #define HASH_BLOCK_HOST_ORDER md5_block_host_order * #define HASH_BLOCK_DATA_ORDER md5_block_data_order * * <appro@fy.chalmers.se> @@ -152,27 +133,9 @@ #error "HASH_FINAL must be defined!" #endif -#ifndef HASH_BLOCK_HOST_ORDER -#error "HASH_BLOCK_HOST_ORDER must be defined!" -#endif - -#if 0 -/* - * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED - * isn't defined. - */ #ifndef HASH_BLOCK_DATA_ORDER #error "HASH_BLOCK_DATA_ORDER must be defined!" #endif -#endif - -#ifndef HASH_LBLOCK -#define HASH_LBLOCK (HASH_CBLOCK/4) -#endif - -#ifndef HASH_LONG_LOG2 -#define HASH_LONG_LOG2 2 -#endif /* * Engage compiler specific rotate intrinsic function if available. @@ -206,7 +169,8 @@ : "cc"); \ ret; \ }) -# elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) +# elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) # define ROTATE(a,n) ({ register unsigned int ret; \ asm ( \ "rlwinm %0,%1,%2,0,31" \ @@ -214,80 +178,28 @@ : "r"(a), "I"(n)); \ ret; \ }) +# elif defined(__s390x__) +# define ROTATE(a,n) ({ register unsigned int ret; \ + asm ("rll %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a), "I"(n)); \ + ret; \ + }) # endif # endif #endif /* PEDANTIC */ -#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */ -/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */ -#ifdef ROTATE -/* 5 instructions with rotate instruction, else 9 */ -#define REVERSE_FETCH32(a,l) ( \ - l=*(const HASH_LONG *)(a), \ - ((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \ - ) -#else -/* 6 instructions with rotate instruction, else 8 */ -#define REVERSE_FETCH32(a,l) ( \ - l=*(const HASH_LONG *)(a), \ - l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \ - ROTATE(l,16) \ - ) -/* - * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|... - * It's rewritten as above for two reasons: - * - RISCs aren't good at long constants and have to explicitely - * compose 'em with several (well, usually 2) instructions in a - * register before performing the actual operation and (as you - * already realized:-) having same constant should inspire the - * compiler to permanently allocate the only register for it; - * - most modern CPUs have two ALUs, but usually only one has - * circuitry for shifts:-( this minor tweak inspires compiler - * to schedule shift instructions in a better way... - * - * <appro@fy.chalmers.se> - */ -#endif -#endif - #ifndef ROTATE #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) #endif -/* - * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED - * and HASH_BLOCK_HOST_ORDER ought to be the same if input data - * and host are of the same "endianess". It's possible to mask - * this with blank #define HASH_BLOCK_DATA_ORDER though... - * - * <appro@fy.chalmers.se> - */ -#if defined(B_ENDIAN) -# if defined(DATA_ORDER_IS_BIG_ENDIAN) -# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 -# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER -# endif -# endif -#elif defined(L_ENDIAN) -# if defined(DATA_ORDER_IS_LITTLE_ENDIAN) -# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 -# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER -# endif -# endif -#endif - -#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) -#ifndef HASH_BLOCK_DATA_ORDER -#error "HASH_BLOCK_DATA_ORDER must be defined!" -#endif -#endif - #if defined(DATA_ORDER_IS_BIG_ENDIAN) #ifndef PEDANTIC # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) # if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \ (defined(__x86_64) || defined(__x86_64__)) +# if !defined(B_ENDIAN) /* * This gives ~30-40% performance improvement in SHA-256 compiled * with gcc [on P4]. Well, first macro to be frank. We can pull @@ -300,9 +212,14 @@ # define HOST_l2c(l,c) ({ unsigned int r=(l); \ asm ("bswapl %0":"=r"(r):"0"(r)); \ *((unsigned int *)(c))=r; (c)+=4; r; }) +# endif # endif # endif #endif +#if defined(__s390__) || defined(__s390x__) +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +#endif #ifndef HOST_c2l #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \ @@ -311,29 +228,6 @@ l|=(((unsigned long)(*((c)++))) ), \ l) #endif -#define HOST_p_c2l(c,l,n) { \ - switch (n) { \ - case 0: l =((unsigned long)(*((c)++)))<<24; \ - case 1: l|=((unsigned long)(*((c)++)))<<16; \ - case 2: l|=((unsigned long)(*((c)++)))<< 8; \ - case 3: l|=((unsigned long)(*((c)++))); \ - } } -#define HOST_p_c2l_p(c,l,sc,len) { \ - switch (sc) { \ - case 0: l =((unsigned long)(*((c)++)))<<24; \ - if (--len == 0) break; \ - case 1: l|=((unsigned long)(*((c)++)))<<16; \ - if (--len == 0) break; \ - case 2: l|=((unsigned long)(*((c)++)))<< 8; \ - } } -/* NOTE the pointer is not incremented at the end of this */ -#define HOST_c2l_p(c,l,n) { \ - l=0; (c)+=n; \ - switch (n) { \ - case 3: l =((unsigned long)(*(--(c))))<< 8; \ - case 2: l|=((unsigned long)(*(--(c))))<<16; \ - case 1: l|=((unsigned long)(*(--(c))))<<24; \ - } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ *((c)++)=(unsigned char)(((l)>>16)&0xff), \ @@ -344,6 +238,18 @@ #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN) +#ifndef PEDANTIC +# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) +# if defined(__s390x__) +# define HOST_c2l(c,l) ({ asm ("lrv %0,0(%1)" \ + :"=r"(l) : "r"(c)); \ + (c)+=4; (l); }) +# define HOST_l2c(l,c) ({ asm ("strv %0,0(%1)" \ + : : "r"(l),"r"(c) : "memory"); \ + (c)+=4; (l); }) +# endif +# endif +#endif #if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # ifndef B_ENDIAN /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */ @@ -359,29 +265,6 @@ l|=(((unsigned long)(*((c)++)))<<24), \ l) #endif -#define HOST_p_c2l(c,l,n) { \ - switch (n) { \ - case 0: l =((unsigned long)(*((c)++))); \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - case 3: l|=((unsigned long)(*((c)++)))<<24; \ - } } -#define HOST_p_c2l_p(c,l,sc,len) { \ - switch (sc) { \ - case 0: l =((unsigned long)(*((c)++))); \ - if (--len == 0) break; \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - if (--len == 0) break; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - } } -/* NOTE the pointer is not incremented at the end of this */ -#define HOST_c2l_p(c,l,n) { \ - l=0; (c)+=n; \ - switch (n) { \ - case 3: l =((unsigned long)(*(--(c))))<<16; \ - case 2: l|=((unsigned long)(*(--(c))))<< 8; \ - case 1: l|=((unsigned long)(*(--(c)))); \ - } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ @@ -399,9 +282,9 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) { const unsigned char *data=data_; - register HASH_LONG * p; - register HASH_LONG l; - size_t sw,sc,ew,ec; + unsigned char *p; + HASH_LONG l; + size_t n; if (len==0) return 1; @@ -413,101 +296,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */ c->Nl=l; - if (c->num != 0) + n = c->num; + if (n != 0) { - p=c->data; - sw=c->num>>2; - sc=c->num&0x03; + p=(unsigned char *)c->data; - if ((c->num+len) >= HASH_CBLOCK) + if ((n+len) >= HASH_CBLOCK) { - l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; - for (; sw<HASH_LBLOCK; sw++) - { - HOST_c2l(data,l); p[sw]=l; - } - HASH_BLOCK_HOST_ORDER (c,p,1); - len-=(HASH_CBLOCK-c->num); - c->num=0; - /* drop through and do the rest */ + memcpy (p+n,data,HASH_CBLOCK-n); + HASH_BLOCK_DATA_ORDER (c,p,1); + n = HASH_CBLOCK-n; + data += n; + len -= n; + c->num = 0; + memset (p,0,HASH_CBLOCK); /* keep it zeroed */ } else { - c->num+=(unsigned int)len; - if ((sc+len) < 4) /* ugly, add char's to a word */ - { - l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l; - } - else - { - ew=(c->num>>2); - ec=(c->num&0x03); - if (sc) - l=p[sw]; - HOST_p_c2l(data,l,sc); - p[sw++]=l; - for (; sw < ew; sw++) - { - HOST_c2l(data,l); p[sw]=l; - } - if (ec) - { - HOST_c2l_p(data,l,ec); p[sw]=l; - } - } + memcpy (p+n,data,len); + c->num += (unsigned int)len; return 1; } } - sw=len/HASH_CBLOCK; - if (sw > 0) + n = len/HASH_CBLOCK; + if (n > 0) { -#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) - /* - * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined - * only if sizeof(HASH_LONG)==4. - */ - if ((((size_t)data)%4) == 0) - { - /* data is properly aligned so that we can cast it: */ - HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw); - sw*=HASH_CBLOCK; - data+=sw; - len-=sw; - } - else -#if !defined(HASH_BLOCK_DATA_ORDER) - while (sw--) - { - memcpy (p=c->data,data,HASH_CBLOCK); - HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1); - data+=HASH_CBLOCK; - len-=HASH_CBLOCK; - } -#endif -#endif -#if defined(HASH_BLOCK_DATA_ORDER) - { - HASH_BLOCK_DATA_ORDER(c,data,sw); - sw*=HASH_CBLOCK; - data+=sw; - len-=sw; - } -#endif + HASH_BLOCK_DATA_ORDER (c,data,n); + n *= HASH_CBLOCK; + data += n; + len -= n; } - if (len!=0) + if (len != 0) { - p = c->data; + p = (unsigned char *)c->data; c->num = len; - ew=len>>2; /* words to copy */ - ec=len&0x03; - for (; ew; ew--,p++) - { - HOST_c2l(data,l); *p=l; - } - HOST_c2l_p(data,l,ec); - *p=l; + memcpy (p,data,len); } return 1; } @@ -515,73 +340,38 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data) { -#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) - if ((((size_t)data)%4) == 0) - /* data is properly aligned so that we can cast it: */ - HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1); - else -#if !defined(HASH_BLOCK_DATA_ORDER) - { - memcpy (c->data,data,HASH_CBLOCK); - HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1); - } -#endif -#endif -#if defined(HASH_BLOCK_DATA_ORDER) HASH_BLOCK_DATA_ORDER (c,data,1); -#endif } int HASH_FINAL (unsigned char *md, HASH_CTX *c) { - register HASH_LONG *p; - register unsigned long l; - register int i,j; - static const unsigned char end[4]={0x80,0x00,0x00,0x00}; - const unsigned char *cp=end; - - /* c->num should definitly have room for at least one more byte. */ - p=c->data; - i=c->num>>2; - j=c->num&0x03; - -#if 0 - /* purify often complains about the following line as an - * Uninitialized Memory Read. While this can be true, the - * following p_c2l macro will reset l when that case is true. - * This is because j&0x03 contains the number of 'valid' bytes - * already in p[i]. If and only if j&0x03 == 0, the UMR will - * occur but this is also the only time p_c2l will do - * l= *(cp++) instead of l|= *(cp++) - * Many thanks to Alex Tang <altitude@cic.net> for pickup this - * 'potential bug' */ -#ifdef PURIFY - if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */ -#endif - l=p[i]; -#else - l = (j==0) ? 0 : p[i]; -#endif - HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */ + unsigned char *p = (unsigned char *)c->data; + size_t n = c->num; - if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */ + p[n] = 0x80; /* there is always room for one */ + n++; + + if (n > (HASH_CBLOCK-8)) { - if (i<HASH_LBLOCK) p[i]=0; - HASH_BLOCK_HOST_ORDER (c,p,1); - i=0; + memset (p+n,0,HASH_CBLOCK-n); + n=0; + HASH_BLOCK_DATA_ORDER (c,p,1); } - for (; i<(HASH_LBLOCK-2); i++) - p[i]=0; + memset (p+n,0,HASH_CBLOCK-8-n); + p += HASH_CBLOCK-8; #if defined(DATA_ORDER_IS_BIG_ENDIAN) - p[HASH_LBLOCK-2]=c->Nh; - p[HASH_LBLOCK-1]=c->Nl; + (void)HOST_l2c(c->Nh,p); + (void)HOST_l2c(c->Nl,p); #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN) - p[HASH_LBLOCK-2]=c->Nl; - p[HASH_LBLOCK-1]=c->Nh; + (void)HOST_l2c(c->Nl,p); + (void)HOST_l2c(c->Nh,p); #endif - HASH_BLOCK_HOST_ORDER (c,p,1); + p -= HASH_CBLOCK; + HASH_BLOCK_DATA_ORDER (c,p,1); + c->num=0; + memset (p,0,HASH_CBLOCK); #ifndef HASH_MAKE_STRING #error "HASH_MAKE_STRING must be defined!" @@ -589,11 +379,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c) HASH_MAKE_STRING(c,md); #endif - c->num=0; - /* clear stuff, HASH_BLOCK may be leaving some stuff on the stack - * but I'm not worried :-) - OPENSSL_cleanse((void *)c,sizeof(HASH_CTX)); - */ return 1; } diff --git a/crypto/md4/md4_dgst.c b/crypto/md4/md4_dgst.c index 63ecac392a..62fb7e6677 100644 --- a/crypto/md4/md4_dgst.c +++ b/crypto/md4/md4_dgst.c @@ -84,79 +84,6 @@ FIPS_NON_FIPS_MD_Init(MD4) return 1; } -#ifndef md4_block_host_order -void md4_block_host_order (MD4_CTX *c, const void *data, size_t num) - { - const MD4_LONG *X=data; - register unsigned MD32_REG_T A,B,C,D; - - A=c->A; - B=c->B; - C=c->C; - D=c->D; - - for (;num--;X+=HASH_LBLOCK) - { - /* Round 0 */ - R0(A,B,C,D,X[ 0], 3,0); - R0(D,A,B,C,X[ 1], 7,0); - R0(C,D,A,B,X[ 2],11,0); - R0(B,C,D,A,X[ 3],19,0); - R0(A,B,C,D,X[ 4], 3,0); - R0(D,A,B,C,X[ 5], 7,0); - R0(C,D,A,B,X[ 6],11,0); - R0(B,C,D,A,X[ 7],19,0); - R0(A,B,C,D,X[ 8], 3,0); - R0(D,A,B,C,X[ 9], 7,0); - R0(C,D,A,B,X[10],11,0); - R0(B,C,D,A,X[11],19,0); - R0(A,B,C,D,X[12], 3,0); - R0(D,A,B,C,X[13], 7,0); - R0(C,D,A,B,X[14],11,0); - R0(B,C,D,A,X[15],19,0); - /* Round 1 */ - R1(A,B,C,D,X[ 0], 3,0x5A827999L); - R1(D,A,B,C,X[ 4], 5,0x5A827999L); - R1(C,D,A,B,X[ 8], 9,0x5A827999L); - R1(B,C,D,A,X[12],13,0x5A827999L); - R1(A,B,C,D,X[ 1], 3,0x5A827999L); - R1(D,A,B,C,X[ 5], 5,0x5A827999L); - R1(C,D,A,B,X[ 9], 9,0x5A827999L); - R1(B,C,D,A,X[13],13,0x5A827999L); - R1(A,B,C,D,X[ 2], 3,0x5A827999L); - R1(D,A,B,C,X[ 6], 5,0x5A827999L); - R1(C,D,A,B,X[10], 9,0x5A827999L); - R1(B,C,D,A,X[14],13,0x5A827999L); - R1(A,B,C,D,X[ 3], 3,0x5A827999L); - R1(D,A,B,C,X[ 7], 5,0x5A827999L); - R1(C,D,A,B,X[11], 9,0x5A827999L); - R1(B,C,D,A,X[15],13,0x5A827999L); - /* Round 2 */ - R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); - R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); - R2(B,C,D,A,X[12],15,0x6ED9EBA1); - R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); - R2(D,A,B,C,X[10], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); - R2(B,C,D,A,X[14],15,0x6ED9EBA1); - R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); - R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); - R2(B,C,D,A,X[13],15,0x6ED9EBA1); - R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); - R2(D,A,B,C,X[11], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); - R2(B,C,D,A,X[15],15,0x6ED9EBA1); - - A = c->A += A; - B = c->B += B; - C = c->C += C; - D = c->D += D; - } - } -#endif - #ifndef md4_block_data_order #ifdef X #undef X @@ -242,19 +169,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num) } } #endif - -#ifdef undef -int printit(unsigned long *l) - { - int i,ii; - - for (i=0; i<2; i++) - { - for (ii=0; ii<8; ii++) - { - fprintf(stderr,"%08lx ",l[i*8+ii]); - } - fprintf(stderr,"\n"); - } - } -#endif diff --git a/crypto/md4/md4_locl.h b/crypto/md4/md4_locl.h index abc7b9bb84..c8085b0ead 100644 --- a/crypto/md4/md4_locl.h +++ b/crypto/md4/md4_locl.h @@ -65,43 +65,13 @@ #define MD4_LONG_LOG2 2 /* default to 32 bits */ #endif -void md4_block_host_order (MD4_CTX *c, const void *p,size_t num); void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) -# if !defined(B_ENDIAN) -/* - * *_block_host_order is expected to handle aligned data while - * *_block_data_order - unaligned. As algorithm and host (x86) - * are in this case of the same "endianness" these two are - * otherwise indistinguishable. But normally you don't want to - * call the same function because unaligned access in places - * where alignment is expected is usually a "Bad Thing". Indeed, - * on RISCs you get punished with BUS ERROR signal or *severe* - * performance degradation. Intel CPUs are in turn perfectly - * capable of loading unaligned data without such drastic side - * effect. Yes, they say it's slower than aligned load, but no - * exception is generated and therefore performance degradation - * is *incomparable* with RISCs. What we should weight here is - * costs of unaligned access against costs of aligning data. - * According to my measurements allowing unaligned access results - * in ~9% performance improvement on Pentium II operating at - * 266MHz. I won't be surprised if the difference will be higher - * on faster systems:-) - * - * <appro@fy.chalmers.se> - */ -# define md4_block_data_order md4_block_host_order -# endif -#endif - #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG MD4_LONG -#define HASH_LONG_LOG2 MD4_LONG_LOG2 #define HASH_CTX MD4_CTX #define HASH_CBLOCK MD4_CBLOCK -#define HASH_LBLOCK MD4_LBLOCK #define HASH_UPDATE MD4_Update #define HASH_TRANSFORM MD4_Transform #define HASH_FINAL MD4_Final @@ -112,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); ll=(c)->C; HOST_l2c(ll,(s)); \ ll=(c)->D; HOST_l2c(ll,(s)); \ } while (0) -#define HASH_BLOCK_HOST_ORDER md4_block_host_order -#if !defined(L_ENDIAN) || defined(md4_block_data_order) #define HASH_BLOCK_DATA_ORDER md4_block_data_order -/* - * Little-endians (Intel and Alpha) feel better without this. - * It looks like memcpy does better job than generic - * md4_block_data_order on copying-n-aligning input data. - * But frankly speaking I didn't expect such result on Alpha. - * On the other hand I've got this with egcs-1.0.2 and if - * program is compiled with another (better?) compiler it - * might turn out other way around. - * - * <appro@fy.chalmers.se> - */ -#endif #include "md32_common.h" diff --git a/crypto/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl index fa3fa3bed5..76ac235f7d 100644 --- a/crypto/md5/asm/md5-586.pl +++ b/crypto/md5/asm/md5-586.pl @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); -&md5_block("md5_block_asm_host_order"); +&md5_block("md5_block_asm_data_order"); &asm_finish(); sub Np diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S deleted file mode 100644 index db45aa4c97..0000000000 --- a/crypto/md5/asm/md5-sparcv9.S +++ /dev/null @@ -1,1031 +0,0 @@ -.ident "md5-sparcv9.S, Version 1.0" -.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" -.file "md5-sparcv9.S" - -/* - * ==================================================================== - * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>. - * - * Rights for redistribution and usage in source and binary forms are - * granted as long as above copyright notices are retained. Warranty - * of any kind is (of course:-) disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contribution to OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is an - * assembler implementation of MD5 block hash function. I've hand-coded - * this for the sole reason to reach UltraSPARC-specific "load in - * little-endian byte order" instruction. This gives up to 15% - * performance improvement for cases when input message is aligned at - * 32 bits boundary. The module was tested under both 32 *and* 64 bit - * kernels. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * To compile with SC4.x/SC5.x: - * - * cc -xarch=v[9|8plus] -DOPENSSL_SYSNAME_ULTRASPARC -DMD5_BLOCK_DATA_ORDER \ - * -c md5-sparcv9.S - * - * and with gcc: - * - * gcc -mcpu=ultrasparc -DOPENSSL_SYSNAME_ULTRASPARC -DMD5_BLOCK_DATA_ORDER \ - * -c md5-sparcv9.S - * - * or if above fails (it does if you have gas): - * - * gcc -E -DOPENSSL_SYSNAMEULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \ - * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o - */ - -#include <openssl/e_os2.h> - -#define A %o0 -#define B %o1 -#define C %o2 -#define D %o3 -#define T1 %o4 -#define T2 %o5 - -#define R0 %l0 -#define R1 %l1 -#define R2 %l2 -#define R3 %l3 -#define R4 %l4 -#define R5 %l5 -#define R6 %l6 -#define R7 %l7 -#define R8 %i3 -#define R9 %i4 -#define R10 %i5 -#define R11 %g1 -#define R12 %g2 -#define R13 %g3 -#define RX %g4 - -#define Aptr %i0+0 -#define Bptr %i0+4 -#define Cptr %i0+8 -#define Dptr %i0+12 - -#define Aval R5 /* those not used at the end of the last round */ -#define Bval R6 -#define Cval R7 -#define Dval R8 - -#if defined(MD5_BLOCK_DATA_ORDER) -# if defined(OPENSSL_SYSNAME_ULTRASPARC) -# define LOAD lda -# define X(i) [%i1+i*4]%asi -# define md5_block md5_block_asm_data_order_aligned -# define ASI_PRIMARY_LITTLE 0x88 -# else -# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!" -# endif -#else -# define LOAD ld -# define X(i) [%i1+i*4] -# define md5_block md5_block_asm_host_order -#endif - -.section ".text",#alloc,#execinstr - -#if defined(__SUNPRO_C) && defined(__sparcv9) - /* They've said -xarch=v9 at command line */ - .register %g2,#scratch - .register %g3,#scratch -# define FRAME -192 -#elif defined(__GNUC__) && defined(__arch64__) - /* They've said -m64 at command line */ - .register %g2,#scratch - .register %g3,#scratch -# define FRAME -192 -#else -# define FRAME -96 -#endif - -.align 32 - -.global md5_block -md5_block: - save %sp,FRAME,%sp - - ld [Dptr],D - ld [Cptr],C - ld [Bptr],B - ld [Aptr],A -#ifdef ASI_PRIMARY_LITTLE - rd %asi,%o7 ! How dare I? Well, I just do:-) - wr %g0,ASI_PRIMARY_LITTLE,%asi -#endif - LOAD X(0),R0 - -.Lmd5_block_loop: - -!!!!!!!!Round 0 - - xor C,D,T1 - sethi %hi(0xd76aa478),T2 - and T1,B,T1 - or T2,%lo(0xd76aa478),T2 != - xor T1,D,T1 - add T1,R0,T1 - LOAD X(1),R1 - add T1,T2,T1 != - add A,T1,A - sll A,7,T2 - srl A,32-7,A - or A,T2,A != - xor B,C,T1 - add A,B,A - - sethi %hi(0xe8c7b756),T2 - and T1,A,T1 != - or T2,%lo(0xe8c7b756),T2 - xor T1,C,T1 - LOAD X(2),R2 - add T1,R1,T1 != - add T1,T2,T1 - add D,T1,D - sll D,12,T2 - srl D,32-12,D != - or D,T2,D - xor A,B,T1 - add D,A,D - - sethi %hi(0x242070db),T2 != - and T1,D,T1 - or T2,%lo(0x242070db),T2 - xor T1,B,T1 - add T1,R2,T1 != - LOAD X(3),R3 - add T1,T2,T1 - add C,T1,C - sll C,17,T2 != - srl C,32-17,C - or C,T2,C - xor D,A,T1 - add C,D,C != - - sethi %hi(0xc1bdceee),T2 - and T1,C,T1 - or T2,%lo(0xc1bdceee),T2 - xor T1,A,T1 != - add T1,R3,T1 - LOAD X(4),R4 - add T1,T2,T1 - add B,T1,B != - sll B,22,T2 - srl B,32-22,B - or B,T2,B - xor C,D,T1 != - add B,C,B - - sethi %hi(0xf57c0faf),T2 - and T1,B,T1 - or T2,%lo(0xf57c0faf),T2 != - xor T1,D,T1 - add T1,R4,T1 - LOAD X(5),R5 - add T1,T2,T1 != - add A,T1,A - sll A,7,T2 - srl A,32-7,A - or A,T2,A != - xor B,C,T1 - add A,B,A - - sethi %hi(0x4787c62a),T2 - and T1,A,T1 != - or T2,%lo(0x4787c62a),T2 - xor T1,C,T1 - LOAD X(6),R6 - add T1,R5,T1 != - add T1,T2,T1 - add D,T1,D - sll D,12,T2 - srl D,32-12,D != - or D,T2,D - xor A,B,T1 - add D,A,D - - sethi %hi(0xa8304613),T2 != - and T1,D,T1 - or T2,%lo(0xa8304613),T2 - xor T1,B,T1 - add T1,R6,T1 != - LOAD X(7),R7 - add T1,T2,T1 - add C,T1,C - sll C,17,T2 != - srl C,32-17,C - or C,T2,C - xor D,A,T1 - add C,D,C != - - sethi %hi(0xfd469501),T2 - and T1,C,T1 - or T2,%lo(0xfd469501),T2 - xor T1,A,T1 != - add T1,R7,T1 - LOAD X(8),R8 - add T1,T2,T1 - add B,T1,B != - sll B,22,T2 - srl B,32-22,B - or B,T2,B - xor C,D,T1 != - add B,C,B - - sethi %hi(0x698098d8),T2 - and T1,B,T1 - or T2,%lo(0x698098d8),T2 != - xor T1,D,T1 - add T1,R8,T1 - LOAD X(9),R9 - add T1,T2,T1 != - add A,T1,A - sll A,7,T2 - srl A,32-7,A - or A,T2,A != - xor B,C,T1 - add A,B,A - - sethi %hi(0x8b44f7af),T2 - and T1,A,T1 != - or T2,%lo(0x8b44f7af),T2 - xor T1,C,T1 - LOAD X(10),R10 - add T1,R9,T1 != - add T1,T2,T1 - add D,T1,D - sll D,12,T2 - srl D,32-12,D != - or D,T2,D |