summaryrefslogtreecommitdiffstats
path: root/crypto/sha
diff options
context:
space:
mode:
authorUlf Möller <ulf@openssl.org>1999-05-05 00:23:53 +0000
committerUlf Möller <ulf@openssl.org>1999-05-05 00:23:53 +0000
commit8e7f966bf3579629ce553f9512ee6952588c02a4 (patch)
tree56a280158be8a9da490d215564792ca4a4967553 /crypto/sha
parent744029c15470a6b16e9645a16c76bbff15be7596 (diff)
SHA-1 cleanups and performance enhancements.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
Diffstat (limited to 'crypto/sha')
-rw-r--r--crypto/sha/sha.h26
-rw-r--r--crypto/sha/sha1dgst.c153
-rw-r--r--crypto/sha/sha_dgst.c163
-rw-r--r--crypto/sha/sha_locl.h68
4 files changed, 283 insertions, 127 deletions
diff --git a/crypto/sha/sha.h b/crypto/sha/sha.h
index ba40aafc13..cd6960ee1a 100644
--- a/crypto/sha/sha.h
+++ b/crypto/sha/sha.h
@@ -67,18 +67,28 @@ extern "C" {
#error SHA is disabled.
#endif
-#define SHA_CBLOCK 64
-#define SHA_LBLOCK 16
-#define SHA_BLOCK 16
-#define SHA_LAST_BLOCK 56
-#define SHA_LENGTH_BLOCK 8
-#define SHA_DIGEST_LENGTH 20
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! SHA_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! SHA_LONG_LOG2 has to be defined along. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
-#ifdef WIN16
+#if defined(WIN16) || defined(__LP32__)
+#define SHA_LONG unsigned long
+#elif defined(_CRAY) || defined(__ILP64__)
#define SHA_LONG unsigned long
+#define SHA_LONG_LOG2 3
#else
#define SHA_LONG unsigned int
-#endif
+#endif
+
+#define SHA_LBLOCK 16
+#define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a
+ * contiguous array of 32 bit
+ * wide big-endian values. */
+#define SHA_LAST_BLOCK (SHA_CBLOCK-8)
+#define SHA_DIGEST_LENGTH 20
typedef struct SHAstate_st
{
diff --git a/crypto/sha/sha1dgst.c b/crypto/sha/sha1dgst.c
index f4a47f3768..e867f6972b 100644
--- a/crypto/sha/sha1dgst.c
+++ b/crypto/sha/sha1dgst.c
@@ -81,14 +81,14 @@ char *SHA1_version="SHA1" OPENSSL_VERSION_PTEXT;
#define K_40_59 0x8f1bbcdcUL
#define K_60_79 0xca62c1d6UL
-# ifdef SHA1_ASM
- void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num);
-# define sha1_block sha1_block_x86
-# else
- void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num);
-# endif
+#ifdef SHA1_ASM
+ void sha1_block_x86(SHA_CTX *c, register SHA_LONG *p, int num);
+# define sha1_block(c,p,n) sha1_block_x86((c),(p),(n)*SHA_CBLOCK)
+#else
+ static void sha1_block(SHA_CTX *c, register SHA_LONG *p, int num);
+#endif
-#if defined(L_ENDIAN) && defined(SHA1_ASM)
+#if !defined(B_ENDIAN) && defined(SHA1_ASM)
# define M_c2nl c2l
# define M_p_c2nl p_c2l
# define M_c2nl_p c2l_p
@@ -147,7 +147,7 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
}
len-=(SHA_CBLOCK-c->num);
- sha1_block(c,p,64);
+ sha1_block(c,p,1);
c->num=0;
/* drop through and do the rest */
}
@@ -184,15 +184,15 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
* copies it to a local array. I should be able to do this for
* the C version as well....
*/
-#if 1
+#if SHA_LONG_LOG2==2
#if defined(B_ENDIAN) || defined(SHA1_ASM)
if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{
sw=len/SHA_CBLOCK;
if (sw)
{
- sw*=SHA_CBLOCK;
sha1_block(c,(SHA_LONG *)data,sw);
+ sw*=SHA_CBLOCK;
data+=sw;
len-=sw;
}
@@ -204,35 +204,61 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
p=c->data;
while (len >= SHA_CBLOCK)
{
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA1_ASM)
+#define SHA_NO_TAIL_CODE
+ /*
+ * Basically we get here only when data happens
+ * to be unaligned.
+ */
if (p != (SHA_LONG *)data)
memcpy(p,data,SHA_CBLOCK);
data+=SHA_CBLOCK;
-# ifdef L_ENDIAN
-# ifndef SHA1_ASM /* Will not happen */
- for (sw=(SHA_LBLOCK/4); sw; sw--)
+ sha1_block(c,p=c->data,1);
+ len-=SHA_CBLOCK;
+#else /* little-endian */
+#define BE_COPY(dst,src,i) { \
+ l = ((SHA_LONG *)src)[i]; \
+ Endian_Reverse32(l); \
+ dst[i] = l; \
+ }
+ if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
+ for (sw=(SHA_LBLOCK/4); sw; sw--)
+ {
+ BE_COPY(p,data,0);
+ BE_COPY(p,data,1);
+ BE_COPY(p,data,2);
+ BE_COPY(p,data,3);
+ p+=4;
+ data += 4*sizeof(SHA_LONG);
+ }
+ sha1_block(c,p=c->data,1);
+ len-=SHA_CBLOCK;
+ continue;
}
+#endif
+#endif
+#ifndef SHA_NO_TAIL_CODE
+ /*
+ * In addition to "sizeof(SHA_LONG)!= 4" case the
+ * following code covers unaligned access cases on
+ * little-endian machines.
+ * <appro@fy.chalmers.se>
+ */
p=c->data;
-# endif
-# endif
-#else
- for (sw=(SHA_BLOCK/4); sw; sw--)
+ for (sw=(SHA_LBLOCK/4); sw; sw--)
{
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
+ M_c2nl(data,l); p[0]=l;
+ M_c2nl(data,l); p[1]=l;
+ M_c2nl(data,l); p[2]=l;
+ M_c2nl(data,l); p[3]=l;
+ p+=4;
}
p=c->data;
-#endif
- sha1_block(c,p,64);
+ sha1_block(c,p,1);
len-=SHA_CBLOCK;
+#endif
}
ec=(int)len;
c->num=ec;
@@ -247,26 +273,35 @@ void SHA1_Update(SHA_CTX *c, const register unsigned char *data,
void SHA1_Transform(SHA_CTX *c, unsigned char *b)
{
- SHA_LONG p[16];
-#ifndef B_ENDIAN
+ SHA_LONG p[SHA_LBLOCK];
SHA_LONG *q;
int i;
-#endif
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
- memcpy(p,b,64);
-#ifdef L_ENDIAN
- q=p;
- for (i=(SHA_LBLOCK/4); i; i--)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA1_ASM)
+ memcpy(p,b,SHA_CBLOCK);
+ sha1_block(c,p,1);
+ return;
+#else
+ if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
{
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
+ q=p;
+ for (i=(SHA_LBLOCK/4); i; i--)
+ {
+ unsigned long l;
+ BE_COPY(q,b,0); /* BE_COPY was defined above */
+ BE_COPY(q,b,1);
+ BE_COPY(q,b,2);
+ BE_COPY(q,b,3);
+ q+=4;
+ b+=4*sizeof(SHA_LONG);
+ }
+ sha1_block(c,p,1);
+ return;
}
#endif
-#else
+#endif
+#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
q=p;
for (i=(SHA_LBLOCK/4); i; i--)
{
@@ -276,16 +311,15 @@ void SHA1_Transform(SHA_CTX *c, unsigned char *b)
c2nl(b,l); *(q++)=l;
c2nl(b,l); *(q++)=l;
}
+ sha1_block(c,p,1);
#endif
- sha1_block(c,p,64);
}
#ifndef SHA1_ASM
-
-void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
+static void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
{
register SHA_LONG A,B,C,D,E,T;
- SHA_LONG X[16];
+ SHA_LONG X[SHA_LBLOCK];
A=c->h0;
B=c->h1;
@@ -385,8 +419,7 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
c->h3=(c->h3+B)&0xffffffffL;
c->h4=(c->h4+C)&0xffffffffL;
- num-=64;
- if (num <= 0) break;
+ if (--num <= 0) break;
A=c->h0;
B=c->h1;
@@ -394,7 +427,12 @@ void sha1_block(SHA_CTX *c, register SHA_LONG *W, int num)
D=c->h3;
E=c->h4;
- W+=16;
+ W+=SHA_LBLOCK; /* Note! This can happen only when sizeof(SHA_LONG)
+ * is 4. Whenever it's not the actual case this
+ * function is never called with num larger than 1
+ * and we never advance down here.
+ * <appro@fy.chalmers.se>
+ */
}
}
#endif
@@ -423,18 +461,20 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c)
{
for (; i<SHA_LBLOCK; i++)
p[i]=0;
- sha1_block(c,p,64);
+ sha1_block(c,p,1);
i=0;
}
for (; i<(SHA_LBLOCK-2); i++)
p[i]=0;
p[SHA_LBLOCK-2]=c->Nh;
p[SHA_LBLOCK-1]=c->Nl;
-#if defined(L_ENDIAN) && defined(SHA1_ASM)
+#if SHA_LONG_LOG2==2
+#if !defined(B_ENDIAN) && defined(SHA1_ASM)
Endian_Reverse32(p[SHA_LBLOCK-2]);
Endian_Reverse32(p[SHA_LBLOCK-1]);
#endif
- sha1_block(c,p,64);
+#endif
+ sha1_block(c,p,1);
cp=md;
l=c->h0; nl2c(l,cp);
l=c->h1; nl2c(l,cp);
@@ -442,10 +482,11 @@ void SHA1_Final(unsigned char *md, SHA_CTX *c)
l=c->h3; nl2c(l,cp);
l=c->h4; nl2c(l,cp);
- /* clear stuff, sha1_block may be leaving some stuff on the stack
- * but I'm not worried :-) */
c->num=0;
-/* memset((char *)&c,0,sizeof(c));*/
+ /* sha_block may be leaving some stuff on the stack
+ * but I'm not worried :-)
+ memset((void *)c,0,sizeof(SHA_CTX));
+ */
}
#endif
diff --git a/crypto/sha/sha_dgst.c b/crypto/sha/sha_dgst.c
index 5827c73cea..d90f497763 100644
--- a/crypto/sha/sha_dgst.c
+++ b/crypto/sha/sha_dgst.c
@@ -81,12 +81,21 @@ char *SHA_version="SHA" OPENSSL_VERSION_PTEXT;
#define K_40_59 0x8f1bbcdcUL
#define K_60_79 0xca62c1d6UL
- void sha_block(SHA_CTX *c, register SHA_LONG *p, int num);
-#define M_c2nl c2nl
-#define M_p_c2nl p_c2nl
-#define M_c2nl_p c2nl_p
-#define M_p_c2nl_p p_c2nl_p
-#define M_nl2c nl2c
+static void sha_block(SHA_CTX *c, register SHA_LONG *p, int num);
+
+#if !defined(B_ENDIAN) && defined(SHA_ASM)
+# define M_c2nl c2l
+# define M_p_c2nl p_c2l
+# define M_c2nl_p c2l_p
+# define M_p_c2nl_p p_c2l_p
+# define M_nl2c l2c
+#else
+# define M_c2nl c2nl
+# define M_p_c2nl p_c2nl
+# define M_c2nl_p c2nl_p
+# define M_p_c2nl_p p_c2nl_p
+# define M_nl2c nl2c
+#endif
void SHA_Init(SHA_CTX *c)
{
@@ -133,7 +142,7 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
}
len-=(SHA_CBLOCK-c->num);
- sha_block(c,p,64);
+ sha_block(c,p,1);
c->num=0;
/* drop through and do the rest */
}
@@ -170,15 +179,15 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
* copies it to a local array. I should be able to do this for
* the C version as well....
*/
-#if 1
+#if SHA_LONG_LOG2==2
#if defined(B_ENDIAN) || defined(SHA_ASM)
if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{
sw=len/SHA_CBLOCK;
if (sw)
{
- sw*=SHA_CBLOCK;
sha_block(c,(SHA_LONG *)data,sw);
+ sw*=SHA_CBLOCK;
data+=sw;
len-=sw;
}
@@ -190,35 +199,61 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
p=c->data;
while (len >= SHA_CBLOCK)
{
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA_ASM)
+#define SHA_NO_TAIL_CODE
+ /*
+ * Basically we get here only when data happens
+ * to be unaligned.
+ */
if (p != (SHA_LONG *)data)
memcpy(p,data,SHA_CBLOCK);
data+=SHA_CBLOCK;
-# ifdef L_ENDIAN
-# ifndef SHA_ASM /* Will not happen */
- for (sw=(SHA_LBLOCK/4); sw; sw--)
+ sha_block(c,p=c->data,1);
+ len-=SHA_CBLOCK;
+#else /* little-endian */
+#define BE_COPY(dst,src,i) { \
+ l = ((SHA_LONG *)src)[i]; \
+ Endian_Reverse32(l); \
+ dst[i] = l; \
+ }
+ if ((((unsigned long)data)%sizeof(SHA_LONG)) == 0)
{
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
+ for (sw=(SHA_LBLOCK/4); sw; sw--)
+ {
+ BE_COPY(p,data,0);
+ BE_COPY(p,data,1);
+ BE_COPY(p,data,2);
+ BE_COPY(p,data,3);
+ p+=4;
+ data += 4*sizeof(SHA_LONG);
+ }
+ sha_block(c,p=c->data,1);
+ len-=SHA_CBLOCK;
+ continue;
}
+#endif
+#endif
+#ifndef SHA_NO_TAIL_CODE
+ /*
+ * In addition to "sizeof(SHA_LONG)!= 4" case the
+ * following code covers unaligned access cases on
+ * little-endian machines.
+ * <appro@fy.chalmers.se>
+ */
p=c->data;
-# endif
-# endif
-#else
- for (sw=(SHA_BLOCK/4); sw; sw--)
+ for (sw=(SHA_LBLOCK/4); sw; sw--)
{
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
+ M_c2nl(data,l); p[0]=l;
+ M_c2nl(data,l); p[1]=l;
+ M_c2nl(data,l); p[2]=l;
+ M_c2nl(data,l); p[3]=l;
+ p+=4;
}
p=c->data;
-#endif
- sha_block(c,p,64);
+ sha_block(c,p,1);
len-=SHA_CBLOCK;
+#endif
}
ec=(int)len;
c->num=ec;
@@ -233,26 +268,35 @@ void SHA_Update(SHA_CTX *c, const register unsigned char *data,
void SHA_Transform(SHA_CTX *c, unsigned char *b)
{
- SHA_LONG p[16];
-#if !defined(B_ENDIAN)
+ SHA_LONG p[SHA_LBLOCK];
SHA_LONG *q;
int i;
-#endif
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
- memcpy(p,b,64);
-#ifdef L_ENDIAN
- q=p;
- for (i=(SHA_LBLOCK/4); i; i--)
+#if SHA_LONG_LOG2==2
+#if defined(B_ENDIAN) || defined(SHA_ASM)
+ memcpy(p,b,SHA_CBLOCK);
+ sha_block(c,p,1);
+ return;
+#else
+ if (((unsigned long)b%sizeof(SHA_LONG)) == 0)
{
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
+ q=p;
+ for (i=(SHA_LBLOCK/4); i; i--)
+ {
+ unsigned long l;
+ BE_COPY(q,b,0); /* BE_COPY was defined above */
+ BE_COPY(q,b,1);
+ BE_COPY(q,b,2);
+ BE_COPY(q,b,3);
+ q+=4;
+ b+=4*sizeof(SHA_LONG);
+ }
+ sha_block(c,p,1);
+ return;
}
#endif
-#else
+#endif
+#ifndef SHA_NO_TAIL_CODE /* defined above, see comment */
q=p;
for (i=(SHA_LBLOCK/4); i; i--)
{
@@ -262,14 +306,15 @@ void SHA_Transform(SHA_CTX *c, unsigned char *b)
c2nl(b,l); *(q++)=l;
c2nl(b,l); *(q++)=l;
}
+ sha_block(c,p,1);
#endif
- sha_block(c,p,64);
}
-void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
+#ifndef SHA_ASM
+static void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
{
register SHA_LONG A,B,C,D,E,T;
- SHA_LONG X[16];
+ SHA_LONG X[SHA_LBLOCK];
A=c->h0;
B=c->h1;
@@ -369,8 +414,7 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
c->h3=(c->h3+B)&0xffffffffL;
c->h4=(c->h4+C)&0xffffffffL;
- num-=64;
- if (num <= 0) break;
+ if (--num <= 0) break;
A=c->h0;
B=c->h1;
@@ -378,9 +422,15 @@ void sha_block(SHA_CTX *c, register SHA_LONG *W, int num)
D=c->h3;
E=c->h4;
- W+=16;
+ W+=SHA_LBLOCK; /* Note! This can happen only when sizeof(SHA_LONG)
+ * is 4. Whenever it's not the actual case this
+ * function is never called with num larger than 1
+ * and we never advance down here.
+ * <appro@fy.chalmers.se>
+ */
}
}
+#endif
void SHA_Final(unsigned char *md, SHA_CTX *c)
{
@@ -406,14 +456,20 @@ void SHA_Final(unsigned char *md, SHA_CTX *c)
{
for (; i<SHA_LBLOCK; i++)
p[i]=0;
- sha_block(c,p,64);
+ sha_block(c,p,1);
i=0;
}
for (; i<(SHA_LBLOCK-2); i++)
p[i]=0;
p[SHA_LBLOCK-2]=c->Nh;
p[SHA_LBLOCK-1]=c->Nl;
- sha_block(c,p,64);
+#if SHA_LONG_LOG2==2
+#if !defined(B_ENDIAN) && defined(SHA_ASM)
+ Endian_Reverse32(p[SHA_LBLOCK-2]);
+ Endian_Reverse32(p[SHA_LBLOCK-1]);
+#endif
+#endif
+ sha_block(c,p,1);
cp=md;
l=c->h0; nl2c(l,cp);
l=c->h1; nl2c(l,cp);
@@ -421,9 +477,10 @@ void SHA_Final(unsigned char *md, SHA_CTX *c)
l=c->h3; nl2c(l,cp);
l=c->h4; nl2c(l,cp);
- /* clear stuff, sha_block may be leaving some stuff on the stack
- * but I'm not worried :-) */
c->num=0;
-/* memset((char *)&c,0,sizeof(c));*/
+ /* sha_block may be leaving some stuff on the stack
+ * but I'm not worried :-)
+ memset((void *)c,0,sizeof(SHA_CTX));
+ */
}
#endif
diff --git a/crypto/sha/sha_locl.h b/crypto/sha/sha_locl.h
index 9f1251e787..32bbe30afd 100644
--- a/crypto/sha/sha_locl.h
+++ b/crypto/sha/sha_locl.h
@@ -158,30 +158,79 @@
*((c)++)=(unsigned char)(((l)>>16)&0xff), \
*((c)++)=(unsigned char)(((l)>>24)&0xff))
+#ifndef SHA_LONG_LOG2
+#define SHA_LONG_LOG2 2 /* default to 32 bits */
+#endif
+
#undef ROTATE
+#undef Endian_Reverse32
#if defined(WIN32)
#define ROTATE(a,n) _lrotl(a,n)
-#else
-#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
+#elif defined(__GNUC__)
+/* some inline assembler templates by <appro@fy.chalmers.se> */
+#if defined(__i386)
+#define ROTATE(a,n) ({ register unsigned int ret; \
+ asm ("roll %1,%0" \
+ : "=r"(ret) \
+ : "I"(n), "0"(a) \
+ : "cc"); \
+ ret; \
+ })
+#ifndef I386_ONLY
+#define Endian_Reverse32(a) \
+ { register unsigned int l=(a); \
+ asm ("bswapl %0" \
+ : "=r"(l) : "0"(l)); \
+ (a)=l; \
+ }
+#endif
+#elif defined(__powerpc)
+#define ROTATE(a,n) ({ register unsigned int ret; \
+ asm ("rlwinm %0,%1,%2,0,31" \
+ : "=r"(ret) \
+ : "r"(a), "I"(n)); \
+ ret; \
+ })
+/* Endian_Reverse32 is not needed for PowerPC */
+#endif
#endif
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
+#ifdef ROTATE
+#ifndef Endian_Reverse32
/* 5 instructions with rotate instruction, else 9 */
#define Endian_Reverse32(a) \
{ \
- unsigned long l=(a); \
- (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
+ unsigned long t=(a); \
+ (a)=((ROTATE(t,8)&0x00FF00FF)|(ROTATE((t&0x00FF00FF),24))); \
}
+#endif
#else
+#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
+#ifndef Endian_Reverse32
/* 6 instructions with rotate instruction, else 8 */
#define Endian_Reverse32(a) \
{ \
- unsigned long l=(a); \
- l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
- (a)=ROTATE(l,16L); \
+ unsigned long t=(a); \
+ t=(((t>>8)&0x00FF00FF)|((t&0x00FF00FF)<<8)); \
+ (a)=ROTATE(t,16); \
}
#endif
+/*
+ * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
+ * It's rewritten as above for two reasons:
+ * - RISCs aren't good at long constants and have to explicitely
+ * compose 'em with several (well, usually 2) instructions in a
+ * register before performing the actual operation and (as you
+ * already realized:-) having same constant should inspire the
+ * compiler to permanently allocate the only register for it;
+ * - most modern CPUs have two ALUs, but usually only one has
+ * circuitry for shifts:-( this minor tweak inspires compiler
+ * to schedule shift instructions in a better way...
+ *
+ * <appro@fy.chalmers.se>
+ */
+#endif
/* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
* simplified to the code in F_00_19. Wei attributes these optimisations
@@ -195,13 +244,12 @@
#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d)))
#define F_60_79(b,c,d) F_20_39(b,c,d)
-#ifdef SHA_0
#undef Xupdate
+#ifdef SHA_0
#define Xupdate(a,i,ia,ib,ic,id) X[(i)&0x0f]=(a)=\
(ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);
#endif
#ifdef SHA_1
-#undef Xupdate
#define Xupdate(a,i,ia,ib,ic,id) (a)=\
(ia[(i)&0x0f]^ib[((i)+2)&0x0f]^ic[((i)+8)&0x0f]^id[((i)+13)&0x0f]);\
X[(i)&0x0f]=(a)=ROTATE((a),1);