summaryrefslogtreecommitdiffstats
path: root/crypto/bn
diff options
context:
space:
mode:
authorUlf Möller <ulf@openssl.org>1999-05-20 01:43:07 +0000
committerUlf Möller <ulf@openssl.org>1999-05-20 01:43:07 +0000
commite14d4443a27816b05b044350ad39cd15668c55b8 (patch)
tree4f3baea5f0c59bcbe5a1f359113d7f1b4ef32fd7 /crypto/bn
parent257e206da6b42181b0dc8976792164c4d9cff89b (diff)
Bignum library bug fix. IRIX 6 passes "make test" now!
This also avoids the problems with SC4.2 and unpatched SC5. Submitted by: Andy Polyakov <appro@fy.chalmers.se>
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/bn.h6
-rw-r--r--crypto/bn/bn_lib.c137
-rw-r--r--crypto/bn/bn_prime.c6
3 files changed, 85 insertions, 64 deletions
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index 65481153ce..230a591e42 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -119,11 +119,11 @@ extern "C" {
/* This is where the long long data type is 64 bits, but long is 32.
* For machines where there are 64bit registers, this is the mode to use.
* IRIX, on R4000 and above should use this mode, along with the relevent
- * assember code :-). Do NOT define BN_ULLONG.
+ * assember code :-). Do NOT define BN_LLONG.
*/
#ifdef SIXTY_FOUR_BIT
-#define BN_LLONG
-/* #define BN_ULLONG unsigned long long */
+#undef BN_LLONG
+#undef BN_ULLONG
#define BN_ULONG unsigned long long
#define BN_LONG long long
#define BN_BITS 128
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index bd53124f1e..64c9fd9dc1 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -150,7 +150,7 @@ char *BN_options(void)
int BN_num_bits_word(BN_ULONG l)
{
- static char bits[256]={
+ static const char bits[256]={
0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
@@ -343,8 +343,9 @@ void BN_CTX_free(BN_CTX *c)
BIGNUM *bn_expand2(BIGNUM *b, int words)
{
- BN_ULONG *A,*B,*a;
- int i,j;
+ BN_ULONG *A,*a;
+ const BN_ULONG *B;
+ int i;
bn_check_top(b);
@@ -362,15 +363,38 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
return(NULL);
}
-memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
#if 1
B=b->d;
/* Check if the previous number needs to be copied */
if (B != NULL)
{
+#if 0
/* This lot is an unrolled loop to copy b->top
* BN_ULONGs from B to A
*/
+/*
+ * I have nothing against unrolling but it's usually done for
+ * several reasons, namely:
+ * - minimize percentage of decision making code, i.e. branches;
+ * - avoid cache trashing;
+ * - make it possible to schedule loads earlier;
+ * Now let's examine the code below. The cornerstone of C is
+ * "programmer is always right" and that's what we love it for:-)
+ * For this very reason C compilers have to be paranoid when it
+ * comes to data aliasing and assume the worst. Yeah, but what
+ * does it mean in real life? This means that loop body below will
+ * be compiled to sequence of loads immediately followed by stores
+ * as compiler assumes the worst, something in A==B+1 style. As a
+ * result CPU pipeline is going to starve for incoming data. Secondly
+ * if A and B happen to share same cache line such code is going to
+ * cause severe cache trashing. Both factors have severe impact on
+ * performance of modern CPUs and this is the reason why this
+ * particulare piece of code is #ifdefed away and replaced by more
+ * "friendly" version found in #else section below. This comment
+ * also applies to BN_copy function.
+ *
+ * <appro@fy.chalmers.se>
+ */
for (i=b->top&(~7); i>0; i-=8)
{
A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
@@ -407,6 +431,30 @@ memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
*/
;
}
+#else
+ for (i=b->top>>2; i>0; i--,A+=4,B+=4)
+ {
+ /*
+ * The fact that the loop is unrolled
+ * 4-wise is a tribute to Intel. It's
+ * the one that doesn't have enough
+ * registers to accomodate more data.
+ * I'd unroll it 8-wise otherwise:-)
+ *
+ * <appro@fy.chalmers.se>
+ */
+ BN_ULONG a0,a1,a2,a3;
+ a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
+ A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
+ }
+ switch (b->top&3)
+ {
+ case 3: A[2]=B[2];
+ case 2: A[1]=B[1];
+ case 1: A[0]=B[0];
+ case 0: ; /* ultrix cc workaround, see above */
+ }
+#endif
Free(b->d);
}
@@ -415,22 +463,19 @@ memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
/* Now need to zero any data between b->top and b->max */
- B= &(b->d[b->top]);
- j=(b->max - b->top) & ~7;
- for (i=0; i<j; i+=8)
+ A= &(b->d[b->top]);
+ for (i=(b->max - b->top)>>3; i>0; i--,A+=8)
{
- B[0]=0; B[1]=0; B[2]=0; B[3]=0;
- B[4]=0; B[5]=0; B[6]=0; B[7]=0;
- B+=8;
- }
- j=(b->max - b->top) & 7;
- for (i=0; i<j; i++)
- {
- B[0]=0;
- B++;
+ A[0]=0; A[1]=0; A[2]=0; A[3]=0;
+ A[4]=0; A[5]=0; A[6]=0; A[7]=0;
}
+ for (i=(b->max - b->top)&7; i>0; i--,A++)
+ A[0]=0;
#else
- memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
+ memset(A,0,sizeof(BN_ULONG)*(words+1));
+ memcpy(A,b->d,sizeof(b->d[0])*b->top);
+ b->d=a;
+ b->max=words;
#endif
/* memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */
@@ -454,7 +499,8 @@ BIGNUM *BN_dup(BIGNUM *a)
BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b)
{
int i;
- BN_ULONG *A,*B;
+ BN_ULONG *A;
+ const BN_ULONG *B;
bn_check_top(b);
@@ -464,47 +510,18 @@ BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b)
#if 1
A=a->d;
B=b->d;
- for (i=b->top&(~7); i>0; i-=8)
+ for (i=b->top>>2; i>0; i--,A+=4,B+=4)
{
- A[0]=B[0];
- A[1]=B[1];
- A[2]=B[2];
- A[3]=B[3];
- A[4]=B[4];
- A[5]=B[5];
- A[6]=B[6];
- A[7]=B[7];
- A+=8;
- B+=8;
+ BN_ULONG a0,a1,a2,a3;
+ a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
+ A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
}
- switch (b->top&7)
+ switch (b->top&3)
{
- case 7:
- A[6]=B[6];
- case 6:
- A[5]=B[5];
- case 5:
- A[4]=B[4];
- case 4:
- A[3]=B[3];
- case 3:
- A[2]=B[2];
- case 2:
- A[1]=B[1];
- case 1:
- A[0]=B[0];
- case 0:
- /* I need the 'case 0' entry for utrix cc.
- * If the optimiser is turned on, it does the
- * switch table by doing
- * a=top&7
- * a--;
- * goto jump_table[a];
- * If top is 0, this makes us jump to 0xffffffc which is
- * rather bad :-(.
- * eric 23-Apr-1998
- */
- ;
+ case 3: A[2]=B[2];
+ case 2: A[1]=B[1];
+ case 1: A[0]=B[0];
+ case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */
}
#else
memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
@@ -539,6 +556,8 @@ BN_ULONG BN_get_word(BIGNUM *a)
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
ret<<=BN_BITS4; /* stops the compiler complaining */
ret<<=BN_BITS4;
+#else
+ ret=0;
#endif
ret|=a->d[i];
}
@@ -563,6 +582,8 @@ int BN_set_word(BIGNUM *a, BN_ULONG w)
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
w>>=BN_BITS4;
w>>=BN_BITS4;
+#else
+ w=0;
#endif
a->d[i]=(BN_ULONG)w&BN_MASK2;
if (a->d[i] != 0) a->top=i+1;
@@ -699,7 +720,7 @@ int BN_set_bit(BIGNUM *a, int n)
a->top=i+1;
}
- a->d[i]|=(1L<<j);
+ a->d[i]|=(((BN_ULONG)1)<<j);
return(1);
}
@@ -711,7 +732,7 @@ int BN_clear_bit(BIGNUM *a, int n)
j=n%BN_BITS2;
if (a->top <= i) return(0);
- a->d[i]&=(~(1L<<j));
+ a->d[i]&=(~(((BN_ULONG)1)<<j));
bn_fix_top(a);
return(1);
}
diff --git a/crypto/bn/bn_prime.c b/crypto/bn/bn_prime.c
index 28610766d0..118eb35159 100644
--- a/crypto/bn/bn_prime.c
+++ b/crypto/bn/bn_prime.c
@@ -319,7 +319,7 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, BIGNUM *add, BIGNUM *rem,
loop: for (i=1; i<NUMPRIMES; i++)
{
/* check that rnd is a prime */
- if (BN_mod_word(rnd,(BN_LONG)primes[i]) <= 1)
+ if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
{
if (!BN_add(rnd,rnd,add)) goto err;
goto loop;
@@ -366,8 +366,8 @@ static int probable_prime_dh_strong(BIGNUM *p, int bits, BIGNUM *padd,
/* check that p and q are prime */
/* check that for p and q
* gcd(p-1,primes) == 1 (except for 2) */
- if ( (BN_mod_word(p,(BN_LONG)primes[i]) == 0) ||
- (BN_mod_word(q,(BN_LONG)primes[i]) == 0))
+ if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
+ (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
{
if (!BN_add(p,p,padd)) goto err;
if (!BN_add(q,q,qadd)) goto err;