summaryrefslogtreecommitdiffstats
path: root/crypto/bn/bn_exp.c
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2013-12-09 21:02:24 +0100
committerAndy Polyakov <appro@openssl.org>2013-12-09 21:02:24 +0100
commitec9cc70f72454b8d4a84247c86159613cee83b81 (patch)
tree504ec4eeaf5d13670389711f7bc01915f6a4a595 /crypto/bn/bn_exp.c
parentd1671f4f1a39d938499c67efe5d4a14c34c09b31 (diff)
bn/asm/x86_64-mont5.pl: add MULX/AD*X code path.
This also eliminates code duplication between x86_64-mont and x86_64-mont and optimizes even original non-MULX code.
Diffstat (limited to 'crypto/bn/bn_exp.c')
-rw-r--r--crypto/bn/bn_exp.c48
1 files changed, 40 insertions, 8 deletions
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 6e3a7264d5..baeda372cf 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -726,7 +726,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
else
#endif
#if defined(OPENSSL_BN_ASM_MONT5)
- if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */
+ if (window>=5)
+ {
+ window=5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
+ if ((top&7)==0) powerbufLen += 2*top*sizeof(m->d[0]);
+ }
#endif
(void)0;
@@ -734,7 +738,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
* powers of am, am itself and tmp.
*/
numPowers = 1 << window;
- powerbufLen = sizeof(m->d[0])*(top*numPowers +
+ powerbufLen += sizeof(m->d[0])*(top*numPowers +
((2*top)>numPowers?(2*top):numPowers));
#ifdef alloca
if (powerbufLen < 3072)
@@ -912,14 +916,26 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
void *table,size_t power);
void bn_gather5(BN_ULONG *out,size_t num,
void *table,size_t power);
+ void bn_power5(BN_ULONG *rp,const BN_ULONG *ap,
+ const void *table,const BN_ULONG *np,
+ const BN_ULONG *n0,int num,int power);
+ int bn_get_bits5(const BN_ULONG *ap,int off);
+ int bn_from_montgomery(BN_ULONG *rp,const BN_ULONG *ap,
+ const BN_ULONG *not_used,const BN_ULONG *np,
+ const BN_ULONG *n0,int num);
- BN_ULONG *np=mont->N.d, *n0=mont->n0;
+ BN_ULONG *np=mont->N.d, *n0=mont->n0, *np2;
/* BN_to_montgomery can contaminate words above .top
* [in BN_DEBUG[_DEBUG] build]... */
for (i=am.top; i<top; i++) am.d[i]=0;
for (i=tmp.top; i<top; i++) tmp.d[i]=0;
+ if (top&7)
+ np2 = np;
+ else
+ for (np2=am.d+top,i=0; i<top; i++) np2[2*i]=np[i];
+
bn_scatter5(tmp.d,top,powerbuf,0);
bn_scatter5(am.d,am.top,powerbuf,1);
bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
@@ -929,7 +945,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
for (i=3; i<32; i++)
{
/* Calculate a^i = a^(i-1) * a */
- bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
+ bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np2,n0,top,i-1);
bn_scatter5(tmp.d,top,powerbuf,i);
}
#else
@@ -942,7 +958,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
for (i=3; i<8; i+=2)
{
int j;
- bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
+ bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np2,n0,top,i-1);
bn_scatter5(tmp.d,top,powerbuf,i);
for (j=2*i; j<32; j*=2)
{
@@ -952,14 +968,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
for (; i<16; i+=2)
{
- bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
+ bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np2,n0,top,i-1);
bn_scatter5(tmp.d,top,powerbuf,i);
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_scatter5(tmp.d,top,powerbuf,2*i);
}
for (; i<32; i+=2)
{
- bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
+ bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np2,n0,top,i-1);
bn_scatter5(tmp.d,top,powerbuf,i);
}
#endif
@@ -971,7 +987,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
/* Scan the exponent one window at a time starting from the most
* significant bits.
*/
- while (bits >= 0)
+ if (top&7)
+ while (bits >= 0)
{
for (wvalue=0, i=0; i<5; i++,bits--)
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
@@ -983,9 +1000,24 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
}
+ else
+ {
+ while (bits >= 0)
+ {
+ wvalue = bn_get_bits5(p->d,bits-4);
+ bits-=5;
+ bn_power5(tmp.d,tmp.d,powerbuf,np2,n0,top,wvalue);
+ }
+ }
+ ret=bn_from_montgomery(tmp.d,tmp.d,NULL,np2,n0,top);
tmp.top=top;
bn_correct_top(&tmp);
+ if (ret)
+ {
+ if (!BN_copy(rr,&tmp)) ret=0;
+ goto err; /* non-zero ret means it's not error */
+ }
}
else
#endif