diff options
author | Andy Polyakov <appro@openssl.org> | 2005-10-04 06:19:29 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2005-10-04 06:19:29 +0000 |
commit | e738280547e0f7e3cc5756a92ce3c926eb7736ce (patch) | |
tree | 7f7eca3a84bb0f98d1970523ac9b32f9b1616295 /crypto/bn/bn_asm.c | |
parent | 8265328def2eec75a8afda71b38195a342aeecb2 (diff) |
Add reference implementation for bn_[mul|sqr]_mont, new candidates for
assembler implementation.
Diffstat (limited to 'crypto/bn/bn_asm.c')
-rw-r--r-- | crypto/bn/bn_asm.c | 126 |
1 files changed, 124 insertions, 2 deletions
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 99bc2de491..52af96d36b 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -820,18 +820,95 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) r[6]=c1; r[7]=c2; } + +#ifdef OPENSSL_BN_ASM_MONT +/* + * This is essentially reference implementation, which may or may not + * result in performance improvement. E.g. on IA-32 this does give 40% + * faster rsa1024 private key operations and 10% faster rsa4096 ones, + * while on AMD64 it improves rsa1024 sign only by 10%, but *worsens* + * rsa4096 sign by 15%. Once again, it's a reference implementation, + * one to be used as start-point for platform-specific assembler. + */ +void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) + { + BN_ULONG c0,c1,ml,*tp; +#ifdef mul64 + BN_ULONG mh; +#endif + volatile BN_ULONG *vp; + int i=0,j; + + vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + + tp[num] = bn_mul_words(tp,ap,num,bp[0]); + tp[num+1] = 0; + goto enter; + + for(i=0;i<num;i++) + { + c0 = bn_mul_add_words(tp,ap,num,bp[i]); + c1 = (tp[num] + c0)&BN_MASK2; + tp[num] = c1; + tp[num+1] = (c1<c0?1:0); + enter: + c1 = tp[0]; + ml = (c1*n0)&BN_MASK2; + c0 = 0; +#ifdef mul64 + mh = HBITS(ml); + ml = LBITS(ml); + mul_add(c1,np[0],ml,mh,c0); +#else + mul_add(c1,ml,np[0],c0); +#endif + for(j=1;j<num;j++) + { + c1 = tp[j]; +#ifdef mul64 + mul_add(c1,np[j],ml,mh,c0); +#else + mul_add(c1,ml,np[j],c0); +#endif + tp[j-1] = c1&BN_MASK2; + } + c1 = (tp[num] + c0)&BN_MASK2; + tp[num-1] = c1; + tp[num] = tp[num+1] + (c1<c0?1:0); + } + + if (tp[num]!=0 || tp[num-1]>=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;i<num+2;i++) vp[i] = 0; + return; + } + } + for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; + vp[num] = 0; + vp[num+1] = 0; + } + +void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num) + { + bn_mul_mont(rp,ap,ap,np,n0,num); + } +#endif /* OPENSSL_BN_ASM_MONT */ + #else /* !BN_MUL_COMBA */ /* hmm... is it faster just to do a multiply? */ #undef bn_sqr_comba4 -void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t[8]; bn_sqr_normal(r,a,4,t); } #undef bn_sqr_comba8 -void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t[16]; bn_sqr_normal(r,a,8,t); @@ -857,4 +934,49 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); } +#ifdef OPENSSL_BN_ASM_MONT +void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) + { + BN_ULONG c0,c1,*tp; + volatile BN_ULONG *vp; + int i=0,j; + + vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + + for(i=0;i<=num;i++) tp[i]=0; + + for(i=0;i<num;i++) + { + c0 = bn_mul_add_words(tp,ap,num,bp[i]); + c1 = tp[num] + c0; + tp[num] = c1; + tp[num+1] = (c1<c0?1:0); + + c0 = bn_mul_add_words(tp,np,num,tp[0]*n0); + c1 = tp[num] + c0; + tp[num] = c1; + tp[num+1] += (c1<c0?1:0); + for(j=0;j<=num;j++) tp[j]=tp[j+1]; + } + + if (tp[num]!=0 || tp[num-1]>=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;i<num+2;i++) vp[i] = 0; + return; + } + } + for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; + vp[num] = 0; + vp[num+1] = 0; + } + +void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num) + { + bn_mul_mont(rp,ap,ap,np,n0,num); + } +#endif /* OPENSSL_BN_ASM_MONT */ + #endif /* !BN_MUL_COMBA */ |