diff options
author | Andy Polyakov <appro@openssl.org> | 2005-08-04 17:35:42 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2005-08-04 17:35:42 +0000 |
commit | 11de71b04c750e6be6f5e8ac875655a2a67efaf7 (patch) | |
tree | 96d7829f07e74d95d7a7af4c933ad21b3cc1d2a0 /crypto/bn/bn_asm.c | |
parent | 19bd66fe747e9768a684e26f24bae4445353ee6c (diff) |
3-4 times better RSA/DSA performance on WIN64A target. Well, on AMD64 CPU,
EMT64T will hardly exhibit better performance...
Diffstat (limited to 'crypto/bn/bn_asm.c')
-rw-r--r-- | crypto/bn/bn_asm.c | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 19978085b2..99bc2de491 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -459,6 +459,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) +#elif defined(BN_UMULT_LOHI) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + BN_UMULT_LOHI(t1,t2,ta,tb); \ + c0 += t1; t2 += (c0<t1)?1:0; \ + c1 += t2; c2 += (c1<t2)?1:0; \ + } + +#define mul_add_c2(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b),t0; \ + BN_UMULT_LOHI(t0,t1,ta,tb); \ + t2 = t1+t1; c2 += (t2<t1)?1:0; \ + t1 = t0+t0; t2 += (t1<t0)?1:0; \ + c0 += t1; t2 += (c0<t1)?1:0; \ + c1 += t2; c2 += (c1<t2)?1:0; \ + } + +#define sqr_add_c(a,i,c0,c1,c2) { \ + BN_ULONG ta=(a)[i]; \ + BN_UMULT_LOHI(t1,t2,ta,ta); \ + c0 += t1; t2 += (c0<t1)?1:0; \ + c1 += t2; c2 += (c1<t2)?1:0; \ + } + +#define sqr_add_c2(a,i,j,c0,c1,c2) \ + mul_add_c2((a)[i],(a)[j],c0,c1,c2) + #elif defined(BN_UMULT_HIGH) #define mul_add_c(a,b,c0,c1,c2) { \ |