summaryrefslogtreecommitdiffstats
path: root/crypto/bn
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2017-11-15 12:25:02 +0100
committerAndy Polyakov <appro@openssl.org>2017-11-16 13:57:55 +0100
commita78324d95bd4568ce2c3b34bfa1d6f14cddf92ef (patch)
tree86648764e99579b852aaaf2bab96da8261e33acc /crypto/bn
parent8e4ec5b2e7c9a5d12c348b2a3430c4b51ab041c5 (diff)
bn/bn_add.c: address performance regression.
Performance regression was reported for EC key generation between 1.0.2 and 1.1.x [in GH#2891]. It naturally depends on platform, values between 6 and 9% were observed. Reviewed-by: Richard Levitte <levitte@openssl.org> (Merged from https://github.com/openssl/openssl/pull/4743)
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/asm/x86_64-gcc.c14
-rw-r--r--crypto/bn/bn_add.c6
2 files changed, 13 insertions, 7 deletions
diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
index 7a3b1a12b2..d38f337164 100644
--- a/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/bn/asm/x86_64-gcc.c
@@ -225,9 +225,10 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
" adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
- " loop 1b \n"
- " sbbq %0,%0 \n":"=&r" (ret), "+c"(n),
- "+r"(i)
+ " dec %1 \n"
+ " jnz 1b \n"
+ " sbbq %0,%0 \n"
+ :"=&r" (ret), "+c"(n), "+r"(i)
:"r"(rp), "r"(ap), "r"(bp)
:"cc", "memory");
@@ -251,9 +252,10 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
- " loop 1b \n"
- " sbbq %0,%0 \n":"=&r" (ret), "+c"(n),
- "+r"(i)
+ " dec %1 \n"
+ " jnz 1b \n"
+ " sbbq %0,%0 \n"
+ :"=&r" (ret), "+c"(n), "+r"(i)
:"r"(rp), "r"(ap), "r"(bp)
:"cc", "memory");
diff --git a/crypto/bn/bn_add.c b/crypto/bn/bn_add.c
index 6479650afb..7cdefa77a1 100644
--- a/crypto/bn/bn_add.c
+++ b/crypto/bn/bn_add.c
@@ -141,9 +141,13 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
borrow &= (t1 == 0);
}
+ while (max && *--rp == 0)
+ max--;
+
r->top = max;
r->neg = 0;
- bn_correct_top(r);
+ bn_pollute(r);
+
return 1;
}