summaryrefslogtreecommitdiffstats
path: root/crypto/rsa
diff options
context:
space:
mode:
authorAndrey Matyukov <andrey.matyukov@intel.com>2020-12-08 22:53:39 +0300
committerMatt Caswell <matt@openssl.org>2021-03-22 09:48:00 +0000
commitc781eb1c63c243cb64dbe3066a43dc172aaab3b8 (patch)
tree36adf4600064afddfb87e16bee0736c6427ca523 /crypto/rsa
parentdb89d8f04bb131bbf0e2b87eb9a1515076c893d3 (diff)
Dual 1024-bit exponentiation optimization for Intel IceLake CPU
with AVX512_IFMA + AVX512_VL instructions, primarily for RSA CRT private key operations. It uses 256-bit registers to avoid CPU frequency scaling issues. The performance speedup for RSA2k signature on ICL is ~2x. Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/13750)
Diffstat (limited to 'crypto/rsa')
-rw-r--r--crypto/rsa/rsa_ossl.c17
1 files changed, 11 insertions, 6 deletions
diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c
index 9f98c037c8..1817392e76 100644
--- a/crypto/rsa/rsa_ossl.c
+++ b/crypto/rsa/rsa_ossl.c
@@ -688,15 +688,20 @@ static int rsa_ossl_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
if (/* m1 = I moq q */
!bn_from_mont_fixed_top(m1, I, rsa->_method_mod_q, ctx)
|| !bn_to_mont_fixed_top(m1, m1, rsa->_method_mod_q, ctx)
- /* m1 = m1^dmq1 mod q */
- || !BN_mod_exp_mont_consttime(m1, m1, rsa->dmq1, rsa->q, ctx,
- rsa->_method_mod_q)
/* r1 = I mod p */
|| !bn_from_mont_fixed_top(r1, I, rsa->_method_mod_p, ctx)
|| !bn_to_mont_fixed_top(r1, r1, rsa->_method_mod_p, ctx)
- /* r1 = r1^dmp1 mod p */
- || !BN_mod_exp_mont_consttime(r1, r1, rsa->dmp1, rsa->p, ctx,
- rsa->_method_mod_p)
+ /*
+ * Use parallel exponentiations optimization if possible,
+ * otherwise fallback to two sequential exponentiations:
+ * m1 = m1^dmq1 mod q
+ * r1 = r1^dmp1 mod p
+ */
+ || !BN_mod_exp_mont_consttime_x2(m1, m1, rsa->dmq1, rsa->q,
+ rsa->_method_mod_q,
+ r1, r1, rsa->dmp1, rsa->p,
+ rsa->_method_mod_p,
+ ctx)
/* r1 = (r1 - m1) mod p */
/*
* bn_mod_sub_fixed_top is not regular modular subtraction,