diff options
author | Andy Polyakov <appro@openssl.org> | 2016-01-25 23:41:01 +0100 |
---|---|---|
committer | Matt Caswell <matt@openssl.org> | 2016-03-01 13:34:22 +0000 |
commit | 8fc8f486f7fa098c9fbb6a6ae399e3c6856e0d87 (patch) | |
tree | b6f93d1b9d5e4b3f4bb27907783449a34ea967d8 /crypto/bn/bn_exp.c | |
parent | d6d422e1ec48fac1c6194ab672e320281a214a32 (diff) |
crypto/bn/x86_64-mont5.pl: constant-time gather procedure.
At the same time remove miniscule bias in final subtraction.
Performance penalty varies from platform to platform, and even with
key length. For rsa2048 sign it was observed to be 4% for Sandy
Bridge and 7% on Broadwell.
CVE-2016-0702
Reviewed-by: Richard Levitte <levitte@openssl.org>
Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/bn/bn_exp.c')
-rw-r--r-- | crypto/bn/bn_exp.c | 28 |
1 files changed, 14 insertions, 14 deletions
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c index 07f9288bb5..c5e579c77c 100644 --- a/crypto/bn/bn_exp.c +++ b/crypto/bn/bn_exp.c @@ -787,8 +787,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (window >= 5) { window = 5; /* ~5% improvement for RSA2048 sign, and even * for RSA4096 */ - if ((top & 7) == 0) - powerbufLen += 2 * top * sizeof(m->d[0]); + /* reserve space for mont->N.d[] copy */ + powerbufLen += top * sizeof(mont->N.d[0]); } #endif (void)0; @@ -1008,7 +1008,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BN_ULONG *not_used, const BN_ULONG *np, const BN_ULONG *n0, int num); - BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2; + BN_ULONG *n0 = mont->n0, *np; /* * BN_to_montgomery can contaminate words above .top [in @@ -1019,11 +1019,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, for (i = tmp.top; i < top; i++) tmp.d[i] = 0; - if (top & 7) - np2 = np; - else - for (np2 = am.d + top, i = 0; i < top; i++) - np2[2 * i] = np[i]; + /* + * copy mont->N.d[] to improve cache locality + */ + for (np = am.d + top, i = 0; i < top; i++) + np[i] = mont->N.d[i]; bn_scatter5(tmp.d, top, powerbuf, 0); bn_scatter5(am.d, am.top, powerbuf, 1); @@ -1033,7 +1033,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, # if 0 for (i = 3; i < 32; i++) { /* Calculate a^i = a^(i-1) * a */ - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); } # else @@ -1044,7 +1044,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } for (i = 3; i < 8; i += 2) { int j; - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); for (j = 2 * i; j < 32; j *= 2) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); @@ -1052,13 +1052,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } for (; i < 16; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_scatter5(tmp.d, top, powerbuf, 2 * i); } for (; i < 32; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); } # endif @@ -1087,11 +1087,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, while (bits >= 0) { wvalue = bn_get_bits5(p->d, bits - 4); bits -= 5; - bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); } } - ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top); + ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top); tmp.top = top; bn_correct_top(&tmp); if (ret) { |