summaryrefslogtreecommitdiffstats
path: root/crypto/bn/bn_exp.c
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/bn_exp.c')
-rw-r--r--crypto/bn/bn_exp.c44
1 files changed, 26 insertions, 18 deletions
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 89ce77fbb7..c7b62232f3 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -897,14 +897,21 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
#if defined(OPENSSL_BN_ASM_MONT5)
if (window == 5 && top > 1) {
/*
- * This optimization uses ideas from http://eprint.iacr.org/2011/239,
- * specifically optimization of cache-timing attack countermeasures
- * and pre-computation optimization.
- */
-
- /*
- * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
- * 512-bit RSA is hardly relevant, we omit it to spare size...
+ * This optimization uses ideas from https://eprint.iacr.org/2011/239,
+ * specifically optimization of cache-timing attack countermeasures,
+ * pre-computation optimization, and Almost Montgomery Multiplication.
+ *
+ * The paper discusses a 4-bit window to optimize 512-bit modular
+ * exponentiation, used in RSA-1024 with CRT, but RSA-1024 is no longer
+ * important.
+ *
+ * |bn_mul_mont_gather5| and |bn_power5| implement the "almost"
+ * reduction variant, so the values here may not be fully reduced.
+ * They are bounded by R (i.e. they fit in |top| words), not |m|.
+ * Additionally, we pass these "almost" reduced inputs into
+ * |bn_mul_mont|, which implements the normal reduction variant.
+ * Given those inputs, |bn_mul_mont| may not give reduced
+ * output, but it will still produce "almost" reduced output.
*/
void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const void *table, const BN_ULONG *np,
@@ -916,9 +923,6 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const void *table, const BN_ULONG *np,
const BN_ULONG *n0, int num, int power);
int bn_get_bits5(const BN_ULONG *ap, int off);
- int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
- const BN_ULONG *not_used, const BN_ULONG *np,
- const BN_ULONG *n0, int num);
BN_ULONG *n0 = mont->n0, *np;
@@ -1007,14 +1011,18 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}
- ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
tmp.top = top;
- bn_correct_top(&tmp);
- if (ret) {
- if (!BN_copy(rr, &tmp))
- ret = 0;
- goto err; /* non-zero ret means it's not error */
- }
+ /*
+ * The result is now in |tmp| in Montgomery form, but it may not be
+ * fully reduced. This is within bounds for |BN_from_montgomery|
+ * (tmp < R <= m*R) so it will, when converting from Montgomery form,
+ * produce a fully reduced result.
+ *
+ * This differs from Figure 2 of the paper, which uses AMM(h, 1) to
+ * convert from Montgomery form with unreduced output, followed by an
+ * extra reduction step. In the paper's terminology, we replace
+ * steps 9 and 10 with MM(h, 1).
+ */
} else
#endif
{