summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xcrypto/bn/asm/x86_64-mont5.pl196
-rw-r--r--crypto/bn/bn_exp.c44
-rw-r--r--test/recipes/10-test_bn_data/bnmod.txt67
3 files changed, 93 insertions, 214 deletions
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index c03473f13a..1faea0bcf8 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -2103,193 +2103,6 @@ __bn_post4x_internal:
.size __bn_post4x_internal,.-__bn_post4x_internal
___
}
-{
-$code.=<<___;
-.globl bn_from_montgomery
-.type bn_from_montgomery,\@abi-omnipotent
-.align 32
-bn_from_montgomery:
-.cfi_startproc
- testl \$7,`($win64?"48(%rsp)":"%r9d")`
- jz bn_from_mont8x
- xor %eax,%eax
- ret
-.cfi_endproc
-.size bn_from_montgomery,.-bn_from_montgomery
-
-.type bn_from_mont8x,\@function,6
-.align 32
-bn_from_mont8x:
-.cfi_startproc
- .byte 0x67
- mov %rsp,%rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
-.Lfrom_prologue:
-
- shl \$3,${num}d # convert $num to bytes
- lea ($num,$num,2),%r10 # 3*$num in bytes
- neg $num
- mov ($n0),$n0 # *n0
-
- ##############################################################
- # Ensure that stack frame doesn't alias with $rptr+3*$num
- # modulo 4096, which covers ret[num], am[num] and n[num]
- # (see bn_exp.c). The stack is allocated to aligned with
- # bn_power5's frame, and as bn_from_montgomery happens to be
- # last operation, we use the opportunity to cleanse it.
- #
- lea -320(%rsp,$num,2),%r11
- mov %rsp,%rbp
- sub $rptr,%r11
- and \$4095,%r11
- cmp %r11,%r10
- jb .Lfrom_sp_alt
- sub %r11,%rbp # align with $aptr
- lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256)
- jmp .Lfrom_sp_done
-
-.align 32
-.Lfrom_sp_alt:
- lea 4096-320(,$num,2),%r10
- lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256)
- sub %r10,%r11
- mov \$0,%r10
- cmovc %r10,%r11
- sub %r11,%rbp
-.Lfrom_sp_done:
- and \$-64,%rbp
- mov %rsp,%r11
- sub %rbp,%r11
- and \$-4096,%r11
- lea (%rbp,%r11),%rsp
- mov (%rsp),%r10
- cmp %rbp,%rsp
- ja .Lfrom_page_walk
- jmp .Lfrom_page_walk_done
-
-.Lfrom_page_walk:
- lea -4096(%rsp),%rsp
- mov (%rsp),%r10
- cmp %rbp,%rsp
- ja .Lfrom_page_walk
-.Lfrom_page_walk_done:
-
- mov $num,%r10
- neg $num
-
- ##############################################################
- # Stack layout
- #
- # +0 saved $num, used in reduction section
- # +8 &t[2*$num], used in reduction section
- # +32 saved *n0
- # +40 saved %rsp
- # +48 t[2*$num]
- #
- mov $n0, 32(%rsp)
- mov %rax, 40(%rsp) # save original %rsp
-.cfi_cfa_expression %rsp+40,deref,+8
-.Lfrom_body:
- mov $num,%r11
- lea 48(%rsp),%rax
- pxor %xmm0,%xmm0
- jmp .Lmul_by_1
-
-.align 32
-.Lmul_by_1:
- movdqu ($aptr),%xmm1
- movdqu 16($aptr),%xmm2
- movdqu 32($aptr),%xmm3
- movdqa %xmm0,(%rax,$num)
- movdqu 48($aptr),%xmm4
- movdqa %xmm0,16(%rax,$num)
- .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 # lea 64($aptr),$aptr
- movdqa %xmm1,(%rax)
- movdqa %xmm0,32(%rax,$num)
- movdqa %xmm2,16(%rax)
- movdqa %xmm0,48(%rax,$num)
- movdqa %xmm3,32(%rax)
- movdqa %xmm4,48(%rax)
- lea 64(%rax),%rax
- sub \$64,%r11
- jnz .Lmul_by_1
-
- movq $rptr,%xmm1
- movq $nptr,%xmm2
- .byte 0x67
- mov $nptr,%rbp
- movq %r10, %xmm3 # -num
-___
-$code.=<<___ if ($addx);
- mov OPENSSL_ia32cap_P+8(%rip),%r11d
- and \$0x80108,%r11d
- cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
- jne .Lfrom_mont_nox
-
- lea (%rax,$num),$rptr
- call __bn_sqrx8x_reduction
- call __bn_postx4x_internal
-
- pxor %xmm0,%xmm0
- lea 48(%rsp),%rax
- jmp .Lfrom_mont_zero
-
-.align 32
-.Lfrom_mont_nox:
-___
-$code.=<<___;
- call __bn_sqr8x_reduction
- call __bn_post4x_internal
-
- pxor %xmm0,%xmm0
- lea 48(%rsp),%rax
- jmp .Lfrom_mont_zero
-
-.align 32
-.Lfrom_mont_zero:
- mov 40(%rsp),%rsi # restore %rsp
-.cfi_def_cfa %rsi,8
- movdqa %xmm0,16*0(%rax)
- movdqa %xmm0,16*1(%rax)
- movdqa %xmm0,16*2(%rax)
- movdqa %xmm0,16*3(%rax)
- lea 16*4(%rax),%rax
- sub \$32,$num
- jnz .Lfrom_mont_zero
-
- mov \$1,%rax
- mov -48(%rsi),%r15
-.cfi_restore %r15
- mov -40(%rsi),%r14
-.cfi_restore %r14
- mov -32(%rsi),%r13
-.cfi_restore %r13
- mov -24(%rsi),%r12
-.cfi_restore %r12
- mov -16(%rsi),%rbp
-.cfi_restore %rbp
- mov -8(%rsi),%rbx
-.cfi_restore %rbx
- lea (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lfrom_epilogue:
- ret
-.cfi_endproc
-.size bn_from_mont8x,.-bn_from_mont8x
-___
-}
}}}
if ($addx) {{{
@@ -3896,10 +3709,6 @@ mul_handler:
.rva .LSEH_begin_bn_power5
.rva .LSEH_end_bn_power5
.rva .LSEH_info_bn_power5
-
- .rva .LSEH_begin_bn_from_mont8x
- .rva .LSEH_end_bn_from_mont8x
- .rva .LSEH_info_bn_from_mont8x
___
$code.=<<___ if ($addx);
.rva .LSEH_begin_bn_mulx4x_mont_gather5
@@ -3931,11 +3740,6 @@ $code.=<<___;
.byte 9,0,0,0
.rva mul_handler
.rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[]
-.align 8
-.LSEH_info_bn_from_mont8x:
- .byte 9,0,0,0
- .rva mul_handler
- .rva .Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue # HandlerData[]
___
$code.=<<___ if ($addx);
.align 8
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index b97b5f4b56..91d6b6b4bb 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -897,14 +897,21 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
#if defined(OPENSSL_BN_ASM_MONT5)
if (window == 5 && top > 1) {
/*
- * This optimization uses ideas from http://eprint.iacr.org/2011/239,
- * specifically optimization of cache-timing attack countermeasures
- * and pre-computation optimization.
- */
-
- /*
- * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
- * 512-bit RSA is hardly relevant, we omit it to spare size...
+ * This optimization uses ideas from https://eprint.iacr.org/2011/239,
+ * specifically optimization of cache-timing attack countermeasures,
+ * pre-computation optimization, and Almost Montgomery Multiplication.
+ *
+ * The paper discusses a 4-bit window to optimize 512-bit modular
+ * exponentiation, used in RSA-1024 with CRT, but RSA-1024 is no longer
+ * important.
+ *
+ * |bn_mul_mont_gather5| and |bn_power5| implement the "almost"
+ * reduction variant, so the values here may not be fully reduced.
+ * They are bounded by R (i.e. they fit in |top| words), not |m|.
+ * Additionally, we pass these "almost" reduced inputs into
+ * |bn_mul_mont|, which implements the normal reduction variant.
+ * Given those inputs, |bn_mul_mont| may not give reduced
+ * output, but it will still produce "almost" reduced output.
*/
void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const void *table, const BN_ULONG *np,
@@ -916,9 +923,6 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const void *table, const BN_ULONG *np,
const BN_ULONG *n0, int num, int power);
int bn_get_bits5(const BN_ULONG *ap, int off);
- int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
- const BN_ULONG *not_used, const BN_ULONG *np,
- const BN_ULONG *n0, int num);
BN_ULONG *n0 = mont->n0, *np;
@@ -1007,14 +1011,18 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}
- ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
tmp.top = top;
- bn_correct_top(&tmp);
- if (ret) {
- if (!BN_copy(rr, &tmp))
- ret = 0;
- goto err; /* non-zero ret means it's not error */
- }
+ /*
+ * The result is now in |tmp| in Montgomery form, but it may not be
+ * fully reduced. This is within bounds for |BN_from_montgomery|
+ * (tmp < R <= m*R) so it will, when converting from Montgomery form,
+ * produce a fully reduced result.
+ *
+ * This differs from Figure 2 of the paper, which uses AMM(h, 1) to
+ * convert from Montgomery form with unreduced output, followed by an
+ * extra reduction step. In the paper's terminology, we replace
+ * steps 9 and 10 with MM(h, 1).
+ */
} else
#endif
{
diff --git a/test/recipes/10-test_bn_data/bnmod.txt b/test/recipes/10-test_bn_data/bnmod.txt
index 6b040d395c..cc941047f5 100644
--- a/test/recipes/10-test_bn_data/bnmod.txt
+++ b/test/recipes/10-test_bn_data/bnmod.txt
@@ -2474,6 +2474,73 @@ A = 9025e6183706105e948b1b0edf922f9011b9e11887d70adb00b26f272b9e76a38f3099084d9c
E = d7e6df5d755284929b986cd9b61c9c2c8843f24c711fbdbae1a468edcae159400943725570726cdc92b3ea94f9f206729516fdda83e31d815b0c7720e7598a91d992273e3bd8ac413b441d8f1dfe5aa7c3bf3ef573adc38292676217467731e6cf440a59611b8110af88d3e62f60209b513b01fbb69a097458ad02096b5e38f0
M = e4e784aa1fa88625a43ba0185a153a929663920be7fe674a4d33c943d3b898cff051482e7050a070cede53be5e89f31515772c7aea637576f99f82708f89d9e244f6ad3a24a02cbe5c0ff7bcf2dad5491f53db7c3f2698a7c41b44f086652f17bb05fe4c5c0a92433c34086b49d7e1825b28bab6c5a9bd0bc95b53d659afa0d7
+# The following inputs trigger an edge case between Montgomery reduction and the
+# "almost" reduction variant from https://eprint.iacr.org/2011/239
+ModExp = 00
+A = 19c7bc9b97c6083cd7b8d1cd001452c9b67983247169c6532047eb7fc8933014dbf69fee7a358769f1429802c8ea89d4f9ca6ba6f368fbdb1fa5717b4a00
+E = bbc7e09147408571050e8d0c634682c5863b7e8a573626648902cff12e590c74f5a23ecce39732266bc15b8afbd6c48a48c83fbdc33947515cc0b6e4fb98ae2cd730e58f951fec8be7e2e3c74f4506c7fd7e29bdb28675fe8a59789ab1148e931a2ebd2d36f78bc241682a3d8083d8ff538858cd240c5a693936e5a391dc9d77118062a3f868c058440a4192267faaaba91112f45eee5842060febbf9353a6d3e7f7996573209136a5506062ea23d74067f08c613f3ff74bade25f8c3368e6dba84eae672eac11be1137fc514924fcab8c82e46d092bd047dcbadaa48c67a096ec1a04f392a8511e6acbad9954949b703e71ff837337b594055ae6f3c0fc154447a687c9ac8a2cdfd64a2e680c6ff21254735af7f5eb6b43f0bce86bda55a04143a991711081435ed4f4a89b23fc3a588022b7a8543db4bf5c8ac93603367c750ff2191f59a716340fab49bb7544759c8d846465eec1438e76395f73e7b5e945f31f1b87fefa854a0d208846eaab5fa27144fd039911608bab0eaee80f1d3553dfa2d9ba95268479b97a059613660df5ad79796e0b272244aca90ccc13449ec15c206eeed7b60405a4c5cfdf5da5d136c27fa9385d810ad198dfe794ffce9955e10520efea1e2eb794e379401b9affd863b9566ce941c4726755574a1b1946acf0090bfb93f37dd55f524485bbba7fa84b53addfde01ae1de9c57fe50d4b708dd0fa45d02af398b3d05c6d17f84c11e9aacdbe0b146cad6ddbd877731e26a17f3ebed459560d12ed7a6abc2ea6fe922e69d2622ef11b6b245b9ba8f0940faaa671a4beb727be5393a94dafaeff7221b29183e7418f4c5bb95a6a586c93dbc8ce0236d9dbe26c40513611b4141fed66599adbfb20fc30e09a4815e4159f65a6708f34584a7a77b3843941cd61a6917dcc3d07a3dfb5a2cb108bacea7e782f2111b4d22ecaaeff469ecd0da371df1ac5e9bf6df6ccba2d3a9f393d597499eaca2c206bfb81c3426c5fe45bcf16e38aecd246a319a1f37041c638b75a4839517e43a6d01bee7d85eaeedbce13cd15699d3ee42c7414cfed576590e4fb6ddb6edd3e1957efaf039bfe8b9dc75869b1f93abff15cae8b234161070fa3542303c2ed35ca66083d0ac299b81182317a2a3985269602b1fa1e822fcbda48e686d80b273f06b0a702ca7f42cbbbd2fc2b3601422c8bff6302eda3c61b293049636002649b16f3c1f0be2b6599d66493a4497cd795b10a2ab8220fafad24fa90e1bfcf39ecce337e705695c7a224bf9f445a287d6aab221341659ca4be7861f6ac4c9d33dac811e6
+M = 519b6e57781d40d897ec0c1b648d195526726b295438c9a70928ac25979563d72db91c8c42298a33b572edecdf40904c68a23337aa5341b56e92b0da5041
+
+# To fully exercise BN_mod_exp_mont_consttime codepaths, we generate inputs at
+# different bitwidths. rsaz-avx2.pl only runs at 1024-bit moduli, and
+# x86_64-mont5.pl unrolls 8 64-bit words at a time, so we want to capture both
+# multiples of 512- and non-multiples. Also include moduli that are not quite a
+# full word.
+# 512-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 8f42c9e9e351ba9b32ab0cf69da43f4acf7028d19cff6e5059ea0e3fcc97c97f36a31470044737d4c0c933ac441ecb29e32c81401523afdac7de9c3fd8493c97
+
+# 1024-bit
+# TODO(davidben): This test breaks the RSAZ implementation. Fix it and enable
+# this test.
+# ModExp = 00
+# A = 800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002f
+# E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+# M = 9da8dc26fdf4d2e49833b240ee552beb7a6e251caa91bfb5d6cafaf8ed9461877fda8f6ac299036d35806bc1ae7872e54eaac1ec6bee6d02c6621a9cf8883b3abc33c49b3e601203e0e86ef8f0562412cc689ee2670704583909ca6d7774c9f9f9f4d77d37fedef9cb51d207cb629ec02fa03b526fd6594bfa8f2da71238a0b7
+
+# 1025-bit
+ModExp = 00
+A = 010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 010223abfdda02e84e11cec8ee7fc784fa135733935f7b9054bb70f1f06d234d76dcf3beed55c7f39e955dc1fef2b65009240fd02f7a1b27a78fc2867144bf666efb929856db9f671c356c4c67a068a70fe83c52eebda03668872fd270d0794f0771d217fb6b93b12529a944f7f0496a9158757c55b8ee14f803f1d2d887e2f561
+
+# 1088-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003d
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = e91f6d748773cb212a23aa348125615123b1800c9ea222c9374c757702ae4140fa333790ed8f6bf60a1d7dda65c2767cc5f33e32e333d19fbfb5a2b85795757c9ca070268763a618e9d33873d28a89bf88acd209efbb15b80cd33b92a6b3a682e1c91782fc24fb86ddff4f809219c977b54b99359094bbcc51dfe17b992ab24b74a17950ad754281
+
+# 1472-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001d
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = a8770362f4bfe4fc1ab0e52705c11a9b6ba235d5a5f22197c2d68e27ed18426ede3316af706aa79bcf943dbd51459eb15ae1f9386216b3f3a847f94440a65b97659bc5ba2adb67173714ecaa886c0b926d7a64ea45576f9d2171784ce7e801724d5b0abfd93357d538ea7ad3ad89a74f4660bdb66dfb5f684dcf00402e3cdf0ab58afd867c943c8f47b80268a789456aa7c50a619dd2f9f5e3f74b5d810f0f8dadbf4ad5b917cdcb156c4c132611c8b3b035118a9e03551f
+
+# 1536-bit
+ModExp = 00
+A = 800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 878cd000778f927b2f1a4b8bac86efd282079a7ac0d25e09ffd2f72fbc282e65e233929d2457c7b1d63c56fb706cdfa04fb87e654c578c98d7cf59c2293dc5641086b68db4867105981daaf147a0ee91f6932ef064deae4142c19e58d50c0686f0eaf778be72450f89a98b4680bbc5ffab942195e44dd20616150fd1deca058068ca31ab2f861e99082588f17a2025bf5e536150142fca3187a259c791fc721430f24d7e338f8dc02e693a7e694d42775e80f7f7c03600b6ae86b4aba2b0e991
+
+# 2048-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 9f40a7535c561208ecb38e17c9336d9bc8484d335901b2cd42759cf03689227f6992f10cb6b586d767fbcdf30e9d82a0eda60d2694ccd0194fa96b50b56e0cdeec1951ea9e58b07e334a7f108841a0ab28256917fecea561388807ed124a17386a7a7b501f9cbf3404247a76948d0561e48137d3f9669e36f175731796aeaf78851f7d866917f661422186a4814aa35c066b5a90b9cfc918af769a9f0bb30c12581027df64ac328a0f07dbd20adb704479f6d0f233a131828c71bab19c3c34795ea4fb68aa632c6f688e5b3b84413c9031d8dc251003a590dec0dd09bfa6109ed4570701439b6f265b84ac2170c317357b5fbe5535e2bbdd93c1aacfdaa28c85
+
+# 3072-bit
+ModExp = 00
+A = 80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001d
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = c23dfd244a58a668d514498a705c8f8f548311b24f0f98b023d2d33632534c2ae948d6641d41fd7a29fbbd594bfc7fdd6e8162cbb3056af3075347b6fc8876458d33a9d0ffdbcdf482de0c73d1310fd8fa8f9f92dd0dbb0e2034e98a30f6c11b482f7476c5b593f673a322b1130daa4314e9074270dce1076436f0d56cf196afcbb235a9a7b3ac85b9062e85fc0e63a12c468c787019f6805f9faab64fc6a0babc80785d88740243f11366bffb40ccbe8b2bb7a99a2c8238a6f656bb0117d7b2602aa400f4d77de5f93c673f13264ca70de949454e3e3f261993c1aa427e8ef4f507af744f71f3b4aaf3c981d44cc1bfb1eb1151168762b242b740573df698e500d99612e17dc760f7b3bf7c235e39e81ad7edbe6c07dbb8b139745bb394d61cb799bcafec5de074932b0b2d74797e779ac8d81f63a2b2e9baa229dfaa7f90f34ffade1d2ad022a3407d35eb2d7477c6ae8ad100f6e95c05b4f947c1fabfb11a17add384e6b4cd3a02fd9b43f46805c6c74e366b74aa3b766be7a5fbbd67fa81
+
+# 4096-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 8030411ecbddcb0fe4e76fd6b5bf542e8b015d1610cf96130ded12ba2cda0641bd9692080f218ea8b0d751845b519d95b843542ec8d2a07f1f93afe3189b69a4f35c983011c7f7928c3df458cc3eae85c36e6934a4b1bc0a67c8a521de336642c49e10a7ffa8d0af911aacc19e3900449161940f139220e099a150dcaf0ff96ffff6e726c1ac139969103cf6a828ac3adf0301506aa02787b4f570d5dde53a34acab8fec6fa94760abf16ee99954371ad65a6e899daab87b95811d069404991de9abe064ebbddf886e970f10d260c899dda940191a82d4c8bd36651363aff5493f4f59e700007dcadf37ebea7fcfd7600d16617ffea0d9ae659446d851d93c564e50e558f734c894d735fa273770703dab62844d9f01badf632f3d14a00f739c022c9be95f54e9cea46ec6da7cb11f4602e06962951c48204726b7f120ddbd0eb3566dc8d1e6f195a9196e96db33322d088b43aecffe9b4df182dd016aca0bd14f1c56cd1a18b89165c027029862b09ffd78e92ab614349c4fd67f49cb12cd33d0728930d0538bda57acef1365a73cc8fbac7d463b9e3c3bae0bb6224b080cdb8b5cd47d546d53111fdc22b7ff679bcfe27192920ee163b2be337d8cccc93b4de7d2d31934b9c0e97af291dcc1135b4a473bd37114eec3ba75c411887b57799d3188e7353f33a4d31735ebfc9fcfc044985148dd96da3876a5ab7ea7a404b411
# These test vectors satisfy (ModSqrt * ModSqrt) mod P = A mod P with P a prime.
# ModSqrt is in [0, (P-1)/2].