From c781eb1c63c243cb64dbe3066a43dc172aaab3b8 Mon Sep 17 00:00:00 2001 From: Andrey Matyukov Date: Tue, 8 Dec 2020 22:53:39 +0300 Subject: Dual 1024-bit exponentiation optimization for Intel IceLake CPU with AVX512_IFMA + AVX512_VL instructions, primarily for RSA CRT private key operations. It uses 256-bit registers to avoid CPU frequency scaling issues. The performance speedup for RSA2k signature on ICL is ~2x. Reviewed-by: Paul Dale Reviewed-by: Matt Caswell (Merged from https://github.com/openssl/openssl/pull/13750) --- test/exptest.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) (limited to 'test/exptest.c') diff --git a/test/exptest.c b/test/exptest.c index 2b2d3fd549..a1ac44e909 100644 --- a/test/exptest.c +++ b/test/exptest.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -198,9 +198,102 @@ static int test_mod_exp(int round) return ret; } +static int test_mod_exp_x2(int idx) +{ + BN_CTX *ctx; + int ret = 0; + BIGNUM *r_mont_const_x2_1 = NULL; + BIGNUM *r_mont_const_x2_2 = NULL; + BIGNUM *r_simple1 = NULL; + BIGNUM *r_simple2 = NULL; + BIGNUM *a1 = NULL; + BIGNUM *b1 = NULL; + BIGNUM *m1 = NULL; + BIGNUM *a2 = NULL; + BIGNUM *b2 = NULL; + BIGNUM *m2 = NULL; + int factor_size = 0; + + /* + * Currently only 1024-bit factor size is supported. + */ + if (idx <= 100) + factor_size = 1024; + + if (!TEST_ptr(ctx = BN_CTX_new())) + goto err; + + if (!TEST_ptr(r_mont_const_x2_1 = BN_new()) + || !TEST_ptr(r_mont_const_x2_2 = BN_new()) + || !TEST_ptr(r_simple1 = BN_new()) + || !TEST_ptr(r_simple2 = BN_new()) + || !TEST_ptr(a1 = BN_new()) + || !TEST_ptr(b1 = BN_new()) + || !TEST_ptr(m1 = BN_new()) + || !TEST_ptr(a2 = BN_new()) + || !TEST_ptr(b2 = BN_new()) + || !TEST_ptr(m2 = BN_new())) + goto err; + + BN_rand(a1, factor_size, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY); + BN_rand(b1, factor_size, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY); + BN_rand(m1, factor_size, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD); + BN_rand(a2, factor_size, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY); + BN_rand(b2, factor_size, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY); + BN_rand(m2, factor_size, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD); + + if (!TEST_true(BN_mod(a1, a1, m1, ctx)) + || !TEST_true(BN_mod(b1, b1, m1, ctx)) + || !TEST_true(BN_mod(a2, a2, m2, ctx)) + || !TEST_true(BN_mod(b2, b2, m2, ctx)) + || !TEST_true(BN_mod_exp_simple(r_simple1, a1, b1, m1, ctx)) + || !TEST_true(BN_mod_exp_simple(r_simple2, a2, b2, m2, ctx)) + || !TEST_true(BN_mod_exp_mont_consttime_x2(r_mont_const_x2_1, a1, b1, m1, NULL, + r_mont_const_x2_2, a2, b2, m2, NULL, + ctx))) + goto err; + + if (!TEST_BN_eq(r_simple1, r_mont_const_x2_1) + || !TEST_BN_eq(r_simple2, r_mont_const_x2_2)) { + if (BN_cmp(r_simple1, r_mont_const_x2_1) != 0) + TEST_info("simple and mont const time x2 (#1) results differ"); + if (BN_cmp(r_simple2, r_mont_const_x2_2) != 0) + TEST_info("simple and mont const time x2 (#2) results differ"); + + BN_print_var(a1); + BN_print_var(b1); + BN_print_var(m1); + BN_print_var(a2); + BN_print_var(b2); + BN_print_var(m2); + BN_print_var(r_simple1); + BN_print_var(r_simple2); + BN_print_var(r_mont_const_x2_1); + BN_print_var(r_mont_const_x2_2); + goto err; + } + + ret = 1; + err: + BN_free(r_mont_const_x2_1); + BN_free(r_mont_const_x2_2); + BN_free(r_simple1); + BN_free(r_simple2); + BN_free(a1); + BN_free(b1); + BN_free(m1); + BN_free(a2); + BN_free(b2); + BN_free(m2); + BN_CTX_free(ctx); + + return ret; +} + int setup_tests(void) { ADD_TEST(test_mod_exp_zero); ADD_ALL_TESTS(test_mod_exp, 200); + ADD_ALL_TESTS(test_mod_exp_x2, 100); return 1; } -- cgit v1.2.3