From 4209ce68d8fe8b1506494efa03d378d05baf9ff8 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Mon, 13 Feb 2023 18:05:18 +0100 Subject: Revert "Fix Timing Oracle in RSA decryption" This reverts commit b1892d21f8f0435deb0250f24a97915dc641c807. Except for the moving derive_kdk to a separate function. Reviewed-by: Paul Dale Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/20281) --- crypto/bn/bn_blind.c | 14 ++ crypto/bn/bn_local.h | 14 -- crypto/bn/build.info | 2 +- crypto/bn/rsa_sup_mul.c | 635 ------------------------------------------------ crypto/rsa/rsa_ossl.c | 21 +- include/crypto/bn.h | 6 - 6 files changed, 22 insertions(+), 670 deletions(-) delete mode 100644 crypto/bn/rsa_sup_mul.c diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c index 82821a0442..6ea54f00a9 100644 --- a/crypto/bn/bn_blind.c +++ b/crypto/bn/bn_blind.c @@ -13,6 +13,20 @@ #define BN_BLINDING_COUNTER 32 +struct bn_blinding_st { + BIGNUM *A; + BIGNUM *Ai; + BIGNUM *e; + BIGNUM *mod; /* just a reference */ + CRYPTO_THREAD_ID tid; + int counter; + unsigned long flags; + BN_MONT_CTX *m_ctx; + int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); + CRYPTO_RWLOCK *lock; +}; + BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) { BN_BLINDING *ret = NULL; diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h index 95c59517d0..322b0af89b 100644 --- a/crypto/bn/bn_local.h +++ b/crypto/bn/bn_local.h @@ -293,20 +293,6 @@ struct bn_gencb_st { } cb; }; -struct bn_blinding_st { - BIGNUM *A; - BIGNUM *Ai; - BIGNUM *e; - BIGNUM *mod; /* just a reference */ - CRYPTO_THREAD_ID tid; - int counter; - unsigned long flags; - BN_MONT_CTX *m_ctx; - int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); - CRYPTO_RWLOCK *lock; -}; - /*- * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions * diff --git a/crypto/bn/build.info b/crypto/bn/build.info index 77ec6d4721..2f30681b3b 100644 --- a/crypto/bn/build.info +++ b/crypto/bn/build.info @@ -105,7 +105,7 @@ $COMMON=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \ bn_mod.c bn_conv.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_sqr.c \ bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ - bn_intern.c bn_dh.c bn_rsa_fips186_4.c bn_const.c rsa_sup_mul.c + bn_intern.c bn_dh.c bn_rsa_fips186_4.c bn_const.c SOURCE[../../libcrypto]=$COMMON $BNASM bn_print.c bn_err.c bn_srp.c DEFINE[../../libcrypto]=$BNDEF IF[{- !$disabled{'deprecated-0.9.8'} -}] diff --git a/crypto/bn/rsa_sup_mul.c b/crypto/bn/rsa_sup_mul.c deleted file mode 100644 index 2657b6b36e..0000000000 --- a/crypto/bn/rsa_sup_mul.c +++ /dev/null @@ -1,635 +0,0 @@ -/* - * Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. - * - * Licensed under the Apache License 2.0 (the "License"). You may not use - * this file except in compliance with the License. You can obtain a copy - * in the file LICENSE in the source distribution or at - * https://www.openssl.org/source/license.html - */ - -#include -#include -#include -#include -#include -#include -#include -#include "internal/endian.h" -#include "internal/numbers.h" -#include "internal/constant_time.h" -#include "bn_local.h" - -# if BN_BYTES == 8 -typedef uint64_t limb_t; -# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16 -typedef uint128_t limb2_t; -# define HAVE_LIMB2_T -# endif -# define LIMB_BIT_SIZE 64 -# define LIMB_BYTE_SIZE 8 -# elif BN_BYTES == 4 -typedef uint32_t limb_t; -typedef uint64_t limb2_t; -# define LIMB_BIT_SIZE 32 -# define LIMB_BYTE_SIZE 4 -# define HAVE_LIMB2_T -# else -# error "Not supported" -# endif - -/* - * For multiplication we're using schoolbook multiplication, - * so if we have two numbers, each with 6 "digits" (words) - * the multiplication is calculated as follows: - * A B C D E F - * x I J K L M N - * -------------- - * N*F - * N*E - * N*D - * N*C - * N*B - * N*A - * M*F - * M*E - * M*D - * M*C - * M*B - * M*A - * L*F - * L*E - * L*D - * L*C - * L*B - * L*A - * K*F - * K*E - * K*D - * K*C - * K*B - * K*A - * J*F - * J*E - * J*D - * J*C - * J*B - * J*A - * I*F - * I*E - * I*D - * I*C - * I*B - * + I*A - * ========================== - * N*B N*D N*F - * + N*A N*C N*E - * + M*B M*D M*F - * + M*A M*C M*E - * + L*B L*D L*F - * + L*A L*C L*E - * + K*B K*D K*F - * + K*A K*C K*E - * + J*B J*D J*F - * + J*A J*C J*E - * + I*B I*D I*F - * + I*A I*C I*E - * - * 1+1 1+3 1+5 - * 1+0 1+2 1+4 - * 0+1 0+3 0+5 - * 0+0 0+2 0+4 - * - * 0 1 2 3 4 5 6 - * which requires n^2 multiplications and 2n full length additions - * as we can keep every other result of limb multiplication in two separate - * limbs - */ - -#if defined HAVE_LIMB2_T -static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) -{ - limb2_t t; - /* - * this is idiomatic code to tell compiler to use the native mul - * those three lines will actually compile to single instruction - */ - - t = (limb2_t)a * b; - *hi = t >> LIMB_BIT_SIZE; - *lo = (limb_t)t; -} -#elif (BN_BYTES == 8) && (defined _MSC_VER) -# if defined(_M_X64) -/* - * on x86_64 (x64) we can use the _umul128 intrinsic to get one `mul` - * instruction to get both high and low 64 bits of the multiplication. - * https://learn.microsoft.com/en-us/cpp/intrinsics/umul128?view=msvc-140 - */ -#include -#pragma intrinsic(_umul128) -static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) -{ - *lo = _umul128(a, b, hi); -} -# elif defined(_M_ARM64) || defined (_M_IA64) -/* - * We can't use the __umulh() on x86_64 as then msvc generates two `mul` - * instructions; so use this more portable intrinsic on platforms that - * don't support _umul128 (like aarch64 (ARM64) or ia64) - * https://learn.microsoft.com/en-us/cpp/intrinsics/umulh?view=msvc-140 - */ -#include -static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) -{ - *lo = a * b; - *hi = __umulh(a, b); -} -# else -# error Only x64, ARM64 and IA64 supported. -# endif /* defined(_M_X64) */ -#else -/* - * if the compiler doesn't have either a 128bit data type nor a "return - * high 64 bits of multiplication" - */ -static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) -{ - limb_t a_low = (limb_t)(uint32_t)a; - limb_t a_hi = a >> 32; - limb_t b_low = (limb_t)(uint32_t)b; - limb_t b_hi = b >> 32; - - limb_t p0 = a_low * b_low; - limb_t p1 = a_low * b_hi; - limb_t p2 = a_hi * b_low; - limb_t p3 = a_hi * b_hi; - - uint32_t cy = (uint32_t)(((p0 >> 32) + (uint32_t)p1 + (uint32_t)p2) >> 32); - - *lo = p0 + (p1 << 32) + (p2 << 32); - *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy; -} -#endif - -/* add two limbs with carry in, return carry out */ -static ossl_inline limb_t _add_limb(limb_t *ret, limb_t a, limb_t b, limb_t carry) -{ - limb_t carry1, carry2, t; - /* - * `c = a + b; if (c < a)` is idiomatic code that makes compilers - * use add with carry on assembly level - */ - - *ret = a + carry; - if (*ret < a) - carry1 = 1; - else - carry1 = 0; - - t = *ret; - *ret = t + b; - if (*ret < t) - carry2 = 1; - else - carry2 = 0; - - return carry1 + carry2; -} - -/* - * add two numbers of the same size, return overflow - * - * add a to b, place result in ret; all arrays need to be n limbs long - * return overflow from addition (0 or 1) - */ -static ossl_inline limb_t add(limb_t *ret, limb_t *a, limb_t *b, size_t n) -{ - limb_t c = 0; - ossl_ssize_t i; - - for(i = n - 1; i > -1; i--) - c = _add_limb(&ret[i], a[i], b[i], c); - - return c; -} - -/* - * return number of limbs necessary for temporary values - * when multiplying numbers n limbs large - */ -static ossl_inline size_t mul_limb_numb(size_t n) -{ - return 2 * n * 2; -} - -/* - * multiply two numbers of the same size - * - * multiply a by b, place result in ret; a and b need to be n limbs long - * ret needs to be 2*n limbs long, tmp needs to be mul_limb_numb(n) limbs - * long - */ -static void limb_mul(limb_t *ret, limb_t *a, limb_t *b, size_t n, limb_t *tmp) -{ - limb_t *r_odd, *r_even; - size_t i, j, k; - - r_odd = tmp; - r_even = &tmp[2 * n]; - - memset(ret, 0, 2 * n * sizeof(limb_t)); - - for (i = 0; i < n; i++) { - for (k = 0; k < i + n + 1; k++) { - r_even[k] = 0; - r_odd[k] = 0; - } - for (j = 0; j < n; j++) { - /* - * place results from even and odd limbs in separate arrays so that - * we don't have to calculate overflow every time we get individual - * limb multiplication result - */ - if (j % 2 == 0) - _mul_limb(&r_even[i + j], &r_even[i + j + 1], a[i], b[j]); - else - _mul_limb(&r_odd[i + j], &r_odd[i + j + 1], a[i], b[j]); - } - /* - * skip the least significant limbs when adding multiples of - * more significant limbs (they're zero anyway) - */ - add(ret, ret, r_even, n + i + 1); - add(ret, ret, r_odd, n + i + 1); - } -} - -/* modifies the value in place by performing a right shift by one bit */ -static ossl_inline void rshift1(limb_t *val, size_t n) -{ - limb_t shift_in = 0, shift_out = 0; - size_t i; - - for (i = 0; i < n; i++) { - shift_out = val[i] & 1; - val[i] = shift_in << (LIMB_BIT_SIZE - 1) | (val[i] >> 1); - shift_in = shift_out; - } -} - -/* extend the LSB of flag to all bits of limb */ -static ossl_inline limb_t mk_mask(limb_t flag) -{ - flag |= flag << 1; - flag |= flag << 2; - flag |= flag << 4; - flag |= flag << 8; - flag |= flag << 16; -#if (LIMB_BYTE_SIZE == 8) - flag |= flag << 32; -#endif - return flag; -} - -/* - * copy from either a or b to ret based on flag - * when flag == 0, then copies from b - * when flag == 1, then copies from a - */ -static ossl_inline void cselect(limb_t flag, limb_t *ret, limb_t *a, limb_t *b, size_t n) -{ - /* - * would be more efficient with non volatile mask, but then gcc - * generates code with jumps - */ - volatile limb_t mask; - size_t i; - - mask = mk_mask(flag); - for (i = 0; i < n; i++) { -#if (LIMB_BYTE_SIZE == 8) - ret[i] = constant_time_select_64(mask, a[i], b[i]); -#else - ret[i] = constant_time_select_32(mask, a[i], b[i]); -#endif - } -} - -static limb_t _sub_limb(limb_t *ret, limb_t a, limb_t b, limb_t borrow) -{ - limb_t borrow1, borrow2, t; - /* - * while it doesn't look constant-time, this is idiomatic code - * to tell compilers to use the carry bit from subtraction - */ - - *ret = a - borrow; - if (*ret > a) - borrow1 = 1; - else - borrow1 = 0; - - t = *ret; - *ret = t - b; - if (*ret > t) - borrow2 = 1; - else - borrow2 = 0; - - return borrow1 + borrow2; -} - -/* - * place the result of a - b into ret, return the borrow bit. - * All arrays need to be n limbs long - */ -static limb_t sub(limb_t *ret, limb_t *a, limb_t *b, size_t n) -{ - limb_t borrow = 0; - ossl_ssize_t i; - - for (i = n - 1; i > -1; i--) - borrow = _sub_limb(&ret[i], a[i], b[i], borrow); - - return borrow; -} - -/* return the number of limbs necessary to allocate for the mod() tmp operand */ -static ossl_inline size_t mod_limb_numb(size_t anum, size_t modnum) -{ - return (anum + modnum) * 3; -} - -/* - * calculate a % mod, place the result in ret - * size of a is defined by anum, size of ret and mod is modnum, - * size of tmp is returned by mod_limb_numb() - */ -static void mod(limb_t *ret, limb_t *a, size_t anum, limb_t *mod, - size_t modnum, limb_t *tmp) -{ - limb_t *atmp, *modtmp, *rettmp; - limb_t res; - size_t i; - - memset(tmp, 0, mod_limb_numb(anum, modnum) * LIMB_BYTE_SIZE); - - atmp = tmp; - modtmp = &tmp[anum + modnum]; - rettmp = &tmp[(anum + modnum) * 2]; - - for (i = modnum; i 0; i--, rp--) { - v = _mul_add_limb(rp, mod, modnum, rp[modnum-1] * ni0, tmp2); - v = v + carry + rp[-1]; - carry |= (v != rp[-1]); - carry &= (v <= rp[-1]); - rp[-1] = v; - } - - /* perform the final reduction by mod... */ - carry -= sub(ret, rp, mod, modnum); - - /* ...conditionally */ - cselect(carry, ret, rp, ret, modnum); -} - -/* allocated buffer should be freed afterwards */ -static void BN_to_limb(const BIGNUM *bn, limb_t *buf, size_t limbs) -{ - int i; - int real_limbs = (BN_num_bytes(bn) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; - limb_t *ptr = buf + (limbs - real_limbs); - - for (i = 0; i < real_limbs; i++) - ptr[i] = bn->d[real_limbs - i - 1]; -} - -#if LIMB_BYTE_SIZE == 8 -static ossl_inline uint64_t be64(uint64_t host) -{ - uint64_t big = 0; - DECLARE_IS_ENDIAN; - - if (!IS_LITTLE_ENDIAN) - return host; - - big |= (host & 0xff00000000000000) >> 56; - big |= (host & 0x00ff000000000000) >> 40; - big |= (host & 0x0000ff0000000000) >> 24; - big |= (host & 0x000000ff00000000) >> 8; - big |= (host & 0x00000000ff000000) << 8; - big |= (host & 0x0000000000ff0000) << 24; - big |= (host & 0x000000000000ff00) << 40; - big |= (host & 0x00000000000000ff) << 56; - return big; -} - -#else -/* Not all platforms have htobe32(). */ -static ossl_inline uint32_t be32(uint32_t host) -{ - uint32_t big = 0; - DECLARE_IS_ENDIAN; - - if (!IS_LITTLE_ENDIAN) - return host; - - big |= (host & 0xff000000) >> 24; - big |= (host & 0x00ff0000) >> 8; - big |= (host & 0x0000ff00) << 8; - big |= (host & 0x000000ff) << 24; - return big; -} -#endif - -/* - * We assume that intermediate, possible_arg2, blinding, and ctx are used - * similar to BN_BLINDING_invert_ex() arguments. - * to_mod is RSA modulus. - * buf and num is the serialization buffer and its length. - * - * Here we use classic/Montgomery multiplication and modulo. After the calculation finished - * we serialize the new structure instead of BIGNUMs taking endianness into account. - */ -int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate, - const BN_BLINDING *blinding, - const BIGNUM *possible_arg2, - const BIGNUM *to_mod, BN_CTX *ctx, - unsigned char *buf, int num) -{ - limb_t *l_im = NULL, *l_mul = NULL, *l_mod = NULL; - limb_t *l_ret = NULL, *l_tmp = NULL, l_buf; - size_t l_im_count = 0, l_mul_count = 0, l_size = 0, l_mod_count = 0; - size_t l_tmp_count = 0; - int ret = 0; - size_t i; - unsigned char *tmp; - const BIGNUM *arg1 = intermediate; - const BIGNUM *arg2 = (possible_arg2 == NULL) ? blinding->Ai : possible_arg2; - - l_im_count = (BN_num_bytes(arg1) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; - l_mul_count = (BN_num_bytes(arg2) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; - l_mod_count = (BN_num_bytes(to_mod) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; - - l_size = l_im_count > l_mul_count ? l_im_count : l_mul_count; - l_im = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE); - l_mul = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE); - l_mod = OPENSSL_zalloc(l_mod_count * LIMB_BYTE_SIZE); - - if ((l_im == NULL) || (l_mul == NULL) || (l_mod == NULL)) - goto err; - - BN_to_limb(arg1, l_im, l_size); - BN_to_limb(arg2, l_mul, l_size); - BN_to_limb(to_mod, l_mod, l_mod_count); - - l_ret = OPENSSL_malloc(2 * l_size * LIMB_BYTE_SIZE); - - if (blinding->m_ctx != NULL) { - l_tmp_count = mul_limb_numb(l_size) > mod_montgomery_limb_numb(l_mod_count) ? - mul_limb_numb(l_size) : mod_montgomery_limb_numb(l_mod_count); - l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE); - } else { - l_tmp_count = mul_limb_numb(l_size) > mod_limb_numb(2 * l_size, l_mod_count) ? - mul_limb_numb(l_size) : mod_limb_numb(2 * l_size, l_mod_count); - l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE); - } - - if ((l_ret == NULL) || (l_tmp == NULL)) - goto err; - - if (blinding->m_ctx != NULL) { - limb_mul(l_ret, l_im, l_mul, l_size, l_tmp); - mod_montgomery(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, - blinding->m_ctx->n0[0], l_tmp); - } else { - limb_mul(l_ret, l_im, l_mul, l_size, l_tmp); - mod(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, l_tmp); - } - - /* modulus size in bytes can be equal to num but after limbs conversion it becomes bigger */ - if (num < BN_num_bytes(to_mod)) { - ERR_raise(ERR_LIB_BN, ERR_R_PASSED_INVALID_ARGUMENT); - goto err; - } - - memset(buf, 0, num); - tmp = buf + num - BN_num_bytes(to_mod); - for (i = 0; i < l_mod_count; i++) { -#if LIMB_BYTE_SIZE == 8 - l_buf = be64(l_ret[i]); -#else - l_buf = be32(l_ret[i]); -#endif - if (i == 0) { - int delta = LIMB_BYTE_SIZE - ((l_mod_count * LIMB_BYTE_SIZE) - num); - - memcpy(tmp, ((char *)&l_buf) + LIMB_BYTE_SIZE - delta, delta); - tmp += delta; - } else { - memcpy(tmp, &l_buf, LIMB_BYTE_SIZE); - tmp += LIMB_BYTE_SIZE; - } - } - ret = num; - - err: - OPENSSL_free(l_im); - OPENSSL_free(l_mul); - OPENSSL_free(l_mod); - OPENSSL_free(l_tmp); - OPENSSL_free(l_ret); - - return ret; -} diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c index ec69076e63..780b7b6715 100644 --- a/crypto/rsa/rsa_ossl.c +++ b/crypto/rsa/rsa_ossl.c @@ -589,6 +589,10 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from, BN_free(d); } + if (blinding) + if (!rsa_blinding_invert(blinding, ret, unblind, ctx)) + goto err; + /* * derive the Key Derivation Key from private exponent and public * ciphertext @@ -598,20 +602,9 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from, goto err; } - if (blinding) { - /* - * ossl_bn_rsa_do_unblind() combines blinding inversion and - * 0-padded BN BE serialization - */ - j = ossl_bn_rsa_do_unblind(ret, blinding, unblind, rsa->n, ctx, - buf, num); - if (j == 0) - goto err; - } else { - j = BN_bn2binpad(ret, buf, num); - if (j < 0) - goto err; - } + j = BN_bn2binpad(ret, buf, num); + if (j < 0) + goto err; switch (padding) { case RSA_PKCS1_NO_IMPLICIT_REJECT_PADDING: diff --git a/include/crypto/bn.h b/include/crypto/bn.h index b354578ee0..58271179fa 100644 --- a/include/crypto/bn.h +++ b/include/crypto/bn.h @@ -116,12 +116,6 @@ OSSL_LIB_CTX *ossl_bn_get_libctx(BN_CTX *ctx); extern const BIGNUM ossl_bn_inv_sqrt_2; -int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate, - const BN_BLINDING *blinding, - const BIGNUM *possible_arg2, - const BIGNUM *to_mod, BN_CTX *ctx, - unsigned char *buf, int num); - #if defined(OPENSSL_SYS_LINUX) && !defined(FIPS_MODULE) && defined (__s390x__) # define S390X_MOD_EXP #endif -- cgit v1.2.3