diff options
author | Xi Ruoyao <xry111@xry111.site> | 2023-11-25 16:14:35 +0800 |
---|---|---|
committer | Tomas Mraz <tomas@openssl.org> | 2023-11-30 18:43:18 +0100 |
commit | fe3f4395a8bcf6d7292bee7e046874d4217fa684 (patch) | |
tree | c3983be8fc519ece6121e4166de1b406ddac5897 /crypto/bn | |
parent | 115d21eea46b627baf80a6bc7f20547ac6f84c48 (diff) |
bn_nist: Fix strict-aliasing violations in little-endian optimizations
The little-endian optimization is doing some type-punning in a way
violating the C standard aliasing rule by loading or storing through a
lvalue with type "unsigned int" but the memory location has effective
type "unsigned long" or "unsigned long long" (BN_ULONG). Convert these
accesses to use memcpy instead, as memcpy is defined as-is "accessing
through the lvalues with type char" and char is aliasing with all types.
GCC does a good job to optimize away the temporary copies introduced
with the change. Ideally copying to a temporary unsigned int array,
doing the calculation, and then copying back to `r_d` will make the code
look better, but unfortunately GCC would fail to optimize away this
temporary array then.
I've not touched the LE optimization in BN_nist_mod_224 because it's
guarded by BN_BITS2!=64, then BN_BITS2 must be 32 and BN_ULONG must be
unsigned int, thus there is no aliasing issue in BN_nist_mod_224.
Fixes #12247.
Reviewed-by: Tom Cosgrove <tom.cosgrove@arm.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/22816)
(cherry picked from commit 990d9ff508070757912c000f0c4132dbb5a0bb0a)
Diffstat (limited to 'crypto/bn')
-rw-r--r-- | crypto/bn/bn_nist.c | 126 |
1 files changed, 74 insertions, 52 deletions
diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index c1dbed0598..bc864346fb 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -319,6 +319,28 @@ static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top) # endif #endif /* BN_BITS2 != 64 */ +#ifdef NIST_INT64 +/* Helpers to load/store a 32-bit word (uint32_t) from/into a memory + * location and avoid potential aliasing issue. */ +static ossl_inline uint32_t load_u32(const void *ptr) +{ + uint32_t tmp; + + memcpy(&tmp, ptr, sizeof(tmp)); + return tmp; +} + +static ossl_inline void store_lo32(void *ptr, NIST_INT64 val) +{ + /* A cast is needed for big-endian system: on a 32-bit BE system + * NIST_INT64 may be defined as well if the compiler supports 64-bit + * long long. */ + uint32_t tmp = (uint32_t)val; + + memcpy(ptr, &tmp, sizeof(tmp)); +} +#endif /* NIST_INT64 */ + #define nist_set_192(to, from, a1, a2, a3) \ { \ bn_cp_64(to, 0, from, (a3) - 3) \ @@ -374,42 +396,42 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, unsigned int *rp = (unsigned int *)r_d; const unsigned int *bp = (const unsigned int *)buf.ui; - acc = rp[0]; + acc = load_u32(&rp[0]); acc += bp[3 * 2 - 6]; acc += bp[5 * 2 - 6]; - rp[0] = (unsigned int)acc; + store_lo32(&rp[0], acc); acc >>= 32; - acc += rp[1]; + acc += load_u32(&rp[1]); acc += bp[3 * 2 - 5]; acc += bp[5 * 2 - 5]; - rp[1] = (unsigned int)acc; + store_lo32(&rp[1], acc); acc >>= 32; - acc += rp[2]; + acc += load_u32(&rp[2]); acc += bp[3 * 2 - 6]; acc += bp[4 * 2 - 6]; acc += bp[5 * 2 - 6]; - rp[2] = (unsigned int)acc; + store_lo32(&rp[2], acc); acc >>= 32; - acc += rp[3]; + acc += load_u32(&rp[3]); acc += bp[3 * 2 - 5]; acc += bp[4 * 2 - 5]; acc += bp[5 * 2 - 5]; - rp[3] = (unsigned int)acc; + store_lo32(&rp[3], acc); acc >>= 32; - acc += rp[4]; + acc += load_u32(&rp[4]); acc += bp[4 * 2 - 6]; acc += bp[5 * 2 - 6]; - rp[4] = (unsigned int)acc; + store_lo32(&rp[4], acc); acc >>= 32; - acc += rp[5]; + acc += load_u32(&rp[5]); acc += bp[4 * 2 - 5]; acc += bp[5 * 2 - 5]; - rp[5] = (unsigned int)acc; + store_lo32(&rp[5], acc); carry = (int)(acc >> 32); } @@ -683,36 +705,36 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, unsigned int *rp = (unsigned int *)r_d; const unsigned int *bp = (const unsigned int *)buf.ui; - acc = rp[0]; + acc = load_u32(&rp[0]); acc += bp[8 - 8]; acc += bp[9 - 8]; acc -= bp[11 - 8]; acc -= bp[12 - 8]; acc -= bp[13 - 8]; acc -= bp[14 - 8]; - rp[0] = (unsigned int)acc; + store_lo32(&rp[0], acc); acc >>= 32; - acc += rp[1]; + acc += load_u32(&rp[1]); acc += bp[9 - 8]; acc += bp[10 - 8]; acc -= bp[12 - 8]; acc -= bp[13 - 8]; acc -= bp[14 - 8]; acc -= bp[15 - 8]; - rp[1] = (unsigned int)acc; + store_lo32(&rp[1], acc); acc >>= 32; - acc += rp[2]; + acc += load_u32(&rp[2]); acc += bp[10 - 8]; acc += bp[11 - 8]; acc -= bp[13 - 8]; acc -= bp[14 - 8]; acc -= bp[15 - 8]; - rp[2] = (unsigned int)acc; + store_lo32(&rp[2], acc); acc >>= 32; - acc += rp[3]; + acc += load_u32(&rp[3]); acc += bp[11 - 8]; acc += bp[11 - 8]; acc += bp[12 - 8]; @@ -721,10 +743,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[15 - 8]; acc -= bp[8 - 8]; acc -= bp[9 - 8]; - rp[3] = (unsigned int)acc; + store_lo32(&rp[3], acc); acc >>= 32; - acc += rp[4]; + acc += load_u32(&rp[4]); acc += bp[12 - 8]; acc += bp[12 - 8]; acc += bp[13 - 8]; @@ -732,10 +754,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[14 - 8]; acc -= bp[9 - 8]; acc -= bp[10 - 8]; - rp[4] = (unsigned int)acc; + store_lo32(&rp[4], acc); acc >>= 32; - acc += rp[5]; + acc += load_u32(&rp[5]); acc += bp[13 - 8]; acc += bp[13 - 8]; acc += bp[14 - 8]; @@ -743,10 +765,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[15 - 8]; acc -= bp[10 - 8]; acc -= bp[11 - 8]; - rp[5] = (unsigned int)acc; + store_lo32(&rp[5], acc); acc >>= 32; - acc += rp[6]; + acc += load_u32(&rp[6]); acc += bp[14 - 8]; acc += bp[14 - 8]; acc += bp[15 - 8]; @@ -755,10 +777,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[13 - 8]; acc -= bp[8 - 8]; acc -= bp[9 - 8]; - rp[6] = (unsigned int)acc; + store_lo32(&rp[6], acc); acc >>= 32; - acc += rp[7]; + acc += load_u32(&rp[7]); acc += bp[15 - 8]; acc += bp[15 - 8]; acc += bp[15 - 8]; @@ -767,7 +789,7 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[11 - 8]; acc -= bp[12 - 8]; acc -= bp[13 - 8]; - rp[7] = (unsigned int)acc; + store_lo32(&rp[7], acc); carry = (int)(acc >> 32); } @@ -920,32 +942,32 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, unsigned int *rp = (unsigned int *)r_d; const unsigned int *bp = (const unsigned int *)buf.ui; - acc = rp[0]; + acc = load_u32(&rp[0]); acc += bp[12 - 12]; acc += bp[21 - 12]; acc += bp[20 - 12]; acc -= bp[23 - 12]; - rp[0] = (unsigned int)acc; + store_lo32(&rp[0], acc); acc >>= 32; - acc += rp[1]; + acc += load_u32(&rp[1]); acc += bp[13 - 12]; acc += bp[22 - 12]; acc += bp[23 - 12]; acc -= bp[12 - 12]; acc -= bp[20 - 12]; - rp[1] = (unsigned int)acc; + store_lo32(&rp[1], acc); acc >>= 32; - acc += rp[2]; + acc += load_u32(&rp[2]); acc += bp[14 - 12]; acc += bp[23 - 12]; acc -= bp[13 - 12]; acc -= bp[21 - 12]; - rp[2] = (unsigned int)acc; + store_lo32(&rp[2], acc); acc >>= 32; - acc += rp[3]; + acc += load_u32(&rp[3]); acc += bp[15 - 12]; acc += bp[12 - 12]; acc += bp[20 - 12]; @@ -953,10 +975,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[14 - 12]; acc -= bp[22 - 12]; acc -= bp[23 - 12]; - rp[3] = (unsigned int)acc; + store_lo32(&rp[3], acc); acc >>= 32; - acc += rp[4]; + acc += load_u32(&rp[4]); acc += bp[21 - 12]; acc += bp[21 - 12]; acc += bp[16 - 12]; @@ -967,10 +989,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc -= bp[15 - 12]; acc -= bp[23 - 12]; acc -= bp[23 - 12]; - rp[4] = (unsigned int)acc; + store_lo32(&rp[4], acc); acc >>= 32; - acc += rp[5]; + acc += load_u32(&rp[5]); acc += bp[22 - 12]; acc += bp[22 - 12]; acc += bp[17 - 12]; @@ -979,10 +1001,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[21 - 12]; acc += bp[23 - 12]; acc -= bp[16 - 12]; - rp[5] = (unsigned int)acc; + store_lo32(&rp[5], acc); acc >>= 32; - acc += rp[6]; + acc += load_u32(&rp[6]); acc += bp[23 - 12]; acc += bp[23 - 12]; acc += bp[18 - 12]; @@ -990,48 +1012,48 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, acc += bp[14 - 12]; acc += bp[22 - 12]; acc -= bp[17 - 12]; - rp[6] = (unsigned int)acc; + store_lo32(&rp[6], acc); acc >>= 32; - acc += rp[7]; + acc += load_u32(&rp[7]); acc += bp[19 - 12]; acc += bp[16 - 12]; acc += bp[15 - 12]; acc += bp[23 - 12]; acc -= bp[18 - 12]; - rp[7] = (unsigned int)acc; + store_lo32(&rp[7], acc); acc >>= 32; - acc += rp[8]; + acc += load_u32(&rp[8]); acc += bp[20 - 12]; acc += bp[17 - 12]; acc += bp[16 - 12]; acc -= bp[19 - 12]; - rp[8] = (unsigned int)acc; + store_lo32(&rp[8], acc); acc >>= 32; - acc += rp[9]; + acc += load_u32(&rp[9]); acc += bp[21 - 12]; acc += bp[18 - 12]; acc += bp[17 - 12]; acc -= bp[20 - 12]; - rp[9] = (unsigned int)acc; + store_lo32(&rp[9], acc); acc >>= 32; - acc += rp[10]; + acc += load_u32(&rp[10]); acc += bp[22 - 12]; acc += bp[19 - 12]; acc += bp[18 - 12]; acc -= bp[21 - 12]; - rp[10] = (unsigned int)acc; + store_lo32(&rp[10], acc); acc >>= 32; - acc += rp[11]; + acc += load_u32(&rp[11]); acc += bp[23 - 12]; acc += bp[20 - 12]; acc += bp[19 - 12]; acc -= bp[22 - 12]; - rp[11] = (unsigned int)acc; + store_lo32(&rp[11], acc); carry = (int)(acc >> 32); } |