summaryrefslogtreecommitdiffstats
path: root/crypto/bn
diff options
context:
space:
mode:
authorXi Ruoyao <xry111@xry111.site>2023-11-25 16:14:35 +0800
committerTomas Mraz <tomas@openssl.org>2023-11-30 18:43:18 +0100
commitfe3f4395a8bcf6d7292bee7e046874d4217fa684 (patch)
treec3983be8fc519ece6121e4166de1b406ddac5897 /crypto/bn
parent115d21eea46b627baf80a6bc7f20547ac6f84c48 (diff)
bn_nist: Fix strict-aliasing violations in little-endian optimizations
The little-endian optimization is doing some type-punning in a way violating the C standard aliasing rule by loading or storing through a lvalue with type "unsigned int" but the memory location has effective type "unsigned long" or "unsigned long long" (BN_ULONG). Convert these accesses to use memcpy instead, as memcpy is defined as-is "accessing through the lvalues with type char" and char is aliasing with all types. GCC does a good job to optimize away the temporary copies introduced with the change. Ideally copying to a temporary unsigned int array, doing the calculation, and then copying back to `r_d` will make the code look better, but unfortunately GCC would fail to optimize away this temporary array then. I've not touched the LE optimization in BN_nist_mod_224 because it's guarded by BN_BITS2!=64, then BN_BITS2 must be 32 and BN_ULONG must be unsigned int, thus there is no aliasing issue in BN_nist_mod_224. Fixes #12247. Reviewed-by: Tom Cosgrove <tom.cosgrove@arm.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/22816) (cherry picked from commit 990d9ff508070757912c000f0c4132dbb5a0bb0a)
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/bn_nist.c126
1 files changed, 74 insertions, 52 deletions
diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c
index c1dbed0598..bc864346fb 100644
--- a/crypto/bn/bn_nist.c
+++ b/crypto/bn/bn_nist.c
@@ -319,6 +319,28 @@ static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
# endif
#endif /* BN_BITS2 != 64 */
+#ifdef NIST_INT64
+/* Helpers to load/store a 32-bit word (uint32_t) from/into a memory
+ * location and avoid potential aliasing issue. */
+static ossl_inline uint32_t load_u32(const void *ptr)
+{
+ uint32_t tmp;
+
+ memcpy(&tmp, ptr, sizeof(tmp));
+ return tmp;
+}
+
+static ossl_inline void store_lo32(void *ptr, NIST_INT64 val)
+{
+ /* A cast is needed for big-endian system: on a 32-bit BE system
+ * NIST_INT64 may be defined as well if the compiler supports 64-bit
+ * long long. */
+ uint32_t tmp = (uint32_t)val;
+
+ memcpy(ptr, &tmp, sizeof(tmp));
+}
+#endif /* NIST_INT64 */
+
#define nist_set_192(to, from, a1, a2, a3) \
{ \
bn_cp_64(to, 0, from, (a3) - 3) \
@@ -374,42 +396,42 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
unsigned int *rp = (unsigned int *)r_d;
const unsigned int *bp = (const unsigned int *)buf.ui;
- acc = rp[0];
+ acc = load_u32(&rp[0]);
acc += bp[3 * 2 - 6];
acc += bp[5 * 2 - 6];
- rp[0] = (unsigned int)acc;
+ store_lo32(&rp[0], acc);
acc >>= 32;
- acc += rp[1];
+ acc += load_u32(&rp[1]);
acc += bp[3 * 2 - 5];
acc += bp[5 * 2 - 5];
- rp[1] = (unsigned int)acc;
+ store_lo32(&rp[1], acc);
acc >>= 32;
- acc += rp[2];
+ acc += load_u32(&rp[2]);
acc += bp[3 * 2 - 6];
acc += bp[4 * 2 - 6];
acc += bp[5 * 2 - 6];
- rp[2] = (unsigned int)acc;
+ store_lo32(&rp[2], acc);
acc >>= 32;
- acc += rp[3];
+ acc += load_u32(&rp[3]);
acc += bp[3 * 2 - 5];
acc += bp[4 * 2 - 5];
acc += bp[5 * 2 - 5];
- rp[3] = (unsigned int)acc;
+ store_lo32(&rp[3], acc);
acc >>= 32;
- acc += rp[4];
+ acc += load_u32(&rp[4]);
acc += bp[4 * 2 - 6];
acc += bp[5 * 2 - 6];
- rp[4] = (unsigned int)acc;
+ store_lo32(&rp[4], acc);
acc >>= 32;
- acc += rp[5];
+ acc += load_u32(&rp[5]);
acc += bp[4 * 2 - 5];
acc += bp[5 * 2 - 5];
- rp[5] = (unsigned int)acc;
+ store_lo32(&rp[5], acc);
carry = (int)(acc >> 32);
}
@@ -683,36 +705,36 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
unsigned int *rp = (unsigned int *)r_d;
const unsigned int *bp = (const unsigned int *)buf.ui;
- acc = rp[0];
+ acc = load_u32(&rp[0]);
acc += bp[8 - 8];
acc += bp[9 - 8];
acc -= bp[11 - 8];
acc -= bp[12 - 8];
acc -= bp[13 - 8];
acc -= bp[14 - 8];
- rp[0] = (unsigned int)acc;
+ store_lo32(&rp[0], acc);
acc >>= 32;
- acc += rp[1];
+ acc += load_u32(&rp[1]);
acc += bp[9 - 8];
acc += bp[10 - 8];
acc -= bp[12 - 8];
acc -= bp[13 - 8];
acc -= bp[14 - 8];
acc -= bp[15 - 8];
- rp[1] = (unsigned int)acc;
+ store_lo32(&rp[1], acc);
acc >>= 32;
- acc += rp[2];
+ acc += load_u32(&rp[2]);
acc += bp[10 - 8];
acc += bp[11 - 8];
acc -= bp[13 - 8];
acc -= bp[14 - 8];
acc -= bp[15 - 8];
- rp[2] = (unsigned int)acc;
+ store_lo32(&rp[2], acc);
acc >>= 32;
- acc += rp[3];
+ acc += load_u32(&rp[3]);
acc += bp[11 - 8];
acc += bp[11 - 8];
acc += bp[12 - 8];
@@ -721,10 +743,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[15 - 8];
acc -= bp[8 - 8];
acc -= bp[9 - 8];
- rp[3] = (unsigned int)acc;
+ store_lo32(&rp[3], acc);
acc >>= 32;
- acc += rp[4];
+ acc += load_u32(&rp[4]);
acc += bp[12 - 8];
acc += bp[12 - 8];
acc += bp[13 - 8];
@@ -732,10 +754,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[14 - 8];
acc -= bp[9 - 8];
acc -= bp[10 - 8];
- rp[4] = (unsigned int)acc;
+ store_lo32(&rp[4], acc);
acc >>= 32;
- acc += rp[5];
+ acc += load_u32(&rp[5]);
acc += bp[13 - 8];
acc += bp[13 - 8];
acc += bp[14 - 8];
@@ -743,10 +765,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[15 - 8];
acc -= bp[10 - 8];
acc -= bp[11 - 8];
- rp[5] = (unsigned int)acc;
+ store_lo32(&rp[5], acc);
acc >>= 32;
- acc += rp[6];
+ acc += load_u32(&rp[6]);
acc += bp[14 - 8];
acc += bp[14 - 8];
acc += bp[15 - 8];
@@ -755,10 +777,10 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[13 - 8];
acc -= bp[8 - 8];
acc -= bp[9 - 8];
- rp[6] = (unsigned int)acc;
+ store_lo32(&rp[6], acc);
acc >>= 32;
- acc += rp[7];
+ acc += load_u32(&rp[7]);
acc += bp[15 - 8];
acc += bp[15 - 8];
acc += bp[15 - 8];
@@ -767,7 +789,7 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[11 - 8];
acc -= bp[12 - 8];
acc -= bp[13 - 8];
- rp[7] = (unsigned int)acc;
+ store_lo32(&rp[7], acc);
carry = (int)(acc >> 32);
}
@@ -920,32 +942,32 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
unsigned int *rp = (unsigned int *)r_d;
const unsigned int *bp = (const unsigned int *)buf.ui;
- acc = rp[0];
+ acc = load_u32(&rp[0]);
acc += bp[12 - 12];
acc += bp[21 - 12];
acc += bp[20 - 12];
acc -= bp[23 - 12];
- rp[0] = (unsigned int)acc;
+ store_lo32(&rp[0], acc);
acc >>= 32;
- acc += rp[1];
+ acc += load_u32(&rp[1]);
acc += bp[13 - 12];
acc += bp[22 - 12];
acc += bp[23 - 12];
acc -= bp[12 - 12];
acc -= bp[20 - 12];
- rp[1] = (unsigned int)acc;
+ store_lo32(&rp[1], acc);
acc >>= 32;
- acc += rp[2];
+ acc += load_u32(&rp[2]);
acc += bp[14 - 12];
acc += bp[23 - 12];
acc -= bp[13 - 12];
acc -= bp[21 - 12];
- rp[2] = (unsigned int)acc;
+ store_lo32(&rp[2], acc);
acc >>= 32;
- acc += rp[3];
+ acc += load_u32(&rp[3]);
acc += bp[15 - 12];
acc += bp[12 - 12];
acc += bp[20 - 12];
@@ -953,10 +975,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[14 - 12];
acc -= bp[22 - 12];
acc -= bp[23 - 12];
- rp[3] = (unsigned int)acc;
+ store_lo32(&rp[3], acc);
acc >>= 32;
- acc += rp[4];
+ acc += load_u32(&rp[4]);
acc += bp[21 - 12];
acc += bp[21 - 12];
acc += bp[16 - 12];
@@ -967,10 +989,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc -= bp[15 - 12];
acc -= bp[23 - 12];
acc -= bp[23 - 12];
- rp[4] = (unsigned int)acc;
+ store_lo32(&rp[4], acc);
acc >>= 32;
- acc += rp[5];
+ acc += load_u32(&rp[5]);
acc += bp[22 - 12];
acc += bp[22 - 12];
acc += bp[17 - 12];
@@ -979,10 +1001,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[21 - 12];
acc += bp[23 - 12];
acc -= bp[16 - 12];
- rp[5] = (unsigned int)acc;
+ store_lo32(&rp[5], acc);
acc >>= 32;
- acc += rp[6];
+ acc += load_u32(&rp[6]);
acc += bp[23 - 12];
acc += bp[23 - 12];
acc += bp[18 - 12];
@@ -990,48 +1012,48 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
acc += bp[14 - 12];
acc += bp[22 - 12];
acc -= bp[17 - 12];
- rp[6] = (unsigned int)acc;
+ store_lo32(&rp[6], acc);
acc >>= 32;
- acc += rp[7];
+ acc += load_u32(&rp[7]);
acc += bp[19 - 12];
acc += bp[16 - 12];
acc += bp[15 - 12];
acc += bp[23 - 12];
acc -= bp[18 - 12];
- rp[7] = (unsigned int)acc;
+ store_lo32(&rp[7], acc);
acc >>= 32;
- acc += rp[8];
+ acc += load_u32(&rp[8]);
acc += bp[20 - 12];
acc += bp[17 - 12];
acc += bp[16 - 12];
acc -= bp[19 - 12];
- rp[8] = (unsigned int)acc;
+ store_lo32(&rp[8], acc);
acc >>= 32;
- acc += rp[9];
+ acc += load_u32(&rp[9]);
acc += bp[21 - 12];
acc += bp[18 - 12];
acc += bp[17 - 12];
acc -= bp[20 - 12];
- rp[9] = (unsigned int)acc;
+ store_lo32(&rp[9], acc);
acc >>= 32;
- acc += rp[10];
+ acc += load_u32(&rp[10]);
acc += bp[22 - 12];
acc += bp[19 - 12];
acc += bp[18 - 12];
acc -= bp[21 - 12];
- rp[10] = (unsigned int)acc;
+ store_lo32(&rp[10], acc);
acc >>= 32;
- acc += rp[11];
+ acc += load_u32(&rp[11]);
acc += bp[23 - 12];
acc += bp[20 - 12];
acc += bp[19 - 12];
acc -= bp[22 - 12];
- rp[11] = (unsigned int)acc;
+ store_lo32(&rp[11], acc);
carry = (int)(acc >> 32);
}