Run util/openssl-format-source -v -c .

Reviewed-by: Tim Hudson <tjh@openssl.org>
author: Matt Caswell <matt@openssl.org> 2015-01-22 03:40:55 +0000
committer: Matt Caswell <matt@openssl.org> 2015-01-22 09:20:09 +0000
commit: 0f113f3ee4d629ef9a4a30911b22b224772085e5 (patch)
tree: e014603da5aed1d0751f587a66d6e270b6bda3de /crypto/ec/ecp_nistp256.c
parent: 22b52164aaed31d6e93dbd2d397ace041360e6aa (diff)
1 files changed, 1964 insertions, 1799 deletions
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index 0d20adc759..5a21a3c13d 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -29,54 +29,58 @@
 #include <openssl/opensslconf.h>
 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
 
-#include <stdint.h>
-#include <string.h>
-#include <openssl/err.h>
-#include "ec_lcl.h"
+# include <stdint.h>
+# include <string.h>
+# include <openssl/err.h>
+# include "ec_lcl.h"
 
-#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
   /* even with gcc, the typedef won't work for 32-bit platforms */
-  typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
-  typedef __int128_t int128_t;
-#else
-  #error "Need GCC 3.1 or later to define type uint128_t"
-#endif
+typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
+                                 * platforms */
+typedef __int128_t int128_t;
+# else
+#  error "Need GCC 3.1 or later to define type uint128_t"
+# endif
 
 typedef uint8_t u8;
 typedef uint32_t u32;
 typedef uint64_t u64;
 typedef int64_t s64;
 
-/* The underlying field.
- *
- * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
- * of this field into 32 bytes. We call this an felem_bytearray. */
+/*
+ * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
+ * can serialise an element of this field into 32 bytes. We call this an
+ * felem_bytearray.
+ */
 
 typedef u8 felem_bytearray[32];
 
-/* These are the parameters of P256, taken from FIPS 186-3, page 86. These
- * values are big-endian. */
+/*
+ * These are the parameters of P256, taken from FIPS 186-3, page 86. These
+ * values are big-endian.
+ */
 static const felem_bytearray nistp256_curve_params[5] = {
-	{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,       /* p */
-	 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-	 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
-	{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,       /* a = -3 */
-	 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-	 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},      /* b */
-	{0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
-	 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
-	 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
-	 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
-	{0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47,       /* x */
-	 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
-	 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
-	 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
-	{0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b,       /* y */
-	 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
-	 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
-	 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
+    {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+     0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
+    {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
+     0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
+     0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
+     0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
+    {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
+     0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
+     0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
+     0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
+    {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
+     0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
+     0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
+     0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
 };
 
 /*-
@@ -100,7 +104,7 @@ static const felem_bytearray nistp256_curve_params[5] = {
  * values are used as intermediate values before multiplication.
  */
 
-#define NLIMBS 4
+# define NLIMBS 4
 
 typedef uint128_t limb;
 typedef limb felem[NLIMBS];
@@ -108,72 +112,74 @@ typedef limb longfelem[NLIMBS * 2];
 typedef u64 smallfelem[NLIMBS];
 
 /* This is the value of the prime as four 64-bit words, little-endian. */
-static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
+static const u64 kPrime[4] =
+    { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
 static const u64 bottom63bits = 0x7ffffffffffffffful;
 
-/* bin32_to_felem takes a little-endian byte array and converts it into felem
- * form. This assumes that the CPU is little-endian. */
+/*
+ * bin32_to_felem takes a little-endian byte array and converts it into felem
+ * form. This assumes that the CPU is little-endian.
+ */
 static void bin32_to_felem(felem out, const u8 in[32])
-	{
-	out[0] = *((u64*) &in[0]);
-	out[1] = *((u64*) &in[8]);
-	out[2] = *((u64*) &in[16]);
-	out[3] = *((u64*) &in[24]);
-	}
-
-/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
- * 32 byte array. This assumes that the CPU is little-endian. */
+{
+    out[0] = *((u64 *)&in[0]);
+    out[1] = *((u64 *)&in[8]);
+    out[2] = *((u64 *)&in[16]);
+    out[3] = *((u64 *)&in[24]);
+}
+
+/*
+ * smallfelem_to_bin32 takes a smallfelem and serialises into a little
+ * endian, 32 byte array. This assumes that the CPU is little-endian.
+ */
 static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
-	{
-	*((u64*) &out[0]) = in[0];
-	*((u64*) &out[8]) = in[1];
-	*((u64*) &out[16]) = in[2];
-	*((u64*) &out[24]) = in[3];
-	}
+{
+    *((u64 *)&out[0]) = in[0];
+    *((u64 *)&out[8]) = in[1];
+    *((u64 *)&out[16]) = in[2];
+    *((u64 *)&out[24]) = in[3];
+}
 
 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
 static void flip_endian(u8 *out, const u8 *in, unsigned len)
-	{
-	unsigned i;
-	for (i = 0; i < len; ++i)
-		out[i] = in[len-1-i];
-	}
+{
+    unsigned i;
+    for (i = 0; i < len; ++i)
+        out[i] = in[len - 1 - i];
+}
 
 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
 static int BN_to_felem(felem out, const BIGNUM *bn)
-	{
-	felem_bytearray b_in;
-	felem_bytearray b_out;
-	unsigned num_bytes;
-
-	/* BN_bn2bin eats leading zeroes */
-	memset(b_out, 0, sizeof b_out);
-	num_bytes = BN_num_bytes(bn);
-	if (num_bytes > sizeof b_out)
-		{
-		ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
-		return 0;
-		}
-	if (BN_is_negative(bn))
-		{
-		ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
-		return 0;
-		}
-	num_bytes = BN_bn2bin(bn, b_in);
-	flip_endian(b_out, b_in, num_bytes);
-	bin32_to_felem(out, b_out);
-	return 1;
-	}
+{
+    felem_bytearray b_in;
+    felem_bytearray b_out;
+    unsigned num_bytes;
+
+    /* BN_bn2bin eats leading zeroes */
+    memset(b_out, 0, sizeof b_out);
+    num_bytes = BN_num_bytes(bn);
+    if (num_bytes > sizeof b_out) {
+        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
+        return 0;
+    }
+    if (BN_is_negative(bn)) {
+        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
+        return 0;
+    }
+    num_bytes = BN_bn2bin(bn, b_in);
+    flip_endian(b_out, b_in, num_bytes);
+    bin32_to_felem(out, b_out);
+    return 1;
+}
 
 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
 static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
-	{
-	felem_bytearray b_in, b_out;
-	smallfelem_to_bin32(b_in, in);
-	flip_endian(b_out, b_in, sizeof b_out);
-	return BN_bin2bn(b_out, sizeof b_out, out);
-	}
-
+{
+    felem_bytearray b_in, b_out;
+    smallfelem_to_bin32(b_in, in);
+    flip_endian(b_out, b_in, sizeof b_out);
+    return BN_bin2bn(b_out, sizeof b_out, out);
+}
 
 /*-
  * Field operations
@@ -181,75 +187,76 @@ static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
  */
 
 static void smallfelem_one(smallfelem out)
-	{
-	out[0] = 1;
-	out[1] = 0;
-	out[2] = 0;
-	out[3] = 0;
-	}
+{
+    out[0] = 1;
+    out[1] = 0;
+    out[2] = 0;
+    out[3] = 0;
+}
 
 static void smallfelem_assign(smallfelem out, const smallfelem in)
-	{
-	out[0] = in[0];
-	out[1] = in[1];
-	out[2] = in[2];
-	out[3] = in[3];
-	}
+{
+    out[0] = in[0];
+    out[1] = in[1];
+    out[2] = in[2];
+    out[3] = in[3];
+}
 
 static void felem_assign(felem out, const felem in)
-	{
-	out[0] = in[0];
-	out[1] = in[1];
-	out[2] = in[2];
-	out[3] = in[3];
-	}
+{
+    out[0] = in[0];
+    out[1] = in[1];
+    out[2] = in[2];
+    out[3] = in[3];
+}
 
 /* felem_sum sets out = out + in. */
 static void felem_sum(felem out, const felem in)
-	{
-	out[0] += in[0];
-	out[1] += in[1];
-	out[2] += in[2];
-	out[3] += in[3];
-	}
+{
+    out[0] += in[0];
+    out[1] += in[1];
+    out[2] += in[2];
+    out[3] += in[3];
+}
 
 /* felem_small_sum sets out = out + in. */
 static void felem_small_sum(felem out, const smallfelem in)
-	{
-	out[0] += in[0];
-	out[1] += in[1];
-	out[2] += in[2];
-	out[3] += in[3];
-	}
+{
+    out[0] += in[0];
+    out[1] += in[1];
+    out[2] += in[2];
+    out[3] += in[3];
+}
 
 /* felem_scalar sets out = out * scalar */
 static void felem_scalar(felem out, const u64 scalar)
-	{
-	out[0] *= scalar;
-	out[1] *= scalar;
-	out[2] *= scalar;
-	out[3] *= scalar;
-	}
+{
+    out[0] *= scalar;
+    out[1] *= scalar;
+    out[2] *= scalar;
+    out[3] *= scalar;
+}
 
 /* longfelem_scalar sets out = out * scalar */
 static void longfelem_scalar(longfelem out, const u64 scalar)
-	{
-	out[0] *= scalar;
-	out[1] *= scalar;
-	out[2] *= scalar;
-	out[3] *= scalar;
-	out[4] *= scalar;
-	out[5] *= scalar;
-	out[6] *= scalar;
-	out[7] *= scalar;
-	}
-
-#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
-#define two105 (((limb)1) << 105)
-#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
+{
+    out[0] *= scalar;
+    out[1] *= scalar;
+    out[2] *= scalar;
+    out[3] *= scalar;
+    out[4] *= scalar;
+    out[5] *= scalar;
+    out[6] *= scalar;
+    out[7] *= scalar;
+}
+
+# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
+# define two105 (((limb)1) << 105)
+# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
 
 /* zero105 is 0 mod p */
-static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
+static const felem zero105 =
+    { two105m41m9, two105, two105m41p9, two105m41p9 };
 
 /*-
  * smallfelem_neg sets |out| to |-small|
@@ -257,13 +264,13 @@ static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
  *   out[i] < out[i] + 2^105
  */
 static void smallfelem_neg(felem out, const smallfelem small)
-	{
-	/* In order to prevent underflow, we subtract from 0 mod p. */
-	out[0] = zero105[0] - small[0];
-	out[1] = zero105[1] - small[1];
-	out[2] = zero105[2] - small[2];
-	out[3] = zero105[3] - small[3];
-	}
+{
+    /* In order to prevent underflow, we subtract from 0 mod p. */
+    out[0] = zero105[0] - small[0];
+    out[1] = zero105[1] - small[1];
+    out[2] = zero105[2] - small[2];
+    out[3] = zero105[3] - small[3];
+}
 
 /*-
  * felem_diff subtracts |in| from |out|
@@ -273,25 +280,28 @@ static void smallfelem_neg(felem out, const smallfelem small)
  *   out[i] < out[i] + 2^105
  */
 static void felem_diff(felem out, const felem in)
-	{
-	/* In order to prevent underflow, we add 0 mod p before subtracting. */
-	out[0] += zero105[0];
-	out[1] += zero105[1];
-	out[2] += zero105[2];
-	out[3] += zero105[3];
-
-	out[0] -= in[0];
-	out[1] -= in[1];
-	out[2] -= in[2];
-	out[3] -= in[3];
-	}
-
-#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
-#define two107 (((limb)1) << 107)
-#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
+{
+    /*
+     * In order to prevent underflow, we add 0 mod p before subtracting.
+     */
+    out[0] += zero105[0];
+    out[1] += zero105[1];
+    out[2] += zero105[2];
+    out[3] += zero105[3];
+
+    out[0] -= in[0];
+    out[1] -= in[1];
+    out[2] -= in[2];
+    out[3] -= in[3];
+}
+
+# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
+# define two107 (((limb)1) << 107)
+# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
 
 /* zero107 is 0 mod p */
-static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
+static const felem zero107 =
+    { two107m43m11, two107, two107m43p11, two107m43p11 };
 
 /*-
  * An alternative felem_diff for larger inputs |in|
@@ -302,18 +312,20 @@ static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11
  *   out[i] < out[i] + 2^107
  */
 static void felem_diff_zero107(felem out, const felem in)
-	{
-	/* In order to prevent underflow, we add 0 mod p before subtracting. */
-	out[0] += zero107[0];
-	out[1] += zero107[1];
-	out[2] += zero107[2];
-	out[3] += zero107[3];
-
-	out[0] -= in[0];
-	out[1] -= in[1];
-	out[2] -= in[2];
-	out[3] -= in[3];
-	}
+{
+    /*
+     * In order to prevent underflow, we add 0 mod p before subtracting.
+     */
+    out[0] += zero107[0];
+    out[1] += zero107[1];
+    out[2] += zero107[2];
+    out[3] += zero107[3];
+
+    out[0] -= in[0];
+    out[1] -= in[1];
+    out[2] -= in[2];
+    out[3] -= in[3];
+}
 
 /*-
  * longfelem_diff subtracts |in| from |out|
@@ -323,38 +335,41 @@ static void felem_diff_zero107(felem out, const felem in)
  *   out[i] < out[i] + 2^70 + 2^40
  */
 static void longfelem_diff(longfelem out, const longfelem in)
-	{
-	static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
-	static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
-	static const limb two70 = (((limb)1) << 70);
-	static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
-	static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
-
-	/* add 0 mod p to avoid underflow */
-	out[0] += two70m8p6;
-	out[1] += two70p40;
-	out[2] += two70;
-	out[3] += two70m40m38p6;
-	out[4] += two70m6;
-	out[5] += two70m6;
-	out[6] += two70m6;
-	out[7] += two70m6;
-
-	/* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
-	out[0] -= in[0];
-	out[1] -= in[1];
-	out[2] -= in[2];
-	out[3] -= in[3];
-	out[4] -= in[4];
-	out[5] -= in[5];
-	out[6] -= in[6];
-	out[7] -= in[7];
-	}
-
-#define two64m0 (((limb)1) << 64) - 1
-#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
-#define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
-#define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
+{
+    static const limb two70m8p6 =
+        (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
+    static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
+    static const limb two70 = (((limb) 1) << 70);
+    static const limb two70m40m38p6 =
+        (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
+        (((limb) 1) << 6);
+    static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
+
+    /* add 0 mod p to avoid underflow */
+    out[0] += two70m8p6;
+    out[1] += two70p40;
+    out[2] += two70;
+    out[3] += two70m40m38p6;
+    out[4] += two70m6;
+    out[5] += two70m6;
+    out[6] += two70m6;
+    out[7] += two70m6;
+
+    /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
+    out[0] -= in[0];
+    out[1] -= in[1];
+    out[2] -= in[2];
+    out[3] -= in[3];
+    out[4] -= in[4];
+    out[5] -= in[5];
+    out[6] -= in[6];
+    out[7] -= in[7];
+}
+
+# define two64m0 (((limb)1) << 64) - 1
+# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
+# define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
+# define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
 
 /* zero110 is 0 mod p */
 static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
@@ -369,96 +384,104 @@ static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
  *   out[i] < 2^64
  */
 static void felem_shrink(smallfelem out, const felem in)
-	{
-	felem tmp;
-	u64 a, b, mask;
-	s64 high, low;
-	static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
-
-	/* Carry 2->3 */
-	tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
-	/* tmp[3] < 2^110 */
-
-	tmp[2] = zero110[2] + (u64) in[2];
-	tmp[0] = zero110[0] + in[0];
-	tmp[1] = zero110[1] + in[1];
-	/* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
-
-	/* We perform two partial reductions where we eliminate the
-	 * high-word of tmp[3]. We don't update the other words till the end.
-	 */
-	a = tmp[3] >> 64; /* a < 2^46 */
-	tmp[3] = (u64) tmp[3];
-	tmp[3] -= a;
-	tmp[3] += ((limb)a) << 32;
-	/* tmp[3] < 2^79 */
-
-	b = a;
-	a = tmp[3] >> 64; /* a < 2^15 */
-	b += a; /* b < 2^46 + 2^15 < 2^47 */
-	tmp[3] = (u64) tmp[3];
-	tmp[3] -= a;
-	tmp[3] += ((limb)a) << 32;
-	/* tmp[3] < 2^64 + 2^47 */
-
-	/* This adjusts the other two words to complete the two partial
-	 * reductions. */
-	tmp[0] += b;
-	tmp[1] -= (((limb)b) << 32);
-
-	/* In order to make space in tmp[3] for the carry from 2 -> 3, we
-	 * conditionally subtract kPrime if tmp[3] is large enough. */
-	high = tmp[3] >> 64;
-	/* As tmp[3] < 2^65, high is either 1 or 0 */
-	high <<= 63;
-	high >>= 63;
-	/*-
-	 * high is:
-	 *   all ones   if the high word of tmp[3] is 1
-	 *   all zeros  if the high word of tmp[3] if 0 */
-	low = tmp[3];
-	mask = low >> 63;
-	/*-
-	 * mask is:
-	 *   all ones   if the MSB of low is 1
-	 *   all zeros  if the MSB of low if 0 */
-	low &= bottom63bits;
-	low -= kPrime3Test;
-	/* if low was greater than kPrime3Test then the MSB is zero */
-	low = ~low;
-	low >>= 63;
-	/*-
-	 * low is:
-	 *   all ones   if low was > kPrime3Test
-	 *   all zeros  if low was <= kPrime3Test */
-	mask = (mask & low) | high;
-	tmp[0] -= mask & kPrime[0];
-	tmp[1] -= mask & kPrime[1];
-	/* kPrime[2] is zero, so omitted */
-	tmp[3] -= mask & kPrime[3];
-	/* tmp[3] < 2**64 - 2**32 + 1 */
-
-	tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
-	tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
-	tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
-	/* tmp[i] < 2^64 */
-
-	out[0] = tmp[0];
-	out[1] = tmp[1];
-	out[2] = tmp[2];
-	out[3] = tmp[3];
-	}
+{
+    felem tmp;
+    u64 a, b, mask;
+    s64 high, low;
+    static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
+
+    /* Carry 2->3 */
+    tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
+    /* tmp[3] < 2^110 */
+
+    tmp[2] = zero110[2] + (u64)in[2];
+    tmp[0] = zero110[0] + in[0];
+    tmp[1] = zero110[1] + in[1];
+    /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
+
+    /*
+     * We perform two partial reductions where we eliminate the high-word of
+     * tmp[3]. We don't update the other words till the end.
+     */
+    a = tmp[3] >> 64;           /* a < 2^46 */
+    tmp[3] = (u64)tmp[3];
+    tmp[3] -= a;
+    tmp[3] += ((limb) a) << 32;
+    /* tmp[3] < 2^79 */
+
+    b = a;
+    a = tmp[3] >> 64;           /* a < 2^15 */
+    b += a;                     /* b < 2^46 + 2^15 < 2^47 */
+    tmp[3] = (u64)tmp[3];
+    tmp[3] -= a;
+    tmp[3] += ((limb) a) << 32;
+    /* tmp[3] < 2^64 + 2^47 */
+
+    /*
+     * This adjusts the other two words to complete the two partial
+     * reductions.
+     */
+    tmp[0] += b;
+    tmp[1] -= (((limb) b) << 32);
+
+    /*
+     * In order to make space in tmp[3] for the carry from 2 -> 3, we
+     * conditionally subtract kPrime if tmp[3] is large enough.
+     */
+    high = tmp[3] >> 64;
+    /* As tmp[3] < 2^65, high is either 1 or 0 */
+    high <<= 63;
+    high >>= 63;
+        /*-
+         * high is:
+         *   all ones   if the high word of tmp[3] is 1
+         *   all zeros  if the high word of tmp[3] if 0 */
+    low = tmp[3];
+    mask = low >> 63;
+        /*-
+         * mask is:
+         *   all ones   if the MSB of low is 1
+         *   all zeros  if the MSB of low if 0 */
+    low &= bottom63bits;
+    low -= kPrime3Test;
+    /* if low was greater than kPrime3Test then the MSB is zero */
+    low = ~low;
+    low >>= 63;
+        /*-
+         * low is:
+         *   all ones   if low was > kPrime3Test
+         *   all zeros  if low was <= kPrime3Test */
+    mask = (mask & low) | high;
+    tmp[0] -= mask & kPrime[0];
+    tmp[1] -= mask & kPrime[1];
+    /* kPrime[2] is zero, so omitted */
+    tmp[3] -= mask & kPrime[3];
+    /* tmp[3] < 2**64 - 2**32 + 1 */
+
+    tmp[1] += ((u64)(tmp[0] >> 64));
+    tmp[0] = (u64)tmp[0];
+    tmp[2] += ((u64)(tmp[1] >> 64));
+    tmp[1] = (u64)tmp[1];
+    tmp[3] += ((u64)(tmp[2] >> 64));
+    tmp[2] = (u64)tmp[2];
+    /* tmp[i] < 2^64 */
+
+    out[0] = tmp[0];
+    out[1] = tmp[1];
+    out[2] = tmp[2];
+    out[3] = tmp[3];
+}
 
 /* smallfelem_expand converts a smallfelem to an felem */
 static void smallfelem_expand(felem out, const smallfelem in)
-	{
-	out[0] = in[0];
-	out[1] = in[1];
-	out[2] = in[2];
-	out[3] = in[3];
-	}
-
-/*- 
+{
+    out[0] = in[0];
+    out[1] = in[1];
+    out[2] = in[2];
+    out[3] = in[3];
+}
+
+/*-
  * smallfelem_square sets |out| = |small|^2
  * On entry:
  *   small[i] < 2^64
@@ -466,76 +489,76 @@ static void smallfelem_expand(felem out, const smallfelem in)
  *   out[i] < 7 * 2^64 < 2^67
  */
 static void smallfelem_square(longfelem out, const smallfelem small)
-	{
-	limb a;
-	u64 high, low;
-
-	a = ((uint128_t) small[0]) * small[0];
-	low = a;
-	high = a >> 64;
-	out[0] = low;
-	out[1] = high;
-
-	a = ((uint128_t) small[0]) * small[1];
-	low = a;
-	high = a >> 64;
-	out[1] += low;
-	out[1] += low;
-	out[2] = high;
-
-	a = ((uint128_t) small[0]) * small[2];
-	low = a;
-	high = a >> 64;
-	out[2] += low;
-	out[2] *= 2;
-	out[3] = high;
-
-	a = ((uint128_t) small[0]) * small[3];
-	low = a;
-	high = a >> 64;
-	out[3] += low;
-	out[4] = high;
-
-	a = ((uint128_t) small[1]) * small[2];
-	low = a;
-	high = a >> 64;
-	out[3] += low;
-	out[3] *= 2;
-	out[4] += high;
-
-	a = ((uint128_t) small[1]) * small[1];
-	low = a;
-	high = a >> 64;
-	out[2] += low;
-	out[3] += high;
-
-	a = ((uint128_t) small[1]) * small[3];
-	low = a;
-	high = a >> 64;
-	out[4] += low;
-	out[4] *= 2;
-	out[5] = high;
-
-	a = ((uint128_t) small[2]) * small[3];
-	low = a;
-	high = a >> 64;
-	out[5] += low;
-	out[5] *= 2;
-	out[6] = high;
-	out[6] += high;
-
-	a = ((uint128_t) small[2]) * small[2];
-	low = a;
-	high = a >> 64;
-	out[4] += low;
-	out[5] += high;
-
-	a = ((uint128_t) small[3]) * small[3];
-	low = a;
-	high = a >> 64;
-	out[6] += low;
-	out[7] = high;
-	}
+{
+    limb a;
+    u64 high, low;
+
+    a = ((uint128_t) small[0]) * small[0];
+    low = a;
+    high = a >> 64;
+    out[0] = low;
+    out[1] = high;
+
+    a = ((uint128_t) small[0]) * small[1];
+    low = a;
+    high = a >> 64;
+    out[1] += low;
+    out[1] += low;
+    out[2] = high;
+
+    a = ((uint128_t) small[0]) * small[2];
+    low = a;
+    high = a >> 64;
+    out[2] += low;
+    out[2] *= 2;
+    out[3] = high;
+
+    a = ((uint128_t) small[0]) * small[3];
+    low = a;
+    high = a >> 64;
+    out[3] += low;
+    out[4] = high;
+
+    a = ((uint128_t) small[1]) * small[2];
+    low = a;
+    high = a >> 64;
+    out[3] += low;
+    out[3] *= 2;
+    out[4] += high;
+
+    a = ((uint128_t) small[1]) * small[1];
+    low = a;
+    high = a >> 64;
+    out[2] += low;
+    out[3] += high;
+
+    a = ((uint128_t) small[1]) * small[3];
+    low = a;
+    high = a >> 64;
+    out[4] += low;
+    out[4] *= 2;
+    out[5] = high;
+
+    a = ((uint128_t) small[2]) * small[3];
+    low = a;
+    high = a >> 64;
+    out[5] += low;
+    out[5] *= 2;
+    out[6] = high;
+    out[6] += high;
+
+    a = ((uint128_t) small[2]) * small[2];
+    low = a;
+    high = a >> 64;
+    out[4] += low;
+    out[5] += high;
+
+    a = ((uint128_t) small[3]) * small[3];
+    low = a;
+    high = a >> 64;
+    out[6] += low;
+    out[7] = high;
+}
 
 /*-
  * felem_square sets |out| = |in|^2
@@ -545,11 +568,11 @@ static void smallfelem_square(longfelem out, const smallfelem small)
  *   out[i] < 7 * 2^64 < 2^67
  */
 static void felem_square(longfelem out, const felem in)
-	{
-	u64 small[4];
-	felem_shrink(small, in);
-	smallfelem_square(out, small);
-	}
+{
+    u64 small[4];
+    felem_shrink(small, in);
+    smallfelem_square(out, small);
+}
 
 /*-
  * smallfelem_mul sets |out| = |small1| * |small2|
@@ -559,113 +582,108 @@ static void felem_square(longfelem out, const felem in)
  * On exit:
  *   out[i] < 7 * 2^64 < 2^67
  */
-static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
-	{
-	limb a;
-	u64 high, low;
-
-	a = ((uint128_t) small1[0]) * small2[0];
-	low = a;
-	high = a >> 64;
-	out[0] = low;
-	out[1] = high;
-
-
-	a = ((uint128_t) small1[0]) * small2[1];
-	low = a;
-	high = a >> 64;
-	out[1] += low;
-	out[2] = high;
-
-	a = ((uint128_t) small1[1]) * small2[0];
-	low = a;
-	high = a >> 64;
-	out[1] += low;
-	out[2] += high;
-
-
-	a = ((uint128_t) small1[0]) * small2[2];
-	low = a;
-	high = a >> 64;
-	out[2] += low;
-	out[3] = high;
-
-	a = ((uint128_t) small1[1]) * small2[1];
-	low = a;
-	high = a >> 64;
-	out[2] += low;
-	out[3] += high;
-
-	a = ((uint128_t) small1[2]) * small2[0];
-	low = a;
-	high = a >> 64;
-	out[2] += low;
-	out[3] += high;
-
-
-	a = ((uint128_t) small1[0]) * small2[3];
-	low = a;
-	high = a >> 64;
-	out[3] += low;
-	out[4] = high;
-
-	a = ((uint128_t) small1[1]) * small2[2];
-	low = a;
-	high = a >> 64;
-	out[3] += low;
-	out[4] += high;
-
-	a = ((uint128_t) small1[2]) * small2[1];
-	low = a;
-	high = a >> 64;
-	out[3] += low;
-	out[4] += high;
-
-	a = ((uint128_t) small1[3]) * small2[0];
-	low = a;
-	high = a >> 64;
-	out[3] += low;
-	out[4] += high;
-
-
-	a = ((uint128_t) small1[1]) * small2[3];
-	low = a;
-	high = a >> 64;
-	out[4] += low;
-	out[5] = high;
-
-	a = ((uint128_t) small1[2]) * small2[2];
-	low = a;
-	high = a >> 64;
-	out[4] += low;
-	out[5] += high;
-
-	a = ((uint128_t) small1[3]) * small2[1];
-	low = a;
-	high = a >> 64;
-	out[4] += low;
-	out[5] += high;
-
-
-	a = ((uint128_t) small1[2]) * small2[3];
-	low = a;
-	high = a >> 64;
-	out[5] += low;
-	out[6] = high;
-
-	a = ((uint128_t) small1[3]) * small2[2];
-	low = a;
-	high = a >> 64;
-	out[5] += low;
-	out[6] += high;
-
-
-	a = ((uint128_t) small1[3]) * small2[3];
-	low = a;
-	high = a >> 64;
-	out[6] += low;
-	out[7] = high;
-	}
+static void smallfelem_mul(longfelem out, const smallfelem small1,
+                           const smallfelem small2)
+{
+    limb a;
+    u64 high, low;
+
+    a = ((uint128_t) small1[0]) * small2[0];
+    low = a;
+    high = a >> 64;
+    out[0] = low;
+    out[1] = high;
+
+    a = ((uint128_t) small1[0]) * small2[1];
+    low = a;
+    high = a >> 64;
+    out[1] += low;
+    out[2] = high;
+
+    a = ((uint128_t) small1[1]) * small2[0];
+    low = a;
+    high = a >> 64;
+    out[1] += low;
+    out[2] += high;
+
+    a = ((uint128_t) small1[0]) * small2[2];
+    low = a;
+    high = a >> 64;
+    out[2] += low;
+    out[3] = high;
+
+    a = ((uint128_t) small1[1]) * small2[1];
+    low = a;
+    high = a >> 64;
+    out[2] += low;
+    out[3] += high;
+
+    a = ((uint128_t) small1[2]) * small2[0];
+    low = a;
+    high = a >> 64;
+    out[2] += low;
+    out[3] += high;
+
+    a = ((uint128_t) small1[0]) * small2[3];
+    low = a;
+    high = a >> 64;
+    out[3] += low;
+    out[4] = high;
+
+    a = ((uint128_t) small1[1]) * small2[2];
+    low = a;
+    high = a >> 64;
+    out[3] += low;
+    out[4] += high;
+
+    a = ((uint128_t) small1[2]) * small2[1];
+    low = a;
+    high = a >> 64;
+    out[3] += low;
+    out[4] += high;
+
+    a = ((uint128_t) small1[3]) * small2[0];
+    low = a;
+    high = a >> 64;
+    out[3] += low;
+    out[4] += high;
+
+    a = ((uint128_t) small1[1]) * small2[3];
+    low = a;
+    high = a >> 64;
+    out[4] += low;
+    out[5] = high;
+
+    a = ((uint128_t) small1[2]) * small2[2];
+    low = a;
+    high = a >> 64;
+    out[4] += low;
+    out[5] += high;
+
+    a = ((uint128_t) small1[3]) * small2[1];
+    low = a;
+    high = a >> 64;
+    out[4] += low;
+    out[5] += high;
+
+    a = ((uint128_t) small1[2]) * small2[3];
+    low = a;
+    high = a >> 64;
+    out[5] += low;
+    out[6] = high;
+
+    a = ((uint128_t) small1[3]) * small2[2];
+    low = a;
+    high = a >> 64;
+    out[5] += low;
+    out[6] += high;
+
+    a = ((uint128_t) small1[3]) * small2[3];
+    low = a;
+    high = a >> 64;
+    out[6] += low;
+    out[7] = high;
+}
 
 /*-
  * felem_mul sets |out| = |in1| * |in2|
@@ -676,12 +694,12 @@ static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfe
  *   out[i] < 7 * 2^64 < 2^67
  */
 static void felem_mul(longfelem out, const felem in1, const felem in2)
-	{
-	smallfelem small1, small2;
-	felem_shrink(small1, in1);
-	felem_shrink(small2, in2);
-	smallfelem_mul(out, small1, small2);
-	}
+{
+    smallfelem small1, small2;
+    felem_shrink(small1, in1);
+    felem_shrink(small2, in2);
+    smallfelem_mul(out, small1, small2);
+}
 
 /*-
  * felem_small_mul sets |out| = |small1| * |in2|
@@ -691,24 +709,26 @@ static void felem_mul(longfelem out, const felem in1, const felem in2)
  * On exit:
  *   out[i] < 7 * 2^64 < 2^67
  */
-static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
-	{
-	smallfelem small2;
-	felem_shrink(small2, in2);
-	smallfelem_mul(out, small1, small2);
-	}
-
-#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
-#define two100 (((limb)1) << 100)
-#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
+static void felem_small_mul(longfelem out, co
author	Matt Caswell <matt@openssl.org>	2015-01-22 03:40:55 +0000
committer	Matt Caswell <matt@openssl.org>	2015-01-22 09:20:09 +0000
commit	0f113f3ee4d629ef9a4a30911b22b224772085e5 (patch)
tree	e014603da5aed1d0751f587a66d6e270b6bda3de /crypto/ec/ecp_nistp256.c
parent	22b52164aaed31d6e93dbd2d397ace041360e6aa (diff)