summaryrefslogtreecommitdiffstats
path: root/crypto/ec/ecp_nistp256.c
diff options
context:
space:
mode:
authorMatt Caswell <matt@openssl.org>2015-01-22 03:40:55 +0000
committerMatt Caswell <matt@openssl.org>2015-01-22 09:20:09 +0000
commit0f113f3ee4d629ef9a4a30911b22b224772085e5 (patch)
treee014603da5aed1d0751f587a66d6e270b6bda3de /crypto/ec/ecp_nistp256.c
parent22b52164aaed31d6e93dbd2d397ace041360e6aa (diff)
Run util/openssl-format-source -v -c .
Reviewed-by: Tim Hudson <tjh@openssl.org>
Diffstat (limited to 'crypto/ec/ecp_nistp256.c')
-rw-r--r--crypto/ec/ecp_nistp256.c3763
1 files changed, 1964 insertions, 1799 deletions
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index 0d20adc759..5a21a3c13d 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -29,54 +29,58 @@
#include <openssl/opensslconf.h>
#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
-#include <stdint.h>
-#include <string.h>
-#include <openssl/err.h>
-#include "ec_lcl.h"
+# include <stdint.h>
+# include <string.h>
+# include <openssl/err.h>
+# include "ec_lcl.h"
-#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
/* even with gcc, the typedef won't work for 32-bit platforms */
- typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
- typedef __int128_t int128_t;
-#else
- #error "Need GCC 3.1 or later to define type uint128_t"
-#endif
+typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
+ * platforms */
+typedef __int128_t int128_t;
+# else
+# error "Need GCC 3.1 or later to define type uint128_t"
+# endif
typedef uint8_t u8;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int64_t s64;
-/* The underlying field.
- *
- * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
- * of this field into 32 bytes. We call this an felem_bytearray. */
+/*
+ * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
+ * can serialise an element of this field into 32 bytes. We call this an
+ * felem_bytearray.
+ */
typedef u8 felem_bytearray[32];
-/* These are the parameters of P256, taken from FIPS 186-3, page 86. These
- * values are big-endian. */
+/*
+ * These are the parameters of P256, taken from FIPS 186-3, page 86. These
+ * values are big-endian.
+ */
static const felem_bytearray nistp256_curve_params[5] = {
- {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
- {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
- 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
- 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
- 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
- {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
- 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
- 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
- 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
- {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
- 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
- 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
- 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
+ {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
+ {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
+ 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
+ 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
+ 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
+ {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
+ 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
+ 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
+ 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
+ {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
+ 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
+ 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
+ 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
};
/*-
@@ -100,7 +104,7 @@ static const felem_bytearray nistp256_curve_params[5] = {
* values are used as intermediate values before multiplication.
*/
-#define NLIMBS 4
+# define NLIMBS 4
typedef uint128_t limb;
typedef limb felem[NLIMBS];
@@ -108,72 +112,74 @@ typedef limb longfelem[NLIMBS * 2];
typedef u64 smallfelem[NLIMBS];
/* This is the value of the prime as four 64-bit words, little-endian. */
-static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
+static const u64 kPrime[4] =
+ { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
static const u64 bottom63bits = 0x7ffffffffffffffful;
-/* bin32_to_felem takes a little-endian byte array and converts it into felem
- * form. This assumes that the CPU is little-endian. */
+/*
+ * bin32_to_felem takes a little-endian byte array and converts it into felem
+ * form. This assumes that the CPU is little-endian.
+ */
static void bin32_to_felem(felem out, const u8 in[32])
- {
- out[0] = *((u64*) &in[0]);
- out[1] = *((u64*) &in[8]);
- out[2] = *((u64*) &in[16]);
- out[3] = *((u64*) &in[24]);
- }
-
-/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
- * 32 byte array. This assumes that the CPU is little-endian. */
+{
+ out[0] = *((u64 *)&in[0]);
+ out[1] = *((u64 *)&in[8]);
+ out[2] = *((u64 *)&in[16]);
+ out[3] = *((u64 *)&in[24]);
+}
+
+/*
+ * smallfelem_to_bin32 takes a smallfelem and serialises into a little
+ * endian, 32 byte array. This assumes that the CPU is little-endian.
+ */
static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
- {
- *((u64*) &out[0]) = in[0];
- *((u64*) &out[8]) = in[1];
- *((u64*) &out[16]) = in[2];
- *((u64*) &out[24]) = in[3];
- }
+{
+ *((u64 *)&out[0]) = in[0];
+ *((u64 *)&out[8]) = in[1];
+ *((u64 *)&out[16]) = in[2];
+ *((u64 *)&out[24]) = in[3];
+}
/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
static void flip_endian(u8 *out, const u8 *in, unsigned len)
- {
- unsigned i;
- for (i = 0; i < len; ++i)
- out[i] = in[len-1-i];
- }
+{
+ unsigned i;
+ for (i = 0; i < len; ++i)
+ out[i] = in[len - 1 - i];
+}
/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
static int BN_to_felem(felem out, const BIGNUM *bn)
- {
- felem_bytearray b_in;
- felem_bytearray b_out;
- unsigned num_bytes;
-
- /* BN_bn2bin eats leading zeroes */
- memset(b_out, 0, sizeof b_out);
- num_bytes = BN_num_bytes(bn);
- if (num_bytes > sizeof b_out)
- {
- ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
- return 0;
- }
- if (BN_is_negative(bn))
- {
- ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
- return 0;
- }
- num_bytes = BN_bn2bin(bn, b_in);
- flip_endian(b_out, b_in, num_bytes);
- bin32_to_felem(out, b_out);
- return 1;
- }
+{
+ felem_bytearray b_in;
+ felem_bytearray b_out;
+ unsigned num_bytes;
+
+ /* BN_bn2bin eats leading zeroes */
+ memset(b_out, 0, sizeof b_out);
+ num_bytes = BN_num_bytes(bn);
+ if (num_bytes > sizeof b_out) {
+ ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
+ return 0;
+ }
+ if (BN_is_negative(bn)) {
+ ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
+ return 0;
+ }
+ num_bytes = BN_bn2bin(bn, b_in);
+ flip_endian(b_out, b_in, num_bytes);
+ bin32_to_felem(out, b_out);
+ return 1;
+}
/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
- {
- felem_bytearray b_in, b_out;
- smallfelem_to_bin32(b_in, in);
- flip_endian(b_out, b_in, sizeof b_out);
- return BN_bin2bn(b_out, sizeof b_out, out);
- }
-
+{
+ felem_bytearray b_in, b_out;
+ smallfelem_to_bin32(b_in, in);
+ flip_endian(b_out, b_in, sizeof b_out);
+ return BN_bin2bn(b_out, sizeof b_out, out);
+}
/*-
* Field operations
@@ -181,75 +187,76 @@ static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
*/
static void smallfelem_one(smallfelem out)
- {
- out[0] = 1;
- out[1] = 0;
- out[2] = 0;
- out[3] = 0;
- }
+{
+ out[0] = 1;
+ out[1] = 0;
+ out[2] = 0;
+ out[3] = 0;
+}
static void smallfelem_assign(smallfelem out, const smallfelem in)
- {
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- }
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
static void felem_assign(felem out, const felem in)
- {
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- }
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
/* felem_sum sets out = out + in. */
static void felem_sum(felem out, const felem in)
- {
- out[0] += in[0];
- out[1] += in[1];
- out[2] += in[2];
- out[3] += in[3];
- }
+{
+ out[0] += in[0];
+ out[1] += in[1];
+ out[2] += in[2];
+ out[3] += in[3];
+}
/* felem_small_sum sets out = out + in. */
static void felem_small_sum(felem out, const smallfelem in)
- {
- out[0] += in[0];
- out[1] += in[1];
- out[2] += in[2];
- out[3] += in[3];
- }
+{
+ out[0] += in[0];
+ out[1] += in[1];
+ out[2] += in[2];
+ out[3] += in[3];
+}
/* felem_scalar sets out = out * scalar */
static void felem_scalar(felem out, const u64 scalar)
- {
- out[0] *= scalar;
- out[1] *= scalar;
- out[2] *= scalar;
- out[3] *= scalar;
- }
+{
+ out[0] *= scalar;
+ out[1] *= scalar;
+ out[2] *= scalar;
+ out[3] *= scalar;
+}
/* longfelem_scalar sets out = out * scalar */
static void longfelem_scalar(longfelem out, const u64 scalar)
- {
- out[0] *= scalar;
- out[1] *= scalar;
- out[2] *= scalar;
- out[3] *= scalar;
- out[4] *= scalar;
- out[5] *= scalar;
- out[6] *= scalar;
- out[7] *= scalar;
- }
-
-#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
-#define two105 (((limb)1) << 105)
-#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
+{
+ out[0] *= scalar;
+ out[1] *= scalar;
+ out[2] *= scalar;
+ out[3] *= scalar;
+ out[4] *= scalar;
+ out[5] *= scalar;
+ out[6] *= scalar;
+ out[7] *= scalar;
+}
+
+# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
+# define two105 (((limb)1) << 105)
+# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
/* zero105 is 0 mod p */
-static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
+static const felem zero105 =
+ { two105m41m9, two105, two105m41p9, two105m41p9 };
/*-
* smallfelem_neg sets |out| to |-small|
@@ -257,13 +264,13 @@ static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
* out[i] < out[i] + 2^105
*/
static void smallfelem_neg(felem out, const smallfelem small)
- {
- /* In order to prevent underflow, we subtract from 0 mod p. */
- out[0] = zero105[0] - small[0];
- out[1] = zero105[1] - small[1];
- out[2] = zero105[2] - small[2];
- out[3] = zero105[3] - small[3];
- }
+{
+ /* In order to prevent underflow, we subtract from 0 mod p. */
+ out[0] = zero105[0] - small[0];
+ out[1] = zero105[1] - small[1];
+ out[2] = zero105[2] - small[2];
+ out[3] = zero105[3] - small[3];
+}
/*-
* felem_diff subtracts |in| from |out|
@@ -273,25 +280,28 @@ static void smallfelem_neg(felem out, const smallfelem small)
* out[i] < out[i] + 2^105
*/
static void felem_diff(felem out, const felem in)
- {
- /* In order to prevent underflow, we add 0 mod p before subtracting. */
- out[0] += zero105[0];
- out[1] += zero105[1];
- out[2] += zero105[2];
- out[3] += zero105[3];
-
- out[0] -= in[0];
- out[1] -= in[1];
- out[2] -= in[2];
- out[3] -= in[3];
- }
-
-#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
-#define two107 (((limb)1) << 107)
-#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
+{
+ /*
+ * In order to prevent underflow, we add 0 mod p before subtracting.
+ */
+ out[0] += zero105[0];
+ out[1] += zero105[1];
+ out[2] += zero105[2];
+ out[3] += zero105[3];
+
+ out[0] -= in[0];
+ out[1] -= in[1];
+ out[2] -= in[2];
+ out[3] -= in[3];
+}
+
+# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
+# define two107 (((limb)1) << 107)
+# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
/* zero107 is 0 mod p */
-static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
+static const felem zero107 =
+ { two107m43m11, two107, two107m43p11, two107m43p11 };
/*-
* An alternative felem_diff for larger inputs |in|
@@ -302,18 +312,20 @@ static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11
* out[i] < out[i] + 2^107
*/
static void felem_diff_zero107(felem out, const felem in)
- {
- /* In order to prevent underflow, we add 0 mod p before subtracting. */
- out[0] += zero107[0];
- out[1] += zero107[1];
- out[2] += zero107[2];
- out[3] += zero107[3];
-
- out[0] -= in[0];
- out[1] -= in[1];
- out[2] -= in[2];
- out[3] -= in[3];
- }
+{
+ /*
+ * In order to prevent underflow, we add 0 mod p before subtracting.
+ */
+ out[0] += zero107[0];
+ out[1] += zero107[1];
+ out[2] += zero107[2];
+ out[3] += zero107[3];
+
+ out[0] -= in[0];
+ out[1] -= in[1];
+ out[2] -= in[2];
+ out[3] -= in[3];
+}
/*-
* longfelem_diff subtracts |in| from |out|
@@ -323,38 +335,41 @@ static void felem_diff_zero107(felem out, const felem in)
* out[i] < out[i] + 2^70 + 2^40
*/
static void longfelem_diff(longfelem out, const longfelem in)
- {
- static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
- static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
- static const limb two70 = (((limb)1) << 70);
- static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
- static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
-
- /* add 0 mod p to avoid underflow */
- out[0] += two70m8p6;
- out[1] += two70p40;
- out[2] += two70;
- out[3] += two70m40m38p6;
- out[4] += two70m6;
- out[5] += two70m6;
- out[6] += two70m6;
- out[7] += two70m6;
-
- /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
- out[0] -= in[0];
- out[1] -= in[1];
- out[2] -= in[2];
- out[3] -= in[3];
- out[4] -= in[4];
- out[5] -= in[5];
- out[6] -= in[6];
- out[7] -= in[7];
- }
-
-#define two64m0 (((limb)1) << 64) - 1
-#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
-#define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
-#define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
+{
+ static const limb two70m8p6 =
+ (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
+ static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
+ static const limb two70 = (((limb) 1) << 70);
+ static const limb two70m40m38p6 =
+ (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
+ (((limb) 1) << 6);
+ static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
+
+ /* add 0 mod p to avoid underflow */
+ out[0] += two70m8p6;
+ out[1] += two70p40;
+ out[2] += two70;
+ out[3] += two70m40m38p6;
+ out[4] += two70m6;
+ out[5] += two70m6;
+ out[6] += two70m6;
+ out[7] += two70m6;
+
+ /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
+ out[0] -= in[0];
+ out[1] -= in[1];
+ out[2] -= in[2];
+ out[3] -= in[3];
+ out[4] -= in[4];
+ out[5] -= in[5];
+ out[6] -= in[6];
+ out[7] -= in[7];
+}
+
+# define two64m0 (((limb)1) << 64) - 1
+# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
+# define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
+# define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
/* zero110 is 0 mod p */
static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
@@ -369,96 +384,104 @@ static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
* out[i] < 2^64
*/
static void felem_shrink(smallfelem out, const felem in)
- {
- felem tmp;
- u64 a, b, mask;
- s64 high, low;
- static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
-
- /* Carry 2->3 */
- tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
- /* tmp[3] < 2^110 */
-
- tmp[2] = zero110[2] + (u64) in[2];
- tmp[0] = zero110[0] + in[0];
- tmp[1] = zero110[1] + in[1];
- /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
-
- /* We perform two partial reductions where we eliminate the
- * high-word of tmp[3]. We don't update the other words till the end.
- */
- a = tmp[3] >> 64; /* a < 2^46 */
- tmp[3] = (u64) tmp[3];
- tmp[3] -= a;
- tmp[3] += ((limb)a) << 32;
- /* tmp[3] < 2^79 */
-
- b = a;
- a = tmp[3] >> 64; /* a < 2^15 */
- b += a; /* b < 2^46 + 2^15 < 2^47 */
- tmp[3] = (u64) tmp[3];
- tmp[3] -= a;
- tmp[3] += ((limb)a) << 32;
- /* tmp[3] < 2^64 + 2^47 */
-
- /* This adjusts the other two words to complete the two partial
- * reductions. */
- tmp[0] += b;
- tmp[1] -= (((limb)b) << 32);
-
- /* In order to make space in tmp[3] for the carry from 2 -> 3, we
- * conditionally subtract kPrime if tmp[3] is large enough. */
- high = tmp[3] >> 64;
- /* As tmp[3] < 2^65, high is either 1 or 0 */
- high <<= 63;
- high >>= 63;
- /*-
- * high is:
- * all ones if the high word of tmp[3] is 1
- * all zeros if the high word of tmp[3] if 0 */
- low = tmp[3];
- mask = low >> 63;
- /*-
- * mask is:
- * all ones if the MSB of low is 1
- * all zeros if the MSB of low if 0 */
- low &= bottom63bits;
- low -= kPrime3Test;
- /* if low was greater than kPrime3Test then the MSB is zero */
- low = ~low;
- low >>= 63;
- /*-
- * low is:
- * all ones if low was > kPrime3Test
- * all zeros if low was <= kPrime3Test */
- mask = (mask & low) | high;
- tmp[0] -= mask & kPrime[0];
- tmp[1] -= mask & kPrime[1];
- /* kPrime[2] is zero, so omitted */
- tmp[3] -= mask & kPrime[3];
- /* tmp[3] < 2**64 - 2**32 + 1 */
-
- tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
- tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
- tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
- /* tmp[i] < 2^64 */
-
- out[0] = tmp[0];
- out[1] = tmp[1];
- out[2] = tmp[2];
- out[3] = tmp[3];
- }
+{
+ felem tmp;
+ u64 a, b, mask;
+ s64 high, low;
+ static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
+
+ /* Carry 2->3 */
+ tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
+ /* tmp[3] < 2^110 */
+
+ tmp[2] = zero110[2] + (u64)in[2];
+ tmp[0] = zero110[0] + in[0];
+ tmp[1] = zero110[1] + in[1];
+ /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
+
+ /*
+ * We perform two partial reductions where we eliminate the high-word of
+ * tmp[3]. We don't update the other words till the end.
+ */
+ a = tmp[3] >> 64; /* a < 2^46 */
+ tmp[3] = (u64)tmp[3];
+ tmp[3] -= a;
+ tmp[3] += ((limb) a) << 32;
+ /* tmp[3] < 2^79 */
+
+ b = a;
+ a = tmp[3] >> 64; /* a < 2^15 */
+ b += a; /* b < 2^46 + 2^15 < 2^47 */
+ tmp[3] = (u64)tmp[3];
+ tmp[3] -= a;
+ tmp[3] += ((limb) a) << 32;
+ /* tmp[3] < 2^64 + 2^47 */
+
+ /*
+ * This adjusts the other two words to complete the two partial
+ * reductions.
+ */
+ tmp[0] += b;
+ tmp[1] -= (((limb) b) << 32);
+
+ /*
+ * In order to make space in tmp[3] for the carry from 2 -> 3, we
+ * conditionally subtract kPrime if tmp[3] is large enough.
+ */
+ high = tmp[3] >> 64;
+ /* As tmp[3] < 2^65, high is either 1 or 0 */
+ high <<= 63;
+ high >>= 63;
+ /*-
+ * high is:
+ * all ones if the high word of tmp[3] is 1
+ * all zeros if the high word of tmp[3] if 0 */
+ low = tmp[3];
+ mask = low >> 63;
+ /*-
+ * mask is:
+ * all ones if the MSB of low is 1
+ * all zeros if the MSB of low if 0 */
+ low &= bottom63bits;
+ low -= kPrime3Test;
+ /* if low was greater than kPrime3Test then the MSB is zero */
+ low = ~low;
+ low >>= 63;
+ /*-
+ * low is:
+ * all ones if low was > kPrime3Test
+ * all zeros if low was <= kPrime3Test */
+ mask = (mask & low) | high;
+ tmp[0] -= mask & kPrime[0];
+ tmp[1] -= mask & kPrime[1];
+ /* kPrime[2] is zero, so omitted */
+ tmp[3] -= mask & kPrime[3];
+ /* tmp[3] < 2**64 - 2**32 + 1 */
+
+ tmp[1] += ((u64)(tmp[0] >> 64));
+ tmp[0] = (u64)tmp[0];
+ tmp[2] += ((u64)(tmp[1] >> 64));
+ tmp[1] = (u64)tmp[1];
+ tmp[3] += ((u64)(tmp[2] >> 64));
+ tmp[2] = (u64)tmp[2];
+ /* tmp[i] < 2^64 */
+
+ out[0] = tmp[0];
+ out[1] = tmp[1];
+ out[2] = tmp[2];
+ out[3] = tmp[3];
+}
/* smallfelem_expand converts a smallfelem to an felem */
static void smallfelem_expand(felem out, const smallfelem in)
- {
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- }
-
-/*-
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+/*-
* smallfelem_square sets |out| = |small|^2
* On entry:
* small[i] < 2^64
@@ -466,76 +489,76 @@ static void smallfelem_expand(felem out, const smallfelem in)
* out[i] < 7 * 2^64 < 2^67
*/
static void smallfelem_square(longfelem out, const smallfelem small)
- {
- limb a;
- u64 high, low;
-
- a = ((uint128_t) small[0]) * small[0];
- low = a;
- high = a >> 64;
- out[0] = low;
- out[1] = high;
-
- a = ((uint128_t) small[0]) * small[1];
- low = a;
- high = a >> 64;
- out[1] += low;
- out[1] += low;
- out[2] = high;
-
- a = ((uint128_t) small[0]) * small[2];
- low = a;
- high = a >> 64;
- out[2] += low;
- out[2] *= 2;
- out[3] = high;
-
- a = ((uint128_t) small[0]) * small[3];
- low = a;
- high = a >> 64;
- out[3] += low;
- out[4] = high;
-
- a = ((uint128_t) small[1]) * small[2];
- low = a;
- high = a >> 64;
- out[3] += low;
- out[3] *= 2;
- out[4] += high;
-
- a = ((uint128_t) small[1]) * small[1];
- low = a;
- high = a >> 64;
- out[2] += low;
- out[3] += high;
-
- a = ((uint128_t) small[1]) * small[3];
- low = a;
- high = a >> 64;
- out[4] += low;
- out[4] *= 2;
- out[5] = high;
-
- a = ((uint128_t) small[2]) * small[3];
- low = a;
- high = a >> 64;
- out[5] += low;
- out[5] *= 2;
- out[6] = high;
- out[6] += high;
-
- a = ((uint128_t) small[2]) * small[2];
- low = a;
- high = a >> 64;
- out[4] += low;
- out[5] += high;
-
- a = ((uint128_t) small[3]) * small[3];
- low = a;
- high = a >> 64;
- out[6] += low;
- out[7] = high;
- }
+{
+ limb a;
+ u64 high, low;
+
+ a = ((uint128_t) small[0]) * small[0];
+ low = a;
+ high = a >> 64;
+ out[0] = low;
+ out[1] = high;
+
+ a = ((uint128_t) small[0]) * small[1];
+ low = a;
+ high = a >> 64;
+ out[1] += low;
+ out[1] += low;
+ out[2] = high;
+
+ a = ((uint128_t) small[0]) * small[2];
+ low = a;
+ high = a >> 64;
+ out[2] += low;
+ out[2] *= 2;
+ out[3] = high;
+
+ a = ((uint128_t) small[0]) * small[3];
+ low = a;
+ high = a >> 64;
+ out[3] += low;
+ out[4] = high;
+
+ a = ((uint128_t) small[1]) * small[2];
+ low = a;
+ high = a >> 64;
+ out[3] += low;
+ out[3] *= 2;
+ out[4] += high;
+
+ a = ((uint128_t) small[1]) * small[1];
+ low = a;
+ high = a >> 64;
+ out[2] += low;
+ out[3] += high;
+
+ a = ((uint128_t) small[1]) * small[3];
+ low = a;
+ high = a >> 64;
+ out[4] += low;
+ out[4] *= 2;
+ out[5] = high;
+
+ a = ((uint128_t) small[2]) * small[3];
+ low = a;
+ high = a >> 64;
+ out[5] += low;
+ out[5] *= 2;
+ out[6] = high;
+ out[6] += high;
+
+ a = ((uint128_t) small[2]) * small[2];
+ low = a;
+ high = a >> 64;
+ out[4] += low;
+ out[5] += high;
+
+ a = ((uint128_t) small[3]) * small[3];
+ low = a;
+ high = a >> 64;
+ out[6] += low;
+ out[7] = high;
+}
/*-
* felem_square sets |out| = |in|^2
@@ -545,11 +568,11 @@ static void smallfelem_square(longfelem out, const smallfelem small)
* out[i] < 7 * 2^64 < 2^67
*/
static void felem_square(longfelem out, const felem in)
- {
- u64 small[4];
- felem_shrink(small, in);
- smallfelem_square(out, small);
- }
+{
+ u64 small[4];
+ felem_shrink(small, in);
+ smallfelem_square(out, small);
+}
/*-
* smallfelem_mul sets |out| = |small1| * |small2|
@@ -559,113 +582,108 @@ static void felem_square(longfelem out, const felem in)
* On exit:
* out[i] < 7 * 2^64 < 2^67
*/
-static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
- {
- limb a;
- u64 high, low;
-
- a = ((uint128_t) small1[0]) * small2[0];
- low = a;
- high = a >> 64;
- out[0] = low;
- out[1] = high;
-
-
- a = ((uint128_t) small1[0]) * small2[1];
- low = a;
- high = a >> 64;
- out[1] += low;
- out[2] = high;
-
- a = ((uint128_t) small1[1]) * small2[0];
- low = a;
- high = a >> 64;
- out[1] += low;
- out[2] += high;
-
-
- a = ((uint128_t) small1[0]) * small2[2];
- low = a;
- high = a >> 64;
- out[2] += low;
- out[3] = high;
-
- a = ((uint128_t) small1[1]) * small2[1];
- low = a;
- high = a >> 64;
- out[2] += low;
- out[3] += high;
-
- a = ((uint128_t) small1[2]) * small2[0];
- low = a;
- high = a >> 64;
- out[2] += low;
- out[3] += high;
-
-
- a = ((uint128_t) small1[0]) * small2[3];
- low = a;
- high = a >> 64;
- out[3] += low;
- out[4] = high;
-
- a = ((uint128_t) small1[1]) * small2[2];
- low = a;
- high = a >> 64;
- out[3] += low;
- out[4] += high;
-
- a = ((uint128_t) small1[2]) * small2[1];
- low = a;
- high = a >> 64;
- out[3] += low;
- out[4] += high;
-
- a = ((uint128_t) small1[3]) * small2[0];
- low = a;
- high = a >> 64;
- out[3] += low;
- out[4] += high;
-
-
- a = ((uint128_t) small1[1]) * small2[3];
- low = a;
- high = a >> 64;
- out[4] += low;
- out[5] = high;
-
- a = ((uint128_t) small1[2]) * small2[2];
- low = a;
- high = a >> 64;
- out[4] += low;
- out[5] += high;
-
- a = ((uint128_t) small1[3]) * small2[1];
- low = a;
- high = a >> 64;
- out[4] += low;
- out[5] += high;
-
-
- a = ((uint128_t) small1[2]) * small2[3];
- low = a;
- high = a >> 64;
- out[5] += low;
- out[6] = high;
-
- a = ((uint128_t) small1[3]) * small2[2];
- low = a;
- high = a >> 64;
- out[5] += low;
- out[6] += high;
-
-
- a = ((uint128_t) small1[3]) * small2[3];
- low = a;
- high = a >> 64;
- out[6] += low;
- out[7] = high;
- }
+static void smallfelem_mul(longfelem out, const smallfelem small1,
+ const smallfelem small2)
+{
+ limb a;
+ u64 high, low;
+
+ a = ((uint128_t) small1[0]) * small2[0];
+ low = a;
+ high = a >> 64;
+ out[0] = low;
+ out[1] = high;
+
+ a = ((uint128_t) small1[0]) * small2[1];
+ low = a;
+ high = a >> 64;
+ out[1] += low;
+ out[2] = high;
+
+ a = ((uint128_t) small1[1]) * small2[0];
+ low = a;
+ high = a >> 64;
+ out[1] += low;
+ out[2] += high;
+
+ a = ((uint128_t) small1[0]) * small2[2];
+ low = a;
+ high = a >> 64;
+ out[2] += low;
+ out[3] = high;
+
+ a = ((uint128_t) small1[1]) * small2[1];
+ low = a;
+ high = a >> 64;
+ out[2] += low;
+ out[3] += high;
+
+ a = ((uint128_t) small1[2]) * small2[0];
+ low = a;
+ high = a >> 64;
+ out[2] += low;
+ out[3] += high;
+
+ a = ((uint128_t) small1[0]) * small2[3];
+ low = a;
+ high = a >> 64;
+ out[3] += low;
+ out[4] = high;
+
+ a = ((uint128_t) small1[1]) * small2[2];
+ low = a;
+ high = a >> 64;
+ out[3] += low;
+ out[4] += high;
+
+ a = ((uint128_t) small1[2]) * small2[1];
+ low = a;
+ high = a >> 64;
+ out[3] += low;
+ out[4] += high;
+
+ a = ((uint128_t) small1[3]) * small2[0];
+ low = a;
+ high = a >> 64;
+ out[3] += low;
+ out[4] += high;
+
+ a = ((uint128_t) small1[1]) * small2[3];
+ low = a;
+ high = a >> 64;
+ out[4] += low;
+ out[5] = high;
+
+ a = ((uint128_t) small1[2]) * small2[2];
+ low = a;
+ high = a >> 64;
+ out[4] += low;
+ out[5] += high;
+
+ a = ((uint128_t) small1[3]) * small2[1];
+ low = a;
+ high = a >> 64;
+ out[4] += low;
+ out[5] += high;
+
+ a = ((uint128_t) small1[2]) * small2[3];
+ low = a;
+ high = a >> 64;
+ out[5] += low;
+ out[6] = high;
+
+ a = ((uint128_t) small1[3]) * small2[2];
+ low = a;
+ high = a >> 64;
+ out[5] += low;
+ out[6] += high;
+
+ a = ((uint128_t) small1[3]) * small2[3];
+ low = a;
+ high = a >> 64;
+ out[6] += low;
+ out[7] = high;
+}
/*-
* felem_mul sets |out| = |in1| * |in2|
@@ -676,12 +694,12 @@ static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfe
* out[i] < 7 * 2^64 < 2^67
*/
static void felem_mul(longfelem out, const felem in1, const felem in2)
- {
- smallfelem small1, small2;
- felem_shrink(small1, in1);
- felem_shrink(small2, in2);
- smallfelem_mul(out, small1, small2);
- }
+{
+ smallfelem small1, small2;
+ felem_shrink(small1, in1);
+ felem_shrink(small2, in2);
+ smallfelem_mul(out, small1, small2);
+}
/*-
* felem_small_mul sets |out| = |small1| * |in2|
@@ -691,24 +709,26 @@ static void felem_mul(longfelem out, const felem in1, const felem in2)
* On exit:
* out[i] < 7 * 2^64 < 2^67
*/
-static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
- {
- smallfelem small2;
- felem_shrink(small2, in2);
- smallfelem_mul(out, small1, small2);
- }
-
-#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
-#define two100 (((limb)1) << 100)
-#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
+static void felem_small_mul(longfelem out, co