diff options
author | Matt Caswell <matt@openssl.org> | 2015-01-22 03:40:55 +0000 |
---|---|---|
committer | Matt Caswell <matt@openssl.org> | 2015-01-22 09:20:09 +0000 |
commit | 0f113f3ee4d629ef9a4a30911b22b224772085e5 (patch) | |
tree | e014603da5aed1d0751f587a66d6e270b6bda3de /crypto/ec/ecp_nistp256.c | |
parent | 22b52164aaed31d6e93dbd2d397ace041360e6aa (diff) |
Run util/openssl-format-source -v -c .
Reviewed-by: Tim Hudson <tjh@openssl.org>
Diffstat (limited to 'crypto/ec/ecp_nistp256.c')
-rw-r--r-- | crypto/ec/ecp_nistp256.c | 3763 |
1 files changed, 1964 insertions, 1799 deletions
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c index 0d20adc759..5a21a3c13d 100644 --- a/crypto/ec/ecp_nistp256.c +++ b/crypto/ec/ecp_nistp256.c @@ -29,54 +29,58 @@ #include <openssl/opensslconf.h> #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 -#include <stdint.h> -#include <string.h> -#include <openssl/err.h> -#include "ec_lcl.h" +# include <stdint.h> +# include <string.h> +# include <openssl/err.h> +# include "ec_lcl.h" -#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) /* even with gcc, the typedef won't work for 32-bit platforms */ - typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */ - typedef __int128_t int128_t; -#else - #error "Need GCC 3.1 or later to define type uint128_t" -#endif +typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit + * platforms */ +typedef __int128_t int128_t; +# else +# error "Need GCC 3.1 or later to define type uint128_t" +# endif typedef uint8_t u8; typedef uint32_t u32; typedef uint64_t u64; typedef int64_t s64; -/* The underlying field. - * - * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element - * of this field into 32 bytes. We call this an felem_bytearray. */ +/* + * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We + * can serialise an element of this field into 32 bytes. We call this an + * felem_bytearray. + */ typedef u8 felem_bytearray[32]; -/* These are the parameters of P256, taken from FIPS 186-3, page 86. These - * values are big-endian. */ +/* + * These are the parameters of P256, taken from FIPS 186-3, page 86. These + * values are big-endian. + */ static const felem_bytearray nistp256_curve_params[5] = { - {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */ - {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, - 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, - 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, - 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b}, - {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */ - 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, - 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, - 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96}, - {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */ - 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, - 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, - 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5} + {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */ + {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, + 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, + 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, + 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b}, + {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */ + 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, + 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, + 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96}, + {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */ + 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, + 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, + 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5} }; /*- @@ -100,7 +104,7 @@ static const felem_bytearray nistp256_curve_params[5] = { * values are used as intermediate values before multiplication. */ -#define NLIMBS 4 +# define NLIMBS 4 typedef uint128_t limb; typedef limb felem[NLIMBS]; @@ -108,72 +112,74 @@ typedef limb longfelem[NLIMBS * 2]; typedef u64 smallfelem[NLIMBS]; /* This is the value of the prime as four 64-bit words, little-endian. */ -static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul }; +static const u64 kPrime[4] = + { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul }; static const u64 bottom63bits = 0x7ffffffffffffffful; -/* bin32_to_felem takes a little-endian byte array and converts it into felem - * form. This assumes that the CPU is little-endian. */ +/* + * bin32_to_felem takes a little-endian byte array and converts it into felem + * form. This assumes that the CPU is little-endian. + */ static void bin32_to_felem(felem out, const u8 in[32]) - { - out[0] = *((u64*) &in[0]); - out[1] = *((u64*) &in[8]); - out[2] = *((u64*) &in[16]); - out[3] = *((u64*) &in[24]); - } - -/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian, - * 32 byte array. This assumes that the CPU is little-endian. */ +{ + out[0] = *((u64 *)&in[0]); + out[1] = *((u64 *)&in[8]); + out[2] = *((u64 *)&in[16]); + out[3] = *((u64 *)&in[24]); +} + +/* + * smallfelem_to_bin32 takes a smallfelem and serialises into a little + * endian, 32 byte array. This assumes that the CPU is little-endian. + */ static void smallfelem_to_bin32(u8 out[32], const smallfelem in) - { - *((u64*) &out[0]) = in[0]; - *((u64*) &out[8]) = in[1]; - *((u64*) &out[16]) = in[2]; - *((u64*) &out[24]) = in[3]; - } +{ + *((u64 *)&out[0]) = in[0]; + *((u64 *)&out[8]) = in[1]; + *((u64 *)&out[16]) = in[2]; + *((u64 *)&out[24]) = in[3]; +} /* To preserve endianness when using BN_bn2bin and BN_bin2bn */ static void flip_endian(u8 *out, const u8 *in, unsigned len) - { - unsigned i; - for (i = 0; i < len; ++i) - out[i] = in[len-1-i]; - } +{ + unsigned i; + for (i = 0; i < len; ++i) + out[i] = in[len - 1 - i]; +} /* BN_to_felem converts an OpenSSL BIGNUM into an felem */ static int BN_to_felem(felem out, const BIGNUM *bn) - { - felem_bytearray b_in; - felem_bytearray b_out; - unsigned num_bytes; - - /* BN_bn2bin eats leading zeroes */ - memset(b_out, 0, sizeof b_out); - num_bytes = BN_num_bytes(bn); - if (num_bytes > sizeof b_out) - { - ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); - return 0; - } - if (BN_is_negative(bn)) - { - ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); - return 0; - } - num_bytes = BN_bn2bin(bn, b_in); - flip_endian(b_out, b_in, num_bytes); - bin32_to_felem(out, b_out); - return 1; - } +{ + felem_bytearray b_in; + felem_bytearray b_out; + unsigned num_bytes; + + /* BN_bn2bin eats leading zeroes */ + memset(b_out, 0, sizeof b_out); + num_bytes = BN_num_bytes(bn); + if (num_bytes > sizeof b_out) { + ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); + return 0; + } + if (BN_is_negative(bn)) { + ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); + return 0; + } + num_bytes = BN_bn2bin(bn, b_in); + flip_endian(b_out, b_in, num_bytes); + bin32_to_felem(out, b_out); + return 1; +} /* felem_to_BN converts an felem into an OpenSSL BIGNUM */ static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in) - { - felem_bytearray b_in, b_out; - smallfelem_to_bin32(b_in, in); - flip_endian(b_out, b_in, sizeof b_out); - return BN_bin2bn(b_out, sizeof b_out, out); - } - +{ + felem_bytearray b_in, b_out; + smallfelem_to_bin32(b_in, in); + flip_endian(b_out, b_in, sizeof b_out); + return BN_bin2bn(b_out, sizeof b_out, out); +} /*- * Field operations @@ -181,75 +187,76 @@ static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in) */ static void smallfelem_one(smallfelem out) - { - out[0] = 1; - out[1] = 0; - out[2] = 0; - out[3] = 0; - } +{ + out[0] = 1; + out[1] = 0; + out[2] = 0; + out[3] = 0; +} static void smallfelem_assign(smallfelem out, const smallfelem in) - { - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - } +{ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} static void felem_assign(felem out, const felem in) - { - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - } +{ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} /* felem_sum sets out = out + in. */ static void felem_sum(felem out, const felem in) - { - out[0] += in[0]; - out[1] += in[1]; - out[2] += in[2]; - out[3] += in[3]; - } +{ + out[0] += in[0]; + out[1] += in[1]; + out[2] += in[2]; + out[3] += in[3]; +} /* felem_small_sum sets out = out + in. */ static void felem_small_sum(felem out, const smallfelem in) - { - out[0] += in[0]; - out[1] += in[1]; - out[2] += in[2]; - out[3] += in[3]; - } +{ + out[0] += in[0]; + out[1] += in[1]; + out[2] += in[2]; + out[3] += in[3]; +} /* felem_scalar sets out = out * scalar */ static void felem_scalar(felem out, const u64 scalar) - { - out[0] *= scalar; - out[1] *= scalar; - out[2] *= scalar; - out[3] *= scalar; - } +{ + out[0] *= scalar; + out[1] *= scalar; + out[2] *= scalar; + out[3] *= scalar; +} /* longfelem_scalar sets out = out * scalar */ static void longfelem_scalar(longfelem out, const u64 scalar) - { - out[0] *= scalar; - out[1] *= scalar; - out[2] *= scalar; - out[3] *= scalar; - out[4] *= scalar; - out[5] *= scalar; - out[6] *= scalar; - out[7] *= scalar; - } - -#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9) -#define two105 (((limb)1) << 105) -#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9) +{ + out[0] *= scalar; + out[1] *= scalar; + out[2] *= scalar; + out[3] *= scalar; + out[4] *= scalar; + out[5] *= scalar; + out[6] *= scalar; + out[7] *= scalar; +} + +# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9) +# define two105 (((limb)1) << 105) +# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9) /* zero105 is 0 mod p */ -static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 }; +static const felem zero105 = + { two105m41m9, two105, two105m41p9, two105m41p9 }; /*- * smallfelem_neg sets |out| to |-small| @@ -257,13 +264,13 @@ static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 }; * out[i] < out[i] + 2^105 */ static void smallfelem_neg(felem out, const smallfelem small) - { - /* In order to prevent underflow, we subtract from 0 mod p. */ - out[0] = zero105[0] - small[0]; - out[1] = zero105[1] - small[1]; - out[2] = zero105[2] - small[2]; - out[3] = zero105[3] - small[3]; - } +{ + /* In order to prevent underflow, we subtract from 0 mod p. */ + out[0] = zero105[0] - small[0]; + out[1] = zero105[1] - small[1]; + out[2] = zero105[2] - small[2]; + out[3] = zero105[3] - small[3]; +} /*- * felem_diff subtracts |in| from |out| @@ -273,25 +280,28 @@ static void smallfelem_neg(felem out, const smallfelem small) * out[i] < out[i] + 2^105 */ static void felem_diff(felem out, const felem in) - { - /* In order to prevent underflow, we add 0 mod p before subtracting. */ - out[0] += zero105[0]; - out[1] += zero105[1]; - out[2] += zero105[2]; - out[3] += zero105[3]; - - out[0] -= in[0]; - out[1] -= in[1]; - out[2] -= in[2]; - out[3] -= in[3]; - } - -#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11) -#define two107 (((limb)1) << 107) -#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11) +{ + /* + * In order to prevent underflow, we add 0 mod p before subtracting. + */ + out[0] += zero105[0]; + out[1] += zero105[1]; + out[2] += zero105[2]; + out[3] += zero105[3]; + + out[0] -= in[0]; + out[1] -= in[1]; + out[2] -= in[2]; + out[3] -= in[3]; +} + +# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11) +# define two107 (((limb)1) << 107) +# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11) /* zero107 is 0 mod p */ -static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 }; +static const felem zero107 = + { two107m43m11, two107, two107m43p11, two107m43p11 }; /*- * An alternative felem_diff for larger inputs |in| @@ -302,18 +312,20 @@ static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 * out[i] < out[i] + 2^107 */ static void felem_diff_zero107(felem out, const felem in) - { - /* In order to prevent underflow, we add 0 mod p before subtracting. */ - out[0] += zero107[0]; - out[1] += zero107[1]; - out[2] += zero107[2]; - out[3] += zero107[3]; - - out[0] -= in[0]; - out[1] -= in[1]; - out[2] -= in[2]; - out[3] -= in[3]; - } +{ + /* + * In order to prevent underflow, we add 0 mod p before subtracting. + */ + out[0] += zero107[0]; + out[1] += zero107[1]; + out[2] += zero107[2]; + out[3] += zero107[3]; + + out[0] -= in[0]; + out[1] -= in[1]; + out[2] -= in[2]; + out[3] -= in[3]; +} /*- * longfelem_diff subtracts |in| from |out| @@ -323,38 +335,41 @@ static void felem_diff_zero107(felem out, const felem in) * out[i] < out[i] + 2^70 + 2^40 */ static void longfelem_diff(longfelem out, const longfelem in) - { - static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6); - static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40); - static const limb two70 = (((limb)1) << 70); - static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6); - static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6); - - /* add 0 mod p to avoid underflow */ - out[0] += two70m8p6; - out[1] += two70p40; - out[2] += two70; - out[3] += two70m40m38p6; - out[4] += two70m6; - out[5] += two70m6; - out[6] += two70m6; - out[7] += two70m6; - - /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */ - out[0] -= in[0]; - out[1] -= in[1]; - out[2] -= in[2]; - out[3] -= in[3]; - out[4] -= in[4]; - out[5] -= in[5]; - out[6] -= in[6]; - out[7] -= in[7]; - } - -#define two64m0 (((limb)1) << 64) - 1 -#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1 -#define two64m46 (((limb)1) << 64) - (((limb)1) << 46) -#define two64m32 (((limb)1) << 64) - (((limb)1) << 32) +{ + static const limb two70m8p6 = + (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6); + static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40); + static const limb two70 = (((limb) 1) << 70); + static const limb two70m40m38p6 = + (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) + + (((limb) 1) << 6); + static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6); + + /* add 0 mod p to avoid underflow */ + out[0] += two70m8p6; + out[1] += two70p40; + out[2] += two70; + out[3] += two70m40m38p6; + out[4] += two70m6; + out[5] += two70m6; + out[6] += two70m6; + out[7] += two70m6; + + /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */ + out[0] -= in[0]; + out[1] -= in[1]; + out[2] -= in[2]; + out[3] -= in[3]; + out[4] -= in[4]; + out[5] -= in[5]; + out[6] -= in[6]; + out[7] -= in[7]; +} + +# define two64m0 (((limb)1) << 64) - 1 +# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1 +# define two64m46 (((limb)1) << 64) - (((limb)1) << 46) +# define two64m32 (((limb)1) << 64) - (((limb)1) << 32) /* zero110 is 0 mod p */ static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 }; @@ -369,96 +384,104 @@ static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 }; * out[i] < 2^64 */ static void felem_shrink(smallfelem out, const felem in) - { - felem tmp; - u64 a, b, mask; - s64 high, low; - static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */ - - /* Carry 2->3 */ - tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64)); - /* tmp[3] < 2^110 */ - - tmp[2] = zero110[2] + (u64) in[2]; - tmp[0] = zero110[0] + in[0]; - tmp[1] = zero110[1] + in[1]; - /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */ - - /* We perform two partial reductions where we eliminate the - * high-word of tmp[3]. We don't update the other words till the end. - */ - a = tmp[3] >> 64; /* a < 2^46 */ - tmp[3] = (u64) tmp[3]; - tmp[3] -= a; - tmp[3] += ((limb)a) << 32; - /* tmp[3] < 2^79 */ - - b = a; - a = tmp[3] >> 64; /* a < 2^15 */ - b += a; /* b < 2^46 + 2^15 < 2^47 */ - tmp[3] = (u64) tmp[3]; - tmp[3] -= a; - tmp[3] += ((limb)a) << 32; - /* tmp[3] < 2^64 + 2^47 */ - - /* This adjusts the other two words to complete the two partial - * reductions. */ - tmp[0] += b; - tmp[1] -= (((limb)b) << 32); - - /* In order to make space in tmp[3] for the carry from 2 -> 3, we - * conditionally subtract kPrime if tmp[3] is large enough. */ - high = tmp[3] >> 64; - /* As tmp[3] < 2^65, high is either 1 or 0 */ - high <<= 63; - high >>= 63; - /*- - * high is: - * all ones if the high word of tmp[3] is 1 - * all zeros if the high word of tmp[3] if 0 */ - low = tmp[3]; - mask = low >> 63; - /*- - * mask is: - * all ones if the MSB of low is 1 - * all zeros if the MSB of low if 0 */ - low &= bottom63bits; - low -= kPrime3Test; - /* if low was greater than kPrime3Test then the MSB is zero */ - low = ~low; - low >>= 63; - /*- - * low is: - * all ones if low was > kPrime3Test - * all zeros if low was <= kPrime3Test */ - mask = (mask & low) | high; - tmp[0] -= mask & kPrime[0]; - tmp[1] -= mask & kPrime[1]; - /* kPrime[2] is zero, so omitted */ - tmp[3] -= mask & kPrime[3]; - /* tmp[3] < 2**64 - 2**32 + 1 */ - - tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0]; - tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1]; - tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2]; - /* tmp[i] < 2^64 */ - - out[0] = tmp[0]; - out[1] = tmp[1]; - out[2] = tmp[2]; - out[3] = tmp[3]; - } +{ + felem tmp; + u64 a, b, mask; + s64 high, low; + static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */ + + /* Carry 2->3 */ + tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64)); + /* tmp[3] < 2^110 */ + + tmp[2] = zero110[2] + (u64)in[2]; + tmp[0] = zero110[0] + in[0]; + tmp[1] = zero110[1] + in[1]; + /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */ + + /* + * We perform two partial reductions where we eliminate the high-word of + * tmp[3]. We don't update the other words till the end. + */ + a = tmp[3] >> 64; /* a < 2^46 */ + tmp[3] = (u64)tmp[3]; + tmp[3] -= a; + tmp[3] += ((limb) a) << 32; + /* tmp[3] < 2^79 */ + + b = a; + a = tmp[3] >> 64; /* a < 2^15 */ + b += a; /* b < 2^46 + 2^15 < 2^47 */ + tmp[3] = (u64)tmp[3]; + tmp[3] -= a; + tmp[3] += ((limb) a) << 32; + /* tmp[3] < 2^64 + 2^47 */ + + /* + * This adjusts the other two words to complete the two partial + * reductions. + */ + tmp[0] += b; + tmp[1] -= (((limb) b) << 32); + + /* + * In order to make space in tmp[3] for the carry from 2 -> 3, we + * conditionally subtract kPrime if tmp[3] is large enough. + */ + high = tmp[3] >> 64; + /* As tmp[3] < 2^65, high is either 1 or 0 */ + high <<= 63; + high >>= 63; + /*- + * high is: + * all ones if the high word of tmp[3] is 1 + * all zeros if the high word of tmp[3] if 0 */ + low = tmp[3]; + mask = low >> 63; + /*- + * mask is: + * all ones if the MSB of low is 1 + * all zeros if the MSB of low if 0 */ + low &= bottom63bits; + low -= kPrime3Test; + /* if low was greater than kPrime3Test then the MSB is zero */ + low = ~low; + low >>= 63; + /*- + * low is: + * all ones if low was > kPrime3Test + * all zeros if low was <= kPrime3Test */ + mask = (mask & low) | high; + tmp[0] -= mask & kPrime[0]; + tmp[1] -= mask & kPrime[1]; + /* kPrime[2] is zero, so omitted */ + tmp[3] -= mask & kPrime[3]; + /* tmp[3] < 2**64 - 2**32 + 1 */ + + tmp[1] += ((u64)(tmp[0] >> 64)); + tmp[0] = (u64)tmp[0]; + tmp[2] += ((u64)(tmp[1] >> 64)); + tmp[1] = (u64)tmp[1]; + tmp[3] += ((u64)(tmp[2] >> 64)); + tmp[2] = (u64)tmp[2]; + /* tmp[i] < 2^64 */ + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; +} /* smallfelem_expand converts a smallfelem to an felem */ static void smallfelem_expand(felem out, const smallfelem in) - { - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - } - -/*- +{ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/*- * smallfelem_square sets |out| = |small|^2 * On entry: * small[i] < 2^64 @@ -466,76 +489,76 @@ static void smallfelem_expand(felem out, const smallfelem in) * out[i] < 7 * 2^64 < 2^67 */ static void smallfelem_square(longfelem out, const smallfelem small) - { - limb a; - u64 high, low; - - a = ((uint128_t) small[0]) * small[0]; - low = a; - high = a >> 64; - out[0] = low; - out[1] = high; - - a = ((uint128_t) small[0]) * small[1]; - low = a; - high = a >> 64; - out[1] += low; - out[1] += low; - out[2] = high; - - a = ((uint128_t) small[0]) * small[2]; - low = a; - high = a >> 64; - out[2] += low; - out[2] *= 2; - out[3] = high; - - a = ((uint128_t) small[0]) * small[3]; - low = a; - high = a >> 64; - out[3] += low; - out[4] = high; - - a = ((uint128_t) small[1]) * small[2]; - low = a; - high = a >> 64; - out[3] += low; - out[3] *= 2; - out[4] += high; - - a = ((uint128_t) small[1]) * small[1]; - low = a; - high = a >> 64; - out[2] += low; - out[3] += high; - - a = ((uint128_t) small[1]) * small[3]; - low = a; - high = a >> 64; - out[4] += low; - out[4] *= 2; - out[5] = high; - - a = ((uint128_t) small[2]) * small[3]; - low = a; - high = a >> 64; - out[5] += low; - out[5] *= 2; - out[6] = high; - out[6] += high; - - a = ((uint128_t) small[2]) * small[2]; - low = a; - high = a >> 64; - out[4] += low; - out[5] += high; - - a = ((uint128_t) small[3]) * small[3]; - low = a; - high = a >> 64; - out[6] += low; - out[7] = high; - } +{ + limb a; + u64 high, low; + + a = ((uint128_t) small[0]) * small[0]; + low = a; + high = a >> 64; + out[0] = low; + out[1] = high; + + a = ((uint128_t) small[0]) * small[1]; + low = a; + high = a >> 64; + out[1] += low; + out[1] += low; + out[2] = high; + + a = ((uint128_t) small[0]) * small[2]; + low = a; + high = a >> 64; + out[2] += low; + out[2] *= 2; + out[3] = high; + + a = ((uint128_t) small[0]) * small[3]; + low = a; + high = a >> 64; + out[3] += low; + out[4] = high; + + a = ((uint128_t) small[1]) * small[2]; + low = a; + high = a >> 64; + out[3] += low; + out[3] *= 2; + out[4] += high; + + a = ((uint128_t) small[1]) * small[1]; + low = a; + high = a >> 64; + out[2] += low; + out[3] += high; + + a = ((uint128_t) small[1]) * small[3]; + low = a; + high = a >> 64; + out[4] += low; + out[4] *= 2; + out[5] = high; + + a = ((uint128_t) small[2]) * small[3]; + low = a; + high = a >> 64; + out[5] += low; + out[5] *= 2; + out[6] = high; + out[6] += high; + + a = ((uint128_t) small[2]) * small[2]; + low = a; + high = a >> 64; + out[4] += low; + out[5] += high; + + a = ((uint128_t) small[3]) * small[3]; + low = a; + high = a >> 64; + out[6] += low; + out[7] = high; +} /*- * felem_square sets |out| = |in|^2 @@ -545,11 +568,11 @@ static void smallfelem_square(longfelem out, const smallfelem small) * out[i] < 7 * 2^64 < 2^67 */ static void felem_square(longfelem out, const felem in) - { - u64 small[4]; - felem_shrink(small, in); - smallfelem_square(out, small); - } +{ + u64 small[4]; + felem_shrink(small, in); + smallfelem_square(out, small); +} /*- * smallfelem_mul sets |out| = |small1| * |small2| @@ -559,113 +582,108 @@ static void felem_square(longfelem out, const felem in) * On exit: * out[i] < 7 * 2^64 < 2^67 */ -static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2) - { - limb a; - u64 high, low; - - a = ((uint128_t) small1[0]) * small2[0]; - low = a; - high = a >> 64; - out[0] = low; - out[1] = high; - - - a = ((uint128_t) small1[0]) * small2[1]; - low = a; - high = a >> 64; - out[1] += low; - out[2] = high; - - a = ((uint128_t) small1[1]) * small2[0]; - low = a; - high = a >> 64; - out[1] += low; - out[2] += high; - - - a = ((uint128_t) small1[0]) * small2[2]; - low = a; - high = a >> 64; - out[2] += low; - out[3] = high; - - a = ((uint128_t) small1[1]) * small2[1]; - low = a; - high = a >> 64; - out[2] += low; - out[3] += high; - - a = ((uint128_t) small1[2]) * small2[0]; - low = a; - high = a >> 64; - out[2] += low; - out[3] += high; - - - a = ((uint128_t) small1[0]) * small2[3]; - low = a; - high = a >> 64; - out[3] += low; - out[4] = high; - - a = ((uint128_t) small1[1]) * small2[2]; - low = a; - high = a >> 64; - out[3] += low; - out[4] += high; - - a = ((uint128_t) small1[2]) * small2[1]; - low = a; - high = a >> 64; - out[3] += low; - out[4] += high; - - a = ((uint128_t) small1[3]) * small2[0]; - low = a; - high = a >> 64; - out[3] += low; - out[4] += high; - - - a = ((uint128_t) small1[1]) * small2[3]; - low = a; - high = a >> 64; - out[4] += low; - out[5] = high; - - a = ((uint128_t) small1[2]) * small2[2]; - low = a; - high = a >> 64; - out[4] += low; - out[5] += high; - - a = ((uint128_t) small1[3]) * small2[1]; - low = a; - high = a >> 64; - out[4] += low; - out[5] += high; - - - a = ((uint128_t) small1[2]) * small2[3]; - low = a; - high = a >> 64; - out[5] += low; - out[6] = high; - - a = ((uint128_t) small1[3]) * small2[2]; - low = a; - high = a >> 64; - out[5] += low; - out[6] += high; - - - a = ((uint128_t) small1[3]) * small2[3]; - low = a; - high = a >> 64; - out[6] += low; - out[7] = high; - } +static void smallfelem_mul(longfelem out, const smallfelem small1, + const smallfelem small2) +{ + limb a; + u64 high, low; + + a = ((uint128_t) small1[0]) * small2[0]; + low = a; + high = a >> 64; + out[0] = low; + out[1] = high; + + a = ((uint128_t) small1[0]) * small2[1]; + low = a; + high = a >> 64; + out[1] += low; + out[2] = high; + + a = ((uint128_t) small1[1]) * small2[0]; + low = a; + high = a >> 64; + out[1] += low; + out[2] += high; + + a = ((uint128_t) small1[0]) * small2[2]; + low = a; + high = a >> 64; + out[2] += low; + out[3] = high; + + a = ((uint128_t) small1[1]) * small2[1]; + low = a; + high = a >> 64; + out[2] += low; + out[3] += high; + + a = ((uint128_t) small1[2]) * small2[0]; + low = a; + high = a >> 64; + out[2] += low; + out[3] += high; + + a = ((uint128_t) small1[0]) * small2[3]; + low = a; + high = a >> 64; + out[3] += low; + out[4] = high; + + a = ((uint128_t) small1[1]) * small2[2]; + low = a; + high = a >> 64; + out[3] += low; + out[4] += high; + + a = ((uint128_t) small1[2]) * small2[1]; + low = a; + high = a >> 64; + out[3] += low; + out[4] += high; + + a = ((uint128_t) small1[3]) * small2[0]; + low = a; + high = a >> 64; + out[3] += low; + out[4] += high; + + a = ((uint128_t) small1[1]) * small2[3]; + low = a; + high = a >> 64; + out[4] += low; + out[5] = high; + + a = ((uint128_t) small1[2]) * small2[2]; + low = a; + high = a >> 64; + out[4] += low; + out[5] += high; + + a = ((uint128_t) small1[3]) * small2[1]; + low = a; + high = a >> 64; + out[4] += low; + out[5] += high; + + a = ((uint128_t) small1[2]) * small2[3]; + low = a; + high = a >> 64; + out[5] += low; + out[6] = high; + + a = ((uint128_t) small1[3]) * small2[2]; + low = a; + high = a >> 64; + out[5] += low; + out[6] += high; + + a = ((uint128_t) small1[3]) * small2[3]; + low = a; + high = a >> 64; + out[6] += low; + out[7] = high; +} /*- * felem_mul sets |out| = |in1| * |in2| @@ -676,12 +694,12 @@ static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfe * out[i] < 7 * 2^64 < 2^67 */ static void felem_mul(longfelem out, const felem in1, const felem in2) - { - smallfelem small1, small2; - felem_shrink(small1, in1); - felem_shrink(small2, in2); - smallfelem_mul(out, small1, small2); - } +{ + smallfelem small1, small2; + felem_shrink(small1, in1); + felem_shrink(small2, in2); + smallfelem_mul(out, small1, small2); +} /*- * felem_small_mul sets |out| = |small1| * |in2| @@ -691,24 +709,26 @@ static void felem_mul(longfelem out, const felem in1, const felem in2) * On exit: * out[i] < 7 * 2^64 < 2^67 */ -static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2) - { - smallfelem small2; - felem_shrink(small2, in2); - smallfelem_mul(out, small1, small2); - } - -#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4) -#define two100 (((limb)1) << 100) -#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4) +static void felem_small_mul(longfelem out, co |