diff options
author | Matt Caswell <matt@openssl.org> | 2015-01-22 03:40:55 +0000 |
---|---|---|
committer | Matt Caswell <matt@openssl.org> | 2015-01-22 09:20:09 +0000 |
commit | 0f113f3ee4d629ef9a4a30911b22b224772085e5 (patch) | |
tree | e014603da5aed1d0751f587a66d6e270b6bda3de /crypto/ec/ecp_nistp521.c | |
parent | 22b52164aaed31d6e93dbd2d397ace041360e6aa (diff) |
Run util/openssl-format-source -v -c .
Reviewed-by: Tim Hudson <tjh@openssl.org>
Diffstat (limited to 'crypto/ec/ecp_nistp521.c')
-rw-r--r-- | crypto/ec/ecp_nistp521.c | 3579 |
1 files changed, 1831 insertions, 1748 deletions
diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c index f97dab67de..c1ef3fedac 100644 --- a/crypto/ec/ecp_nistp521.c +++ b/crypto/ec/ecp_nistp521.c @@ -29,85 +29,87 @@ #include <openssl/opensslconf.h> #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 -#ifndef OPENSSL_SYS_VMS -#include <stdint.h> -#else -#include <inttypes.h> -#endif +# ifndef OPENSSL_SYS_VMS +# include <stdint.h> +# else +# include <inttypes.h> +# endif -#include <string.h> -#include <openssl/err.h> -#include "ec_lcl.h" +# include <string.h> +# include <openssl/err.h> +# include "ec_lcl.h" -#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) /* even with gcc, the typedef won't work for 32-bit platforms */ - typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */ -#else - #error "Need GCC 3.1 or later to define type uint128_t" -#endif +typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit + * platforms */ +# else +# error "Need GCC 3.1 or later to define type uint128_t" +# endif typedef uint8_t u8; typedef uint64_t u64; typedef int64_t s64; -/* The underlying field. - * - * P521 operates over GF(2^521-1). We can serialise an element of this field - * into 66 bytes where the most significant byte contains only a single bit. We - * call this an felem_bytearray. */ +/* + * The underlying field. P521 operates over GF(2^521-1). We can serialise an + * element of this field into 66 bytes where the most significant byte + * contains only a single bit. We call this an felem_bytearray. + */ typedef u8 felem_bytearray[66]; -/* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5. - * These values are big-endian. */ -static const felem_bytearray nistp521_curve_params[5] = - { - {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff}, - {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xfc}, - {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ - 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, - 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, - 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, - 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, - 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, - 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, - 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, - 0x3f, 0x00}, - {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ - 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, - 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, - 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, - 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, - 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, - 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, - 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, - 0xbd, 0x66}, - {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ - 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, - 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, - 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, - 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, - 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, - 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, - 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, - 0x66, 0x50} - }; +/* + * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5. + * These values are big-endian. + */ +static const felem_bytearray nistp521_curve_params[5] = { + {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff}, + {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xfc}, + {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ + 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, + 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, + 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, + 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, + 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, + 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, + 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, + 0x3f, 0x00}, + {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ + 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, + 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, + 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, + 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, + 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, + 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, + 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, + 0xbd, 0x66}, + {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ + 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, + 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, + 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, + 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, + 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, + 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, + 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, + 0x66, 0x50} +}; /*- * The representation of field elements. @@ -123,7 +125,7 @@ static const felem_bytearray nistp521_curve_params[5] = * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a * 'largefelem' */ -#define NLIMBS 9 +# define NLIMBS 9 typedef uint64_t limb; typedef limb felem[NLIMBS]; @@ -132,80 +134,81 @@ typedef uint128_t largefelem[NLIMBS]; static const limb bottom57bits = 0x1ffffffffffffff; static const limb bottom58bits = 0x3ffffffffffffff; -/* bin66_to_felem takes a little-endian byte array and converts it into felem - * form. This assumes that the CPU is little-endian. */ +/* + * bin66_to_felem takes a little-endian byte array and converts it into felem + * form. This assumes that the CPU is little-endian. + */ static void bin66_to_felem(felem out, const u8 in[66]) - { - out[0] = (*((limb*) &in[0])) & bottom58bits; - out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits; - out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits; - out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits; - out[4] = (*((limb*) &in[29])) & bottom58bits; - out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits; - out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits; - out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits; - out[8] = (*((limb*) &in[58])) & bottom57bits; - } - -/* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte - * array. This assumes that the CPU is little-endian. */ +{ + out[0] = (*((limb *) & in[0])) & bottom58bits; + out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits; + out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits; + out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits; + out[4] = (*((limb *) & in[29])) & bottom58bits; + out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits; + out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits; + out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits; + out[8] = (*((limb *) & in[58])) & bottom57bits; +} + +/* + * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte + * array. This assumes that the CPU is little-endian. + */ static void felem_to_bin66(u8 out[66], const felem in) - { - memset(out, 0, 66); - (*((limb*) &out[0])) = in[0]; - (*((limb*) &out[7])) |= in[1] << 2; - (*((limb*) &out[14])) |= in[2] << 4; - (*((limb*) &out[21])) |= in[3] << 6; - (*((limb*) &out[29])) = in[4]; - (*((limb*) &out[36])) |= in[5] << 2; - (*((limb*) &out[43])) |= in[6] << 4; - (*((limb*) &out[50])) |= in[7] << 6; - (*((limb*) &out[58])) = in[8]; - } +{ + memset(out, 0, 66); + (*((limb *) & out[0])) = in[0]; + (*((limb *) & out[7])) |= in[1] << 2; + (*((limb *) & out[14])) |= in[2] << 4; + (*((limb *) & out[21])) |= in[3] << 6; + (*((limb *) & out[29])) = in[4]; + (*((limb *) & out[36])) |= in[5] << 2; + (*((limb *) & out[43])) |= in[6] << 4; + (*((limb *) & out[50])) |= in[7] << 6; + (*((limb *) & out[58])) = in[8]; +} /* To preserve endianness when using BN_bn2bin and BN_bin2bn */ static void flip_endian(u8 *out, const u8 *in, unsigned len) - { - unsigned i; - for (i = 0; i < len; ++i) - out[i] = in[len-1-i]; - } +{ + unsigned i; + for (i = 0; i < len; ++i) + out[i] = in[len - 1 - i]; +} /* BN_to_felem converts an OpenSSL BIGNUM into an felem */ static int BN_to_felem(felem out, const BIGNUM *bn) - { - felem_bytearray b_in; - felem_bytearray b_out; - unsigned num_bytes; - - /* BN_bn2bin eats leading zeroes */ - memset(b_out, 0, sizeof b_out); - num_bytes = BN_num_bytes(bn); - if (num_bytes > sizeof b_out) - { - ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); - return 0; - } - if (BN_is_negative(bn)) - { - ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); - return 0; - } - num_bytes = BN_bn2bin(bn, b_in); - flip_endian(b_out, b_in, num_bytes); - bin66_to_felem(out, b_out); - return 1; - } +{ + felem_bytearray b_in; + felem_bytearray b_out; + unsigned num_bytes; + + /* BN_bn2bin eats leading zeroes */ + memset(b_out, 0, sizeof b_out); + num_bytes = BN_num_bytes(bn); + if (num_bytes > sizeof b_out) { + ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); + return 0; + } + if (BN_is_negative(bn)) { + ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); + return 0; + } + num_bytes = BN_bn2bin(bn, b_in); + flip_endian(b_out, b_in, num_bytes); + bin66_to_felem(out, b_out); + return 1; +} /* felem_to_BN converts an felem into an OpenSSL BIGNUM */ static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) - { - felem_bytearray b_in, b_out; - felem_to_bin66(b_in, in); - flip_endian(b_out, b_in, sizeof b_out); - return BN_bin2bn(b_out, sizeof b_out, out); - } - +{ + felem_bytearray b_in, b_out; + felem_to_bin66(b_in, in); + flip_endian(b_out, b_in, sizeof b_out); + return BN_bin2bn(b_out, sizeof b_out, out); +} /*- * Field operations @@ -213,86 +216,86 @@ static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) */ static void felem_one(felem out) - { - out[0] = 1; - out[1] = 0; - out[2] = 0; - out[3] = 0; - out[4] = 0; - out[5] = 0; - out[6] = 0; - out[7] = 0; - out[8] = 0; - } +{ + out[0] = 1; + out[1] = 0; + out[2] = 0; + out[3] = 0; + out[4] = 0; + out[5] = 0; + out[6] = 0; + out[7] = 0; + out[8] = 0; +} static void felem_assign(felem out, const felem in) - { - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - out[4] = in[4]; - out[5] = in[5]; - out[6] = in[6]; - out[7] = in[7]; - out[8] = in[8]; - } +{ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[4] = in[4]; + out[5] = in[5]; + out[6] = in[6]; + out[7] = in[7]; + out[8] = in[8]; +} /* felem_sum64 sets out = out + in. */ static void felem_sum64(felem out, const felem in) - { - out[0] += in[0]; - out[1] += in[1]; - out[2] += in[2]; - out[3] += in[3]; - out[4] += in[4]; - out[5] += in[5]; - out[6] += in[6]; - out[7] += in[7]; - out[8] += in[8]; - } +{ + out[0] += in[0]; + out[1] += in[1]; + out[2] += in[2]; + out[3] += in[3]; + out[4] += in[4]; + out[5] += in[5]; + out[6] += in[6]; + out[7] += in[7]; + out[8] += in[8]; +} /* felem_scalar sets out = in * scalar */ static void felem_scalar(felem out, const felem in, limb scalar) - { - out[0] = in[0] * scalar; - out[1] = in[1] * scalar; - out[2] = in[2] * scalar; - out[3] = in[3] * scalar; - out[4] = in[4] * scalar; - out[5] = in[5] * scalar; - out[6] = in[6] * scalar; - out[7] = in[7] * scalar; - out[8] = in[8] * scalar; - } +{ + out[0] = in[0] * scalar; + out[1] = in[1] * scalar; + out[2] = in[2] * scalar; + out[3] = in[3] * scalar; + out[4] = in[4] * scalar; + out[5] = in[5] * scalar; + out[6] = in[6] * scalar; + out[7] = in[7] * scalar; + out[8] = in[8] * scalar; +} /* felem_scalar64 sets out = out * scalar */ static void felem_scalar64(felem out, limb scalar) - { - out[0] *= scalar; - out[1] *= scalar; - out[2] *= scalar; - out[3] *= scalar; - out[4] *= scalar; - out[5] *= scalar; - out[6] *= scalar; - out[7] *= scalar; - out[8] *= scalar; - } +{ + out[0] *= scalar; + out[1] *= scalar; + out[2] *= scalar; + out[3] *= scalar; + out[4] *= scalar; + out[5] *= scalar; + out[6] *= scalar; + out[7] *= scalar; + out[8] *= scalar; +} /* felem_scalar128 sets out = out * scalar */ static void felem_scalar128(largefelem out, limb scalar) - { - out[0] *= scalar; - out[1] *= scalar; - out[2] *= scalar; - out[3] *= scalar; - out[4] *= scalar; - out[5] *= scalar; - out[6] *= scalar; - out[7] *= scalar; - out[8] *= scalar; - } +{ + out[0] *= scalar; + out[1] *= scalar; + out[2] *= scalar; + out[3] *= scalar; + out[4] *= scalar; + out[5] *= scalar; + out[6] *= scalar; + out[7] *= scalar; + out[8] *= scalar; +} /*- * felem_neg sets |out| to |-in| @@ -302,21 +305,21 @@ static void felem_scalar128(largefelem out, limb scalar) * out[i] < 2^62 */ static void felem_neg(felem out, const felem in) - { - /* In order to prevent underflow, we subtract from 0 mod p. */ - static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5); - static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4); - - out[0] = two62m3 - in[0]; - out[1] = two62m2 - in[1]; - out[2] = two62m2 - in[2]; - out[3] = two62m2 - in[3]; - out[4] = two62m2 - in[4]; - out[5] = two62m2 - in[5]; - out[6] = two62m2 - in[6]; - out[7] = two62m2 - in[7]; - out[8] = two62m2 - in[8]; - } +{ + /* In order to prevent underflow, we subtract from 0 mod p. */ + static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); + static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); + + out[0] = two62m3 - in[0]; + out[1] = two62m2 - in[1]; + out[2] = two62m2 - in[2]; + out[3] = two62m2 - in[3]; + out[4] = two62m2 - in[4]; + out[5] = two62m2 - in[5]; + out[6] = two62m2 - in[6]; + out[7] = two62m2 - in[7]; + out[8] = two62m2 - in[8]; +} /*- * felem_diff64 subtracts |in| from |out| @@ -326,21 +329,23 @@ static void felem_neg(felem out, const felem in) * out[i] < out[i] + 2^62 */ static void felem_diff64(felem out, const felem in) - { - /* In order to prevent underflow, we add 0 mod p before subtracting. */ - static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5); - static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4); - - out[0] += two62m3 - in[0]; - out[1] += two62m2 - in[1]; - out[2] += two62m2 - in[2]; - out[3] += two62m2 - in[3]; - out[4] += two62m2 - in[4]; - out[5] += two62m2 - in[5]; - out[6] += two62m2 - in[6]; - out[7] += two62m2 - in[7]; - out[8] += two62m2 - in[8]; - } +{ + /* + * In order to prevent underflow, we add 0 mod p before subtracting. + */ + static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5); + static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4); + + out[0] += two62m3 - in[0]; + out[1] += two62m2 - in[1]; + out[2] += two62m2 - in[2]; + out[3] += two62m2 - in[3]; + out[4] += two62m2 - in[4]; + out[5] += two62m2 - in[5]; + out[6] += two62m2 - in[6]; + out[7] += two62m2 - in[7]; + out[8] += two62m2 - in[8]; +} /*- * felem_diff_128_64 subtracts |in| from |out| @@ -350,21 +355,23 @@ static void felem_diff64(felem out, const felem in) * out[i] < out[i] + 2^63 */ static void felem_diff_128_64(largefelem out, const felem in) - { - /* In order to prevent underflow, we add 0 mod p before subtracting. */ - static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5); - static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4); - - out[0] += two63m6 - in[0]; - out[1] += two63m5 - in[1]; - out[2] += two63m5 - in[2]; - out[3] += two63m5 - in[3]; - out[4] += two63m5 - in[4]; - out[5] += two63m5 - in[5]; - out[6] += two63m5 - in[6]; - out[7] += two63m5 - in[7]; - out[8] += two63m5 - in[8]; - } +{ + /* + * In order to prevent underflow, we add 0 mod p before subtracting. + */ + static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5); + static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4); + + out[0] += two63m6 - in[0]; + out[1] += two63m5 - in[1]; + out[2] += two63m5 - in[2]; + out[3] += two63m5 - in[3]; + out[4] += two63m5 - in[4]; + out[5] += two63m5 - in[5]; + out[6] += two63m5 - in[6]; + out[7] += two63m5 - in[7]; + out[8] += two63m5 - in[8]; +} /*- * felem_diff_128_64 subtracts |in| from |out| @@ -374,21 +381,25 @@ static void felem_diff_128_64(largefelem out, const felem in) * out[i] < out[i] + 2^127 - 2^69 */ static void felem_diff128(largefelem out, const largefelem in) - { - /* In order to prevent underflow, we add 0 mod p before subtracting. */ - static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70); - static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69); - - out[0] += (two127m70 - in[0]); - out[1] += (two127m69 - in[1]); - out[2] += (two127m69 - in[2]); - out[3] += (two127m69 - in[3]); - out[4] += (two127m69 - in[4]); - out[5] += (two127m69 - in[5]); - out[6] += (two127m69 - in[6]); - out[7] += (two127m69 - in[7]); - out[8] += (two127m69 - in[8]); - } +{ + /* + * In order to prevent underflow, we add 0 mod p before subtracting. + */ + static const uint128_t two127m70 = + (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70); + static const uint128_t two127m69 = + (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69); + + out[0] += (two127m70 - in[0]); + out[1] += (two127m69 - in[1]); + out[2] += (two127m69 - in[2]); + out[3] += (two127m69 - in[3]); + out[4] += (two127m69 - in[4]); + out[5] += (two127m69 - in[5]); + out[6] += (two127m69 - in[6]); + out[7] += (two127m69 - in[7]); + out[8] += (two127m69 - in[8]); +} /*- * felem_square sets |out| = |in|^2 @@ -398,90 +409,79 @@ static void felem_diff128(largefelem out, const largefelem in) * out[i] < 17 * max(in[i]) * max(in[i]) */ static void felem_square(largefelem out, const felem in) - { - felem inx2, inx4; - felem_scalar(inx2, in, 2); - felem_scalar(inx4, in, 4); - - /*- - * We have many cases were we want to do - * in[x] * in[y] + - * in[y] * in[x] - * This is obviously just - * 2 * in[x] * in[y] - * However, rather than do the doubling on the 128 bit result, we - * double one of the inputs to the multiplication by reading from - * |inx2| */ - - out[0] = ((uint128_t) in[0]) * in[0]; - out[1] = ((uint128_t) in[0]) * inx2[1]; - out[2] = ((uint128_t) in[0]) * inx2[2] + - ((uint128_t) in[1]) * in[1]; - out[3] = ((uint128_t) in[0]) * inx2[3] + - ((uint128_t) in[1]) * inx2[2]; - out[4] = ((uint128_t) in[0]) * inx2[4] + - ((uint128_t) in[1]) * inx2[3] + - ((uint128_t) in[2]) * in[2]; - out[5] = ((uint128_t) in[0]) * inx2[5] + - ((uint128_t) in[1]) * inx2[4] + - ((uint128_t) in[2]) * inx2[3]; - out[6] = ((uint128_t) in[0]) * inx2[6] + - ((uint128_t) in[1]) * inx2[5] + - ((uint128_t) in[2]) * inx2[4] + - ((uint128_t) in[3]) * in[3]; - out[7] = ((uint128_t) in[0]) * inx2[7] + - ((uint128_t) in[1]) * inx2[6] + - ((uint128_t) in[2]) * inx2[5] + - ((uint128_t) in[3]) * inx2[4]; - out[8] = ((uint128_t) in[0]) * inx2[8] + - ((uint128_t) in[1]) * inx2[7] + - ((uint128_t) in[2]) * inx2[6] + - ((uint128_t) in[3]) * inx2[5] + - ((uint128_t) in[4]) * in[4]; - - /* The remaining limbs fall above 2^521, with the first falling at - * 2^522. They correspond to locations one bit up from the limbs - * produced above so we would have to multiply by two to align them. - * Again, rather than operate on the 128-bit result, we double one of - * the inputs to the multiplication. If we want to double for both this - * reason, and the reason above, then we end up multiplying by four. */ - - /* 9 */ - out[0] += ((uint128_t) in[1]) * inx4[8] + - ((uint128_t) in[2]) * inx4[7] + - ((uint128_t) in[3]) * inx4[6] + - ((uint128_t) in[4]) * inx4[5]; - - /* 10 */ - out[1] += ((uint128_t) in[2]) * inx4[8] + - ((uint128_t) in[3]) * inx4[7] + - ((uint128_t) in[4]) * inx4[6] + - ((uint128_t) in[5]) * inx2[5]; - - /* 11 */ - out[2] += ((uint128_t) in[3]) * inx4[8] + - ((uint128_t) in[4]) * inx4[7] + - ((uint128_t) in[5]) * inx4[6]; - - /* 12 */ - out[3] += ((uint128_t) in[4]) * inx4[8] + - ((uint128_t) in[5]) * inx4[7] + - ((uint128_t) in[6]) * inx2[6]; - - /* 13 */ - out[4] += ((uint128_t) in[5]) * inx4[8] + - ((uint128_t) in[6]) * inx4[7]; - - /* 14 */ - out[5] += ((uint128_t) in[6]) * inx4[8] + - ((uint128_t) in[7]) * inx2[7]; - - /* 15 */ - out[6] += ((uint128_t) in[7]) * inx4[8]; - - /* 16 */ - out[7] += ((uint128_t) in[8]) * inx2[8]; - } +{ + felem inx2, inx4; + felem_scalar(inx2, in, 2); + felem_scalar(inx4, in, 4); + + /*- + * We have many cases were we want to do + * in[x] * in[y] + + * in[y] * in[x] + * This is obviously just + * 2 * in[x] * in[y] + * However, rather than do the doubling on the 128 bit result, we + * double one of the inputs to the multiplication by reading from + * |inx2| */ + + out[0] = ((uint128_t) in[0]) * in[0]; + out[1] = ((uint128_t) in[0]) * inx2[1]; + out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1]; + out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2]; + out[4] = ((uint128_t) in[0]) * inx2[4] + + ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2]; + out[5] = ((uint128_t) in[0]) * inx2[5] + + ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3]; + out[6] = ((uint128_t) in[0]) * inx2[6] + + ((uint128_t) in[1]) * inx2[5] + + ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3]; + out[7] = ((uint128_t) in[0]) * inx2[7] + + ((uint128_t) in[1]) * inx2[6] + + ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4]; + out[8] = ((uint128_t) in[0]) * inx2[8] + + ((uint128_t) in[1]) * inx2[7] + + ((uint128_t) in[2]) * inx2[6] + + ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4]; + + /* + * The remaining limbs fall above 2^521, with the first falling at 2^522. + * They correspond to locations one bit up from the limbs produced above + * so we would have to multiply by two to align them. Again, rather than + * operate on the 128-bit result, we double one of the inputs to the + * multiplication. If we want to double for both this reason, and the + * reason above, then we end up multiplying by four. + */ + + /* 9 */ + out[0] += ((uint128_t) in[1]) * inx4[8] + + ((uint128_t) in[2]) * inx4[7] + + ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5]; + + /* 10 */ + out[1] += ((uint128_t) in[2]) * inx4[8] + + ((uint128_t) in[3]) * inx4[7] + + ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5]; + + /* 11 */ + out[2] += ((uint128_t) in[3]) * inx4[8] + + ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6]; + + /* 12 */ + out[3] += ((uint128_t) in[4]) * inx4[8] + + ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6]; + + /* 13 */ + out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7]; + + /* 14 */ + out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7]; + + /* 15 */ + out[6] += ((uint128_t) in[7]) * inx4[8]; + + /* 16 */ + out[7] += ((uint128_t) in[8]) * inx2[8]; +} /*- * felem_mul sets |out| = |in1| * |in2| @@ -492,110 +492,96 @@ static void felem_square(largefelem out, const felem in) * out[i] < 17 * max(in1[i]) * max(in2[i]) */ static void felem_mul(largefelem out, const felem in1, const felem in2) - { - felem in2x2; - felem_scalar(in2x2, in2, 2); - - out[0] = ((uint128_t) in1[0]) * in2[0]; - - out[1] = ((uint128_t) in1[0]) * in2[1] + - ((uint128_t) in1[1]) * in2[0]; - - out[2] = ((uint128_t) in1[0]) * in2[2] + - ((uint128_t) in1[1]) * in2[1] + - ((uint128_t) in1[2]) * in2[0]; - - out[3] = ((uint128_t) in1[0]) * in2[3] + - ((uint128_t) in1[1]) * in2[2] + - ((uint128_t) in1[2]) * in2[1] + - ((uint128_t) in1[3]) * in2[0]; - - out[4] = ((uint128_t) in1[0]) * in2[4] + - ((uint128_t) in1[1]) * in2[3] + - ((uint128_t) in1[2]) * in2[2] + - ((uint128_t) in1[3]) * in2[1] + - ((uint128_t) in1[4]) * in2[0]; - - out[5] = ((uint128_t) in1[0]) * in2[5] + - ((uint128_t) in1[1]) * in2[4] + - ((uint128_t) in1[2]) * in2[3] + - ((uint128_t) in1[3]) * in2[2] + - ((uint128_t) in1[4]) * in2[1] + - ((uint128_t) in1[5]) * in2[0]; - - out[6] = ((uint128_t) in1[0]) * in2[6] + - ((uint128_t) in1[1]) * in2[5] + - ((uint128_t) in1[2]) * in2[4] + - ((uint128_t) in1[3]) * in2[3] + - ((uint128_t) in1[4]) * in2[2] + - ((uint128_t) in1[5]) * in2[1] + - ((uint128_t) in1[6]) * in2[0]; - - out[7] = ((uint128_t) in1[0]) * in2[7] + - ((uint128_t) in1[1]) * in2[6] + - ((uint128_t) in1[2]) * in2[5] + - ((uint128_t) in1[3]) * in2[4] + - ((uint128_t) in1[4]) * in2[3] + - ((uint128_t) in1[5]) * in2[2] + - ((uint128_t) in1[6]) * in2[1] + - ((uint128_t) in1[7]) * in2[0]; - - out[8] = ((uint128_t) in1[0]) * in2[8] + - ((uint128_t) in1[1]) * in2[7] + - ((uint128_t) in1[2]) * in2[6] + - ((uint128_t) in1[3]) * in2[5] + - ((uint128_t) in1[4]) * in2[4] + - ((uint128_t) in1[5]) * in2[3] + - ((uint128_t) in1[6]) * in2[2] + - ((uint128_t) in1[7]) * in2[1] + - ((uint128_t) in1[8]) * in2[0]; - - /* See comment in felem_square about the use of in2x2 here */ - - out[0] += ((uint128_t) in1[1]) * in2x2[8] + - ((uint128_t) in1[2]) * in2x2[7] + - ((uint128_t) in1[3]) * in2x2[6] + - ((uint128_t) in1[4]) * in2x2[5] + - ((uint128_t) in1[5]) * in2x2[4] + - ((uint128_t) in1[6]) * in2x2[3] + - ((uint128_t) in1[7]) * in2x2[2] + - ((uint128_t) in1[8]) * in2x2[1]; - - out[1] += ((uint128_t) in1[2]) * in2x2[8] + - ((uint128_t) in1[3]) * in2x2[7] + - ((uint128_t) in1[4]) * in2x2[6] + - ((uint128_t) in1[5]) * in2x2[5] + - ((uint128_t) in1[6]) * in2x2[4] + - ((uint128_t) in1[7]) * in2x2[3] + - ((uint128_t) in1[8]) * in2x2[2]; - - out[2] += ((uint128_t) in1[3]) * in2x2[8] + - ((uint128_t) in1[4]) * in2x2[7] + - ((uint128_t) in1[5]) * in2x2[6] + - ((uint128_t) in1[6]) * in2x2[5] + - ((uint128_t) in1[7]) * in2x2[4] + - ((uint128_t) in1[8]) * in2x2[3]; - - out[3] += ((uint128_t) in1[4]) * in2x2[8] + - ((uint128_t) in1[5]) * in2x2[7] + - ((uint128_t) in1[6]) * in2x2[6] + - ((uint128_t) in1[7]) * in2x2[5] + - ((uint128_t) in1[8]) * in2x2[4]; - - out[4] += ((uint128_t) in1[5]) * in2x2[8] + - ((uint128_t) in1[6]) * in2x2[7] + - ((uint128_t) in1[7]) * in2x2[6] + - ((uint128_t) in1[8]) * in2x2[5]; - - out[5] += ((uint128_t) in1[6]) * in2x2[8] + - ((uint128_t) in1[7]) * in2x2[7] + - ((uint128_t) in1[8]) * in2x2[6]; - - out[6] += ((uint128_t) in1[7]) * in2x2[8] + - ((uint128_t) in1[8]) * in2x2[7]; - - out[7] += ((uint128_t) in1[8]) * in2x2[8]; - } +{ + felem in2x2; + felem_scalar(in2x2, in2, 2); + + out[0] = ((uint128_t) in1[0]) * in2[0]; + + out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0]; + + out[2] = ((uint128_t) in1[0]) * in2[2] + + ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0]; + + out[3] = ((uint128_t) in1[0]) * in2[3] + + ((uint128_t) in1[1]) * in2[2] + + ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0]; + + out[4] = ((uint128_t) in1[0]) * in2[4] + + ((uint128_t) in1[1]) * in2[3] + + ((uint128_t) in1[2]) * in2[2] + + ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0]; + + out[5] = ((uint128_t) in1[0]) * in2[5] + + ((uint128_t) in1[1]) * in2[4] + + ((uint128_t) in1[2]) * in2[3] + + ((uint128_t) in1[3]) * in2[2] + + ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0]; + + out[6] = ((uint128_t) in1[0]) * in2[6] + + ((uint128_t) in1[1]) * in2[5] + + ((uint128_t) in1[2]) * in2[4] + + ((uint128_t) in1[3]) * in2[3] + + ((uint128_t) in1[4]) * in2[2] + + ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0]; + + out[7] = ((uint128_t) in1[0]) * in2[7] + + ((uint128_t) in1[1]) * in2[6] + + ((uint128_t) in1[2]) * in2[5] + + ((uint128_t) in1[3]) * in2[4] + + ((uint128_t) in1[4]) * in2[3] + + ((uint128_t) in1[5]) * in2[2] + + ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0]; + + out[8] = ((uint128_t) in1[0]) * in2[8] + + ((uint128_t) in1[1]) * in2[7] + + ((uint128_t) in1[2]) * in2[6] + + ((uint128_t) in1[3]) * in2[5] + + ((uint128_t) in1[4]) * in2[4] + + ((uint128_t) in1[5]) * in2[3] + + ((uint128_t) in1[6]) * in2[2] + + ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0]; + + /* See comment in felem_square about the use of in2x2 here */ + + out[0] += ((uint128_t) in1[1]) * in2x2[8] + + ((uint128_t) in1[2]) * in2x2[7] + + ((uint128_t) in1[3]) * in2x2[6] + + ((uint128_t) in1[4]) * in2x2[5] + + ((uint128_t) in1[5]) * in2x2[4] + + ((uint128_t) in1[6]) * in2x2[3] + + ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1]; + + out[1] += ((uint128_t) in1[2]) * in2x2[8] + + ((uint128_t) in1[3]) * in2x2[7] + + ((uint128_t) in1[4]) * in2x2[6] + + ((uint128_t) in1[5]) * in2x2[5] + + ((uint128_t) in1[6]) * in2x2[4] + + ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2]; + + out[2] += ((uint128_t) in1[3]) * in2x2[8] + + ((uint128_t) in1[4]) * in2x2[7] + + ((uint128_t) in1[5]) * in2x2[6] + + ((uint128_t) in1[6]) * in2x2[5] + + ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3]; + + out[3] += ((uint128_t) in1[4]) * in2x2[8] + + ((uint128_t) in1[5]) * in2x2[7] + + ((uint128_t) in1[6]) * in2x2[6] + + ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4]; + + out[4] += ((uint128_t) in1[5]) * in2x2[8] + + ((uint128_t) in1[6]) * in2x2[7] + + ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5]; + + out[5] += ((uint128_t) in1[6]) * in2x2[8] + + ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6]; + + out[6] += ((uint128_t) in1[7]) * in2x2[8] + + ((uint128_t) in1[8]) * in2x2[7]; + + out[7] += ((uint128_t) in1[8]) * in2x2[8]; +} static const limb bottom52bits = 0xfffffffffffff; @@ -607,92 +593,93 @@ static const limb bottom52bits = 0xfffffffffffff; * out[i] < 2^59 + 2^14 */ static void felem_reduce(felem out, const largefelem in) - { - u64 overflow1, overflow2; - - out[0] = ((limb) in[0]) & bottom58bits; - out[1] = ((limb) in[1]) & bottom58bits; - out[2] = ((limb) in[2]) & bottom58bits; - out[3] = ((limb) in[3]) & bottom58bits; - out[4] = ((limb) in[4]) & bottom58bits; - out[5] = ((limb) in[5]) & bottom58bits; - out[6] = ((limb) in[6]) & bottom58bits; - out[7] = ((limb) in[7]) & bottom58bits; - out[8] = ((limb) in[8]) & bottom58bits; - - /* out[i] < 2^58 */ - - out[1] += ((limb) in[0]) >> 58; - out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6; - /*- - * out[1] < 2^58 + 2^6 + 2^58 - * = 2^59 + 2^6 - */ - out[2] += ((limb) (in[0] >> 64)) >> 52; - - out[2] += ((limb) in[1]) >> 58; - out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6; - out[3] += ((limb) (in[1] >> 64)) >> 52; - - out[3] += ((limb) in[2]) >> 58; - out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6; - out[4] += ((limb) (in[2] >> 64)) >> 52; - - out[4] += ((limb) in[3]) >> 58; - out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6; - out[5] += ((limb) (in[3] >> 64)) >> 52; - - out[5] += ((limb) in[4]) >> 58; - out[5] += (((limb) (in[4] >> 64 |