summaryrefslogtreecommitdiffstats
path: root/crypto/blake2
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/blake2')
-rw-r--r--crypto/blake2/blake2_impl.h53
-rw-r--r--crypto/blake2/blake2b.c197
-rw-r--r--crypto/blake2/blake2s.c193
3 files changed, 272 insertions, 171 deletions
diff --git a/crypto/blake2/blake2_impl.h b/crypto/blake2/blake2_impl.h
index c613abd308..335a38302f 100644
--- a/crypto/blake2/blake2_impl.h
+++ b/crypto/blake2/blake2_impl.h
@@ -30,10 +30,10 @@ static ossl_inline uint32_t load32(const uint8_t *src)
memcpy(&w, src, sizeof(w));
return w;
} else {
- uint32_t w = *src++;
- w |= (uint32_t)(*src++) << 8;
- w |= (uint32_t)(*src++) << 16;
- w |= (uint32_t)(*src++) << 24;
+ uint32_t w = ((uint32_t)src[0])
+ | ((uint32_t)src[1] << 8)
+ | ((uint32_t)src[2] << 16)
+ | ((uint32_t)src[3] << 24);
return w;
}
}
@@ -50,14 +50,14 @@ static ossl_inline uint64_t load64(const uint8_t *src)
memcpy(&w, src, sizeof(w));
return w;
} else {
- uint64_t w = *src++;
- w |= (uint64_t)(*src++) << 8;
- w |= (uint64_t)(*src++) << 16;
- w |= (uint64_t)(*src++) << 24;
- w |= (uint64_t)(*src++) << 32;
- w |= (uint64_t)(*src++) << 40;
- w |= (uint64_t)(*src++) << 48;
- w |= (uint64_t)(*src++) << 56;
+ uint64_t w = ((uint64_t)src[0])
+ | ((uint64_t)src[1] << 8)
+ | ((uint64_t)src[2] << 16)
+ | ((uint64_t)src[3] << 24)
+ | ((uint64_t)src[4] << 32)
+ | ((uint64_t)src[5] << 40)
+ | ((uint64_t)src[6] << 48)
+ | ((uint64_t)src[7] << 56);
return w;
}
}
@@ -100,29 +100,24 @@ static ossl_inline void store64(uint8_t *dst, uint64_t w)
static ossl_inline uint64_t load48(const uint8_t *src)
{
- uint64_t w = *src++;
- w |= (uint64_t)(*src++) << 8;
- w |= (uint64_t)(*src++) << 16;
- w |= (uint64_t)(*src++) << 24;
- w |= (uint64_t)(*src++) << 32;
- w |= (uint64_t)(*src++) << 40;
+ uint64_t w = ((uint64_t)src[0])
+ | ((uint64_t)src[1] << 8)
+ | ((uint64_t)src[2] << 16)
+ | ((uint64_t)src[3] << 24)
+ | ((uint64_t)src[4] << 32)
+ | ((uint64_t)src[5] << 40);
return w;
}
static ossl_inline void store48(uint8_t *dst, uint64_t w)
{
uint8_t *p = (uint8_t *)dst;
- *p++ = (uint8_t)w;
- w >>= 8;
- *p++ = (uint8_t)w;
- w >>= 8;
- *p++ = (uint8_t)w;
- w >>= 8;
- *p++ = (uint8_t)w;
- w >>= 8;
- *p++ = (uint8_t)w;
- w >>= 8;
- *p++ = (uint8_t)w;
+ p[0] = (uint8_t)w;
+ p[1] = (uint8_t)(w>>8);
+ p[2] = (uint8_t)(w>>16);
+ p[3] = (uint8_t)(w>>24);
+ p[4] = (uint8_t)(w>>32);
+ p[5] = (uint8_t)(w>>40);
}
static ossl_inline uint32_t rotr32(const uint32_t w, const unsigned int c)
diff --git a/crypto/blake2/blake2b.c b/crypto/blake2/blake2b.c
index 6f04412e47..56b56fb2ef 100644
--- a/crypto/blake2/blake2b.c
+++ b/crypto/blake2/blake2b.c
@@ -15,6 +15,12 @@
* can be found at https://blake2.net.
*/
+#ifndef BLAKE_DEBUG
+# undef NDEBUG /* avoid conflicting definitions */
+# define NDEBUG
+#endif
+
+#include <assert.h>
#include <string.h>
#include <openssl/crypto.h>
#include "e_os.h"
@@ -52,21 +58,13 @@ static ossl_inline void blake2b_set_lastblock(BLAKE2B_CTX *S)
S->f[0] = -1;
}
-/* Increment the data hashed counter. */
-static ossl_inline void blake2b_increment_counter(BLAKE2B_CTX *S,
- const uint64_t inc)
-{
- S->t[0] += inc;
- S->t[1] += (S->t[0] < inc);
-}
-
/* Initialize the hashing state. */
static ossl_inline void blake2b_init0(BLAKE2B_CTX *S)
{
int i;
memset(S, 0, sizeof(BLAKE2B_CTX));
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
S->h[i] = blake2b_IV[i];
}
}
@@ -82,7 +80,7 @@ static void blake2b_init_param(BLAKE2B_CTX *S, const BLAKE2B_PARAM *P)
* every platform. */
OPENSSL_assert(sizeof(BLAKE2B_PARAM) == 64);
/* IV XOR ParamBlock */
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
S->h[i] ^= load64(p + sizeof(S->h[i]) * i);
}
}
@@ -108,69 +106,106 @@ int BLAKE2b_Init(BLAKE2B_CTX *c)
/* Permute the state while xoring in the block of data. */
static void blake2b_compress(BLAKE2B_CTX *S,
- const uint8_t block[BLAKE2B_BLOCKBYTES])
+ const uint8_t *blocks,
+ size_t len)
{
uint64_t m[16];
uint64_t v[16];
int i;
+ size_t increment;
- for(i = 0; i < 16; ++i) {
- m[i] = load64(block + i * sizeof(m[i]));
- }
+ /*
+ * There are two distinct usage vectors for this function:
+ *
+ * a) BLAKE2b_Update uses it to process complete blocks,
+ * possibly more than one at a time;
+ *
+ * b) BLAK2b_Final uses it to process last block, always
+ * single but possibly incomplete, in which case caller
+ * pads input with zeros.
+ */
+ assert(len < BLAKE2B_BLOCKBYTES || len % BLAKE2B_BLOCKBYTES == 0);
+
+ /*
+ * Since last block is always processed with separate call,
+ * |len| not being multiple of complete blocks can be observed
+ * only with |len| being less than BLAKE2B_BLOCKBYTES ("less"
+ * including even zero), which is why following assignment doesn't
+ * have to reside inside the main loop below.
+ */
+ increment = len < BLAKE2B_BLOCKBYTES ? len : BLAKE2B_BLOCKBYTES;
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
v[i] = S->h[i];
}
- v[8] = blake2b_IV[0];
- v[9] = blake2b_IV[1];
- v[10] = blake2b_IV[2];
- v[11] = blake2b_IV[3];
- v[12] = S->t[0] ^ blake2b_IV[4];
- v[13] = S->t[1] ^ blake2b_IV[5];
- v[14] = S->f[0] ^ blake2b_IV[6];
- v[15] = S->f[1] ^ blake2b_IV[7];
+ do {
+ for (i = 0; i < 16; ++i) {
+ m[i] = load64(blocks + i * sizeof(m[i]));
+ }
+
+ /* blake2b_increment_counter */
+ S->t[0] += increment;
+ S->t[1] += (S->t[0] < increment);
+
+ v[8] = blake2b_IV[0];
+ v[9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = S->t[0] ^ blake2b_IV[4];
+ v[13] = S->t[1] ^ blake2b_IV[5];
+ v[14] = S->f[0] ^ blake2b_IV[6];
+ v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
- do { \
- a = a + b + m[blake2b_sigma[r][2*i+0]]; \
- d = rotr64(d ^ a, 32); \
- c = c + d; \
- b = rotr64(b ^ c, 24); \
- a = a + b + m[blake2b_sigma[r][2*i+1]]; \
- d = rotr64(d ^ a, 16); \
- c = c + d; \
- b = rotr64(b ^ c, 63); \
- } while(0)
+ do { \
+ a = a + b + m[blake2b_sigma[r][2*i+0]]; \
+ d = rotr64(d ^ a, 32); \
+ c = c + d; \
+ b = rotr64(b ^ c, 24); \
+ a = a + b + m[blake2b_sigma[r][2*i+1]]; \
+ d = rotr64(d ^ a, 16); \
+ c = c + d; \
+ b = rotr64(b ^ c, 63); \
+ } while (0)
#define ROUND(r) \
- do { \
- G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
- G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
- G(r,2,v[ 2],v[ 6],v[10],v[14]); \
- G(r,3,v[ 3],v[ 7],v[11],v[15]); \
- G(r,4,v[ 0],v[ 5],v[10],v[15]); \
- G(r,5,v[ 1],v[ 6],v[11],v[12]); \
- G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
- G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
- } while(0)
- ROUND(0);
- ROUND(1);
- ROUND(2);
- ROUND(3);
- ROUND(4);
- ROUND(5);
- ROUND(6);
- ROUND(7);
- ROUND(8);
- ROUND(9);
- ROUND(10);
- ROUND(11);
-
- for(i = 0; i < 8; ++i) {
- S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
- }
+ do { \
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+ } while (0)
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+ /* 3x size reduction on x86_64, almost 7x on ARMv8, 9x on ARMv4 */
+ for (i = 0; i < 12; i++) {
+ ROUND(i);
+ }
+#else
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+#endif
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = v[i] ^= v[i + 8] ^ S->h[i];
+ }
#undef G
#undef ROUND
+ blocks += increment;
+ len -= increment;
+ } while (len);
}
/* Absorb the input data into the hash state. Always returns 1. */
@@ -179,23 +214,42 @@ int BLAKE2b_Update(BLAKE2B_CTX *c, const void *data, size_t datalen)
const uint8_t *in = data;
size_t fill;
- while(datalen > 0) {
- fill = sizeof(c->buf) - c->buflen;
- /* Must be >, not >=, so that last block can be hashed differently */
- if(datalen > fill) {
+ /*
+ * Intuitively one would expect intermediate buffer, c->buf, to
+ * store incomplete blocks. But in this case we are interested to
+ * temporarily stash even complete blocks, because last one in the
+ * stream has to be treated in special way, and at this point we
+ * don't know if last block in *this* call is last one "ever". This
+ * is the reason for why |datalen| is compared as >, and not >=.
+ */
+ fill = sizeof(c->buf) - c->buflen;
+ if (datalen > fill) {
+ if (c->buflen) {
memcpy(c->buf + c->buflen, in, fill); /* Fill buffer */
- blake2b_increment_counter(c, BLAKE2B_BLOCKBYTES);
- blake2b_compress(c, c->buf); /* Compress */
+ blake2b_compress(c, c->buf, BLAKE2B_BLOCKBYTES);
c->buflen = 0;
in += fill;
datalen -= fill;
- } else { /* datalen <= fill */
- memcpy(c->buf + c->buflen, in, datalen);
- c->buflen += datalen; /* Be lazy, do not compress */
- return 1;
+ }
+ if (datalen > BLAKE2B_BLOCKBYTES) {
+ size_t stashlen = datalen % BLAKE2B_BLOCKBYTES;
+ /*
+ * If |datalen| is a multiple of the blocksize, stash
+ * last complete block, it can be final one...
+ */
+ stashlen = stashlen ? stashlen : BLAKE2B_BLOCKBYTES;
+ datalen -= stashlen;
+ blake2b_compress(c, in, datalen);
+ in += datalen;
+ datalen = stashlen;
}
}
+ assert(datalen <= BLAKE2B_BLOCKBYTES);
+
+ memcpy(c->buf + c->buflen, in, datalen);
+ c->buflen += datalen; /* Be lazy, do not compress */
+
return 1;
}
@@ -207,14 +261,13 @@ int BLAKE2b_Final(unsigned char *md, BLAKE2B_CTX *c)
{
int i;
- blake2b_increment_counter(c, c->buflen);
blake2b_set_lastblock(c);
/* Padding */
memset(c->buf + c->buflen, 0, sizeof(c->buf) - c->buflen);
- blake2b_compress(c, c->buf);
+ blake2b_compress(c, c->buf, c->buflen);
/* Output full hash to message digest */
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
store64(md + sizeof(c->h[i]) * i, c->h[i]);
}
diff --git a/crypto/blake2/blake2s.c b/crypto/blake2/blake2s.c
index f940aa150b..905a28eaf7 100644
--- a/crypto/blake2/blake2s.c
+++ b/crypto/blake2/blake2s.c
@@ -15,6 +15,12 @@
* can be found at https://blake2.net.
*/
+#ifndef BLAKE_DEBUG
+# undef NDEBUG /* avoid conflicting definitions */
+# define NDEBUG
+#endif
+
+#include <assert.h>
#include <string.h>
#include <openssl/crypto.h>
#include "e_os.h"
@@ -48,21 +54,13 @@ static ossl_inline void blake2s_set_lastblock(BLAKE2S_CTX *S)
S->f[0] = -1;
}
-/* Increment the data hashed counter. */
-static ossl_inline void blake2s_increment_counter(BLAKE2S_CTX *S,
- const uint32_t inc)
-{
- S->t[0] += inc;
- S->t[1] += (S->t[0] < inc);
-}
-
/* Initialize the hashing state. */
static ossl_inline void blake2s_init0(BLAKE2S_CTX *S)
{
int i;
memset(S, 0, sizeof(BLAKE2S_CTX));
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
S->h[i] = blake2s_IV[i];
}
}
@@ -78,7 +76,7 @@ static void blake2s_init_param(BLAKE2S_CTX *S, const BLAKE2S_PARAM *P)
OPENSSL_assert(sizeof(BLAKE2S_PARAM) == 32);
blake2s_init0(S);
/* IV XOR ParamBlock */
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
S->h[i] ^= load32(&p[i*4]);
}
}
@@ -104,67 +102,104 @@ int BLAKE2s_Init(BLAKE2S_CTX *c)
/* Permute the state while xoring in the block of data. */
static void blake2s_compress(BLAKE2S_CTX *S,
- const uint8_t block[BLAKE2S_BLOCKBYTES])
+ const uint8_t *blocks,
+ size_t len)
{
uint32_t m[16];
uint32_t v[16];
size_t i;
+ size_t increment;
- for(i = 0; i < 16; ++i) {
- m[i] = load32(block + i * sizeof(m[i]));
- }
+ /*
+ * There are two distinct usage vectors for this function:
+ *
+ * a) BLAKE2s_Update uses it to process complete blocks,
+ * possibly more than one at a time;
+ *
+ * b) BLAK2s_Final uses it to process last block, always
+ * single but possibly incomplete, in which case caller
+ * pads input with zeros.
+ */
+ assert(len < BLAKE2S_BLOCKBYTES || len % BLAKE2S_BLOCKBYTES == 0);
+
+ /*
+ * Since last block is always processed with separate call,
+ * |len| not being multiple of complete blocks can be observed
+ * only with |len| being less than BLAKE2S_BLOCKBYTES ("less"
+ * including even zero), which is why following assignment doesn't
+ * have to reside inside the main loop below.
+ */
+ increment = len < BLAKE2S_BLOCKBYTES ? len : BLAKE2S_BLOCKBYTES;
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
v[i] = S->h[i];
}
- v[ 8] = blake2s_IV[0];
- v[ 9] = blake2s_IV[1];
- v[10] = blake2s_IV[2];
- v[11] = blake2s_IV[3];
- v[12] = S->t[0] ^ blake2s_IV[4];
- v[13] = S->t[1] ^ blake2s_IV[5];
- v[14] = S->f[0] ^ blake2s_IV[6];
- v[15] = S->f[1] ^ blake2s_IV[7];
+ do {
+ for (i = 0; i < 16; ++i) {
+ m[i] = load32(blocks + i * sizeof(m[i]));
+ }
+
+ /* blake2s_increment_counter */
+ S->t[0] += increment;
+ S->t[1] += (S->t[0] < increment);
+
+ v[ 8] = blake2s_IV[0];
+ v[ 9] = blake2s_IV[1];
+ v[10] = blake2s_IV[2];
+ v[11] = blake2s_IV[3];
+ v[12] = S->t[0] ^ blake2s_IV[4];
+ v[13] = S->t[1] ^ blake2s_IV[5];
+ v[14] = S->f[0] ^ blake2s_IV[6];
+ v[15] = S->f[1] ^ blake2s_IV[7];
#define G(r,i,a,b,c,d) \
- do { \
- a = a + b + m[blake2s_sigma[r][2*i+0]]; \
- d = rotr32(d ^ a, 16); \
- c = c + d; \
- b = rotr32(b ^ c, 12); \
- a = a + b + m[blake2s_sigma[r][2*i+1]]; \
- d = rotr32(d ^ a, 8); \
- c = c + d; \
- b = rotr32(b ^ c, 7); \
- } while(0)
+ do { \
+ a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+ d = rotr32(d ^ a, 16); \
+ c = c + d; \
+ b = rotr32(b ^ c, 12); \
+ a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+ d = rotr32(d ^ a, 8); \
+ c = c + d; \
+ b = rotr32(b ^ c, 7); \
+ } while (0)
#define ROUND(r) \
- do { \
- G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
- G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
- G(r,2,v[ 2],v[ 6],v[10],v[14]); \
- G(r,3,v[ 3],v[ 7],v[11],v[15]); \
- G(r,4,v[ 0],v[ 5],v[10],v[15]); \
- G(r,5,v[ 1],v[ 6],v[11],v[12]); \
- G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
- G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
- } while(0)
- ROUND(0);
- ROUND(1);
- ROUND(2);
- ROUND(3);
- ROUND(4);
- ROUND(5);
- ROUND(6);
- ROUND(7);
- ROUND(8);
- ROUND(9);
-
- for(i = 0; i < 8; ++i) {
- S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
- }
+ do { \
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+ } while (0)
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+ /* almost 3x reduction on x86_64, 4.5x on ARMv8, 4x on ARMv4 */
+ for (i = 0; i < 10; i++) {
+ ROUND(i);
+ }
+#else
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+#endif
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = v[i] ^= v[i + 8] ^ S->h[i];
+ }
#undef G
#undef ROUND
+ blocks += increment;
+ len -= increment;
+ } while (len);
}
/* Absorb the input data into the hash state. Always returns 1. */
@@ -173,23 +208,42 @@ int BLAKE2s_Update(BLAKE2S_CTX *c, const void *data, size_t datalen)
const uint8_t *in = data;
size_t fill;
- while(datalen > 0) {
- fill = sizeof(c->buf) - c->buflen;
- /* Must be >, not >=, so that last block can be hashed differently */
- if(datalen > fill) {
+ /*
+ * Intuitively one would expect intermediate buffer, c->buf, to
+ * store incomplete blocks. But in this case we are interested to
+ * temporarily stash even complete blocks, because last one in the
+ * stream has to be treated in special way, and at this point we
+ * don't know if last block in *this* call is last one "ever". This
+ * is the reason for why |datalen| is compared as >, and not >=.
+ */
+ fill = sizeof(c->buf) - c->buflen;
+ if (datalen > fill) {
+ if (c->buflen) {
memcpy(c->buf + c->buflen, in, fill); /* Fill buffer */
- blake2s_increment_counter(c, BLAKE2S_BLOCKBYTES);
- blake2s_compress(c, c->buf); /* Compress */
+ blake2s_compress(c, c->buf, BLAKE2S_BLOCKBYTES);
c->buflen = 0;
in += fill;
datalen -= fill;
- } else { /* datalen <= fill */
- memcpy(c->buf + c->buflen, in, datalen);
- c->buflen += datalen; /* Be lazy, do not compress */
- return 1;
+ }
+ if (datalen > BLAKE2S_BLOCKBYTES) {
+ size_t stashlen = datalen % BLAKE2S_BLOCKBYTES;
+ /*
+ * If |datalen| is a multiple of the blocksize, stash
+ * last complete block, it can be final one...
+ */
+ stashlen = stashlen ? stashlen : BLAKE2S_BLOCKBYTES;
+ datalen -= stashlen;
+ blake2s_compress(c, in, datalen);
+ in += datalen;
+ datalen = stashlen;
}
}
+ assert(datalen <= BLAKE2S_BLOCKBYTES);
+
+ memcpy(c->buf + c->buflen, in, datalen);
+ c->buflen += datalen; /* Be lazy, do not compress */
+
return 1;
}
@@ -201,14 +255,13 @@ int BLAKE2s_Final(unsigned char *md, BLAKE2S_CTX *c)
{
int i;
- blake2s_increment_counter(c, (uint32_t)c->buflen);
blake2s_set_lastblock(c);
/* Padding */
memset(c->buf + c->buflen, 0, sizeof(c->buf) - c->buflen);
- blake2s_compress(c, c->buf);
+ blake2s_compress(c, c->buf, c->buflen);
/* Output full hash to temp buffer */
- for(i = 0; i < 8; ++i) {
+ for (i = 0; i < 8; ++i) {
store32(md + sizeof(c->h[i]) * i, c->h[i]);
}