From 41cf2d2518f8b7f31287984ea9f13bc9d55205dc Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 5 Feb 2014 19:52:38 +0100 Subject: evp/e_aes_cbc_hmac_sha[1|256].c: add multi-block implementations [from master]. --- crypto/evp/e_aes_cbc_hmac_sha1.c | 389 ++++++++++++++++++++++++++++++++++--- crypto/evp/e_aes_cbc_hmac_sha256.c | 315 +++++++++++++++++++++++++++--- crypto/evp/evp.h | 13 ++ 3 files changed, 667 insertions(+), 50 deletions(-) (limited to 'crypto/evp') diff --git a/crypto/evp/e_aes_cbc_hmac_sha1.c b/crypto/evp/e_aes_cbc_hmac_sha1.c index fb2c884a78..09f928190d 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -58,7 +58,8 @@ #include #include #include -#include "evp_locl.h" +#include +#include "modes_lcl.h" #ifndef EVP_CIPH_FLAG_AEAD_CIPHER #define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 @@ -70,6 +71,10 @@ #define EVP_CIPH_FLAG_DEFAULT_ASN1 0 #endif +#if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +#endif + #define TLS1_1_VERSION 0x0302 typedef struct @@ -90,11 +95,7 @@ typedef struct defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -#if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) -# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) -#endif - -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[3]; #define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, @@ -112,6 +113,10 @@ void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks, const AES_KEY *key, unsigned char iv[16], SHA_CTX *ctx,const void *in0); +void aesni256_cbc_sha1_dec (const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA_CTX *ctx,const void *in0); + #define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data) static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, @@ -136,6 +141,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, } #define STITCHED_CALL +#undef STITCHED_DECRYPT_CALL #if !defined(STITCHED_CALL) #define aes_off 0 @@ -176,6 +182,198 @@ static void sha1_update(SHA_CTX *c,const void *data,size_t len) #endif #define SHA1_Update sha1_update +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + +typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8]; } SHA1_MB_CTX; +typedef struct { const unsigned char *ptr; int blocks; } HASH_DESC; + +void sha1_multi_block(SHA1_MB_CTX *,const HASH_DESC *,int); + +typedef struct { const unsigned char *inp; unsigned char *out; + int blocks; u64 iv[2]; } CIPH_DESC; + +void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int); + +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, + unsigned char *out, const unsigned char *inp, size_t inp_len, + int n4x) /* n4x is 1 or 2 */ +{ + HASH_DESC hash_d[8], edges[8]; + CIPH_DESC ciph_d[8]; + unsigned char storage[sizeof(SHA1_MB_CTX)+32]; + union { u64 q[16]; + u32 d[32]; + u8 c[128]; } blocks[8]; + SHA1_MB_CTX *ctx; + unsigned int frag, last, packlen, i, x4=4*n4x; + size_t ret = 0; + u8 *IVs; +#if defined(BSWAP8) + u64 seqnum; +#endif + + ctx = (SHA1_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */ + + frag = (unsigned int)inp_len>>(1+n4x); + last = (unsigned int)inp_len+frag-(frag<<(1+n4x)); + if (last>frag && ((last+13+9)%64)<(x4-1)) { + frag++; + last -= x4-1; + } + + hash_d[0].ptr = inp; + for (i=1;imd.data,8); + seqnum = BSWAP8(blocks[0].q[0]); +#endif + for (i=0;iA[i] = key->md.h0; + ctx->B[i] = key->md.h1; + ctx->C[i] = key->md.h2; + ctx->D[i] = key->md.h3; + ctx->E[i] = key->md.h4; + + /* fix seqnum */ +#if defined(BSWAP8) + blocks[i].q[0] = BSWAP8(seqnum+i); +#else + blocks[i].c[7] += ((u8*)key->md.data)[7]+i; + if (blocks[i].c[7] < i) { + int j; + + for (j=6;j>=0;j--) { + if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break; + } + } +#endif + blocks[i].c[8] = ((u8*)key->md.data)[8]; + blocks[i].c[9] = ((u8*)key->md.data)[9]; + blocks[i].c[10] = ((u8*)key->md.data)[10]; + /* fix length */ + blocks[i].c[11] = (u8)(len>>8); + blocks[i].c[12] = (u8)(len); + + memcpy(blocks[i].c+13,hash_d[i].ptr,64-13); + hash_d[i].ptr += 64-13; + hash_d[i].blocks = (len-(64-13))/64; + + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* hash 13-byte headers and first 64-13 bytes of inputs */ + sha1_multi_block(ctx,edges,n4x); + /* hash bulk inputs */ + sha1_multi_block(ctx,hash_d,n4x); + + memset(blocks,0,sizeof(blocks)); + for (i=0;iA[i]); ctx->A[i] = key->tail.h0; + blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h1; + blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h2; + blocks[i].d[3] = BSWAP4(ctx->D[i]); ctx->D[i] = key->tail.h3; + blocks[i].d[4] = BSWAP4(ctx->E[i]); ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + blocks[i].d[15] = BSWAP4((64+20)*8); + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* finalize MACs */ + sha1_multi_block(ctx,edges,n4x); + + packlen = 5+16+((frag+20+16)&-16); + + out += (packlen<<(1+n4x))-packlen; + inp += (frag<<(1+n4x))-frag; + + RAND_bytes((IVs=blocks[0].c),16*x4); /* ask for IVs in bulk */ + + for (i=x4-1;;i--) { + unsigned int len = (i==(x4-1)?last:frag), pad, j; + unsigned char *out0 = out; + + out += 5+16; /* place for header and explicit IV */ + ciph_d[i].inp = out; + ciph_d[i].out = out; + + memmove(out,inp,len); + out += len; + + /* write MAC */ + ((u32 *)out)[0] = BSWAP4(ctx->A[i]); + ((u32 *)out)[1] = BSWAP4(ctx->B[i]); + ((u32 *)out)[2] = BSWAP4(ctx->C[i]); + ((u32 *)out)[3] = BSWAP4(ctx->D[i]); + ((u32 *)out)[4] = BSWAP4(ctx->E[i]); + out += 20; + len += 20; + + /* pad */ + pad = 15-len%16; + for (j=0;j<=pad;j++) *(out++) = pad; + len += pad+1; + + ciph_d[i].blocks = len/16; + len += 16; /* account for explicit iv */ + + /* arrange header */ + out0[0] = ((u8*)key->md.data)[8]; + out0[1] = ((u8*)key->md.data)[9]; + out0[2] = ((u8*)key->md.data)[10]; + out0[3] = (u8)(len>>8); + out0[4] = (u8)(len); + + /* explicit iv */ + memcpy(ciph_d[i].iv, IVs, 16); + memcpy(&out0[5], IVs, 16); + + ret += len+5; + + if (i==0) break; + + out = out0-packlen; + inp -= frag; + IVs += 16; + } + + aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x); + + OPENSSL_cleanse(blocks,sizeof(blocks)); + OPENSSL_cleanse(ctx,sizeof(*ctx)); + + return ret; +} +#endif + static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len) { @@ -249,28 +447,45 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* arrange cache line alignment */ pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32)); - /* decrypt HMAC|padding at once */ - aesni_cbc_encrypt(in,out,len, - &key->ks,ctx->iv,0); - - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; union { unsigned int u[SHA_LBLOCK]; unsigned char c[SHA_CBLOCK]; } *data = (void *)key->md.data; +#if defined(STITCHED_DECRYPT_CALL) + unsigned char tail_iv[AES_BLOCK_SIZE]; + int stitch=0; +#endif if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3]) - >= TLS1_1_VERSION) - iv = AES_BLOCK_SIZE; - - if (len<(iv+SHA_DIGEST_LENGTH+1)) + >= TLS1_1_VERSION) { + if (len<(AES_BLOCK_SIZE+SHA_DIGEST_LENGTH+1)) + return 0; + + /* omit explicit iv */ + memcpy(ctx->iv,in,AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } + else if (len<(SHA_DIGEST_LENGTH+1)) return 0; - /* omit explicit iv */ - out += iv; - len -= iv; +#if defined(STITCHED_DECRYPT_CALL) + if (len>=1024 && ctx->key_len==32) { + /* decrypt last block */ + memcpy(tail_iv,in+len-2*AES_BLOCK_SIZE,AES_BLOCK_SIZE); + aesni_cbc_encrypt(in+len-AES_BLOCK_SIZE, + out+len-AES_BLOCK_SIZE,AES_BLOCK_SIZE, + &key->ks,tail_iv,0); + stitch=1; + } else +#endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); /* figure out payload length */ pad = out[len-1]; @@ -290,6 +505,30 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, key->md = key->head; SHA1_Update(&key->md,key->aux.tls_aad,plen); +#if defined(STITCHED_DECRYPT_CALL) + if (stitch) { + blocks = (len-(256+32+SHA_CBLOCK))/SHA_CBLOCK; + aes_off = len-AES_BLOCK_SIZE-blocks*SHA_CBLOCK; + sha_off = SHA_CBLOCK-plen; + + aesni_cbc_encrypt(in,out,aes_off, + &key->ks,ctx->iv,0); + + SHA1_Update(&key->md,out,sha_off); + aesni256_cbc_sha1_dec(in+aes_off, + out+aes_off,blocks,&key->ks,ctx->iv, + &key->md,out+sha_off); + + sha_off += blocks*=SHA_CBLOCK; + out += sha_off; + len -= sha_off; + inp_len -= sha_off; + + key->md.Nl += (blocks<<3); /* at most 18 bits */ + memcpy(ctx->iv,tail_iv,AES_BLOCK_SIZE); + } +#endif + #if 1 len -= SHA_DIGEST_LENGTH; /* amend mac */ if (len>=(256+SHA_CBLOCK)) { @@ -303,8 +542,8 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* but pretend as if we hashed padded payload */ bitlen = key->md.Nl+(inp_len<<3); /* at most 18 bits */ -#ifdef BSWAP - bitlen = BSWAP(bitlen); +#ifdef BSWAP4 + bitlen = BSWAP4(bitlen); #else mac.c[0] = 0; mac.c[1] = (unsigned char)(bitlen>>16); @@ -366,12 +605,12 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, pmac->u[3] |= key->md.h3 & mask; pmac->u[4] |= key->md.h4 & mask; -#ifdef BSWAP - pmac->u[0] = BSWAP(pmac->u[0]); - pmac->u[1] = BSWAP(pmac->u[1]); - pmac->u[2] = BSWAP(pmac->u[2]); - pmac->u[3] = BSWAP(pmac->u[3]); - pmac->u[4] = BSWAP(pmac->u[4]); +#ifdef BSWAP4 + pmac->u[0] = BSWAP4(pmac->u[0]); + pmac->u[1] = BSWAP4(pmac->u[1]); + pmac->u[2] = BSWAP4(pmac->u[2]); + pmac->u[3] = BSWAP4(pmac->u[3]); + pmac->u[4] = BSWAP4(pmac->u[4]); #else for (i=0;i<5;i++) { res = pmac->u[i]; @@ -444,6 +683,34 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, #endif return ret; } else { +#if defined(STITCHED_DECRYPT_CALL) + if (len>=1024 && ctx->key_len==32) { + if (sha_off%=SHA_CBLOCK) + blocks = (len-3*SHA_CBLOCK)/SHA_CBLOCK; + else + blocks = (len-2*SHA_CBLOCK)/SHA_CBLOCK; + aes_off = len-blocks*SHA_CBLOCK; + + aesni_cbc_encrypt(in,out,aes_off, + &key->ks,ctx->iv,0); + SHA1_Update(&key->md,out,sha_off); + aesni256_cbc_sha1_dec(in+aes_off, + out+aes_off,blocks,&key->ks,ctx->iv, + &key->md,out+sha_off); + + sha_off += blocks*=SHA_CBLOCK; + out += sha_off; + len -= sha_off; + + key->md.Nh += blocks>>29; + key->md.Nl += blocks<<=3; + if (key->md.Nl<(unsigned int)blocks) key->md.Nh++; + } else +#endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in,out,len, + &key->ks,ctx->iv,0); + SHA1_Update(&key->md,out,len); } } @@ -514,6 +781,70 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void return SHA_DIGEST_LENGTH; } } +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5+16+((arg+20+16)&-16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; + unsigned int n4x=1, x4; + unsigned int frag, last, packlen, inp_len; + + if (arginp[11]<<8|param->inp[12]; + + if (ctx->encrypt) + { + if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION) + return -1; + + if (inp_len) + { + if (inp_len<4096) return 0; /* too short */ + + if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) + n4x=2; /* AVX2 */ + } + else if ((n4x=param->interleave/4) && n4x<=2) + inp_len = param->len; + else + return -1; + + key->md = key->head; + SHA1_Update(&key->md,param->inp,13); + + x4 = 4*n4x; n4x += 1; + + frag = inp_len>>n4x; + last = inp_len+frag-(frag<frag && ((last+13+9)%64<(x4-1))) { + frag++; + last -= x4-1; + } + + packlen = 5+16+((frag+20+16)&-16); + packlen = (packlen<interleave = x4; + + return (int)packlen; + } + else + return -1; /* not yet */ + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; + + return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp, + param->len,param->interleave/4); + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: +#endif default: return -1; } @@ -527,7 +858,8 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher = NID_undef, #endif 16,16,16, - EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, @@ -546,7 +878,8 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = NID_undef, #endif 16,32,16, - EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c index 4b6ab77893..95bdd42b13 100644 --- a/crypto/evp/e_aes_cbc_hmac_sha256.c +++ b/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include "modes_lcl.h" #ifndef EVP_CIPH_FLAG_AEAD_CIPHER #define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 @@ -69,6 +71,10 @@ #define EVP_CIPH_FLAG_DEFAULT_ASN1 0 #endif +#if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +#endif + #define TLS1_1_VERSION 0x0302 typedef struct @@ -89,12 +95,8 @@ typedef struct defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -#if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) -# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) -#endif - extern unsigned int OPENSSL_ia32cap_P[3]; -#define AESNI_AVX_CAPABLE (1<<(57-32)|1<<(60-32)) +#define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); @@ -176,6 +178,207 @@ static void sha256_update(SHA256_CTX *c,const void *data,size_t len) #endif #define SHA256_Update sha256_update +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + +typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8],F[8],G[8],H[8]; } SHA256_MB_CTX; +typedef struct { const unsigned char *ptr; int blocks; } HASH_DESC; + +void sha256_multi_block(SHA256_MB_CTX *,const HASH_DESC *,int); + +typedef struct { const unsigned char *inp; unsigned char *out; + int blocks; u64 iv[2]; } CIPH_DESC; + +void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int); + +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, + unsigned char *out, const unsigned char *inp, size_t inp_len, + int n4x) /* n4x is 1 or 2 */ +{ + HASH_DESC hash_d[8], edges[8]; + CIPH_DESC ciph_d[8]; + unsigned char storage[sizeof(SHA256_MB_CTX)+32]; + union { u64 q[16]; + u32 d[32]; + u8 c[128]; } blocks[8]; + SHA256_MB_CTX *ctx; + unsigned int frag, last, packlen, i, x4=4*n4x; + size_t ret = 0; + u8 *IVs; +#if defined(BSWAP8) + u64 seqnum; +#endif + + ctx = (SHA256_MB_CTX *)(storage+32-((size_t)storage%32)); /* align */ + + frag = (unsigned int)inp_len>>(1+n4x); + last = (unsigned int)inp_len+frag-(frag<<(1+n4x)); + if (last>frag && ((last+13+9)%64)<(x4-1)) { + frag++; + last -= x4-1; + } + + hash_d[0].ptr = inp; + for (i=1;imd.data,8); + seqnum = BSWAP8(blocks[0].q[0]); +#endif + for (i=0;iA[i] = key->md.h[0]; + ctx->B[i] = key->md.h[1]; + ctx->C[i] = key->md.h[2]; + ctx->D[i] = key->md.h[3]; + ctx->E[i] = key->md.h[4]; + ctx->F[i] = key->md.h[5]; + ctx->G[i] = key->md.h[6]; + ctx->H[i] = key->md.h[7]; + + /* fix seqnum */ +#if defined(BSWAP8) + blocks[i].q[0] = BSWAP8(seqnum+i); +#else + blocks[i].c[7] += ((u8*)key->md.data)[7]+i; + if (blocks[i].c[7] < i) { + int j; + + for (j=6;j>=0;j--) { + if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break; + } + } +#endif + blocks[i].c[8] = ((u8*)key->md.data)[8]; + blocks[i].c[9] = ((u8*)key->md.data)[9]; + blocks[i].c[10] = ((u8*)key->md.data)[10]; + /* fix length */ + blocks[i].c[11] = (u8)(len>>8); + blocks[i].c[12] = (u8)(len); + + memcpy(blocks[i].c+13,hash_d[i].ptr,64-13); + hash_d[i].ptr += 64-13; + hash_d[i].blocks = (len-(64-13))/64; + + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* hash 13-byte headers and first 64-13 bytes of inputs */ + sha256_multi_block(ctx,edges,n4x); + /* hash bulk inputs */ + sha256_multi_block(ctx,hash_d,n4x); + + memset(blocks,0,sizeof(blocks)); + for (i=0;iA[i]); ctx->A[i] = key->tail.h[0]; + blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h[1]; + blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h[2]; + blocks[i].d[3] = BSWAP4(ctx->D[i]); ctx->D[i] = key->tail.h[3]; + blocks[i].d[4] = BSWAP4(ctx->E[i]); ctx->E[i] = key->tail.h[4]; + blocks[i].d[5] = BSWAP4(ctx->F[i]); ctx->F[i] = key->tail.h[5]; + blocks[i].d[6] = BSWAP4(ctx->G[i]); ctx->G[i] = key->tail.h[6]; + blocks[i].d[7] = BSWAP4(ctx->H[i]); ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + blocks[i].d[15] = BSWAP4((64+32)*8); + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* finalize MACs */ + sha256_multi_block(ctx,edges,n4x); + + packlen = 5+16+((frag+32+16)&-16); + + out += (packlen<<(1+n4x))-packlen; + inp += (frag<<(1+n4x))-frag; + + RAND_bytes((IVs=blocks[0].c),16*x4); /* ask for IVs in bulk */ + + for (i=x4-1;;i--) { + unsigned int len = (i==(x4-1)?last:frag), pad, j; + unsigned char *out0 = out; + + out += 5+16; /* place for header and explicit IV */ + ciph_d[i].inp = out; + ciph_d[i].out = out; + + memmove(out,inp,len); + out += len; + + /* write MAC */ + ((u32 *)out)[0] = BSWAP4(ctx->A[i]); + ((u32 *)out)[1] = BSWAP4(ctx->B[i]); + ((u32 *)out)[2] = BSWAP4(ctx->C[i]); + ((u32 *)out)[3] = BSWAP4(ctx->D[i]); + ((u32 *)out)[4] = BSWAP4(ctx->E[i]); + ((u32 *)out)[5] = BSWAP4(ctx->F[i]); + ((u32 *)out)[6] = BSWAP4(ctx->G[i]); + ((u32 *)out)[7] = BSWAP4(ctx->H[i]); + out += 32; + len += 32; + + /* pad */ + pad = 15-len%16; + for (j=0;j<=pad;j++) *(out++) = pad; + len += pad+1; + + ciph_d[i].blocks = len/16; + len += 16; /* account for explicit iv */ + + /* arrange header */ + out0[0] = ((u8*)key->md.data)[8]; + out0[1] = ((u8*)key->md.data)[9]; + out0[2] = ((u8*)key->md.data)[10]; + out0[3] = (u8)(len>>8); + out0[4] = (u8)(len); + + /* explicit iv */ + memcpy(ciph_d[i].iv, IVs, 16); + memcpy(&out0[5], IVs, 16); + + ret += len+5; + + if (i==0) break; + + out = out0-packlen; + inp -= frag; + IVs += 16; + } + + aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x); + + OPENSSL_cleanse(blocks,sizeof(blocks)); + OPENSSL_cleanse(ctx,sizeof(*ctx)); + + return ret; +} +#endif + static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len) { @@ -204,7 +407,9 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, iv = AES_BLOCK_SIZE; #if defined(STITCHED_CALL) - if (plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) { + if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && /* AVX? */ + plen>(sha_off+iv) && + (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) { SHA256_Update(&key->md,in+iv,sha_off); (void)aesni_cbc_sha256_enc(in,out,blocks,&key->ks, @@ -253,7 +458,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, aesni_cbc_encrypt(in,out,len, &key->ks,ctx->iv,0); - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; @@ -303,8 +508,8 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* but pretend as if we hashed padded payload */ bitlen = key->md.Nl+(inp_len<<3); /* at most 18 bits */ -#ifdef BSWAP - bitlen = BSWAP(bitlen); +#ifdef BSWAP4 + bitlen = BSWAP4(bitlen); #else mac.c[0] = 0; mac.c[1] = (unsigned char)(bitlen>>16); @@ -378,15 +583,15 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, pmac->u[6] |= key->md.h[6] & mask; pmac->u[7] |= key->md.h[7] & mask; -#ifdef BSWAP - pmac->u[0] = BSWAP(pmac->u[0]); - pmac->u[1] = BSWAP(pmac->u[1]); - pmac->u[2] = BSWAP(pmac->u[2]); - pmac->u[3] = BSWAP(pmac->u[3]); - pmac->u[4] = BSWAP(pmac->u[4]); - pmac->u[5] = BSWAP(pmac->u[5]); - pmac->u[6] = BSWAP(pmac->u[6]); - pmac->u[7] = BSWAP(pmac->u[7]); +#ifdef BSWAP4 + pmac->u[0] = BSWAP4(pmac->u[0]); + pmac->u[1] = BSWAP4(pmac->u[1]); + pmac->u[2] = BSWAP4(pmac->u[2]); + pmac->u[3] = BSWAP4(pmac->u[3]); + pmac->u[4] = BSWAP4(pmac->u[4]); + pmac->u[5] = BSWAP4(pmac->u[5]); + pmac->u[6] = BSWAP4(pmac->u[6]); + pmac->u[7] = BSWAP4(pmac->u[7]); #else for (i=0;i<8;i++) { res = pmac->u[i]; @@ -529,6 +734,70 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo return SHA256_DIGEST_LENGTH; } } +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5+16+((arg+32+16)&-16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; + unsigned int n4x=1, x4; + unsigned int frag, last, packlen, inp_len; + + if (arginp[11]<<8|param->inp[12]; + + if (ctx->encrypt) + { + if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION) + return -1; + + if (inp_len) + { + if (inp_len<4096) return 0; /* too short */ + + if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5)) + n4x=2; /* AVX2 */ + } + else if ((n4x=param->interleave/4) && n4x<=2) + inp_len = param->len; + else + return -1; + + key->md = key->head; + SHA256_Update(&key->md,param->inp,13); + + x4 = 4*n4x; n4x += 1; + + frag = inp_len>>n4x; + last = inp_len+frag-(frag<frag && ((last+13+9)%64<(x4-1))) { + frag++; + last -= x4-1; + } + + packlen = 5+16+((frag+32+16)&-16); + packlen = (packlen<interleave = x4; + + return (int)packlen; + } + else + return -1; /* not yet */ + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr; + + return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp, + param->len,param->interleave/4); + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: +#endif default: return -1; } @@ -542,7 +811,8 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = NID_undef, #endif 16,16,16, - EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha256_init_key, aesni_cbc_hmac_sha256_cipher, NULL, @@ -561,7 +831,8 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = NID_undef, #endif 16,32,16, - EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1| + EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha256_init_key, aesni_cbc_hmac_sha256_cipher, NULL, @@ -574,14 +845,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) { - return((OPENSSL_ia32cap_P[1]&AESNI_AVX_CAPABLE)==AESNI_AVX_CAPABLE && + return((OPENSSL_ia32cap_P[1]&AESNI_CAPABLE) && aesni_cbc_sha256_enc(NULL,NULL,0,NULL,NULL,NULL,NULL) ? &aesni_128_cbc_hmac_sha256_cipher:NULL); } const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) { - return((OPENSSL_ia32cap_P[1]&AESNI_AVX_CAPABLE)==AESNI_AVX_CAPABLE && + return((OPENSSL_ia32cap_P[1]&AESNI_CAPABLE) && aesni_cbc_sha256_enc(NULL,NULL,0,NULL,NULL,NULL,NULL)? &aesni_256_cbc_hmac_sha256_cipher:NULL); } diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h index adf5389084..097fa3bece 100644 --- a/crypto/evp/evp.h +++ b/crypto/evp/evp.h @@ -364,6 +364,7 @@ struct evp_cipher_st */ #define EVP_CIPH_FLAG_CUSTOM_CIPHER 0x100000 #define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 +#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0x400000 /* Cipher context flag to indicate we can handle * wrap mode: if allowed in older applications it could @@ -403,6 +404,18 @@ struct evp_cipher_st /* Set the GCM invocation field, decrypt only */ #define EVP_CTRL_GCM_SET_IV_INV 0x18 +#define EVP_CTRL_TLS1_1_MULTIBLOCK_AAD 0x19 +#define EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT 0x1a +#define EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT 0x1b +#define EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE 0x1c + +typedef struct { + unsigned char *out; + const unsigned char *inp; + size_t len; + unsigned int interleave; +} EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM; + /* GCM TLS constants */ /* Length of fixed part of IV derived from PRF */ #define EVP_GCM_TLS_FIXED_IV_LEN 4 -- cgit v1.2.3