diff options
author | slontis <shane.lontis@oracle.com> | 2023-07-21 15:05:38 +1000 |
---|---|---|
committer | Tomas Mraz <tomas@openssl.org> | 2023-11-10 13:27:00 +0100 |
commit | 536649082212e7c643ab8d7bab89f620fbcd37f0 (patch) | |
tree | d5f28d382eb86111b2d2672db4f7ab0a836bc9c5 /crypto/sha | |
parent | 9257a89b6f25dfa5aeee7114baec8ea992fcf5e5 (diff) |
Add EVP_DigestSqueeze() API.
Fixes #7894
This allows SHAKE to squeeze multiple times with different output sizes.
The existing EVP_DigestFinalXOF() API has been left as a one shot
operation. A similar interface is used by another toolkit.
The low level SHA3_Squeeze() function needed to change slightly so
that it can handle multiple squeezes. This involves changing the
assembler code so that it passes a boolean to indicate whether
the Keccak function should be called on entry.
At the provider level, the squeeze is buffered, so that it only requests
a multiple of the blocksize when SHA3_Squeeze() is called. On the first
call the value is zero, on subsequent calls the value passed is 1.
This PR is derived from the excellent work done by @nmathewson in
https://github.com/openssl/openssl/pull/7921
Reviewed-by: Paul Dale <pauli@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/21511)
Diffstat (limited to 'crypto/sha')
-rwxr-xr-x | crypto/sha/asm/keccak1600-armv4.pl | 4 | ||||
-rwxr-xr-x | crypto/sha/asm/keccak1600-armv8.pl | 4 | ||||
-rwxr-xr-x | crypto/sha/asm/keccak1600-ppc64.pl | 3 | ||||
-rwxr-xr-x | crypto/sha/asm/keccak1600-x86_64.pl | 18 | ||||
-rw-r--r-- | crypto/sha/keccak1600.c | 19 | ||||
-rw-r--r-- | crypto/sha/sha3.c | 100 |
6 files changed, 128 insertions, 20 deletions
diff --git a/crypto/sha/asm/keccak1600-armv4.pl b/crypto/sha/asm/keccak1600-armv4.pl index eaad86d39d..18948fd7c0 100755 --- a/crypto/sha/asm/keccak1600-armv4.pl +++ b/crypto/sha/asm/keccak1600-armv4.pl @@ -966,6 +966,8 @@ SHA3_squeeze: stmdb sp!,{r6-r9} mov r14,$A_flat + cmp r4, #0 @ r4 = 'next' argument + bne .Lnext_block b .Loop_squeeze .align 4 @@ -1037,7 +1039,7 @@ SHA3_squeeze: subs $bsz,$bsz,#8 @ bsz -= 8 bhi .Loop_squeeze - +.Lnext_block: mov r0,r14 @ original $A_flat bl KeccakF1600 diff --git a/crypto/sha/asm/keccak1600-armv8.pl b/crypto/sha/asm/keccak1600-armv8.pl index ab7aa713ac..72f8c3adb5 100755 --- a/crypto/sha/asm/keccak1600-armv8.pl +++ b/crypto/sha/asm/keccak1600-armv8.pl @@ -483,6 +483,8 @@ SHA3_squeeze: mov $out,x1 mov $len,x2 mov $bsz,x3 + cmp x4, #0 // x4 = 'next' argument + bne .Lnext_block .Loop_squeeze: ldr x4,[x0],#8 @@ -497,7 +499,7 @@ SHA3_squeeze: subs x3,x3,#8 bhi .Loop_squeeze - +.Lnext_block: mov x0,$A_flat bl KeccakF1600 mov x0,$A_flat diff --git a/crypto/sha/asm/keccak1600-ppc64.pl b/crypto/sha/asm/keccak1600-ppc64.pl index bff0d78585..3f8ba817f8 100755 --- a/crypto/sha/asm/keccak1600-ppc64.pl +++ b/crypto/sha/asm/keccak1600-ppc64.pl @@ -668,6 +668,8 @@ SHA3_squeeze: subi $out,r4,1 ; prepare for stbu mr $len,r5 mr $bsz,r6 + ${UCMP}i r7,1 ; r7 = 'next' argument + blt .Lnext_block b .Loop_squeeze .align 4 @@ -698,6 +700,7 @@ SHA3_squeeze: subic. r6,r6,8 bgt .Loop_squeeze +.Lnext_block: mr r3,$A_flat bl KeccakF1600 subi r3,$A_flat,8 ; prepare for ldu diff --git a/crypto/sha/asm/keccak1600-x86_64.pl b/crypto/sha/asm/keccak1600-x86_64.pl index 02f0116014..bddcaf8294 100755 --- a/crypto/sha/asm/keccak1600-x86_64.pl +++ b/crypto/sha/asm/keccak1600-x86_64.pl @@ -503,12 +503,12 @@ SHA3_absorb: .size SHA3_absorb,.-SHA3_absorb ___ } -{ my ($A_flat,$out,$len,$bsz) = ("%rdi","%rsi","%rdx","%rcx"); +{ my ($A_flat,$out,$len,$bsz,$next) = ("%rdi","%rsi","%rdx","%rcx","%r8"); ($out,$len,$bsz) = ("%r12","%r13","%r14"); $code.=<<___; .globl SHA3_squeeze -.type SHA3_squeeze,\@function,4 +.type SHA3_squeeze,\@function,5 .align 32 SHA3_squeeze: .cfi_startproc @@ -520,10 +520,12 @@ SHA3_squeeze: .cfi_push %r14 shr \$3,%rcx - mov $A_flat,%r8 + mov $A_flat,%r9 mov %rsi,$out mov %rdx,$len mov %rcx,$bsz + bt \$0,$next + jc .Lnext_block jmp .Loop_squeeze .align 32 @@ -531,8 +533,8 @@ SHA3_squeeze: cmp \$8,$len jb .Ltail_squeeze - mov (%r8),%rax - lea 8(%r8),%r8 + mov (%r9),%rax + lea 8(%r9),%r9 mov %rax,($out) lea 8($out),$out sub \$8,$len # len -= 8 @@ -540,14 +542,14 @@ SHA3_squeeze: sub \$1,%rcx # bsz-- jnz .Loop_squeeze - +.Lnext_block: call KeccakF1600 - mov $A_flat,%r8 + mov $A_flat,%r9 mov $bsz,%rcx jmp .Loop_squeeze .Ltail_squeeze: - mov %r8, %rsi + mov %r9, %rsi mov $out,%rdi mov $len,%rcx .byte 0xf3,0xa4 # rep movsb diff --git a/crypto/sha/keccak1600.c b/crypto/sha/keccak1600.c index c15bc42aaa..6682367be1 100644 --- a/crypto/sha/keccak1600.c +++ b/crypto/sha/keccak1600.c @@ -13,7 +13,7 @@ size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len, size_t r); -void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r); +void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next); #if !defined(KECCAK1600_ASM) || !defined(SELFTEST) @@ -1090,10 +1090,16 @@ size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len, } /* - * sha3_squeeze is called once at the end to generate |out| hash value - * of |len| bytes. + * SHA3_squeeze may be called after SHA3_absorb to generate |out| hash value of + * |len| bytes. + * If multiple SHA3_squeeze calls are required the output length |len| must be a + * multiple of the blocksize, with |next| being 0 on the first call and 1 on + * subsequent calls. It is the callers responsibility to buffer the results. + * When only a single call to SHA3_squeeze is required, |len| can be any size + * and |next| must be 0. */ -void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r) +void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, + int next) { uint64_t *A_flat = (uint64_t *)A; size_t i, w = r / 8; @@ -1101,6 +1107,9 @@ void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r) assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0); while (len != 0) { + if (next) + KeccakF1600(A); + next = 1; for (i = 0; i < w && len != 0; i++) { uint64_t Ai = BitDeinterleave(A_flat[i]); @@ -1123,8 +1132,6 @@ void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r) out += 8; len -= 8; } - if (len) - KeccakF1600(A); } } #endif diff --git a/crypto/sha/sha3.c b/crypto/sha/sha3.c index 633bc2e120..2411b3f1f8 100644 --- a/crypto/sha/sha3.c +++ b/crypto/sha/sha3.c @@ -10,12 +10,13 @@ #include <string.h> #include "internal/sha3.h" -void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r); +void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next); void ossl_sha3_reset(KECCAK1600_CTX *ctx) { memset(ctx->A, 0, sizeof(ctx->A)); ctx->bufsz = 0; + ctx->xof_state = XOF_STATE_INIT; } int ossl_sha3_init(KECCAK1600_CTX *ctx, unsigned char pad, size_t bitlen) @@ -51,6 +52,10 @@ int ossl_sha3_update(KECCAK1600_CTX *ctx, const void *_inp, size_t len) if (len == 0) return 1; + if (ctx->xof_state == XOF_STATE_SQUEEZE + || ctx->xof_state == XOF_STATE_FINAL) + return 0; + if ((num = ctx->bufsz) != 0) { /* process intermediate buffer? */ rem = bsz - num; @@ -84,13 +89,21 @@ int ossl_sha3_update(KECCAK1600_CTX *ctx, const void *_inp, size_t len) return 1; } -int ossl_sha3_final(unsigned char *md, KECCAK1600_CTX *ctx) +/* + * ossl_sha3_final()is a single shot method + * (Use ossl_sha3_squeeze for multiple calls). + * outlen is the variable size output. + */ +int ossl_sha3_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen) { size_t bsz = ctx->block_size; size_t num = ctx->bufsz; - if (ctx->md_size == 0) + if (outlen == 0) return 1; + if (ctx->xof_state == XOF_STATE_SQUEEZE + || ctx->xof_state == XOF_STATE_FINAL) + return 0; /* * Pad the data with 10*1. Note that |num| can be |bsz - 1| @@ -103,7 +116,86 @@ int ossl_sha3_final(unsigned char *md, KECCAK1600_CTX *ctx) (void)SHA3_absorb(ctx->A, ctx->buf, bsz, bsz); - SHA3_squeeze(ctx->A, md, ctx->md_size, bsz); + ctx->xof_state = XOF_STATE_FINAL; + SHA3_squeeze(ctx->A, out, outlen, bsz, 0); + return 1; +} + +/* + * This method can be called multiple times. + * Rather than heavily modifying assembler for SHA3_squeeze(), + * we instead just use the limitations of the existing function. + * i.e. Only request multiples of the ctx->block_size when calling + * SHA3_squeeze(). For output length requests smaller than the + * ctx->block_size just request a single ctx->block_size bytes and + * buffer the results. The next request will use the buffer first + * to grab output bytes. + */ +int ossl_sha3_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen) +{ + size_t bsz = ctx->block_size; + size_t num = ctx->bufsz; + size_t len; + int next = 1; + + if (outlen == 0) + return 1; + + if (ctx->xof_state == XOF_STATE_FINAL) + return 0; + + /* + * On the first squeeze call, finish the absorb process, + * by adding the trailing padding and then doing + * a final absorb. + */ + if (ctx->xof_state != XOF_STATE_SQUEEZE) { + /* + * Pad the data with 10*1. Note that |num| can be |bsz - 1| + * in which case both byte operations below are performed on + * same byte... + */ + memset(ctx->buf + num, 0, bsz - num); + ctx->buf[num] = ctx->pad; + ctx->buf[bsz - 1] |= 0x80; + (void)SHA3_absorb(ctx->A, ctx->buf, bsz, bsz); + ctx->xof_state = XOF_STATE_SQUEEZE; + num = ctx->bufsz = 0; + next = 0; + } + + /* + * Step 1. Consume any bytes left over from a previous squeeze + * (See Step 4 below). + */ + if (num != 0) { + if (outlen > ctx->bufsz) + len = ctx->bufsz; + else + len = outlen; + memcpy(out, ctx->buf + bsz - ctx->bufsz, len); + out += len; + outlen -= len; + ctx->bufsz -= len; + } + if (outlen == 0) + return 1; + + /* Step 2. Copy full sized squeezed blocks to the output buffer directly */ + if (outlen >= bsz) { + len = bsz * (outlen / bsz); + SHA3_squeeze(ctx->A, out, len, bsz, next); + next = 1; + out += len; + outlen -= len; + } + if (outlen > 0) { + /* Step 3. Squeeze one more block into a buffer */ + SHA3_squeeze(ctx->A, ctx->buf, bsz, bsz, next); + memcpy(out, ctx->buf, outlen); + /* Step 4. Remember the leftover part of the squeezed block */ + ctx->bufsz = bsz - outlen; + } return 1; } |