diff options
author | Christoph Müllner <christoph.muellner@vrull.eu> | 2023-01-18 11:57:33 +0100 |
---|---|---|
committer | Pauli <pauli@openssl.org> | 2023-03-16 13:12:19 +1100 |
commit | b24684369b76df8b226fe9aa95fca2bccfc6a175 (patch) | |
tree | 2aed331bc37f57dcf4e0ff961004e82268db4239 /crypto/perlasm | |
parent | 75623ed8d01555e7c719f3b1c598e555c0878da0 (diff) |
riscv: GCM: Simplify GCM calculation
The existing GCM calculation provides some potential
for further optimizations. Let's use the demo code
from the RISC-V cryptography extension groups
(https://github.com/riscv/riscv-crypto), which represents
the extension architect's intended use of the clmul instruction.
The GCM calculation depends on bit and byte reversal.
Therefore, we use the corresponding instructions to do that
(if available at run-time).
The resulting computation becomes quite compact and passes
all tests.
Note, that a side-effect of this change is a reduced register
usage in .gmult(), which opens the door for an efficient .ghash()
implementation.
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/20078)
Diffstat (limited to 'crypto/perlasm')
-rw-r--r-- | crypto/perlasm/riscv.pm | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm index 80f23ae5b3..b0c786a13c 100644 --- a/crypto/perlasm/riscv.pm +++ b/crypto/perlasm/riscv.pm @@ -49,6 +49,77 @@ sub read_reg { return $1; } +# Helper functions + +sub brev8_rv64i { + # brev8 without `brev8` instruction (only in Zbkb) + # Bit-reverses the first argument and needs two scratch registers + my $val = shift; + my $t0 = shift; + my $t1 = shift; + my $brev8_const = shift; + my $seq = <<___; + la $brev8_const, Lbrev8_const + + ld $t0, 0($brev8_const) # 0xAAAAAAAAAAAAAAAA + slli $t1, $val, 1 + and $t1, $t1, $t0 + and $val, $val, $t0 + srli $val, $val, 1 + or $val, $t1, $val + + ld $t0, 8($brev8_const) # 0xCCCCCCCCCCCCCCCC + slli $t1, $val, 2 + and $t1, $t1, $t0 + and $val, $val, $t0 + srli $val, $val, 2 + or $val, $t1, $val + + ld $t0, 16($brev8_const) # 0xF0F0F0F0F0F0F0F0 + slli $t1, $val, 4 + and $t1, $t1, $t0 + and $val, $val, $t0 + srli $val, $val, 4 + or $val, $t1, $val +___ + return $seq; +} + +sub sd_rev8_rv64i { + # rev8 without `rev8` instruction (only in Zbb or Zbkb) + # Stores the given value byte-reversed and needs one scratch register + my $val = shift; + my $addr = shift; + my $off = shift; + my $tmp = shift; + my $off0 = ($off + 0); + my $off1 = ($off + 1); + my $off2 = ($off + 2); + my $off3 = ($off + 3); + my $off4 = ($off + 4); + my $off5 = ($off + 5); + my $off6 = ($off + 6); + my $off7 = ($off + 7); + my $seq = <<___; + sb $val, $off7($addr) + srli $tmp, $val, 8 + sb $tmp, $off6($addr) + srli $tmp, $val, 16 + sb $tmp, $off5($addr) + srli $tmp, $val, 24 + sb $tmp, $off4($addr) + srli $tmp, $val, 32 + sb $tmp, $off3($addr) + srli $tmp, $val, 40 + sb $tmp, $off2($addr) + srli $tmp, $val, 48 + sb $tmp, $off1($addr) + srli $tmp, $val, 56 + sb $tmp, $off0($addr) +___ + return $seq; +} + # Scalar crypto instructions sub aes64ds { @@ -120,6 +191,14 @@ sub aes64ks2 { return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); } +sub brev8 { + # brev8 rd, rs + my $template = 0b011010000111_00000_101_00000_0010011; + my $rd = read_reg shift; + my $rs = read_reg shift; + return ".word ".($template | ($rs << 15) | ($rd << 7)); +} + sub clmul { # Encoding for clmul rd, rs1, rs2 instruction on RV64 # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX |