summaryrefslogtreecommitdiffstats
path: root/crypto/perlasm
diff options
context:
space:
mode:
authorChristoph Müllner <christoph.muellner@vrull.eu>2023-01-18 11:57:33 +0100
committerPauli <pauli@openssl.org>2023-03-16 13:12:19 +1100
commitb24684369b76df8b226fe9aa95fca2bccfc6a175 (patch)
tree2aed331bc37f57dcf4e0ff961004e82268db4239 /crypto/perlasm
parent75623ed8d01555e7c719f3b1c598e555c0878da0 (diff)
riscv: GCM: Simplify GCM calculation
The existing GCM calculation provides some potential for further optimizations. Let's use the demo code from the RISC-V cryptography extension groups (https://github.com/riscv/riscv-crypto), which represents the extension architect's intended use of the clmul instruction. The GCM calculation depends on bit and byte reversal. Therefore, we use the corresponding instructions to do that (if available at run-time). The resulting computation becomes quite compact and passes all tests. Note, that a side-effect of this change is a reduced register usage in .gmult(), which opens the door for an efficient .ghash() implementation. Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/20078)
Diffstat (limited to 'crypto/perlasm')
-rw-r--r--crypto/perlasm/riscv.pm79
1 files changed, 79 insertions, 0 deletions
diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm
index 80f23ae5b3..b0c786a13c 100644
--- a/crypto/perlasm/riscv.pm
+++ b/crypto/perlasm/riscv.pm
@@ -49,6 +49,77 @@ sub read_reg {
return $1;
}
+# Helper functions
+
+sub brev8_rv64i {
+ # brev8 without `brev8` instruction (only in Zbkb)
+ # Bit-reverses the first argument and needs two scratch registers
+ my $val = shift;
+ my $t0 = shift;
+ my $t1 = shift;
+ my $brev8_const = shift;
+ my $seq = <<___;
+ la $brev8_const, Lbrev8_const
+
+ ld $t0, 0($brev8_const) # 0xAAAAAAAAAAAAAAAA
+ slli $t1, $val, 1
+ and $t1, $t1, $t0
+ and $val, $val, $t0
+ srli $val, $val, 1
+ or $val, $t1, $val
+
+ ld $t0, 8($brev8_const) # 0xCCCCCCCCCCCCCCCC
+ slli $t1, $val, 2
+ and $t1, $t1, $t0
+ and $val, $val, $t0
+ srli $val, $val, 2
+ or $val, $t1, $val
+
+ ld $t0, 16($brev8_const) # 0xF0F0F0F0F0F0F0F0
+ slli $t1, $val, 4
+ and $t1, $t1, $t0
+ and $val, $val, $t0
+ srli $val, $val, 4
+ or $val, $t1, $val
+___
+ return $seq;
+}
+
+sub sd_rev8_rv64i {
+ # rev8 without `rev8` instruction (only in Zbb or Zbkb)
+ # Stores the given value byte-reversed and needs one scratch register
+ my $val = shift;
+ my $addr = shift;
+ my $off = shift;
+ my $tmp = shift;
+ my $off0 = ($off + 0);
+ my $off1 = ($off + 1);
+ my $off2 = ($off + 2);
+ my $off3 = ($off + 3);
+ my $off4 = ($off + 4);
+ my $off5 = ($off + 5);
+ my $off6 = ($off + 6);
+ my $off7 = ($off + 7);
+ my $seq = <<___;
+ sb $val, $off7($addr)
+ srli $tmp, $val, 8
+ sb $tmp, $off6($addr)
+ srli $tmp, $val, 16
+ sb $tmp, $off5($addr)
+ srli $tmp, $val, 24
+ sb $tmp, $off4($addr)
+ srli $tmp, $val, 32
+ sb $tmp, $off3($addr)
+ srli $tmp, $val, 40
+ sb $tmp, $off2($addr)
+ srli $tmp, $val, 48
+ sb $tmp, $off1($addr)
+ srli $tmp, $val, 56
+ sb $tmp, $off0($addr)
+___
+ return $seq;
+}
+
# Scalar crypto instructions
sub aes64ds {
@@ -120,6 +191,14 @@ sub aes64ks2 {
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
}
+sub brev8 {
+ # brev8 rd, rs
+ my $template = 0b011010000111_00000_101_00000_0010011;
+ my $rd = read_reg shift;
+ my $rs = read_reg shift;
+ return ".word ".($template | ($rs << 15) | ($rd << 7));
+}
+
sub clmul {
# Encoding for clmul rd, rs1, rs2 instruction on RV64
# XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX