summaryrefslogtreecommitdiffstats
path: root/crypto/aes
diff options
context:
space:
mode:
authorPhoebe Chen <phoebe.chen@sifive.com>2023-09-12 06:44:05 -0700
committerHugo Landau <hlandau@openssl.org>2023-10-26 15:55:50 +0100
commit5e16a6276bf4624fb15ec26b49219af5b2ed19d1 (patch)
tree4c97491219d2de80c50ee1b58c5642b6c01f6386 /crypto/aes
parent3645eb0be22a4cea4300ab5afbf248d195d0f45b (diff)
riscv: Provide vector crypto implementation of AES-CBC mode.
To accelerate the performance of the AES-128/192/256-CBC block cipher encryption, we used the vaesz, vaesem and vaesef instructions, which implement a single round of AES encryption. Similarly, to optimize the performance of AES-128/192/256-CBC block cipher decryption, we have utilized the vaesz, vaesdm, and vaesdf instructions, which facilitate a single round of AES decryption. Furthermore, we optimize the key and initialization vector (IV) step by keeping the rounding key in vector registers. Signed-off-by: Phoebe Chen <phoebe.chen@sifive.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Hugo Landau <hlandau@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21923)
Diffstat (limited to 'crypto/aes')
-rw-r--r--crypto/aes/asm/aes-riscv64-zvkned.pl536
1 files changed, 534 insertions, 2 deletions
diff --git a/crypto/aes/asm/aes-riscv64-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkned.pl
index 1225e39d2b..319808b51c 100644
--- a/crypto/aes/asm/aes-riscv64-zvkned.pl
+++ b/crypto/aes/asm/aes-riscv64-zvkned.pl
@@ -11,6 +11,7 @@
# or
#
# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+# Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -35,8 +36,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# - RV64I
-# - RISC-V vector ('V') with VLEN >= 128
-# - RISC-V vector crypto AES extension ('Zvkned')
+# - RISC-V Vector ('V') with VLEN >= 128
+# - RISC-V Vector AES block cipher extension ('Zvkned')
use strict;
use warnings;
@@ -57,6 +58,533 @@ my $code=<<___;
.text
___
+{
+###############################################################################
+# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# unsigned char *ivec, const int enc);
+my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
+my ($T0, $T1, $rounds) = ("t0", "t1", "t2");
+my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7,
+ $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15,
+ $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23,
+ $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31,
+) = map("v$_",(0..31));
+
+# Load all 11 round keys to v1-v11 registers.
+sub aes_128_load_key {
+ my $code=<<___;
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+ @{[vle32_v $v1, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v2, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v3, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v4, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v5, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v6, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v7, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v8, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v9, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v10, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v11, $KEYP]}
+___
+
+ return $code;
+}
+
+# Load all 13 round keys to v1-v13 registers.
+sub aes_192_load_key {
+ my $code=<<___;
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+ @{[vle32_v $v1, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v2, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v3, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v4, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v5, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v6, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v7, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v8, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v9, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v10, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v11, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v12, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v13, $KEYP]}
+___
+
+ return $code;
+}
+
+# Load all 15 round keys to v1-v15 registers.
+sub aes_256_load_key {
+ my $code=<<___;
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+ @{[vle32_v $v1, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v2, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v3, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v4, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v5, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v6, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v7, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v8, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v9, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v10, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v11, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v12, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v13, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v14, $KEYP]}
+ addi $KEYP, $KEYP, 16
+ @{[vle32_v $v15, $KEYP]}
+___
+
+ return $code;
+}
+
+# aes-128 encryption with round keys v1-v11
+sub aes_128_encrypt {
+ my $code=<<___;
+ @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
+ @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
+ @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
+ @{[vaesem_vs $v24, $v4]} # with round key w[12,15]
+ @{[vaesem_vs $v24, $v5]} # with round key w[16,19]
+ @{[vaesem_vs $v24, $v6]} # with round key w[20,23]
+ @{[vaesem_vs $v24, $v7]} # with round key w[24,27]
+ @{[vaesem_vs $v24, $v8]} # with round key w[28,31]
+ @{[vaesem_vs $v24, $v9]} # with round key w[32,35]
+ @{[vaesem_vs $v24, $v10]} # with round key w[36,39]
+ @{[vaesef_vs $v24, $v11]} # with round key w[40,43]
+___
+
+ return $code;
+}
+
+# aes-128 decryption with round keys v1-v11
+sub aes_128_decrypt {
+ my $code=<<___;
+ @{[vaesz_vs $v24, $v11]} # with round key w[40,43]
+ @{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
+ @{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
+ @{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
+ @{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
+ @{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
+ @{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
+ @{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
+ @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
+ @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
+ @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
+___
+
+ return $code;
+}
+
+# aes-192 encryption with round keys v1-v13
+sub aes_192_encrypt {
+ my $code=<<___;
+ @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
+ @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
+ @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
+ @{[vaesem_vs $v24, $v4]} # with round key w[12,15]
+ @{[vaesem_vs $v24, $v5]} # with round key w[16,19]
+ @{[vaesem_vs $v24, $v6]} # with round key w[20,23]
+ @{[vaesem_vs $v24, $v7]} # with round key w[24,27]
+ @{[vaesem_vs $v24, $v8]} # with round key w[28,31]
+ @{[vaesem_vs $v24, $v9]} # with round key w[32,35]
+ @{[vaesem_vs $v24, $v10]} # with round key w[36,39]
+ @{[vaesem_vs $v24, $v11]} # with round key w[40,43]
+ @{[vaesem_vs $v24, $v12]} # with round key w[44,47]
+ @{[vaesef_vs $v24, $v13]} # with round key w[48,51]
+___
+
+ return $code;
+}
+
+# aes-192 decryption with round keys v1-v13
+sub aes_192_decrypt {
+ my $code=<<___;
+ @{[vaesz_vs $v24, $v13]} # with round key w[48,51]
+ @{[vaesdm_vs $v24, $v12]} # with round key w[44,47]
+ @{[vaesdm_vs $v24, $v11]} # with round key w[40,43]
+ @{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
+ @{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
+ @{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
+ @{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
+ @{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
+ @{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
+ @{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
+ @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
+ @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
+ @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
+___
+
+ return $code;
+}
+
+# aes-256 encryption with round keys v1-v15
+sub aes_256_encrypt {
+ my $code=<<___;
+ @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
+ @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
+ @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
+ @{[vaesem_vs $v24, $v4]} # with round key w[12,15]
+ @{[vaesem_vs $v24, $v5]} # with round key w[16,19]
+ @{[vaesem_vs $v24, $v6]} # with round key w[20,23]
+ @{[vaesem_vs $v24, $v7]} # with round key w[24,27]
+ @{[vaesem_vs $v24, $v8]} # with round key w[28,31]
+ @{[vaesem_vs $v24, $v9]} # with round key w[32,35]
+ @{[vaesem_vs $v24, $v10]} # with round key w[36,39]
+ @{[vaesem_vs $v24, $v11]} # with round key w[40,43]
+ @{[vaesem_vs $v24, $v12]} # with round key w[44,47]
+ @{[vaesem_vs $v24, $v13]} # with round key w[48,51]
+ @{[vaesem_vs $v24, $v14]} # with round key w[52,55]
+ @{[vaesef_vs $v24, $v15]} # with round key w[56,59]
+___
+
+ return $code;
+}
+
+# aes-256 decryption with round keys v1-v15
+sub aes_256_decrypt {
+ my $code=<<___;
+ @{[vaesz_vs $v24, $v15]} # with round key w[56,59]
+ @{[vaesdm_vs $v24, $v14]} # with round key w[52,55]
+ @{[vaesdm_vs $v24, $v13]} # with round key w[48,51]
+ @{[vaesdm_vs $v24, $v12]} # with round key w[44,47]
+ @{[vaesdm_vs $v24, $v11]} # with round key w[40,43]
+ @{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
+ @{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
+ @{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
+ @{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
+ @{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
+ @{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
+ @{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
+ @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
+ @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
+ @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
+___
+
+ return $code;
+}
+
+$code .= <<___;
+.p2align 3
+.globl rv64i_zvkned_cbc_encrypt
+.type rv64i_zvkned_cbc_encrypt,\@function
+rv64i_zvkned_cbc_encrypt:
+ # check whether the length is a multiple of 16 and >= 16
+ li $T1, 16
+ blt $LEN, $T1, L_end
+ andi $T1, $LEN, 15
+ bnez $T1, L_end
+
+ # Load number of rounds
+ lwu $rounds, 240($KEYP)
+
+ # Get proper routine for key size
+ li $T0, 10
+ beq $rounds, $T0, L_cbc_enc_128
+
+ li $T0, 12
+ beq $rounds, $T0, L_cbc_enc_192
+
+ li $T0, 14
+ beq $rounds, $T0, L_cbc_enc_256
+
+ ret
+.size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt
+___
+
+$code .= <<___;
+.p2align 3
+L_cbc_enc_128:
+ # Load all 11 round keys to v1-v11 registers.
+ @{[aes_128_load_key]}
+
+ # Load IV.
+ @{[vle32_v $v16, ($IVP)]}
+
+ @{[vle32_v $v24, ($INP)]}
+ @{[vxor_vv $v24, $v24, $v16]}
+ j 2f
+
+1:
+ @{[vle32_v $v17, ($INP)]}
+ @{[vxor_vv $v24, $v24, $v17]}
+
+2:
+ # AES body
+ @{[aes_128_encrypt]}
+
+ @{[vse32_v $v24, ($OUTP)]}
+
+ addi $INP, $INP, 16
+ addi $OUTP, $OUTP, 16
+ addi $LEN, $LEN, -16
+
+ bnez $LEN, 1b
+
+ @{[vse32_v $v24, ($IVP)]}
+
+ ret
+.size L_cbc_enc_128,.-L_cbc_enc_128
+___
+
+$code .= <<___;
+.p2align 3
+L_cbc_enc_192:
+ # Load all 13 round keys to v1-v13 registers.
+ @{[aes_192_load_key]}
+
+ # Load IV.
+ @{[vle32_v $v16, ($IVP)]}
+
+ @{[vle32_v $v24, ($INP)]}
+ @{[vxor_vv $v24, $v24, $v16]}
+ j 2f
+
+1:
+ @{[vle32_v $v17, ($INP)]}
+ @{[vxor_vv $v24, $v24, $v17]}
+
+2:
+ # AES body
+ @{[aes_192_encrypt]}
+
+ @{[vse32_v $v24, ($OUTP)]}
+
+ addi $INP, $INP, 16
+ addi $OUTP, $OUTP, 16
+ addi $LEN, $LEN, -16
+
+ bnez $LEN, 1b
+
+ @{[vse32_v $v24, ($IVP)]}
+
+ ret
+.size L_cbc_enc_192,.-L_cbc_enc_192
+___
+
+$code .= <<___;
+.p2align 3
+L_cbc_enc_256:
+ # Load all 15 round keys to v1-v15 registers.
+ @{[aes_256_load_key]}
+
+ # Load IV.
+ @{[vle32_v $v16, ($IVP)]}
+
+ @{[vle32_v $v24, ($INP)]}
+ @{[vxor_vv $v24, $v24, $v16]}
+ j 2f
+
+1:
+ @{[vle32_v $v17, ($INP)]}
+ @{[vxor_vv $v24, $v24, $v17]}
+
+2:
+ # AES body
+ @{[aes_256_encrypt]}
+
+ @{[vse32_v $v24, ($OUTP)]}
+
+ addi $INP, $INP, 16
+ addi $OUTP, $OUTP, 16
+ addi $LEN, $LEN, -16
+
+ bnez $LEN, 1b
+
+ @{[vse32_v $v24, ($IVP)]}
+
+ ret
+.size L_cbc_enc_256,.-L_cbc_enc_256
+___
+
+###############################################################################
+# void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# unsigned char *ivec, const int enc);
+my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
+my ($T0, $T1, $rounds) = ("t0", "t1", "t2");
+my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7,
+ $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15,
+ $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23,
+ $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31,
+) = map("v$_",(0..31));
+
+$code .= <<___;
+.p2align 3
+.globl rv64i_zvkned_cbc_decrypt
+.type rv64i_zvkned_cbc_decrypt,\@function
+rv64i_zvkned_cbc_decrypt:
+ # check whether the length is a multiple of 16 and >= 16
+ li $T1, 16
+ blt $LEN, $T1, L_end
+ andi $T1, $LEN, 15
+ bnez $T1, L_end
+
+ # Load number of rounds
+ lwu $rounds, 240($KEYP)
+
+ # Get proper routine for key size
+ li $T0, 10
+ beq $rounds, $T0, L_cbc_dec_128
+
+ li $T0, 12
+ beq $rounds, $T0, L_cbc_dec_192
+
+ li $T0, 14
+ beq $rounds, $T0, L_cbc_dec_256
+
+ ret
+.size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt
+___
+
+$code .= <<___;
+.p2align 3
+L_cbc_dec_128:
+ # Load all 11 round keys to v1-v11 registers.
+ @{[aes_128_load_key]}
+
+ # Load IV.
+ @{[vle32_v $v16, ($IVP)]}
+
+ @{[vle32_v $v24, ($INP)]}
+ @{[vmv_v_v $v17, $v24]}
+ j 2f
+
+1:
+ @{[vle32_v $v24, ($INP)]}
+ @{[vmv_v_v $v17, $v24]}
+ addi $OUTP, $OUTP, 16
+
+2:
+ # AES body
+ @{[aes_128_decrypt]}
+
+ @{[vxor_vv $v24, $v24, $v16]}
+ @{[vse32_v $v24, ($OUTP)]}
+ @{[vmv_v_v $v16, $v17]}
+
+ addi $LEN, $LEN, -16
+ addi $INP, $INP, 16
+
+ bnez $LEN, 1b
+
+ @{[vse32_v $v16, ($IVP)]}
+
+ ret
+.size L_cbc_dec_128,.-L_cbc_dec_128
+___
+
+$code .= <<___;
+.p2align 3
+L_cbc_dec_192:
+ # Load all 13 round keys to v1-v13 registers.
+ @{[aes_192_load_key]}
+
+ # Load IV.
+ @{[vle32_v $v16, ($IVP)]}
+
+ @{[vle32_v $v24, ($INP)]}
+ @{[vmv_v_v $v17, $v24]}
+ j 2f
+
+1:
+ @{[vle32_v $v24, ($INP)]}
+ @{[vmv_v_v $v17, $v24]}
+ addi $OUTP, $OUTP, 16
+
+2:
+ # AES body
+ @{[aes_192_decrypt]}
+
+ @{[vxor_vv $v24, $v24, $v16]}
+ @{[vse32_v $v24, ($OUTP)]}
+ @{[vmv_v_v $v16, $v17]}
+
+ addi $LEN, $LEN, -16
+ addi $INP, $INP, 16
+
+ bnez $LEN, 1b
+
+ @{[vse32_v $v16, ($IVP)]}
+
+ ret
+.size L_cbc_dec_192,.-L_cbc_dec_192
+___
+
+$code .= <<___;
+.p2align 3
+L_cbc_dec_256:
+ # Load all 15 round keys to v1-v15 registers.
+ @{[aes_256_load_key]}
+
+ # Load IV.
+ @{[vle32_v $v16, ($IVP)]}
+
+ @{[vle32_v $v24, ($INP)]}
+ @{[vmv_v_v $v17, $v24]}
+ j 2f
+
+1:
+ @{[vle32_v $v24, ($INP)]}
+ @{[vmv_v_v $v17, $v24]}
+ addi $OUTP, $OUTP, 16
+
+2:
+ # AES body
+ @{[aes_256_decrypt]}
+
+ @{[vxor_vv $v24, $v24, $v16]}
+ @{[vse32_v $v24, ($OUTP)]}
+ @{[vmv_v_v $v16, $v17]}
+
+ addi $LEN, $LEN, -16
+ addi $INP, $INP, 16
+
+ bnez $LEN, 1b
+
+ @{[vse32_v $v16, ($IVP)]}
+
+ ret
+.size L_cbc_dec_256,.-L_cbc_dec_256
+___
+
+}
+
################################################################################
# int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits,
# AES_KEY *key)
@@ -627,6 +1155,10 @@ L_fail_m2:
li a0, -2
ret
.size L_fail_m2,.-L_fail_m2
+
+L_end:
+ ret
+.size L_end,.-L_end
___
print $code;