From 5e16a6276bf4624fb15ec26b49219af5b2ed19d1 Mon Sep 17 00:00:00 2001 From: Phoebe Chen Date: Tue, 12 Sep 2023 06:44:05 -0700 Subject: riscv: Provide vector crypto implementation of AES-CBC mode. To accelerate the performance of the AES-128/192/256-CBC block cipher encryption, we used the vaesz, vaesem and vaesef instructions, which implement a single round of AES encryption. Similarly, to optimize the performance of AES-128/192/256-CBC block cipher decryption, we have utilized the vaesz, vaesdm, and vaesdf instructions, which facilitate a single round of AES decryption. Furthermore, we optimize the key and initialization vector (IV) step by keeping the rounding key in vector registers. Signed-off-by: Phoebe Chen Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale Reviewed-by: Hugo Landau (Merged from https://github.com/openssl/openssl/pull/21923) --- crypto/aes/asm/aes-riscv64-zvkned.pl | 536 ++++++++++++++++++++++++++++++++++- 1 file changed, 534 insertions(+), 2 deletions(-) (limited to 'crypto') diff --git a/crypto/aes/asm/aes-riscv64-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkned.pl index 1225e39d2b..319808b51c 100644 --- a/crypto/aes/asm/aes-riscv64-zvkned.pl +++ b/crypto/aes/asm/aes-riscv64-zvkned.pl @@ -11,6 +11,7 @@ # or # # Copyright (c) 2023, Christoph Müllner +# Copyright (c) 2023, Phoebe Chen # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -35,8 +36,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # - RV64I -# - RISC-V vector ('V') with VLEN >= 128 -# - RISC-V vector crypto AES extension ('Zvkned') +# - RISC-V Vector ('V') with VLEN >= 128 +# - RISC-V Vector AES block cipher extension ('Zvkned') use strict; use warnings; @@ -57,6 +58,533 @@ my $code=<<___; .text ___ +{ +############################################################################### +# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out, +# size_t length, const AES_KEY *key, +# unsigned char *ivec, const int enc); +my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5"); +my ($T0, $T1, $rounds) = ("t0", "t1", "t2"); +my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, + $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15, + $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23, + $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31, +) = map("v$_",(0..31)); + +# Load all 11 round keys to v1-v11 registers. +sub aes_128_load_key { + my $code=<<___; + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $v1, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v2, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v3, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v4, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v5, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v6, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v7, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v8, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v9, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v10, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v11, $KEYP]} +___ + + return $code; +} + +# Load all 13 round keys to v1-v13 registers. +sub aes_192_load_key { + my $code=<<___; + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $v1, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v2, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v3, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v4, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v5, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v6, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v7, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v8, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v9, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v10, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v11, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v12, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v13, $KEYP]} +___ + + return $code; +} + +# Load all 15 round keys to v1-v15 registers. +sub aes_256_load_key { + my $code=<<___; + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $v1, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v2, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v3, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v4, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v5, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v6, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v7, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v8, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v9, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v10, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v11, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v12, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v13, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v14, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $v15, $KEYP]} +___ + + return $code; +} + +# aes-128 encryption with round keys v1-v11 +sub aes_128_encrypt { + my $code=<<___; + @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3] + @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7] + @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11] + @{[vaesem_vs $v24, $v4]} # with round key w[12,15] + @{[vaesem_vs $v24, $v5]} # with round key w[16,19] + @{[vaesem_vs $v24, $v6]} # with round key w[20,23] + @{[vaesem_vs $v24, $v7]} # with round key w[24,27] + @{[vaesem_vs $v24, $v8]} # with round key w[28,31] + @{[vaesem_vs $v24, $v9]} # with round key w[32,35] + @{[vaesem_vs $v24, $v10]} # with round key w[36,39] + @{[vaesef_vs $v24, $v11]} # with round key w[40,43] +___ + + return $code; +} + +# aes-128 decryption with round keys v1-v11 +sub aes_128_decrypt { + my $code=<<___; + @{[vaesz_vs $v24, $v11]} # with round key w[40,43] + @{[vaesdm_vs $v24, $v10]} # with round key w[36,39] + @{[vaesdm_vs $v24, $v9]} # with round key w[32,35] + @{[vaesdm_vs $v24, $v8]} # with round key w[28,31] + @{[vaesdm_vs $v24, $v7]} # with round key w[24,27] + @{[vaesdm_vs $v24, $v6]} # with round key w[20,23] + @{[vaesdm_vs $v24, $v5]} # with round key w[16,19] + @{[vaesdm_vs $v24, $v4]} # with round key w[12,15] + @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11] + @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7] + @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3] +___ + + return $code; +} + +# aes-192 encryption with round keys v1-v13 +sub aes_192_encrypt { + my $code=<<___; + @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3] + @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7] + @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11] + @{[vaesem_vs $v24, $v4]} # with round key w[12,15] + @{[vaesem_vs $v24, $v5]} # with round key w[16,19] + @{[vaesem_vs $v24, $v6]} # with round key w[20,23] + @{[vaesem_vs $v24, $v7]} # with round key w[24,27] + @{[vaesem_vs $v24, $v8]} # with round key w[28,31] + @{[vaesem_vs $v24, $v9]} # with round key w[32,35] + @{[vaesem_vs $v24, $v10]} # with round key w[36,39] + @{[vaesem_vs $v24, $v11]} # with round key w[40,43] + @{[vaesem_vs $v24, $v12]} # with round key w[44,47] + @{[vaesef_vs $v24, $v13]} # with round key w[48,51] +___ + + return $code; +} + +# aes-192 decryption with round keys v1-v13 +sub aes_192_decrypt { + my $code=<<___; + @{[vaesz_vs $v24, $v13]} # with round key w[48,51] + @{[vaesdm_vs $v24, $v12]} # with round key w[44,47] + @{[vaesdm_vs $v24, $v11]} # with round key w[40,43] + @{[vaesdm_vs $v24, $v10]} # with round key w[36,39] + @{[vaesdm_vs $v24, $v9]} # with round key w[32,35] + @{[vaesdm_vs $v24, $v8]} # with round key w[28,31] + @{[vaesdm_vs $v24, $v7]} # with round key w[24,27] + @{[vaesdm_vs $v24, $v6]} # with round key w[20,23] + @{[vaesdm_vs $v24, $v5]} # with round key w[16,19] + @{[vaesdm_vs $v24, $v4]} # with round key w[12,15] + @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11] + @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7] + @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3] +___ + + return $code; +} + +# aes-256 encryption with round keys v1-v15 +sub aes_256_encrypt { + my $code=<<___; + @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3] + @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7] + @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11] + @{[vaesem_vs $v24, $v4]} # with round key w[12,15] + @{[vaesem_vs $v24, $v5]} # with round key w[16,19] + @{[vaesem_vs $v24, $v6]} # with round key w[20,23] + @{[vaesem_vs $v24, $v7]} # with round key w[24,27] + @{[vaesem_vs $v24, $v8]} # with round key w[28,31] + @{[vaesem_vs $v24, $v9]} # with round key w[32,35] + @{[vaesem_vs $v24, $v10]} # with round key w[36,39] + @{[vaesem_vs $v24, $v11]} # with round key w[40,43] + @{[vaesem_vs $v24, $v12]} # with round key w[44,47] + @{[vaesem_vs $v24, $v13]} # with round key w[48,51] + @{[vaesem_vs $v24, $v14]} # with round key w[52,55] + @{[vaesef_vs $v24, $v15]} # with round key w[56,59] +___ + + return $code; +} + +# aes-256 decryption with round keys v1-v15 +sub aes_256_decrypt { + my $code=<<___; + @{[vaesz_vs $v24, $v15]} # with round key w[56,59] + @{[vaesdm_vs $v24, $v14]} # with round key w[52,55] + @{[vaesdm_vs $v24, $v13]} # with round key w[48,51] + @{[vaesdm_vs $v24, $v12]} # with round key w[44,47] + @{[vaesdm_vs $v24, $v11]} # with round key w[40,43] + @{[vaesdm_vs $v24, $v10]} # with round key w[36,39] + @{[vaesdm_vs $v24, $v9]} # with round key w[32,35] + @{[vaesdm_vs $v24, $v8]} # with round key w[28,31] + @{[vaesdm_vs $v24, $v7]} # with round key w[24,27] + @{[vaesdm_vs $v24, $v6]} # with round key w[20,23] + @{[vaesdm_vs $v24, $v5]} # with round key w[16,19] + @{[vaesdm_vs $v24, $v4]} # with round key w[12,15] + @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11] + @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7] + @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3] +___ + + return $code; +} + +$code .= <<___; +.p2align 3 +.globl rv64i_zvkned_cbc_encrypt +.type rv64i_zvkned_cbc_encrypt,\@function +rv64i_zvkned_cbc_encrypt: + # check whether the length is a multiple of 16 and >= 16 + li $T1, 16 + blt $LEN, $T1, L_end + andi $T1, $LEN, 15 + bnez $T1, L_end + + # Load number of rounds + lwu $rounds, 240($KEYP) + + # Get proper routine for key size + li $T0, 10 + beq $rounds, $T0, L_cbc_enc_128 + + li $T0, 12 + beq $rounds, $T0, L_cbc_enc_192 + + li $T0, 14 + beq $rounds, $T0, L_cbc_enc_256 + + ret +.size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt +___ + +$code .= <<___; +.p2align 3 +L_cbc_enc_128: + # Load all 11 round keys to v1-v11 registers. + @{[aes_128_load_key]} + + # Load IV. + @{[vle32_v $v16, ($IVP)]} + + @{[vle32_v $v24, ($INP)]} + @{[vxor_vv $v24, $v24, $v16]} + j 2f + +1: + @{[vle32_v $v17, ($INP)]} + @{[vxor_vv $v24, $v24, $v17]} + +2: + # AES body + @{[aes_128_encrypt]} + + @{[vse32_v $v24, ($OUTP)]} + + addi $INP, $INP, 16 + addi $OUTP, $OUTP, 16 + addi $LEN, $LEN, -16 + + bnez $LEN, 1b + + @{[vse32_v $v24, ($IVP)]} + + ret +.size L_cbc_enc_128,.-L_cbc_enc_128 +___ + +$code .= <<___; +.p2align 3 +L_cbc_enc_192: + # Load all 13 round keys to v1-v13 registers. + @{[aes_192_load_key]} + + # Load IV. + @{[vle32_v $v16, ($IVP)]} + + @{[vle32_v $v24, ($INP)]} + @{[vxor_vv $v24, $v24, $v16]} + j 2f + +1: + @{[vle32_v $v17, ($INP)]} + @{[vxor_vv $v24, $v24, $v17]} + +2: + # AES body + @{[aes_192_encrypt]} + + @{[vse32_v $v24, ($OUTP)]} + + addi $INP, $INP, 16 + addi $OUTP, $OUTP, 16 + addi $LEN, $LEN, -16 + + bnez $LEN, 1b + + @{[vse32_v $v24, ($IVP)]} + + ret +.size L_cbc_enc_192,.-L_cbc_enc_192 +___ + +$code .= <<___; +.p2align 3 +L_cbc_enc_256: + # Load all 15 round keys to v1-v15 registers. + @{[aes_256_load_key]} + + # Load IV. + @{[vle32_v $v16, ($IVP)]} + + @{[vle32_v $v24, ($INP)]} + @{[vxor_vv $v24, $v24, $v16]} + j 2f + +1: + @{[vle32_v $v17, ($INP)]} + @{[vxor_vv $v24, $v24, $v17]} + +2: + # AES body + @{[aes_256_encrypt]} + + @{[vse32_v $v24, ($OUTP)]} + + addi $INP, $INP, 16 + addi $OUTP, $OUTP, 16 + addi $LEN, $LEN, -16 + + bnez $LEN, 1b + + @{[vse32_v $v24, ($IVP)]} + + ret +.size L_cbc_enc_256,.-L_cbc_enc_256 +___ + +############################################################################### +# void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out, +# size_t length, const AES_KEY *key, +# unsigned char *ivec, const int enc); +my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5"); +my ($T0, $T1, $rounds) = ("t0", "t1", "t2"); +my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, + $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15, + $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23, + $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31, +) = map("v$_",(0..31)); + +$code .= <<___; +.p2align 3 +.globl rv64i_zvkned_cbc_decrypt +.type rv64i_zvkned_cbc_decrypt,\@function +rv64i_zvkned_cbc_decrypt: + # check whether the length is a multiple of 16 and >= 16 + li $T1, 16 + blt $LEN, $T1, L_end + andi $T1, $LEN, 15 + bnez $T1, L_end + + # Load number of rounds + lwu $rounds, 240($KEYP) + + # Get proper routine for key size + li $T0, 10 + beq $rounds, $T0, L_cbc_dec_128 + + li $T0, 12 + beq $rounds, $T0, L_cbc_dec_192 + + li $T0, 14 + beq $rounds, $T0, L_cbc_dec_256 + + ret +.size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt +___ + +$code .= <<___; +.p2align 3 +L_cbc_dec_128: + # Load all 11 round keys to v1-v11 registers. + @{[aes_128_load_key]} + + # Load IV. + @{[vle32_v $v16, ($IVP)]} + + @{[vle32_v $v24, ($INP)]} + @{[vmv_v_v $v17, $v24]} + j 2f + +1: + @{[vle32_v $v24, ($INP)]} + @{[vmv_v_v $v17, $v24]} + addi $OUTP, $OUTP, 16 + +2: + # AES body + @{[aes_128_decrypt]} + + @{[vxor_vv $v24, $v24, $v16]} + @{[vse32_v $v24, ($OUTP)]} + @{[vmv_v_v $v16, $v17]} + + addi $LEN, $LEN, -16 + addi $INP, $INP, 16 + + bnez $LEN, 1b + + @{[vse32_v $v16, ($IVP)]} + + ret +.size L_cbc_dec_128,.-L_cbc_dec_128 +___ + +$code .= <<___; +.p2align 3 +L_cbc_dec_192: + # Load all 13 round keys to v1-v13 registers. + @{[aes_192_load_key]} + + # Load IV. + @{[vle32_v $v16, ($IVP)]} + + @{[vle32_v $v24, ($INP)]} + @{[vmv_v_v $v17, $v24]} + j 2f + +1: + @{[vle32_v $v24, ($INP)]} + @{[vmv_v_v $v17, $v24]} + addi $OUTP, $OUTP, 16 + +2: + # AES body + @{[aes_192_decrypt]} + + @{[vxor_vv $v24, $v24, $v16]} + @{[vse32_v $v24, ($OUTP)]} + @{[vmv_v_v $v16, $v17]} + + addi $LEN, $LEN, -16 + addi $INP, $INP, 16 + + bnez $LEN, 1b + + @{[vse32_v $v16, ($IVP)]} + + ret +.size L_cbc_dec_192,.-L_cbc_dec_192 +___ + +$code .= <<___; +.p2align 3 +L_cbc_dec_256: + # Load all 15 round keys to v1-v15 registers. + @{[aes_256_load_key]} + + # Load IV. + @{[vle32_v $v16, ($IVP)]} + + @{[vle32_v $v24, ($INP)]} + @{[vmv_v_v $v17, $v24]} + j 2f + +1: + @{[vle32_v $v24, ($INP)]} + @{[vmv_v_v $v17, $v24]} + addi $OUTP, $OUTP, 16 + +2: + # AES body + @{[aes_256_decrypt]} + + @{[vxor_vv $v24, $v24, $v16]} + @{[vse32_v $v24, ($OUTP)]} + @{[vmv_v_v $v16, $v17]} + + addi $LEN, $LEN, -16 + addi $INP, $INP, 16 + + bnez $LEN, 1b + + @{[vse32_v $v16, ($IVP)]} + + ret +.size L_cbc_dec_256,.-L_cbc_dec_256 +___ + +} + ################################################################################ # int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) @@ -627,6 +1155,10 @@ L_fail_m2: li a0, -2 ret .size L_fail_m2,.-L_fail_m2 + +L_end: + ret +.size L_end,.-L_end ___ print $code; -- cgit v1.2.3