From 751a22194e4dc52532d2aedd49fe62dbea0badc8 Mon Sep 17 00:00:00 2001 From: Phoebe Chen Date: Sun, 24 Sep 2023 20:07:48 -0700 Subject: riscv: Provide vector crypto implementation of AES-ECB mode. This patch provides stream and multi-block implementations for AES-128-ECB, AES-192-ECB, and AES-256-ECB to accelerate AES-ECB. Also, refactor functions to share the same variable declaration in aes-riscv64-zvkned.pl. Signed-off-by: Phoebe Chen Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale Reviewed-by: Hugo Landau (Merged from https://github.com/openssl/openssl/pull/21923) --- crypto/aes/asm/aes-riscv64-zvkned.pl | 1130 ++++++++++++++++++++-------------- 1 file changed, 670 insertions(+), 460 deletions(-) (limited to 'crypto') diff --git a/crypto/aes/asm/aes-riscv64-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkned.pl index 319808b51c..4c0292781d 100644 --- a/crypto/aes/asm/aes-riscv64-zvkned.pl +++ b/crypto/aes/asm/aes-riscv64-zvkned.pl @@ -58,44 +58,39 @@ my $code=<<___; .text ___ -{ -############################################################################### -# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivec, const int enc); -my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5"); -my ($T0, $T1, $rounds) = ("t0", "t1", "t2"); -my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, - $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15, - $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23, - $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31, +my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, + $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, + $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, + $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, ) = map("v$_",(0..31)); # Load all 11 round keys to v1-v11 registers. sub aes_128_load_key { + my $KEYP = shift; + my $code=<<___; @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, $KEYP]} + @{[vle32_v $V1, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v2, $KEYP]} + @{[vle32_v $V2, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v3, $KEYP]} + @{[vle32_v $V3, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v4, $KEYP]} + @{[vle32_v $V4, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v5, $KEYP]} + @{[vle32_v $V5, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v6, $KEYP]} + @{[vle32_v $V6, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v7, $KEYP]} + @{[vle32_v $V7, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v8, $KEYP]} + @{[vle32_v $V8, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v9, $KEYP]} + @{[vle32_v $V9, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v10, $KEYP]} + @{[vle32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v11, $KEYP]} + @{[vle32_v $V11, $KEYP]} ___ return $code; @@ -103,33 +98,35 @@ ___ # Load all 13 round keys to v1-v13 registers. sub aes_192_load_key { + my $KEYP = shift; + my $code=<<___; @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, $KEYP]} + @{[vle32_v $V1, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v2, $KEYP]} + @{[vle32_v $V2, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v3, $KEYP]} + @{[vle32_v $V3, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v4, $KEYP]} + @{[vle32_v $V4, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v5, $KEYP]} + @{[vle32_v $V5, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v6, $KEYP]} + @{[vle32_v $V6, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v7, $KEYP]} + @{[vle32_v $V7, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v8, $KEYP]} + @{[vle32_v $V8, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v9, $KEYP]} + @{[vle32_v $V9, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v10, $KEYP]} + @{[vle32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v11, $KEYP]} + @{[vle32_v $V11, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v12, $KEYP]} + @{[vle32_v $V12, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v13, $KEYP]} + @{[vle32_v $V13, $KEYP]} ___ return $code; @@ -137,37 +134,39 @@ ___ # Load all 15 round keys to v1-v15 registers. sub aes_256_load_key { + my $KEYP = shift; + my $code=<<___; @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, $KEYP]} + @{[vle32_v $V1, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v2, $KEYP]} + @{[vle32_v $V2, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v3, $KEYP]} + @{[vle32_v $V3, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v4, $KEYP]} + @{[vle32_v $V4, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v5, $KEYP]} + @{[vle32_v $V5, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v6, $KEYP]} + @{[vle32_v $V6, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v7, $KEYP]} + @{[vle32_v $V7, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v8, $KEYP]} + @{[vle32_v $V8, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v9, $KEYP]} + @{[vle32_v $V9, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v10, $KEYP]} + @{[vle32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v11, $KEYP]} + @{[vle32_v $V11, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v12, $KEYP]} + @{[vle32_v $V12, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v13, $KEYP]} + @{[vle32_v $V13, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v14, $KEYP]} + @{[vle32_v $V14, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v15, $KEYP]} + @{[vle32_v $V15, $KEYP]} ___ return $code; @@ -176,17 +175,17 @@ ___ # aes-128 encryption with round keys v1-v11 sub aes_128_encrypt { my $code=<<___; - @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3] - @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7] - @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11] - @{[vaesem_vs $v24, $v4]} # with round key w[12,15] - @{[vaesem_vs $v24, $v5]} # with round key w[16,19] - @{[vaesem_vs $v24, $v6]} # with round key w[20,23] - @{[vaesem_vs $v24, $v7]} # with round key w[24,27] - @{[vaesem_vs $v24, $v8]} # with round key w[28,31] - @{[vaesem_vs $v24, $v9]} # with round key w[32,35] - @{[vaesem_vs $v24, $v10]} # with round key w[36,39] - @{[vaesef_vs $v24, $v11]} # with round key w[40,43] + @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3] + @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7] + @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11] + @{[vaesem_vs $V24, $V4]} # with round key w[12,15] + @{[vaesem_vs $V24, $V5]} # with round key w[16,19] + @{[vaesem_vs $V24, $V6]} # with round key w[20,23] + @{[vaesem_vs $V24, $V7]} # with round key w[24,27] + @{[vaesem_vs $V24, $V8]} # with round key w[28,31] + @{[vaesem_vs $V24, $V9]} # with round key w[32,35] + @{[vaesem_vs $V24, $V10]} # with round key w[36,39] + @{[vaesef_vs $V24, $V11]} # with round key w[40,43] ___ return $code; @@ -195,17 +194,17 @@ ___ # aes-128 decryption with round keys v1-v11 sub aes_128_decrypt { my $code=<<___; - @{[vaesz_vs $v24, $v11]} # with round key w[40,43] - @{[vaesdm_vs $v24, $v10]} # with round key w[36,39] - @{[vaesdm_vs $v24, $v9]} # with round key w[32,35] - @{[vaesdm_vs $v24, $v8]} # with round key w[28,31] - @{[vaesdm_vs $v24, $v7]} # with round key w[24,27] - @{[vaesdm_vs $v24, $v6]} # with round key w[20,23] - @{[vaesdm_vs $v24, $v5]} # with round key w[16,19] - @{[vaesdm_vs $v24, $v4]} # with round key w[12,15] - @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11] - @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7] - @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3] + @{[vaesz_vs $V24, $V11]} # with round key w[40,43] + @{[vaesdm_vs $V24, $V10]} # with round key w[36,39] + @{[vaesdm_vs $V24, $V9]} # with round key w[32,35] + @{[vaesdm_vs $V24, $V8]} # with round key w[28,31] + @{[vaesdm_vs $V24, $V7]} # with round key w[24,27] + @{[vaesdm_vs $V24, $V6]} # with round key w[20,23] + @{[vaesdm_vs $V24, $V5]} # with round key w[16,19] + @{[vaesdm_vs $V24, $V4]} # with round key w[12,15] + @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11] + @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7] + @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3] ___ return $code; @@ -214,19 +213,19 @@ ___ # aes-192 encryption with round keys v1-v13 sub aes_192_encrypt { my $code=<<___; - @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3] - @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7] - @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11] - @{[vaesem_vs $v24, $v4]} # with round key w[12,15] - @{[vaesem_vs $v24, $v5]} # with round key w[16,19] - @{[vaesem_vs $v24, $v6]} # with round key w[20,23] - @{[vaesem_vs $v24, $v7]} # with round key w[24,27] - @{[vaesem_vs $v24, $v8]} # with round key w[28,31] - @{[vaesem_vs $v24, $v9]} # with round key w[32,35] - @{[vaesem_vs $v24, $v10]} # with round key w[36,39] - @{[vaesem_vs $v24, $v11]} # with round key w[40,43] - @{[vaesem_vs $v24, $v12]} # with round key w[44,47] - @{[vaesef_vs $v24, $v13]} # with round key w[48,51] + @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3] + @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7] + @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11] + @{[vaesem_vs $V24, $V4]} # with round key w[12,15] + @{[vaesem_vs $V24, $V5]} # with round key w[16,19] + @{[vaesem_vs $V24, $V6]} # with round key w[20,23] + @{[vaesem_vs $V24, $V7]} # with round key w[24,27] + @{[vaesem_vs $V24, $V8]} # with round key w[28,31] + @{[vaesem_vs $V24, $V9]} # with round key w[32,35] + @{[vaesem_vs $V24, $V10]} # with round key w[36,39] + @{[vaesem_vs $V24, $V11]} # with round key w[40,43] + @{[vaesem_vs $V24, $V12]} # with round key w[44,47] + @{[vaesef_vs $V24, $V13]} # with round key w[48,51] ___ return $code; @@ -235,19 +234,19 @@ ___ # aes-192 decryption with round keys v1-v13 sub aes_192_decrypt { my $code=<<___; - @{[vaesz_vs $v24, $v13]} # with round key w[48,51] - @{[vaesdm_vs $v24, $v12]} # with round key w[44,47] - @{[vaesdm_vs $v24, $v11]} # with round key w[40,43] - @{[vaesdm_vs $v24, $v10]} # with round key w[36,39] - @{[vaesdm_vs $v24, $v9]} # with round key w[32,35] - @{[vaesdm_vs $v24, $v8]} # with round key w[28,31] - @{[vaesdm_vs $v24, $v7]} # with round key w[24,27] - @{[vaesdm_vs $v24, $v6]} # with round key w[20,23] - @{[vaesdm_vs $v24, $v5]} # with round key w[16,19] - @{[vaesdm_vs $v24, $v4]} # with round key w[12,15] - @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11] - @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7] - @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3] + @{[vaesz_vs $V24, $V13]} # with round key w[48,51] + @{[vaesdm_vs $V24, $V12]} # with round key w[44,47] + @{[vaesdm_vs $V24, $V11]} # with round key w[40,43] + @{[vaesdm_vs $V24, $V10]} # with round key w[36,39] + @{[vaesdm_vs $V24, $V9]} # with round key w[32,35] + @{[vaesdm_vs $V24, $V8]} # with round key w[28,31] + @{[vaesdm_vs $V24, $V7]} # with round key w[24,27] + @{[vaesdm_vs $V24, $V6]} # with round key w[20,23] + @{[vaesdm_vs $V24, $V5]} # with round key w[16,19] + @{[vaesdm_vs $V24, $V4]} # with round key w[12,15] + @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11] + @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7] + @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3] ___ return $code; @@ -256,21 +255,21 @@ ___ # aes-256 encryption with round keys v1-v15 sub aes_256_encrypt { my $code=<<___; - @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3] - @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7] - @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11] - @{[vaesem_vs $v24, $v4]} # with round key w[12,15] - @{[vaesem_vs $v24, $v5]} # with round key w[16,19] - @{[vaesem_vs $v24, $v6]} # with round key w[20,23] - @{[vaesem_vs $v24, $v7]} # with round key w[24,27] - @{[vaesem_vs $v24, $v8]} # with round key w[28,31] - @{[vaesem_vs $v24, $v9]} # with round key w[32,35] - @{[vaesem_vs $v24, $v10]} # with round key w[36,39] - @{[vaesem_vs $v24, $v11]} # with round key w[40,43] - @{[vaesem_vs $v24, $v12]} # with round key w[44,47] - @{[vaesem_vs $v24, $v13]} # with round key w[48,51] - @{[vaesem_vs $v24, $v14]} # with round key w[52,55] - @{[vaesef_vs $v24, $v15]} # with round key w[56,59] + @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3] + @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7] + @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11] + @{[vaesem_vs $V24, $V4]} # with round key w[12,15] + @{[vaesem_vs $V24, $V5]} # with round key w[16,19] + @{[vaesem_vs $V24, $V6]} # with round key w[20,23] + @{[vaesem_vs $V24, $V7]} # with round key w[24,27] + @{[vaesem_vs $V24, $V8]} # with round key w[28,31] + @{[vaesem_vs $V24, $V9]} # with round key w[32,35] + @{[vaesem_vs $V24, $V10]} # with round key w[36,39] + @{[vaesem_vs $V24, $V11]} # with round key w[40,43] + @{[vaesem_vs $V24, $V12]} # with round key w[44,47] + @{[vaesem_vs $V24, $V13]} # with round key w[48,51] + @{[vaesem_vs $V24, $V14]} # with round key w[52,55] + @{[vaesef_vs $V24, $V15]} # with round key w[56,59] ___ return $code; @@ -279,26 +278,34 @@ ___ # aes-256 decryption with round keys v1-v15 sub aes_256_decrypt { my $code=<<___; - @{[vaesz_vs $v24, $v15]} # with round key w[56,59] - @{[vaesdm_vs $v24, $v14]} # with round key w[52,55] - @{[vaesdm_vs $v24, $v13]} # with round key w[48,51] - @{[vaesdm_vs $v24, $v12]} # with round key w[44,47] - @{[vaesdm_vs $v24, $v11]} # with round key w[40,43] - @{[vaesdm_vs $v24, $v10]} # with round key w[36,39] - @{[vaesdm_vs $v24, $v9]} # with round key w[32,35] - @{[vaesdm_vs $v24, $v8]} # with round key w[28,31] - @{[vaesdm_vs $v24, $v7]} # with round key w[24,27] - @{[vaesdm_vs $v24, $v6]} # with round key w[20,23] - @{[vaesdm_vs $v24, $v5]} # with round key w[16,19] - @{[vaesdm_vs $v24, $v4]} # with round key w[12,15] - @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11] - @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7] - @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3] + @{[vaesz_vs $V24, $V15]} # with round key w[56,59] + @{[vaesdm_vs $V24, $V14]} # with round key w[52,55] + @{[vaesdm_vs $V24, $V13]} # with round key w[48,51] + @{[vaesdm_vs $V24, $V12]} # with round key w[44,47] + @{[vaesdm_vs $V24, $V11]} # with round key w[40,43] + @{[vaesdm_vs $V24, $V10]} # with round key w[36,39] + @{[vaesdm_vs $V24, $V9]} # with round key w[32,35] + @{[vaesdm_vs $V24, $V8]} # with round key w[28,31] + @{[vaesdm_vs $V24, $V7]} # with round key w[24,27] + @{[vaesdm_vs $V24, $V6]} # with round key w[20,23] + @{[vaesdm_vs $V24, $V5]} # with round key w[16,19] + @{[vaesdm_vs $V24, $V4]} # with round key w[12,15] + @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11] + @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7] + @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3] ___ return $code; } +{ +############################################################################### +# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out, +# size_t length, const AES_KEY *key, +# unsigned char *ivec, const int enc); +my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5"); +my ($T0, $T1, $ROUNDS) = ("t0", "t1", "t2"); + $code .= <<___; .p2align 3 .globl rv64i_zvkned_cbc_encrypt @@ -311,17 +318,17 @@ rv64i_zvkned_cbc_encrypt: bnez $T1, L_end # Load number of rounds - lwu $rounds, 240($KEYP) + lwu $ROUNDS, 240($KEYP) # Get proper routine for key size li $T0, 10 - beq $rounds, $T0, L_cbc_enc_128 + beq $ROUNDS, $T0, L_cbc_enc_128 li $T0, 12 - beq $rounds, $T0, L_cbc_enc_192 + beq $ROUNDS, $T0, L_cbc_enc_192 li $T0, 14 - beq $rounds, $T0, L_cbc_enc_256 + beq $ROUNDS, $T0, L_cbc_enc_256 ret .size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt @@ -331,24 +338,24 @@ $code .= <<___; .p2align 3 L_cbc_enc_128: # Load all 11 round keys to v1-v11 registers. - @{[aes_128_load_key]} + @{[aes_128_load_key $KEYP]} # Load IV. - @{[vle32_v $v16, ($IVP)]} + @{[vle32_v $V16, $IVP]} - @{[vle32_v $v24, ($INP)]} - @{[vxor_vv $v24, $v24, $v16]} + @{[vle32_v $V24, $INP]} + @{[vxor_vv $V24, $V24, $V16]} j 2f 1: - @{[vle32_v $v17, ($INP)]} - @{[vxor_vv $v24, $v24, $v17]} + @{[vle32_v $V17, $INP]} + @{[vxor_vv $V24, $V24, $V17]} 2: # AES body @{[aes_128_encrypt]} - @{[vse32_v $v24, ($OUTP)]} + @{[vse32_v $V24, $OUTP]} addi $INP, $INP, 16 addi $OUTP, $OUTP, 16 @@ -356,7 +363,7 @@ L_cbc_enc_128: bnez $LEN, 1b - @{[vse32_v $v24, ($IVP)]} + @{[vse32_v $V24, $IVP]} ret .size L_cbc_enc_128,.-L_cbc_enc_128 @@ -366,24 +373,24 @@ $code .= <<___; .p2align 3 L_cbc_enc_192: # Load all 13 round keys to v1-v13 registers. - @{[aes_192_load_key]} + @{[aes_192_load_key $KEYP]} # Load IV. - @{[vle32_v $v16, ($IVP)]} + @{[vle32_v $V16, $IVP]} - @{[vle32_v $v24, ($INP)]} - @{[vxor_vv $v24, $v24, $v16]} + @{[vle32_v $V24, $INP]} + @{[vxor_vv $V24, $V24, $V16]} j 2f 1: - @{[vle32_v $v17, ($INP)]} - @{[vxor_vv $v24, $v24, $v17]} + @{[vle32_v $V17, $INP]} + @{[vxor_vv $V24, $V24, $V17]} 2: # AES body @{[aes_192_encrypt]} - @{[vse32_v $v24, ($OUTP)]} + @{[vse32_v $V24, $OUTP]} addi $INP, $INP, 16 addi $OUTP, $OUTP, 16 @@ -391,7 +398,7 @@ L_cbc_enc_192: bnez $LEN, 1b - @{[vse32_v $v24, ($IVP)]} + @{[vse32_v $V24, $IVP]} ret .size L_cbc_enc_192,.-L_cbc_enc_192 @@ -401,24 +408,24 @@ $code .= <<___; .p2align 3 L_cbc_enc_256: # Load all 15 round keys to v1-v15 registers. - @{[aes_256_load_key]} + @{[aes_256_load_key $KEYP]} # Load IV. - @{[vle32_v $v16, ($IVP)]} + @{[vle32_v $V16, $IVP]} - @{[vle32_v $v24, ($INP)]} - @{[vxor_vv $v24, $v24, $v16]} + @{[vle32_v $V24, $INP]} + @{[vxor_vv $V24, $V24, $V16]} j 2f 1: - @{[vle32_v $v17, ($INP)]} - @{[vxor_vv $v24, $v24, $v17]} + @{[vle32_v $V17, $INP]} + @{[vxor_vv $V24, $V24, $V17]} 2: # AES body @{[aes_256_encrypt]} - @{[vse32_v $v24, ($OUTP)]} + @{[vse32_v $V24, $OUTP]} addi $INP, $INP, 16 addi $OUTP, $OUTP, 16 @@ -426,7 +433,7 @@ L_cbc_enc_256: bnez $LEN, 1b - @{[vse32_v $v24, ($IVP)]} + @{[vse32_v $V24, $IVP]} ret .size L_cbc_enc_256,.-L_cbc_enc_256 @@ -436,13 +443,6 @@ ___ # void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out, # size_t length, const AES_KEY *key, # unsigned char *ivec, const int enc); -my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5"); -my ($T0, $T1, $rounds) = ("t0", "t1", "t2"); -my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, - $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15, - $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23, - $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31, -) = map("v$_",(0..31)); $code .= <<___; .p2align 3 @@ -456,17 +456,17 @@ rv64i_zvkned_cbc_decrypt: bnez $T1, L_end # Load number of rounds - lwu $rounds, 240($KEYP) + lwu $ROUNDS, 240($KEYP) # Get proper routine for key size li $T0, 10 - beq $rounds, $T0, L_cbc_dec_128 + beq $ROUNDS, $T0, L_cbc_dec_128 li $T0, 12 - beq $rounds, $T0, L_cbc_dec_192 + beq $ROUNDS, $T0, L_cbc_dec_192 li $T0, 14 - beq $rounds, $T0, L_cbc_dec_256 + beq $ROUNDS, $T0, L_cbc_dec_256 ret .size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt @@ -476,34 +476,34 @@ $code .= <<___; .p2align 3 L_cbc_dec_128: # Load all 11 round keys to v1-v11 registers. - @{[aes_128_load_key]} + @{[aes_128_load_key $KEYP]} # Load IV. - @{[vle32_v $v16, ($IVP)]} + @{[vle32_v $V16, $IVP]} - @{[vle32_v $v24, ($INP)]} - @{[vmv_v_v $v17, $v24]} + @{[vle32_v $V24, $INP]} + @{[vmv_v_v $V17, $V24]} j 2f 1: - @{[vle32_v $v24, ($INP)]} - @{[vmv_v_v $v17, $v24]} + @{[vle32_v $V24, $INP]} + @{[vmv_v_v $V17, $V24]} addi $OUTP, $OUTP, 16 2: # AES body @{[aes_128_decrypt]} - @{[vxor_vv $v24, $v24, $v16]} - @{[vse32_v $v24, ($OUTP)]} - @{[vmv_v_v $v16, $v17]} + @{[vxor_vv $V24, $V24, $V16]} + @{[vse32_v $V24, $OUTP]} + @{[vmv_v_v $V16, $V17]} addi $LEN, $LEN, -16 addi $INP, $INP, 16 bnez $LEN, 1b - @{[vse32_v $v16, ($IVP)]} + @{[vse32_v $V16, $IVP]} ret .size L_cbc_dec_128,.-L_cbc_dec_128 @@ -513,34 +513,34 @@ $code .= <<___; .p2align 3 L_cbc_dec_192: # Load all 13 round keys to v1-v13 registers. - @{[aes_192_load_key]} + @{[aes_192_load_key $KEYP]} # Load IV. - @{[vle32_v $v16, ($IVP)]} + @{[vle32_v $V16, $IVP]} - @{[vle32_v $v24, ($INP)]} - @{[vmv_v_v $v17, $v24]} + @{[vle32_v $V24, $INP]} + @{[vmv_v_v $V17, $V24]} j 2f 1: - @{[vle32_v $v24, ($INP)]} - @{[vmv_v_v $v17, $v24]} + @{[vle32_v $V24, $INP]} + @{[vmv_v_v $V17, $V24]} addi $OUTP, $OUTP, 16 2: # AES body @{[aes_192_decrypt]} - @{[vxor_vv $v24, $v24, $v16]} - @{[vse32_v $v24, ($OUTP)]} - @{[vmv_v_v $v16, $v17]} + @{[vxor_vv $V24, $V24, $V16]} + @{[vse32_v $V24, $OUTP]} + @{[vmv_v_v $V16, $V17]} addi $LEN, $LEN, -16 addi $INP, $INP, 16 bnez $LEN, 1b - @{[vse32_v $v16, ($IVP)]} + @{[vse32_v $V16, $IVP]} ret .size L_cbc_dec_192,.-L_cbc_dec_192 @@ -550,54 +550,278 @@ $code .= <<___; .p2align 3 L_cbc_dec_256: # Load all 15 round keys to v1-v15 registers. - @{[aes_256_load_key]} + @{[aes_256_load_key $KEYP]} # Load IV. - @{[vle32_v $v16, ($IVP)]} + @{[vle32_v $V16, $IVP]} - @{[vle32_v $v24, ($INP)]} - @{[vmv_v_v $v17, $v24]} + @{[vle32_v $V24, $INP]} + @{[vmv_v_v $V17, $V24]} j 2f 1: - @{[vle32_v $v24, ($INP)]} - @{[vmv_v_v $v17, $v24]} + @{[vle32_v $V24, $INP]} + @{[vmv_v_v $V17, $V24]} addi $OUTP, $OUTP, 16 2: # AES body @{[aes_256_decrypt]} - @{[vxor_vv $v24, $v24, $v16]} - @{[vse32_v $v24, ($OUTP)]} - @{[vmv_v_v $v16, $v17]} + @{[vxor_vv $V24, $V24, $V16]} + @{[vse32_v $V24, $OUTP]} + @{[vmv_v_v $V16, $V17]} addi $LEN, $LEN, -16 addi $INP, $INP, 16 bnez $LEN, 1b - @{[vse32_v $v16, ($IVP)]} + @{[vse32_v $V16, $IVP]} ret .size L_cbc_dec_256,.-L_cbc_dec_256 ___ +} + +{ +############################################################################### +# void rv64i_zvkned_ecb_encrypt(const unsigned char *in, unsigned char *out, +# size_t length, const AES_KEY *key, +# const int enc); +my ($INP, $OUTP, $LEN, $KEYP, $ENC) = ("a0", "a1", "a2", "a3", "a4"); +my ($REMAIN_LEN) = ("a5"); +my ($VL) = ("a6"); +my ($T0, $T1, $ROUNDS) = ("t0", "t1", "t2"); +my ($LEN32) = ("t3"); + +$code .= <<___; +.p2align 3 +.globl rv64i_zvkned_ecb_encrypt +.type rv64i_zvkned_ecb_encrypt,\@function +rv64i_zvkned_ecb_encrypt: + # Make the LEN become e32 length. + srli $LEN32, $LEN, 2 + + # Load number of rounds + lwu $ROUNDS, 240($KEYP) + + # Get proper routine for key size + li $T0, 10 + beq $ROUNDS, $T0, L_ecb_enc_128 + + li $T0, 12 + beq $ROUNDS, $T0, L_ecb_enc_192 + + li $T0, 14 + beq $ROUNDS, $T0, L_ecb_enc_256 + + ret +.size rv64i_zvkned_ecb_encrypt,.-rv64i_zvkned_ecb_encrypt +___ + +$code .= <<___; +.p2align 3 +L_ecb_enc_128: + # Load all 11 round keys to v1-v11 registers. + @{[aes_128_load_key $KEYP]} + +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} + slli $T0, $VL, 2 + sub $LEN32, $LEN32, $VL + + @{[vle32_v $V24, $INP]} + + # AES body + @{[aes_128_encrypt]} + + @{[vse32_v $V24, $OUTP]} + + add $INP, $INP, $T0 + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size L_ecb_enc_128,.-L_ecb_enc_128 +___ + +$code .= <<___; +.p2align 3 +L_ecb_enc_192: + # Load all 13 round keys to v1-v13 registers. + @{[aes_192_load_key $KEYP]} + +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} + slli $T0, $VL, 2 + sub $LEN32, $LEN32, $VL + + @{[vle32_v $V24, $INP]} + + # AES body + @{[aes_192_encrypt]} + + @{[vse32_v $V24, $OUTP]} + + add $INP, $INP, $T0 + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size L_ecb_enc_192,.-L_ecb_enc_192 +___ + +$code .= <<___; +.p2align 3 +L_ecb_enc_256: + # Load all 15 round keys to v1-v15 registers. + @{[aes_256_load_key $KEYP]} + +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} + slli $T0, $VL, 2 + sub $LEN32, $LEN32, $VL + + @{[vle32_v $V24, $INP]} + + # AES body + @{[aes_256_encrypt]} + + @{[vse32_v $V24, $OUTP]} + + add $INP, $INP, $T0 + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size L_ecb_enc_256,.-L_ecb_enc_256 +___ + +############################################################################### +# void rv64i_zvkned_ecb_decrypt(const unsigned char *in, unsigned char *out, +# size_t length, const AES_KEY *key, +# const int enc); + +$code .= <<___; +.p2align 3 +.globl rv64i_zvkned_ecb_decrypt +.type rv64i_zvkned_ecb_decrypt,\@function +rv64i_zvkned_ecb_decrypt: + # Make the LEN become e32 length. + srli $LEN32, $LEN, 2 + + # Load number of rounds + lwu $ROUNDS, 240($KEYP) + + # Get proper routine for key size + li $T0, 10 + beq $ROUNDS, $T0, L_ecb_dec_128 + + li $T0, 12 + beq $ROUNDS, $T0, L_ecb_dec_192 + + li $T0, 14 + beq $ROUNDS, $T0, L_ecb_dec_256 + + ret +.size rv64i_zvkned_ecb_decrypt,.-rv64i_zvkned_ecb_decrypt +___ + +$code .= <<___; +.p2align 3 +L_ecb_dec_128: + # Load all 11 round keys to v1-v11 registers. + @{[aes_128_load_key $KEYP]} + +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} + slli $T0, $VL, 2 + sub $LEN32, $LEN32, $VL + + @{[vle32_v $V24, $INP]} + + # AES body + @{[aes_128_decrypt]} + + @{[vse32_v $V24, $OUTP]} + + add $INP, $INP, $T0 + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size L_ecb_dec_128,.-L_ecb_dec_128 +___ + +$code .= <<___; +.p2align 3 +L_ecb_dec_192: + # Load all 13 round keys to v1-v13 registers. + @{[aes_192_load_key $KEYP]} + +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} + slli $T0, $VL, 2 + sub $LEN32, $LEN32, $VL + + @{[vle32_v $V24, $INP]} + + # AES body + @{[aes_192_decrypt]} + + @{[vse32_v $V24, $OUTP]} + + add $INP, $INP, $T0 + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size L_ecb_dec_192,.-L_ecb_dec_192 +___ + +$code .= <<___; +.p2align 3 +L_ecb_dec_256: + # Load all 15 round keys to v1-v15 registers. + @{[aes_256_load_key $KEYP]} + +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} + slli $T0, $VL, 2 + sub $LEN32, $LEN32, $VL + + @{[vle32_v $V24, $INP]} + + # AES body + @{[aes_256_decrypt]} + + @{[vse32_v $V24, $OUTP]} + + add $INP, $INP, $T0 + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size L_ecb_dec_256,.-L_ecb_dec_256 +___ } +{ ################################################################################ # int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) # int rv64i_zvkned_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) -{ my ($UKEY,$BITS,$KEYP) = ("a0", "a1", "a2"); my ($T0,$T1,$T4) = ("t1", "t2", "t4"); -my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, - $v7, $v8, $v9, $v10, $v11, $v12, - $v13, $v14, $v15, $v16, $v17, $v18, - $v19, $v20, $v21, $v22, $v23, $v24, -) = map("v$_",(0..24)); $code .= <<___; .p2align 3 @@ -647,42 +871,42 @@ L_set_key_128: @{[vsetivli__x0_4_e32_m1_tu_mu]} # Load the key - @{[vle32_v $v10, ($UKEY)]} + @{[vle32_v $V10, ($UKEY)]} # Generate keys for round 2-11 into registers v11-v20. - @{[vaeskf1_vi $v11, $v10, 1]} # v11 <- rk2 (w[ 4, 7]) - @{[vaeskf1_vi $v12, $v11, 2]} # v12 <- rk3 (w[ 8,11]) - @{[vaeskf1_vi $v13, $v12, 3]} # v13 <- rk4 (w[12,15]) - @{[vaeskf1_vi $v14, $v13, 4]} # v14 <- rk5 (w[16,19]) - @{[vaeskf1_vi $v15, $v14, 5]} # v15 <- rk6 (w[20,23]) - @{[vaeskf1_vi $v16, $v15, 6]} # v16 <- rk7 (w[24,27]) - @{[vaeskf1_vi $v17, $v16, 7]} # v17 <- rk8 (w[28,31]) - @{[vaeskf1_vi $v18, $v17, 8]} # v18 <- rk9 (w[32,35]) - @{[vaeskf1_vi $v19, $v18, 9]} # v19 <- rk10 (w[36,39]) - @{[vaeskf1_vi $v20, $v19, 10]} # v20 <- rk11 (w[40,43]) + @{[vaeskf1_vi $V11, $V10, 1]} # v11 <- rk2 (w[ 4, 7]) + @{[vaeskf1_vi $V12, $V11, 2]} # v12 <- rk3 (w[ 8,11]) + @{[vaeskf1_vi $V13, $V12, 3]} # v13 <- rk4 (w[12,15]) + @{[vaeskf1_vi $V14, $V13, 4]} # v14 <- rk5 (w[16,19]) + @{[vaeskf1_vi $V15, $V14, 5]} # v15 <- rk6 (w[20,23]) + @{[vaeskf1_vi $V16, $V15, 6]} # v16 <- rk7 (w[24,27]) + @{[vaeskf1_vi $V17, $V16, 7]} # v17 <- rk8 (w[28,31]) + @{[vaeskf1_vi $V18, $V17, 8]} # v18 <- rk9 (w[32,35]) + @{[vaeskf1_vi $V19, $V18, 9]} # v19 <- rk10 (w[36,39]) + @{[vaeskf1_vi $V20, $V19, 10]} # v20 <- rk11 (w[40,43]) # Store the round keys - @{[vse32_v $v10, ($KEYP)]} + @{[vse32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v11, ($KEYP)]} + @{[vse32_v $V11, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v12, ($KEYP)]} + @{[vse32_v $V12, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v13, ($KEYP)]} + @{[vse32_v $V13, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v14, ($KEYP)]} + @{[vse32_v $V14, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v15, ($KEYP)]} + @{[vse32_v $V15, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v16, ($KEYP)]} + @{[vse32_v $V16, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v17, ($KEYP)]} + @{[vse32_v $V17, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v18, ($KEYP)]} + @{[vse32_v $V18, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v19, ($KEYP)]} + @{[vse32_v $V19, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v20, ($KEYP)]} + @{[vse32_v $V20, $KEYP]} li a0, 1 ret @@ -699,66 +923,66 @@ L_set_key_256: @{[vsetivli__x0_4_e32_m1_tu_mu]} # Load the key - @{[vle32_v $v10, ($UKEY)]} + @{[vle32_v $V10, ($UKEY)]} addi $UKEY, $UKEY, 16 - @{[vle32_v $v11, ($UKEY)]} + @{[vle32_v $V11, ($UKEY)]} - @{[vmv_v_v $v12, $v10]} - @{[vaeskf2_vi $v12, $v11, 2]} - @{[vmv_v_v $v13, $v11]} - @{[vaeskf2_vi $v13, $v12, 3]} - @{[vmv_v_v $v14, $v12]} - @{[vaeskf2_vi $v14, $v13, 4]} - @{[vmv_v_v $v15, $v13]} - @{[vaeskf2_vi $v15, $v14, 5]} - @{[vmv_v_v $v16, $v14]} - @{[vaeskf2_vi $v16, $v15, 6]} - @{[vmv_v_v $v17, $v15]} - @{[vaeskf2_vi $v17, $v16, 7]} - @{[vmv_v_v $v18, $v16]} - @{[vaeskf2_vi $v18, $v17, 8]} - @{[vmv_v_v $v19, $v17]} - @{[vaeskf2_vi $v19, $v18, 9]} - @{[vmv_v_v $v20, $v18]} - @{[vaeskf2_vi $v20, $v19, 10]} - @{[vmv_v_v $v21, $v19]} - @{[vaeskf2_vi $v21, $v20, 11]} - @{[vmv_v_v $v22, $v20]} - @{[vaeskf2_vi $v22, $v21, 12]} - @{[vmv_v_v $v23, $v21]} - @{[vaeskf2_vi $v23, $v22, 13]} - @{[vmv_v_v $v24, $v22]} - @{[vaeskf2_vi $v24, $v23, 14]} + @{[vmv_v_v $V12, $V10]} + @{[vaeskf2_vi $V12, $V11, 2]} + @{[vmv_v_v $V13, $V11]} + @{[vaeskf2_vi $V13, $V12, 3]} + @{[vmv_v_v $V14, $V12]} + @{[vaeskf2_vi $V14, $V13, 4]} + @{[vmv_v_v $V15, $V13]} + @{[vaeskf2_vi $V15, $V14, 5]} + @{[vmv_v_v $V16, $V14]} + @{[vaeskf2_vi $V16, $V15, 6]} + @{[vmv_v_v $V17, $V15]} + @{[vaeskf2_vi $V17, $V16, 7]} + @{[vmv_v_v $V18, $V16]} + @{[vaeskf2_vi $V18, $V17, 8]} + @{[vmv_v_v $V19, $V17]} + @{[vaeskf2_vi $V19, $V18, 9]} + @{[vmv_v_v $V20, $V18]} + @{[vaeskf2_vi $V20, $V19, 10]} + @{[vmv_v_v $V21, $V19]} + @{[vaeskf2_vi $V21, $V20, 11]} + @{[vmv_v_v $V22, $V20]} + @{[vaeskf2_vi $V22, $V21, 12]} + @{[vmv_v_v $V23, $V21]} + @{[vaeskf2_vi $V23, $V22, 13]} + @{[vmv_v_v $V24, $V22]} + @{[vaeskf2_vi $V24, $V23, 14]} - @{[vse32_v $v10, ($KEYP)]} + @{[vse32_v $V10, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v11, ($KEYP)]} + @{[vse32_v $V11, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v12, ($KEYP)]} + @{[vse32_v $V12, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v13, ($KEYP)]} + @{[vse32_v $V13, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v14, ($KEYP)]} + @{[vse32_v $V14, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v15, ($KEYP)]} + @{[vse32_v $V15, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v16, ($KEYP)]} + @{[vse32_v $V16, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v17, ($KEYP)]} + @{[vse32_v $V17, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v18, ($KEYP)]} + @{[vse32_v $V18, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v19, ($KEYP)]} + @{[vse32_v $V19, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v20, ($KEYP)]} + @{[vse32_v $V20, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v21, ($KEYP)]} + @{[vse32_v $V21, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v22, ($KEYP)]} + @{[vse32_v $V22, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v23, ($KEYP)]} + @{[vse32_v $V23, $KEYP]} addi $KEYP, $KEYP, 16 - @{[vse32_v $v24, ($KEYP)]} + @{[vse32_v $V24, $KEYP]} li a0, 1 ret @@ -766,17 +990,12 @@ L_set_key_256: ___ } +{ ################################################################################ # void rv64i_zvkned_encrypt(const unsigned char *in, unsigned char *out, # const AES_KEY *key); -{ my ($INP,$OUTP,$KEYP) = ("a0", "a1", "a2"); -my ($T0,$T1, $rounds, $T6) = ("a3", "a4", "t5", "t6"); -my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, - $v7, $v8, $v9, $v10, $v11, $v12, - $v13, $v14, $v15, $v16, $v17, $v18, - $v19, $v20, $v21, $v22, $v23, $v24, -) = map("v$_",(0..24)); +my ($T0,$T1, $ROUNDS, $T6) = ("a3", "a4", "t5", "t6"); $code .= <<___; .p2align 3 @@ -784,15 +1003,15 @@ $code .= <<___; .type rv64i_zvkned_encrypt,\@function rv64i_zvkned_encrypt: # Load number of rounds - lwu $rounds, 240($KEYP) + lwu $ROUNDS, 240($KEYP) # Get proper routine for key size li $T6, 14 - beq $rounds, $T6, L_enc_256 + beq $ROUNDS, $T6, L_enc_256 li $T6, 10 - beq $rounds, $T6, L_enc_128 + beq $ROUNDS, $T6, L_enc_128 li $T6, 12 - beq $rounds, $T6, L_enc_192 + beq $ROUNDS, $T6, L_enc_192 j L_fail_m2 .size rv64i_zvkned_encrypt,.-rv64i_zvkned_encrypt @@ -803,42 +1022,42 @@ $code .= <<___; L_enc_128: @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, ($INP)]} + @{[vle32_v $V1, $INP]} - @{[vle32_v $v10, ($KEYP)]} - @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] + @{[vle32_v $V10, $KEYP]} + @{[vaesz_vs $V1, $V10]} # with round key w[ 0, 3] addi $KEYP, $KEYP, 16 - @{[vle32_v $v11, ($KEYP)]} - @{[vaesem_vs $v1, $v11]} # with round key w[ 4, 7] + @{[vle32_v $V11, $KEYP]} + @{[vaesem_vs $V1, $V11]} # with round key w[ 4, 7] addi $KEYP, $KEYP, 16 - @{[vle32_v $v12, ($KEYP)]} - @{[vaesem_vs $v1, $v12]} # with round key w[ 8,11] + @{[vle32_v $V12, $KEYP]} + @{[vaesem_vs $V1, $V12]} # with round key w[ 8,11] addi $KEYP, $KEYP, 16 - @{[vle32_v $v13, ($KEYP)]} - @{[vaesem_vs $v1, $v13]} # with round key w[12,15] + @{[vle32_v $V13, $KEYP]} + @{[vaesem_vs $V1, $V13]} # with round key w[12,15] addi $KEYP, $KEYP, 16 - @{[vle32_v $v14, ($KEYP)]} - @{[vaesem_vs $v1, $v14]} # with round key w[16,19] + @{[vle32_v $V14, $KEYP]} + @{[vaesem_vs $V1, $V14]} # with round key w[16,19] addi $KEYP, $KEYP, 16 - @{[vle32_v $v15, ($KEYP)]} - @{[vaesem_vs $v1, $v15]} # with round key w[20,23] + @{[vle32_v $V15, $KEYP]} + @{[vaesem_vs $V1, $V15]} # with round key w[20,23] addi $KEYP, $KEYP, 16 - @{[vle32_v $v16, ($KEYP)]} - @{[vaesem_vs $v1, $v16]} # with round key w[24,27] + @{[vle32_v $V16, $KEYP]} + @{[vaesem_vs $V1, $V16]} # with round key w[24,27] addi $KEYP, $KEYP, 16 - @{[vle32_v $v17, ($KEYP)]} - @{[vaesem_vs $v1, $v17]} # with round key w[28,31] + @{[vle32_v $V17, $KEYP]} + @{[vaesem_vs $V1, $V17]} # with round key w[28,31] addi $KEYP, $KEYP, 16 - @{[vle32_v $v18, ($KEYP)]} - @{[vaesem_vs $v1, $v18]} # with round key w[32,35] + @{[vle32_v $V18, $KEYP]} + @{[vaesem_vs $V1, $V18]} # with round key w[32,35] addi $KEYP, $KEYP, 16 - @{[vle32_v $v19, ($KEYP)]} - @{[vaesem_vs $v1, $v19]} # with round key w[36,39] + @{[vle32_v $V19, $KEYP]} + @{[vaesem_vs $V1, $V19]} # with round key w[36,39] addi $KEYP, $KEYP, 16 - @{[vle32_v $v20, ($KEYP)]} - @{[vaesef_vs $v1, $v20]} # with round key w[40,43] + @{[vle32_v $V20, $KEYP]} + @{[vaesef_vs $V1, $V20]} # with round key w[40,43] - @{[vse32_v $v1, ($OUTP)]} + @{[vse32_v $V1, $OUTP]} ret .size L_enc_128,.-L_enc_128 @@ -849,48 +1068,48 @@ $code .= <<___; L_enc_192: @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, ($INP)]} + @{[vle32_v $V1, $INP]} - @{[vle32_v $v10, ($KEYP)]} - @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] + @{[vle32_v $V10, $KEYP]} + @{[vaesz_vs $V1, $V10]} # with round key w[ 0, 3] addi $KEYP, $KEYP, 16 - @{[vle32_v $v11, ($KEYP)]} - @{[vaesem_vs $v1, $v11]} + @{[vle32_v $V11, $KEYP]} + @{[vaesem_vs $V1, $V11]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v12, ($KEYP)]} - @{[vaesem_vs $v1, $v12]} + @{[vle32_v $V12, $KEYP]} + @{[vaesem_vs $V1, $V12]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v13, ($KEYP)]} - @{[vaesem_vs $v1, $v13]} + @{[vle32_v $V13, $KEYP]} + @{[vaesem_vs $V1, $V13]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v14, ($KEYP)]} - @{[vaesem_vs $v1, $v14]} + @{[vle32_v $V14, $KEYP]} + @{[vaesem_vs $V1, $V14]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v15, ($KEYP)]} - @{[vaesem_vs $v1, $v15]} + @{[vle32_v $V15, $KEYP]} + @{[vaesem_vs $V1, $V15]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v16, ($KEYP)]} - @{[vaesem_vs $v1, $v16]} + @{[vle32_v $V16, $KEYP]} + @{[vaesem_vs $V1, $V16]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v17, ($KEYP)]} - @{[vaesem_vs $v1, $v17]} + @{[vle32_v $V17, $KEYP]} + @{[vaesem_vs $V1, $V17]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v18, ($KEYP)]} - @{[vaesem_vs $v1, $v18]} + @{[vle32_v $V18, $KEYP]} + @{[vaesem_vs $V1, $V18]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v19, ($KEYP)]} - @{[vaesem_vs $v1, $v19]} + @{[vle32_v $V19, $KEYP]} + @{[vaesem_vs $V1, $V19]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v20, ($KEYP)]} - @{[vaesem_vs $v1, $v20]} + @{[vle32_v $V20, $KEYP]} + @{[vaesem_vs $V1, $V20]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v21, ($KEYP)]} - @{[vaesem_vs $v1, $v21]} + @{[vle32_v $V21, $KEYP]} + @{[vaesem_vs $V1, $V21]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v22, ($KEYP)]} - @{[vaesef_vs $v1, $v22]} + @{[vle32_v $V22, $KEYP]} + @{[vaesef_vs $V1, $V22]} - @{[vse32_v $v1, ($OUTP)]} + @{[vse32_v $V1, $OUTP]} ret .size L_enc_192,.-L_enc_192 ___ @@ -900,70 +1119,61 @@ $code .= <<___; L_enc_256: @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, ($INP)]} + @{[vle32_v $V1, $INP]} - @{[vle32_v $v10, ($KEYP)]} - @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] + @{[vle32_v $V10, $KEYP]} + @{[vaesz_vs $V1, $V10]} # with round key w[ 0, 3] addi $KEYP, $KEYP, 16 - @{[vle32_v $v11, ($KEYP)]} - @{[vaesem_vs $v1, $v11]} + @{[vle32_v $V11, $KEYP]} + @{[vaesem_vs $V1, $V11]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v12, ($KEYP)]} - @{[vaesem_vs $v1, $v12]} + @{[vle32_v $V12, $KEYP]} + @{[vaesem_vs $V1, $V12]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v13, ($KEYP)]} - @{[vaesem_vs $v1, $v13]} + @{[vle32_v $V13, $KEYP]} + @{[vaesem_vs $V1, $V13]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v14, ($KEYP)]} - @{[vaesem_vs $v1, $v14]} + @{[vle32_v $V14, $KEYP]} + @{[vaesem_vs $V1, $V14]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v15, ($KEYP)]} - @{[vaesem_vs $v1, $v15]} + @{[vle32_v $V15, $KEYP]} + @{[vaesem_vs $V1, $V15]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v16, ($KEYP)]} - @{[vaesem_vs $v1, $v16]} + @{[vle32_v $V16, $KEYP]} + @{[vaesem_vs $V1, $V16]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v17, ($KEYP)]} - @{[vaesem_vs $v1, $v17]} + @{[vle32_v $V17, $KEYP]} + @{[vaesem_vs $V1, $V17]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v18, ($KEYP)]} - @{[vaesem_vs $v1, $v18]} + @{[vle32_v $V18, $KEYP]} + @{[vaesem_vs $V1, $V18]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v19, ($KEYP)]} - @{[vaesem_vs $v1, $v19]} + @{[vle32_v $V19, $KEYP]} + @{[vaesem_vs $V1, $V19]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v20, ($KEYP)]} - @{[vaesem_vs $v1, $v20]} + @{[vle32_v $V20, $KEYP]} + @{[vaesem_vs $V1, $V20]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v21, ($KEYP)]} - @{[vaesem_vs $v1, $v21]} + @{[vle32_v $V21, $KEYP]} + @{[vaesem_vs $V1, $V21]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v22, ($KEYP)]} - @{[vaesem_vs $v1, $v22]} + @{[vle32_v $V22, $KEYP]} + @{[vaesem_vs $V1, $V22]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v23, ($KEYP)]} - @{[vaesem_vs $v1, $v23]} + @{[vle32_v $V23, $KEYP]} + @{[vaesem_vs $V1, $V23]} addi $KEYP, $KEYP, 16 - @{[vle32_v $v24, ($KEYP)]} - @{[vaesef_vs $v1, $v24]} + @{[vle32_v $V24, $KEYP]} + @{[vaesef_vs $V1, $V24]} - @{[vse32_v $v1, ($OUTP)]} + @{[vse32_v $V1, $OUTP]} ret .size L_enc_256,.-L_enc_256 ___ -} ################################################################################ # void rv64i_zvkned_decrypt(const unsigned char *in, unsigned char *out, # const AES_KEY *key); -{ -my ($INP,$OUTP,$KEYP) = ("a0", "a1", "a2"); -my ($T0,$T1, $rounds, $T6) = ("a3", "a4", "t5", "t6"); -my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, - $v7, $v8, $v9, $v10, $v11, $v12, - $v13, $v14, $v15, $v16, $v17, $v18, - $v19, $v20, $v21, $v22, $v23, $v24, -) = map("v$_",(0..24)); $code .= <<___; .p2align 3 @@ -971,15 +1181,15 @@ $code .= <<___; .type rv64i_zvkned_decrypt,\@function rv64i_zvkned_decrypt: # Load number of rounds - lwu $rounds, 240($KEYP) + lwu $ROUNDS, 240($KEYP) # Get proper routine for key size li $T6, 14 - beq $rounds, $T6, L_dec_256 + beq $ROUNDS, $T6, L_dec_256 li $T6, 10 - beq $rounds, $T6, L_dec_128 + beq $ROUNDS, $T6, L_dec_128 li $T6, 12 - beq $rounds, $T6, L_dec_192 + beq $ROUNDS, $T6, L_dec_192 j L_fail_m2 .size rv64i_zvkned_decrypt,.-rv64i_zvkned_decrypt @@ -990,43 +1200,43 @@ $code .= <<___; L_dec_128: @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, ($INP)]} + @{[vle32_v $V1, $INP]} addi $KEYP, $KEYP, 160 - @{[vle32_v $v20, ($KEYP)]} - @{[vaesz_vs $v1, $v20]} # with round key w[40,43] + @{[vle32_v $V20, $KEYP]} + @{[vaesz_vs $V1, $V20]} # with round key w[40,43] addi $KEYP, $KEYP, -16 - @{[vle32_v $v19, ($KEYP)]} - @{[vaesdm_vs $v1, $v19]} # with round key w[36,39] + @{[vle32_v $V19, $KEYP]} + @{[vaesdm_vs $V1, $V19]} # with round key w[36,39] addi $KEYP, $KEYP, -16 - @{[vle32_v $v18, ($KEYP)]} - @{[vaesdm_vs $v1, $v18]} # with round key w[32,35] + @{[vle32_v $V18, $KEYP]} + @{[vaesdm_vs $V1, $V18]} # with round key w[32,35] addi $KEYP, $KEYP, -16 - @{[vle32_v $v17, ($KEYP)]} - @{[vaesdm_vs $v1, $v17]} # with round key w[28,31] + @{[vle32_v $V17, $KEYP]} + @{[vaesdm_vs $V1, $V17]} # with round key w[28,31] addi $KEYP, $KEYP, -16 - @{[vle32_v $v16, ($KEYP)]} - @{[vaesdm_vs $v1, $v16]} # with round key w[24,27] + @{[vle32_v $V16, $KEYP]} + @{[vaesdm_vs $V1, $V16]} # with round key w[24,27] addi $KEYP, $KEYP, -16 - @{[vle32_v $v15, ($KEYP)]} - @{[vaesdm_vs $v1, $v15]} # with round key w[20,23] + @{[vle32_v $V15, $KEYP]} + @{[vaesdm_vs $V1, $V15]} # with round key w[20,23] addi $KEYP, $KEYP, -16 - @{[vle32_v $v14, ($KEYP)]} - @{[vaesdm_vs $v1, $v14]} # with round key w[16,19] + @{[vle32_v $V14, $KEYP]} + @{[vaesdm_vs $V1, $V14]} # with round key w[16,19] addi $KEYP, $KEYP, -16 - @{[vle32_v $v13, ($KEYP)]} - @{[vaesdm_vs $v1, $v13]} # with round key w[12,15] + @{[vle32_v $V13, $KEYP]} + @{[vaesdm_vs $V1, $V13]} # with round key w[12,15] addi $KEYP, $KEYP, -16 - @{[vle32_v $v12, ($KEYP)]} - @{[vaesdm_vs $v1, $v12]} # with round key w[ 8,11] + @{[vle32_v $V12, $KEYP]} + @{[vaesdm_vs $V1, $V12]} # with round key w[ 8,11] addi $KEYP, $KEYP, -16 - @{[vle32_v $v11, ($KEYP)]} - @{[vaesdm_vs $v1, $v11]} # with round key w[ 4, 7] + @{[vle32_v $V11, $KEYP]} + @{[vaesdm_vs $V1, $V11]} # with round key w[ 4, 7] addi $KEYP, $KEYP, -16 - @{[vle32_v $v10, ($KEYP)]} - @{[vaesdf_vs $v1, $v10]} # with round key w[ 0, 3] + @{[vle32_v $V10, $KEYP]} + @{[vaesdf_vs $V1, $V10]} # with round key w[ 0, 3] - @{[vse32_v $v1, ($OUTP)]} + @{[vse32_v $V1, $OUTP]} ret .size L_dec_128,.-L_dec_128 @@ -1037,49 +1247,49 @@ $code .= <<___; L_dec_192: @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, ($INP)]} + @{[vle32_v $V1, $INP]} addi $KEYP, $KEYP, 192 - @{[vle32_v $v22, ($KEYP)]} - @{[vaesz_vs $v1, $v22]} # with round key w[48,51] + @{[vle32_v $V22, $KEYP]} + @{[vaesz_vs $V1, $V22]} # with round key w[48,51] addi $KEYP, $KEYP, -16 - @{[vle32_v $v21, ($KEYP)]} - @{[vaesdm_vs $v1, $v21]} # with round key w[44,47] + @{[vle32_v $V21, $KEYP]} + @{[vaesdm_vs $V1, $V21]} # with round key w[44,47] addi $KEYP, $KEYP, -16 - @{[vle32_v $v20, ($KEYP)]} - @{[vaesdm_vs $v1, $v20]} # with round key w[40,43] + @{[vle32_v $V20, $KEYP]} + @{[vaesdm_vs $V1, $V20]} # with round key w[40,43] addi $KEYP, $KEYP, -16 - @{[vle32_v $v19, ($KEYP)]} - @{[vaesdm_vs $v1, $v19]} # with round key w[36,39] + @{[vle32_v $V19, $KEYP]} + @{[vaesdm_vs $V1, $V19]} # with round key w[36,39] addi $KEYP, $KEYP, -16 - @{[vle32_v $v18, ($KEYP)]} - @{[vaesdm_vs $v1, $v18]} # with round key w[32,35] + @{[vle32_v $V18, $KEYP]} + @{[vaesdm_vs $V1, $V18]} # with round key w[32,35] addi $KEYP, $KEYP, -16 - @{[vle32_v $v17, ($KEYP)]} - @{[vaesdm_vs $v1, $v17]} # with round key w[28,31] + @{[vle32_v $V17, $KEYP]} + @{[vaesdm_vs $V1, $V17]} # with round key w[28,31] addi $KEYP, $KEYP, -16 - @{[vle32_v $v16, ($KEYP)]} - @{[vaesdm_vs $v1, $v16]} # with round key w[24,27] + @{[vle32_v $V16, $KEYP]} + @{[vaesdm_vs $V1, $V16]} # with round key w[24,27] addi $KEYP, $KEYP, -16 - @{[vle32_v $v15, ($KEYP)]} - @{[vaesdm_vs $v1, $v15]} # with round key w[20,23] + @{[vle32_v $V15, $KEYP]} + @{[vaesdm_vs $V1, $V15]} # with round key w[20,23] addi $KEYP, $KEYP, -16 - @{[vle32_v $v14, ($KEYP)]} - @{[vaesdm_vs $v1, $v14]} # with round key w[16,19] + @{[vle32_v $V14, $KEYP]} + @{[vaesdm_vs $V1, $V14]} # with round key w[16,19] addi $KEYP, $KEYP, -16 - @{[vle32_v $v13, ($KEYP)]} - @{[vaesdm_vs $v1, $v13]} # with round key w[12,15] + @{[vle32_v $V13, $KEYP]} + @{[vaesdm_vs $V1, $V13]} # with round key w[12,15] addi $KEYP, $KEYP, -16 - @{[vle32_v $v12, ($KEYP)]} - @{[vaesdm_vs $v1, $v12]} # with round key w[ 8,11] + @{[vle32_v $V12, $KEYP]} + @{[vaesdm_vs $V1, $V12]} # with round key w[ 8,11] addi $KEYP, $KEYP, -16 - @{[vle32_v $v11, ($KEYP)]} - @{[vaesdm_vs $v1, $v11]} # with round key w[ 4, 7] + @{[vle32_v $V11, $KEYP]} + @{[vaesdm_vs $V1, $V11]} # with round key w[ 4, 7] addi $KEYP, $KEYP, -16 - @{[vle32_v $v10, ($KEYP)]} - @{[vaesdf_vs $v1, $v10]} # with round key w[ 0, 3] + @{[vle32_v $V10, $KEYP]} + @{[vaesdf_vs $V1, $V10]} # with round key w[ 0, 3] - @{[vse32_v $v1, ($OUTP)]} + @{[vse32_v $V1, $OUTP]} ret .size L_dec_192,.-L_dec_192 @@ -1090,55 +1300,55 @@ $code .= <<___; L_dec_256: @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[vle32_v $v1, ($INP)]} + @{[vle32_v $V1, $INP]} addi $KEYP, $KEYP, 224 - @{[vle32_v $v24, ($KEYP)]} - @{[vaesz_vs $v1, $v24]} # with round key w[56,59] + @{[vle32_v $V24, $KEYP]} + @{[vaesz_vs $V1, $V24]} # with round key w[56,59] addi $KEYP, $KEYP, -16 - @{[vle32_v $v23, ($KEYP)]} - @{[vaesdm_vs $v1, $v23]} # with round key w[52,55] + @{[vle32_v $V23, $KEYP]} + @{[vaesdm_vs $V1, $V23]} # with round key w[52,55] addi $KEYP, $KEYP, -16 - @{[vle32_v $v22, ($KEYP)]} - @{[vaesdm_vs $v1, $v22]} # with round key w[48,51] + @{[vle32_v $V22, $KEYP]} + @{[vaesdm_vs $V1, $V22]} # with round key w[48,51] addi $KEYP, $KEYP, -16 - @{[vle32_v $v21, ($KEYP)]} - @{[vaesdm_vs $v1, $v21]} # with round key w[44,47] + @{[vle32_v $V21, $KEYP]} + @{[vaesdm_vs $V1, $V21]} # with round key w[44,47] addi $KEYP, $KEYP, -16 - @{[vle32_v $v20, ($KEYP)]} - @{[vaesdm_vs $v1, $v20]} # with round key w[40,43] + @{[vle32_v $V20, $KEYP]} + @{[vaesdm_vs $V1, $V20]} # with round key w[40,43] addi $KEYP, $KEYP, -16 - @{[vle32_v $v19, ($KEYP)]} - @{[vaesdm_vs $v1, $v19]} # with round key w[36,39] + @{[vle32_v $V19, $KEYP]} + @{[vaesdm_vs $V1, $V19]} # with round key w[36,39] addi $KEYP, $KEYP, -16 - @{[vle32_v $v18, ($KEYP)]} - @{[vaesdm_vs $v1, $v18]} # with round key w[32,35] + @{[vle32_v $V18, $KEYP]} + @{[vaesdm_vs $V1, $V18]} # with round key w[32,35] addi $KEYP, $KEYP, -16 - @{[vle32_v $v17, ($KEYP)]} - @{[vaesdm_vs $v1, $v17]} # with round key w[28,31] + @{[vle32_v $V17, $KEYP]} + @{[vaesdm_vs $V1, $V17]} # with round key w[28,31] addi $KEYP, $KEYP, -16 - @{[vle32_v $v16, ($KEYP)]} - @{[vaesdm_vs $v1, $v16]} # with round key w[24,27] + @{[vle32_v $V16, $KEYP]} + @{[vaesdm_vs $V1, $V16]} # with round key w[24,27] addi $KEYP, $KEYP, -16 - @{[vle32_v $v15, ($KEYP)]} - @{[vaesdm_vs $v1, $v15]} # with round key w[20,23] + @{[vle32_v $V15, $KEYP]} + @{[vaesdm_vs $V1, $V15]} # with round key w[20,23] addi $KEYP, $KEYP, -16 - @{[vle32_v $v14, ($KEYP)]} - @{[vaesdm_vs $v1, $v14]} # with round key w[16,19] + @{[vle32_v $V14, $KEYP]} + @{[vaesdm_vs $V1, $V14]} # with round key w[16,19] addi $KEYP, $KEYP, -16 - @{[vle32_v $v13, ($KEYP)]} - @{[vaesdm_vs $v1, $v13]} # with round key w[12,15] + @{[vle32_v $V13, $KEYP]} + @{[vaesdm_vs $V1, $V13]} # with round key w[12,15] addi $KEYP, $KEYP, -16 - @{[vle32_v $v12, ($KEYP)]} - @{[vaesdm_vs $v1, $v12]} # with round key w[ 8,11] + @{[vle32_v $V12, $KEYP]} + @{[vaesdm_vs $V1, $V12]} # with round key w[ 8,11] addi $KEYP, $KEYP, -16 - @{[vle32_v $v11, ($KEYP)]} - @{[vaesdm_vs $v1, $v11]} # with round key w[ 4, 7] + @{[vle32_v $V11, $KEYP]} + @{[vaesdm_vs $V1, $V11]} # with round key w[ 4, 7] addi $KEYP, $KEYP, -16 - @{[vle32_v $v10, ($KEYP)]} - @{[vaesdf_vs $v1, $v10]} # with round key w[ 0, 3] + @{[vle32_v $V10, $KEYP]} + @{[vaesdf_vs $V1, $V10]} # with round key w[ 0, 3] - @{[vse32_v $v1, ($OUTP)]} + @{[vse32_v $V1, $OUTP]} ret .size L_dec_256,.-L_dec_256 -- cgit v1.2.3