summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorPhoebe Chen <phoebe.chen@sifive.com>2023-09-24 20:07:48 -0700
committerHugo Landau <hlandau@openssl.org>2023-10-26 15:55:50 +0100
commit751a22194e4dc52532d2aedd49fe62dbea0badc8 (patch)
treef4a29231180e7697370b5efd94feb912da375324 /crypto
parent1c25bc2e3f5e9db90a1d7bc4f0bae1b59e5f2c4c (diff)
riscv: Provide vector crypto implementation of AES-ECB mode.
This patch provides stream and multi-block implementations for AES-128-ECB, AES-192-ECB, and AES-256-ECB to accelerate AES-ECB. Also, refactor functions to share the same variable declaration in aes-riscv64-zvkned.pl. Signed-off-by: Phoebe Chen <phoebe.chen@sifive.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Hugo Landau <hlandau@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21923)
Diffstat (limited to 'crypto')
-rw-r--r--crypto/aes/asm/aes-riscv64-zvkned.pl1130
1 files changed, 670 insertions, 460 deletions
diff --git a/crypto/aes/asm/aes-riscv64-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkned.pl
index 319808b51c..4c0292781d 100644
--- a/crypto/aes/asm/aes-riscv64-zvkned.pl
+++ b/crypto/aes/asm/aes-riscv64-zvkned.pl
@@ -58,44 +58,39 @@ my $code=<<___;
.text
___
-{
-###############################################################################
-# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out,
-# size_t length, const AES_KEY *key,
-# unsigned char *ivec, const int enc);
-my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
-my ($T0, $T1, $rounds) = ("t0", "t1", "t2");
-my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7,
- $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15,
- $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23,
- $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31,
+my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
+ $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
+ $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
+ $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
) = map("v$_",(0..31));
# Load all 11 round keys to v1-v11 registers.
sub aes_128_load_key {
+ my $KEYP = shift;
+
my $code=<<___;
@{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
- @{[vle32_v $v1, $KEYP]}
+ @{[vle32_v $V1, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v2, $KEYP]}
+ @{[vle32_v $V2, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v3, $KEYP]}
+ @{[vle32_v $V3, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v4, $KEYP]}
+ @{[vle32_v $V4, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v5, $KEYP]}
+ @{[vle32_v $V5, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v6, $KEYP]}
+ @{[vle32_v $V6, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v7, $KEYP]}
+ @{[vle32_v $V7, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v8, $KEYP]}
+ @{[vle32_v $V8, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v9, $KEYP]}
+ @{[vle32_v $V9, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v10, $KEYP]}
+ @{[vle32_v $V10, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v11, $KEYP]}
+ @{[vle32_v $V11, $KEYP]}
___
return $code;
@@ -103,33 +98,35 @@ ___
# Load all 13 round keys to v1-v13 registers.
sub aes_192_load_key {
+ my $KEYP = shift;
+
my $code=<<___;
@{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
- @{[vle32_v $v1, $KEYP]}
+ @{[vle32_v $V1, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v2, $KEYP]}
+ @{[vle32_v $V2, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v3, $KEYP]}
+ @{[vle32_v $V3, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v4, $KEYP]}
+ @{[vle32_v $V4, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v5, $KEYP]}
+ @{[vle32_v $V5, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v6, $KEYP]}
+ @{[vle32_v $V6, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v7, $KEYP]}
+ @{[vle32_v $V7, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v8, $KEYP]}
+ @{[vle32_v $V8, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v9, $KEYP]}
+ @{[vle32_v $V9, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v10, $KEYP]}
+ @{[vle32_v $V10, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v11, $KEYP]}
+ @{[vle32_v $V11, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v12, $KEYP]}
+ @{[vle32_v $V12, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v13, $KEYP]}
+ @{[vle32_v $V13, $KEYP]}
___
return $code;
@@ -137,37 +134,39 @@ ___
# Load all 15 round keys to v1-v15 registers.
sub aes_256_load_key {
+ my $KEYP = shift;
+
my $code=<<___;
@{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
- @{[vle32_v $v1, $KEYP]}
+ @{[vle32_v $V1, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v2, $KEYP]}
+ @{[vle32_v $V2, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v3, $KEYP]}
+ @{[vle32_v $V3, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v4, $KEYP]}
+ @{[vle32_v $V4, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v5, $KEYP]}
+ @{[vle32_v $V5, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v6, $KEYP]}
+ @{[vle32_v $V6, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v7, $KEYP]}
+ @{[vle32_v $V7, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v8, $KEYP]}
+ @{[vle32_v $V8, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v9, $KEYP]}
+ @{[vle32_v $V9, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v10, $KEYP]}
+ @{[vle32_v $V10, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v11, $KEYP]}
+ @{[vle32_v $V11, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v12, $KEYP]}
+ @{[vle32_v $V12, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v13, $KEYP]}
+ @{[vle32_v $V13, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v14, $KEYP]}
+ @{[vle32_v $V14, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vle32_v $v15, $KEYP]}
+ @{[vle32_v $V15, $KEYP]}
___
return $code;
@@ -176,17 +175,17 @@ ___
# aes-128 encryption with round keys v1-v11
sub aes_128_encrypt {
my $code=<<___;
- @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
- @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
- @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
- @{[vaesem_vs $v24, $v4]} # with round key w[12,15]
- @{[vaesem_vs $v24, $v5]} # with round key w[16,19]
- @{[vaesem_vs $v24, $v6]} # with round key w[20,23]
- @{[vaesem_vs $v24, $v7]} # with round key w[24,27]
- @{[vaesem_vs $v24, $v8]} # with round key w[28,31]
- @{[vaesem_vs $v24, $v9]} # with round key w[32,35]
- @{[vaesem_vs $v24, $v10]} # with round key w[36,39]
- @{[vaesef_vs $v24, $v11]} # with round key w[40,43]
+ @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3]
+ @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7]
+ @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11]
+ @{[vaesem_vs $V24, $V4]} # with round key w[12,15]
+ @{[vaesem_vs $V24, $V5]} # with round key w[16,19]
+ @{[vaesem_vs $V24, $V6]} # with round key w[20,23]
+ @{[vaesem_vs $V24, $V7]} # with round key w[24,27]
+ @{[vaesem_vs $V24, $V8]} # with round key w[28,31]
+ @{[vaesem_vs $V24, $V9]} # with round key w[32,35]
+ @{[vaesem_vs $V24, $V10]} # with round key w[36,39]
+ @{[vaesef_vs $V24, $V11]} # with round key w[40,43]
___
return $code;
@@ -195,17 +194,17 @@ ___
# aes-128 decryption with round keys v1-v11
sub aes_128_decrypt {
my $code=<<___;
- @{[vaesz_vs $v24, $v11]} # with round key w[40,43]
- @{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
- @{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
- @{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
- @{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
- @{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
- @{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
- @{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
- @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
- @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
- @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
+ @{[vaesz_vs $V24, $V11]} # with round key w[40,43]
+ @{[vaesdm_vs $V24, $V10]} # with round key w[36,39]
+ @{[vaesdm_vs $V24, $V9]} # with round key w[32,35]
+ @{[vaesdm_vs $V24, $V8]} # with round key w[28,31]
+ @{[vaesdm_vs $V24, $V7]} # with round key w[24,27]
+ @{[vaesdm_vs $V24, $V6]} # with round key w[20,23]
+ @{[vaesdm_vs $V24, $V5]} # with round key w[16,19]
+ @{[vaesdm_vs $V24, $V4]} # with round key w[12,15]
+ @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11]
+ @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7]
+ @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3]
___
return $code;
@@ -214,19 +213,19 @@ ___
# aes-192 encryption with round keys v1-v13
sub aes_192_encrypt {
my $code=<<___;
- @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
- @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
- @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
- @{[vaesem_vs $v24, $v4]} # with round key w[12,15]
- @{[vaesem_vs $v24, $v5]} # with round key w[16,19]
- @{[vaesem_vs $v24, $v6]} # with round key w[20,23]
- @{[vaesem_vs $v24, $v7]} # with round key w[24,27]
- @{[vaesem_vs $v24, $v8]} # with round key w[28,31]
- @{[vaesem_vs $v24, $v9]} # with round key w[32,35]
- @{[vaesem_vs $v24, $v10]} # with round key w[36,39]
- @{[vaesem_vs $v24, $v11]} # with round key w[40,43]
- @{[vaesem_vs $v24, $v12]} # with round key w[44,47]
- @{[vaesef_vs $v24, $v13]} # with round key w[48,51]
+ @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3]
+ @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7]
+ @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11]
+ @{[vaesem_vs $V24, $V4]} # with round key w[12,15]
+ @{[vaesem_vs $V24, $V5]} # with round key w[16,19]
+ @{[vaesem_vs $V24, $V6]} # with round key w[20,23]
+ @{[vaesem_vs $V24, $V7]} # with round key w[24,27]
+ @{[vaesem_vs $V24, $V8]} # with round key w[28,31]
+ @{[vaesem_vs $V24, $V9]} # with round key w[32,35]
+ @{[vaesem_vs $V24, $V10]} # with round key w[36,39]
+ @{[vaesem_vs $V24, $V11]} # with round key w[40,43]
+ @{[vaesem_vs $V24, $V12]} # with round key w[44,47]
+ @{[vaesef_vs $V24, $V13]} # with round key w[48,51]
___
return $code;
@@ -235,19 +234,19 @@ ___
# aes-192 decryption with round keys v1-v13
sub aes_192_decrypt {
my $code=<<___;
- @{[vaesz_vs $v24, $v13]} # with round key w[48,51]
- @{[vaesdm_vs $v24, $v12]} # with round key w[44,47]
- @{[vaesdm_vs $v24, $v11]} # with round key w[40,43]
- @{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
- @{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
- @{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
- @{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
- @{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
- @{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
- @{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
- @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
- @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
- @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
+ @{[vaesz_vs $V24, $V13]} # with round key w[48,51]
+ @{[vaesdm_vs $V24, $V12]} # with round key w[44,47]
+ @{[vaesdm_vs $V24, $V11]} # with round key w[40,43]
+ @{[vaesdm_vs $V24, $V10]} # with round key w[36,39]
+ @{[vaesdm_vs $V24, $V9]} # with round key w[32,35]
+ @{[vaesdm_vs $V24, $V8]} # with round key w[28,31]
+ @{[vaesdm_vs $V24, $V7]} # with round key w[24,27]
+ @{[vaesdm_vs $V24, $V6]} # with round key w[20,23]
+ @{[vaesdm_vs $V24, $V5]} # with round key w[16,19]
+ @{[vaesdm_vs $V24, $V4]} # with round key w[12,15]
+ @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11]
+ @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7]
+ @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3]
___
return $code;
@@ -256,21 +255,21 @@ ___
# aes-256 encryption with round keys v1-v15
sub aes_256_encrypt {
my $code=<<___;
- @{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
- @{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
- @{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
- @{[vaesem_vs $v24, $v4]} # with round key w[12,15]
- @{[vaesem_vs $v24, $v5]} # with round key w[16,19]
- @{[vaesem_vs $v24, $v6]} # with round key w[20,23]
- @{[vaesem_vs $v24, $v7]} # with round key w[24,27]
- @{[vaesem_vs $v24, $v8]} # with round key w[28,31]
- @{[vaesem_vs $v24, $v9]} # with round key w[32,35]
- @{[vaesem_vs $v24, $v10]} # with round key w[36,39]
- @{[vaesem_vs $v24, $v11]} # with round key w[40,43]
- @{[vaesem_vs $v24, $v12]} # with round key w[44,47]
- @{[vaesem_vs $v24, $v13]} # with round key w[48,51]
- @{[vaesem_vs $v24, $v14]} # with round key w[52,55]
- @{[vaesef_vs $v24, $v15]} # with round key w[56,59]
+ @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3]
+ @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7]
+ @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11]
+ @{[vaesem_vs $V24, $V4]} # with round key w[12,15]
+ @{[vaesem_vs $V24, $V5]} # with round key w[16,19]
+ @{[vaesem_vs $V24, $V6]} # with round key w[20,23]
+ @{[vaesem_vs $V24, $V7]} # with round key w[24,27]
+ @{[vaesem_vs $V24, $V8]} # with round key w[28,31]
+ @{[vaesem_vs $V24, $V9]} # with round key w[32,35]
+ @{[vaesem_vs $V24, $V10]} # with round key w[36,39]
+ @{[vaesem_vs $V24, $V11]} # with round key w[40,43]
+ @{[vaesem_vs $V24, $V12]} # with round key w[44,47]
+ @{[vaesem_vs $V24, $V13]} # with round key w[48,51]
+ @{[vaesem_vs $V24, $V14]} # with round key w[52,55]
+ @{[vaesef_vs $V24, $V15]} # with round key w[56,59]
___
return $code;
@@ -279,26 +278,34 @@ ___
# aes-256 decryption with round keys v1-v15
sub aes_256_decrypt {
my $code=<<___;
- @{[vaesz_vs $v24, $v15]} # with round key w[56,59]
- @{[vaesdm_vs $v24, $v14]} # with round key w[52,55]
- @{[vaesdm_vs $v24, $v13]} # with round key w[48,51]
- @{[vaesdm_vs $v24, $v12]} # with round key w[44,47]
- @{[vaesdm_vs $v24, $v11]} # with round key w[40,43]
- @{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
- @{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
- @{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
- @{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
- @{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
- @{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
- @{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
- @{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
- @{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
- @{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
+ @{[vaesz_vs $V24, $V15]} # with round key w[56,59]
+ @{[vaesdm_vs $V24, $V14]} # with round key w[52,55]
+ @{[vaesdm_vs $V24, $V13]} # with round key w[48,51]
+ @{[vaesdm_vs $V24, $V12]} # with round key w[44,47]
+ @{[vaesdm_vs $V24, $V11]} # with round key w[40,43]
+ @{[vaesdm_vs $V24, $V10]} # with round key w[36,39]
+ @{[vaesdm_vs $V24, $V9]} # with round key w[32,35]
+ @{[vaesdm_vs $V24, $V8]} # with round key w[28,31]
+ @{[vaesdm_vs $V24, $V7]} # with round key w[24,27]
+ @{[vaesdm_vs $V24, $V6]} # with round key w[20,23]
+ @{[vaesdm_vs $V24, $V5]} # with round key w[16,19]
+ @{[vaesdm_vs $V24, $V4]} # with round key w[12,15]
+ @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11]
+ @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7]
+ @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3]
___
return $code;
}
+{
+###############################################################################
+# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# unsigned char *ivec, const int enc);
+my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
+my ($T0, $T1, $ROUNDS) = ("t0", "t1", "t2");
+
$code .= <<___;
.p2align 3
.globl rv64i_zvkned_cbc_encrypt
@@ -311,17 +318,17 @@ rv64i_zvkned_cbc_encrypt:
bnez $T1, L_end
# Load number of rounds
- lwu $rounds, 240($KEYP)
+ lwu $ROUNDS, 240($KEYP)
# Get proper routine for key size
li $T0, 10
- beq $rounds, $T0, L_cbc_enc_128
+ beq $ROUNDS, $T0, L_cbc_enc_128
li $T0, 12
- beq $rounds, $T0, L_cbc_enc_192
+ beq $ROUNDS, $T0, L_cbc_enc_192
li $T0, 14
- beq $rounds, $T0, L_cbc_enc_256
+ beq $ROUNDS, $T0, L_cbc_enc_256
ret
.size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt
@@ -331,24 +338,24 @@ $code .= <<___;
.p2align 3
L_cbc_enc_128:
# Load all 11 round keys to v1-v11 registers.
- @{[aes_128_load_key]}
+ @{[aes_128_load_key $KEYP]}
# Load IV.
- @{[vle32_v $v16, ($IVP)]}
+ @{[vle32_v $V16, $IVP]}
- @{[vle32_v $v24, ($INP)]}
- @{[vxor_vv $v24, $v24, $v16]}
+ @{[vle32_v $V24, $INP]}
+ @{[vxor_vv $V24, $V24, $V16]}
j 2f
1:
- @{[vle32_v $v17, ($INP)]}
- @{[vxor_vv $v24, $v24, $v17]}
+ @{[vle32_v $V17, $INP]}
+ @{[vxor_vv $V24, $V24, $V17]}
2:
# AES body
@{[aes_128_encrypt]}
- @{[vse32_v $v24, ($OUTP)]}
+ @{[vse32_v $V24, $OUTP]}
addi $INP, $INP, 16
addi $OUTP, $OUTP, 16
@@ -356,7 +363,7 @@ L_cbc_enc_128:
bnez $LEN, 1b
- @{[vse32_v $v24, ($IVP)]}
+ @{[vse32_v $V24, $IVP]}
ret
.size L_cbc_enc_128,.-L_cbc_enc_128
@@ -366,24 +373,24 @@ $code .= <<___;
.p2align 3
L_cbc_enc_192:
# Load all 13 round keys to v1-v13 registers.
- @{[aes_192_load_key]}
+ @{[aes_192_load_key $KEYP]}
# Load IV.
- @{[vle32_v $v16, ($IVP)]}
+ @{[vle32_v $V16, $IVP]}
- @{[vle32_v $v24, ($INP)]}
- @{[vxor_vv $v24, $v24, $v16]}
+ @{[vle32_v $V24, $INP]}
+ @{[vxor_vv $V24, $V24, $V16]}
j 2f
1:
- @{[vle32_v $v17, ($INP)]}
- @{[vxor_vv $v24, $v24, $v17]}
+ @{[vle32_v $V17, $INP]}
+ @{[vxor_vv $V24, $V24, $V17]}
2:
# AES body
@{[aes_192_encrypt]}
- @{[vse32_v $v24, ($OUTP)]}
+ @{[vse32_v $V24, $OUTP]}
addi $INP, $INP, 16
addi $OUTP, $OUTP, 16
@@ -391,7 +398,7 @@ L_cbc_enc_192:
bnez $LEN, 1b
- @{[vse32_v $v24, ($IVP)]}
+ @{[vse32_v $V24, $IVP]}
ret
.size L_cbc_enc_192,.-L_cbc_enc_192
@@ -401,24 +408,24 @@ $code .= <<___;
.p2align 3
L_cbc_enc_256:
# Load all 15 round keys to v1-v15 registers.
- @{[aes_256_load_key]}
+ @{[aes_256_load_key $KEYP]}
# Load IV.
- @{[vle32_v $v16, ($IVP)]}
+ @{[vle32_v $V16, $IVP]}
- @{[vle32_v $v24, ($INP)]}
- @{[vxor_vv $v24, $v24, $v16]}
+ @{[vle32_v $V24, $INP]}
+ @{[vxor_vv $V24, $V24, $V16]}
j 2f
1:
- @{[vle32_v $v17, ($INP)]}
- @{[vxor_vv $v24, $v24, $v17]}
+ @{[vle32_v $V17, $INP]}
+ @{[vxor_vv $V24, $V24, $V17]}
2:
# AES body
@{[aes_256_encrypt]}
- @{[vse32_v $v24, ($OUTP)]}
+ @{[vse32_v $V24, $OUTP]}
addi $INP, $INP, 16
addi $OUTP, $OUTP, 16
@@ -426,7 +433,7 @@ L_cbc_enc_256:
bnez $LEN, 1b
- @{[vse32_v $v24, ($IVP)]}
+ @{[vse32_v $V24, $IVP]}
ret
.size L_cbc_enc_256,.-L_cbc_enc_256
@@ -436,13 +443,6 @@ ___
# void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivec, const int enc);
-my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
-my ($T0, $T1, $rounds) = ("t0", "t1", "t2");
-my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7,
- $v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15,
- $v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23,
- $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31,
-) = map("v$_",(0..31));
$code .= <<___;
.p2align 3
@@ -456,17 +456,17 @@ rv64i_zvkned_cbc_decrypt:
bnez $T1, L_end
# Load number of rounds
- lwu $rounds, 240($KEYP)
+ lwu $ROUNDS, 240($KEYP)
# Get proper routine for key size
li $T0, 10
- beq $rounds, $T0, L_cbc_dec_128
+ beq $ROUNDS, $T0, L_cbc_dec_128
li $T0, 12
- beq $rounds, $T0, L_cbc_dec_192
+ beq $ROUNDS, $T0, L_cbc_dec_192
li $T0, 14
- beq $rounds, $T0, L_cbc_dec_256
+ beq $ROUNDS, $T0, L_cbc_dec_256
ret
.size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt
@@ -476,34 +476,34 @@ $code .= <<___;
.p2align 3
L_cbc_dec_128:
# Load all 11 round keys to v1-v11 registers.
- @{[aes_128_load_key]}
+ @{[aes_128_load_key $KEYP]}
# Load IV.
- @{[vle32_v $v16, ($IVP)]}
+ @{[vle32_v $V16, $IVP]}
- @{[vle32_v $v24, ($INP)]}
- @{[vmv_v_v $v17, $v24]}
+ @{[vle32_v $V24, $INP]}
+ @{[vmv_v_v $V17, $V24]}
j 2f
1:
- @{[vle32_v $v24, ($INP)]}
- @{[vmv_v_v $v17, $v24]}
+ @{[vle32_v $V24, $INP]}
+ @{[vmv_v_v $V17, $V24]}
addi $OUTP, $OUTP, 16
2:
# AES body
@{[aes_128_decrypt]}
- @{[vxor_vv $v24, $v24, $v16]}
- @{[vse32_v $v24, ($OUTP)]}
- @{[vmv_v_v $v16, $v17]}
+ @{[vxor_vv $V24, $V24, $V16]}
+ @{[vse32_v $V24, $OUTP]}
+ @{[vmv_v_v $V16, $V17]}
addi $LEN, $LEN, -16
addi $INP, $INP, 16
bnez $LEN, 1b
- @{[vse32_v $v16, ($IVP)]}
+ @{[vse32_v $V16, $IVP]}
ret
.size L_cbc_dec_128,.-L_cbc_dec_128
@@ -513,34 +513,34 @@ $code .= <<___;
.p2align 3
L_cbc_dec_192:
# Load all 13 round keys to v1-v13 registers.
- @{[aes_192_load_key]}
+ @{[aes_192_load_key $KEYP]}
# Load IV.
- @{[vle32_v $v16, ($IVP)]}
+ @{[vle32_v $V16, $IVP]}
- @{[vle32_v $v24, ($INP)]}
- @{[vmv_v_v $v17, $v24]}
+ @{[vle32_v $V24, $INP]}
+ @{[vmv_v_v $V17, $V24]}
j 2f
1:
- @{[vle32_v $v24, ($INP)]}
- @{[vmv_v_v $v17, $v24]}
+ @{[vle32_v $V24, $INP]}
+ @{[vmv_v_v $V17, $V24]}
addi $OUTP, $OUTP, 16
2:
# AES body
@{[aes_192_decrypt]}
- @{[vxor_vv $v24, $v24, $v16]}
- @{[vse32_v $v24, ($OUTP)]}
- @{[vmv_v_v $v16, $v17]}
+ @{[vxor_vv $V24, $V24, $V16]}
+ @{[vse32_v $V24, $OUTP]}
+ @{[vmv_v_v $V16, $V17]}
addi $LEN, $LEN, -16
addi $INP, $INP, 16
bnez $LEN, 1b
- @{[vse32_v $v16, ($IVP)]}
+ @{[vse32_v $V16, $IVP]}
ret
.size L_cbc_dec_192,.-L_cbc_dec_192
@@ -550,54 +550,278 @@ $code .= <<___;
.p2align 3
L_cbc_dec_256:
# Load all 15 round keys to v1-v15 registers.
- @{[aes_256_load_key]}
+ @{[aes_256_load_key $KEYP]}
# Load IV.
- @{[vle32_v $v16, ($IVP)]}
+ @{[vle32_v $V16, $IVP]}
- @{[vle32_v $v24, ($INP)]}
- @{[vmv_v_v $v17, $v24]}
+ @{[vle32_v $V24, $INP]}
+ @{[vmv_v_v $V17, $V24]}
j 2f
1:
- @{[vle32_v $v24, ($INP)]}
- @{[vmv_v_v $v17, $v24]}
+ @{[vle32_v $V24, $INP]}
+ @{[vmv_v_v $V17, $V24]}
addi $OUTP, $OUTP, 16
2:
# AES body
@{[aes_256_decrypt]}
- @{[vxor_vv $v24, $v24, $v16]}
- @{[vse32_v $v24, ($OUTP)]}
- @{[vmv_v_v $v16, $v17]}
+ @{[vxor_vv $V24, $V24, $V16]}
+ @{[vse32_v $V24, $OUTP]}
+ @{[vmv_v_v $V16, $V17]}
addi $LEN, $LEN, -16
addi $INP, $INP, 16
bnez $LEN, 1b
- @{[vse32_v $v16, ($IVP)]}
+ @{[vse32_v $V16, $IVP]}
ret
.size L_cbc_dec_256,.-L_cbc_dec_256
___
+}
+
+{
+###############################################################################
+# void rv64i_zvkned_ecb_encrypt(const unsigned char *in, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# const int enc);
+my ($INP, $OUTP, $LEN, $KEYP, $ENC) = ("a0", "a1", "a2", "a3", "a4");
+my ($REMAIN_LEN) = ("a5");
+my ($VL) = ("a6");
+my ($T0, $T1, $ROUNDS) = ("t0", "t1", "t2");
+my ($LEN32) = ("t3");
+
+$code .= <<___;
+.p2align 3
+.globl rv64i_zvkned_ecb_encrypt
+.type rv64i_zvkned_ecb_encrypt,\@function
+rv64i_zvkned_ecb_encrypt:
+ # Make the LEN become e32 length.
+ srli $LEN32, $LEN, 2
+
+ # Load number of rounds
+ lwu $ROUNDS, 240($KEYP)
+
+ # Get proper routine for key size
+ li $T0, 10
+ beq $ROUNDS, $T0, L_ecb_enc_128
+
+ li $T0, 12
+ beq $ROUNDS, $T0, L_ecb_enc_192
+
+ li $T0, 14
+ beq $ROUNDS, $T0, L_ecb_enc_256
+
+ ret
+.size rv64i_zvkned_ecb_encrypt,.-rv64i_zvkned_ecb_encrypt
+___
+
+$code .= <<___;
+.p2align 3
+L_ecb_enc_128:
+ # Load all 11 round keys to v1-v11 registers.
+ @{[aes_128_load_key $KEYP]}
+
+1:
+ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
+ slli $T0, $VL, 2
+ sub $LEN32, $LEN32, $VL
+
+ @{[vle32_v $V24, $INP]}
+
+ # AES body
+ @{[aes_128_encrypt]}
+
+ @{[vse32_v $V24, $OUTP]}
+
+ add $INP, $INP, $T0
+ add $OUTP, $OUTP, $T0
+
+ bnez $LEN32, 1b
+
+ ret
+.size L_ecb_enc_128,.-L_ecb_enc_128
+___
+
+$code .= <<___;
+.p2align 3
+L_ecb_enc_192:
+ # Load all 13 round keys to v1-v13 registers.
+ @{[aes_192_load_key $KEYP]}
+
+1:
+ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
+ slli $T0, $VL, 2
+ sub $LEN32, $LEN32, $VL
+
+ @{[vle32_v $V24, $INP]}
+
+ # AES body
+ @{[aes_192_encrypt]}
+
+ @{[vse32_v $V24, $OUTP]}
+
+ add $INP, $INP, $T0
+ add $OUTP, $OUTP, $T0
+
+ bnez $LEN32, 1b
+
+ ret
+.size L_ecb_enc_192,.-L_ecb_enc_192
+___
+
+$code .= <<___;
+.p2align 3
+L_ecb_enc_256:
+ # Load all 15 round keys to v1-v15 registers.
+ @{[aes_256_load_key $KEYP]}
+
+1:
+ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
+ slli $T0, $VL, 2
+ sub $LEN32, $LEN32, $VL
+
+ @{[vle32_v $V24, $INP]}
+
+ # AES body
+ @{[aes_256_encrypt]}
+
+ @{[vse32_v $V24, $OUTP]}
+
+ add $INP, $INP, $T0
+ add $OUTP, $OUTP, $T0
+
+ bnez $LEN32, 1b
+
+ ret
+.size L_ecb_enc_256,.-L_ecb_enc_256
+___
+
+###############################################################################
+# void rv64i_zvkned_ecb_decrypt(const unsigned char *in, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# const int enc);
+
+$code .= <<___;
+.p2align 3
+.globl rv64i_zvkned_ecb_decrypt
+.type rv64i_zvkned_ecb_decrypt,\@function
+rv64i_zvkned_ecb_decrypt:
+ # Make the LEN become e32 length.
+ srli $LEN32, $LEN, 2
+
+ # Load number of rounds
+ lwu $ROUNDS, 240($KEYP)
+
+ # Get proper routine for key size
+ li $T0, 10
+ beq $ROUNDS, $T0, L_ecb_dec_128
+
+ li $T0, 12
+ beq $ROUNDS, $T0, L_ecb_dec_192
+
+ li $T0, 14
+ beq $ROUNDS, $T0, L_ecb_dec_256
+
+ ret
+.size rv64i_zvkned_ecb_decrypt,.-rv64i_zvkned_ecb_decrypt
+___
+
+$code .= <<___;
+.p2align 3
+L_ecb_dec_128:
+ # Load all 11 round keys to v1-v11 registers.
+ @{[aes_128_load_key $KEYP]}
+
+1:
+ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
+ slli $T0, $VL, 2
+ sub $LEN32, $LEN32, $VL
+
+ @{[vle32_v $V24, $INP]}
+
+ # AES body
+ @{[aes_128_decrypt]}
+
+ @{[vse32_v $V24, $OUTP]}
+
+ add $INP, $INP, $T0
+ add $OUTP, $OUTP, $T0
+
+ bnez $LEN32, 1b
+
+ ret
+.size L_ecb_dec_128,.-L_ecb_dec_128
+___
+
+$code .= <<___;
+.p2align 3
+L_ecb_dec_192:
+ # Load all 13 round keys to v1-v13 registers.
+ @{[aes_192_load_key $KEYP]}
+
+1:
+ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
+ slli $T0, $VL, 2
+ sub $LEN32, $LEN32, $VL
+
+ @{[vle32_v $V24, $INP]}
+
+ # AES body
+ @{[aes_192_decrypt]}
+
+ @{[vse32_v $V24, $OUTP]}
+
+ add $INP, $INP, $T0
+ add $OUTP, $OUTP, $T0
+
+ bnez $LEN32, 1b
+
+ ret
+.size L_ecb_dec_192,.-L_ecb_dec_192
+___
+
+$code .= <<___;
+.p2align 3
+L_ecb_dec_256:
+ # Load all 15 round keys to v1-v15 registers.
+ @{[aes_256_load_key $KEYP]}
+
+1:
+ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]}
+ slli $T0, $VL, 2
+ sub $LEN32, $LEN32, $VL
+
+ @{[vle32_v $V24, $INP]}
+
+ # AES body
+ @{[aes_256_decrypt]}
+
+ @{[vse32_v $V24, $OUTP]}
+
+ add $INP, $INP, $T0
+ add $OUTP, $OUTP, $T0
+
+ bnez $LEN32, 1b
+
+ ret
+.size L_ecb_dec_256,.-L_ecb_dec_256
+___
}
+{
################################################################################
# int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits,
# AES_KEY *key)
# int rv64i_zvkned_set_decrypt_key(const unsigned char *userKey, const int bits,
# AES_KEY *key)
-{
my ($UKEY,$BITS,$KEYP) = ("a0", "a1", "a2");
my ($T0,$T1,$T4) = ("t1", "t2", "t4");
-my ($v0, $v1, $v2, $v3, $v4, $v5, $v6,
- $v7, $v8, $v9, $v10, $v11, $v12,
- $v13, $v14, $v15, $v16, $v17, $v18,
- $v19, $v20, $v21, $v22, $v23, $v24,
-) = map("v$_",(0..24));
$code .= <<___;
.p2align 3
@@ -647,42 +871,42 @@ L_set_key_128:
@{[vsetivli__x0_4_e32_m1_tu_mu]}
# Load the key
- @{[vle32_v $v10, ($UKEY)]}
+ @{[vle32_v $V10, ($UKEY)]}
# Generate keys for round 2-11 into registers v11-v20.
- @{[vaeskf1_vi $v11, $v10, 1]} # v11 <- rk2 (w[ 4, 7])
- @{[vaeskf1_vi $v12, $v11, 2]} # v12 <- rk3 (w[ 8,11])
- @{[vaeskf1_vi $v13, $v12, 3]} # v13 <- rk4 (w[12,15])
- @{[vaeskf1_vi $v14, $v13, 4]} # v14 <- rk5 (w[16,19])
- @{[vaeskf1_vi $v15, $v14, 5]} # v15 <- rk6 (w[20,23])
- @{[vaeskf1_vi $v16, $v15, 6]} # v16 <- rk7 (w[24,27])
- @{[vaeskf1_vi $v17, $v16, 7]} # v17 <- rk8 (w[28,31])
- @{[vaeskf1_vi $v18, $v17, 8]} # v18 <- rk9 (w[32,35])
- @{[vaeskf1_vi $v19, $v18, 9]} # v19 <- rk10 (w[36,39])
- @{[vaeskf1_vi $v20, $v19, 10]} # v20 <- rk11 (w[40,43])
+ @{[vaeskf1_vi $V11, $V10, 1]} # v11 <- rk2 (w[ 4, 7])
+ @{[vaeskf1_vi $V12, $V11, 2]} # v12 <- rk3 (w[ 8,11])
+ @{[vaeskf1_vi $V13, $V12, 3]} # v13 <- rk4 (w[12,15])
+ @{[vaeskf1_vi $V14, $V13, 4]} # v14 <- rk5 (w[16,19])
+ @{[vaeskf1_vi $V15, $V14, 5]} # v15 <- rk6 (w[20,23])
+ @{[vaeskf1_vi $V16, $V15, 6]} # v16 <- rk7 (w[24,27])
+ @{[vaeskf1_vi $V17, $V16, 7]} # v17 <- rk8 (w[28,31])
+ @{[vaeskf1_vi $V18, $V17, 8]} # v18 <- rk9 (w[32,35])
+ @{[vaeskf1_vi $V19, $V18, 9]} # v19 <- rk10 (w[36,39])
+ @{[vaeskf1_vi $V20, $V19, 10]} # v20 <- rk11 (w[40,43])
# Store the round keys
- @{[vse32_v $v10, ($KEYP)]}
+ @{[vse32_v $V10, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v11, ($KEYP)]}
+ @{[vse32_v $V11, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v12, ($KEYP)]}
+ @{[vse32_v $V12, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v13, ($KEYP)]}
+ @{[vse32_v $V13, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v14, ($KEYP)]}
+ @{[vse32_v $V14, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v15, ($KEYP)]}
+ @{[vse32_v $V15, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v16, ($KEYP)]}
+ @{[vse32_v $V16, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v17, ($KEYP)]}
+ @{[vse32_v $V17, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v18, ($KEYP)]}
+ @{[vse32_v $V18, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v19, ($KEYP)]}
+ @{[vse32_v $V19, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v20, ($KEYP)]}
+ @{[vse32_v $V20, $KEYP]}
li a0, 1
ret
@@ -699,66 +923,66 @@ L_set_key_256:
@{[vsetivli__x0_4_e32_m1_tu_mu]}
# Load the key
- @{[vle32_v $v10, ($UKEY)]}
+ @{[vle32_v $V10, ($UKEY)]}
addi $UKEY, $UKEY, 16
- @{[vle32_v $v11, ($UKEY)]}
+ @{[vle32_v $V11, ($UKEY)]}
- @{[vmv_v_v $v12, $v10]}
- @{[vaeskf2_vi $v12, $v11, 2]}
- @{[vmv_v_v $v13, $v11]}
- @{[vaeskf2_vi $v13, $v12, 3]}
- @{[vmv_v_v $v14, $v12]}
- @{[vaeskf2_vi $v14, $v13, 4]}
- @{[vmv_v_v $v15, $v13]}
- @{[vaeskf2_vi $v15, $v14, 5]}
- @{[vmv_v_v $v16, $v14]}
- @{[vaeskf2_vi $v16, $v15, 6]}
- @{[vmv_v_v $v17, $v15]}
- @{[vaeskf2_vi $v17, $v16, 7]}
- @{[vmv_v_v $v18, $v16]}
- @{[vaeskf2_vi $v18, $v17, 8]}
- @{[vmv_v_v $v19, $v17]}
- @{[vaeskf2_vi $v19, $v18, 9]}
- @{[vmv_v_v $v20, $v18]}
- @{[vaeskf2_vi $v20, $v19, 10]}
- @{[vmv_v_v $v21, $v19]}
- @{[vaeskf2_vi $v21, $v20, 11]}
- @{[vmv_v_v $v22, $v20]}
- @{[vaeskf2_vi $v22, $v21, 12]}
- @{[vmv_v_v $v23, $v21]}
- @{[vaeskf2_vi $v23, $v22, 13]}
- @{[vmv_v_v $v24, $v22]}
- @{[vaeskf2_vi $v24, $v23, 14]}
+ @{[vmv_v_v $V12, $V10]}
+ @{[vaeskf2_vi $V12, $V11, 2]}
+ @{[vmv_v_v $V13, $V11]}
+ @{[vaeskf2_vi $V13, $V12, 3]}
+ @{[vmv_v_v $V14, $V12]}
+ @{[vaeskf2_vi $V14, $V13, 4]}
+ @{[vmv_v_v $V15, $V13]}
+ @{[vaeskf2_vi $V15, $V14, 5]}
+ @{[vmv_v_v $V16, $V14]}
+ @{[vaeskf2_vi $V16, $V15, 6]}
+ @{[vmv_v_v $V17, $V15]}
+ @{[vaeskf2_vi $V17, $V16, 7]}
+ @{[vmv_v_v $V18, $V16]}
+ @{[vaeskf2_vi $V18, $V17, 8]}
+ @{[vmv_v_v $V19, $V17]}
+ @{[vaeskf2_vi $V19, $V18, 9]}
+ @{[vmv_v_v $V20, $V18]}
+ @{[vaeskf2_vi $V20, $V19, 10]}
+ @{[vmv_v_v $V21, $V19]}
+ @{[vaeskf2_vi $V21, $V20, 11]}
+ @{[vmv_v_v $V22, $V20]}
+ @{[vaeskf2_vi $V22, $V21, 12]}
+ @{[vmv_v_v $V23, $V21]}
+ @{[vaeskf2_vi $V23, $V22, 13]}
+ @{[vmv_v_v $V24, $V22]}
+ @{[vaeskf2_vi $V24, $V23, 14]}
- @{[vse32_v $v10, ($KEYP)]}
+ @{[vse32_v $V10, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v11, ($KEYP)]}
+ @{[vse32_v $V11, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v12, ($KEYP)]}
+ @{[vse32_v $V12, $KEYP]}
addi $KEYP, $KEYP, 16
- @{[vse32_v $v13, ($KEYP)]}
+ @{[vse32_v $V13, $KEYP]}