diff options
author | Yangyu Chen <cyy@cyyself.name> | 2024-04-19 21:49:44 +0800 |
---|---|---|
committer | Tomas Mraz <tomas@openssl.org> | 2024-05-08 11:10:45 +0200 |
commit | c857205407a0a074baf0db7fa7237a469f297c83 (patch) | |
tree | 801d032a44ef4867aa176a9f14fa55eae95a5888 /crypto | |
parent | 03ce37e11729bbe9964bd613c0eed6156b920208 (diff) |
chacha-riscv64-v-zbb.pl: add comment about vector register allocation
Since we can do group operations on vector registers in RISC-V, some vector
registers will be used without being explicitly referenced. Thus, comments
on vector register allocation should be added to improve the code
readability and maintainability.
Signed-off-by: Yangyu Chen <cyy@cyyself.name>
Reviewed-by: Paul Dale <ppzgs1@gmail.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/24069)
Diffstat (limited to 'crypto')
-rwxr-xr-x | crypto/chacha/asm/chacha-riscv64-v-zbb.pl | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/crypto/chacha/asm/chacha-riscv64-v-zbb.pl b/crypto/chacha/asm/chacha-riscv64-v-zbb.pl index 5252ae842a..8bc7fbc4ad 100755 --- a/crypto/chacha/asm/chacha-riscv64-v-zbb.pl +++ b/crypto/chacha/asm/chacha-riscv64-v-zbb.pl @@ -284,7 +284,7 @@ ChaCha20_ctr32@{[$isaext]}: 1: #### chacha block data - # init chacha const states + # init chacha const states into $V0~$V3 # "expa" little endian li $CONST_DATA0, 0x61707865 @{[vmv_v_x $V0, $CONST_DATA0]} @@ -299,7 +299,7 @@ ChaCha20_ctr32@{[$isaext]}: lw $KEY0, 0($KEY) @{[vmv_v_x $V3, $CONST_DATA3]} - # init chacha key states + # init chacha key states into $V4~$V11 lw $KEY1, 4($KEY) @{[vmv_v_x $V4, $KEY0]} lw $KEY2, 8($KEY) @@ -316,7 +316,7 @@ ChaCha20_ctr32@{[$isaext]}: @{[vmv_v_x $V10, $KEY6]} @{[vmv_v_x $V11, $KEY7]} - # init chacha key states + # init chacha key states into $V12~$V13 lw $COUNTER1, 4($COUNTER) @{[vid_v $V12]} lw $NONCE0, 8($COUNTER) @@ -325,17 +325,23 @@ ChaCha20_ctr32@{[$isaext]}: @{[vmv_v_x $V13, $COUNTER1]} add $COUNTER0, $CURRENT_COUNTER, $VL - # init chacha nonce states + # init chacha nonce states into $V14~$V15 @{[vmv_v_x $V14, $NONCE0]} @{[vmv_v_x $V15, $NONCE1]} li $T0, 64 - # load the top-half of input data + # load the top-half of input data into $V16~$V23 @{[vlsseg_nf_e32_v 8, $V16, $INPUT, $T0]} + # till now in block_loop, we used: + # - $V0~$V15 for chacha states. + # - $V16~$V23 for top-half of input data. + # - $V24~$V31 haven't been used yet. + # 20 round groups li $T0, 10 .Lround_loop: + # we can use $V24~$V31 as temporary registers in round_loop. addi $T0, $T0, -1 @{[chacha_quad_round_group $V0, $V4, $V8, $V12, @@ -360,10 +366,12 @@ ChaCha20_ctr32@{[$isaext]}: bnez $T0, .Lround_loop li $T0, 64 - # load the bottom-half of input data + # load the bottom-half of input data into $V24~$V31 addi $T1, $INPUT, 32 @{[vlsseg_nf_e32_v 8, $V24, $T1, $T0]} + # now, there are no free vector registers until the round_loop exits. + # add chacha top-half initial block states # "expa" little endian li $T0, 0x61707865 @@ -415,7 +423,7 @@ ChaCha20_ctr32@{[$isaext]}: lw $T2, 24($KEY) @{[vxor_vv $V23, $V23, $V7]} - # save the top-half of output + # save the top-half of output from $V16~$V23 li $T3, 64 @{[vssseg_nf_e32_v 8, $V16, $OUTPUT, $T3]} @@ -461,7 +469,7 @@ ChaCha20_ctr32@{[$isaext]}: @{[vxor_vv $V31, $V31, $V15]} sw $STATE15, 60(sp) - # save the bottom-half of output + # save the bottom-half of output from $V24~$V31 li $T0, 64 addi $T1, $OUTPUT, 32 @{[vssseg_nf_e32_v 8, $V24, $T1, $T0]} @@ -482,6 +490,7 @@ ChaCha20_ctr32@{[$isaext]}: mv $T2, sp .Lscalar_data_loop: @{[vsetvli $VL, $T1, "e8", "m8", "ta", "ma"]} + # from this on, vector registers are grouped with lmul = 8 @{[vle8_v $V8, $INPUT]} @{[vle8_v $V16, $T2]} @{[vxor_vv $V8, $V8, $V16]} |