diff options
author | Hongren (Zenithal) Zheng <i@zenithal.me> | 2022-05-13 22:24:43 +0800 |
---|---|---|
committer | Pauli <pauli@openssl.org> | 2022-09-05 10:20:30 +1000 |
commit | b1b889d1b3fc92a56ead5536bee06f3415b78482 (patch) | |
tree | e89c78da40126d629f847f8086a78034e802cdef /crypto/aes | |
parent | 3e139f3d85396cab0bac5d263472b3223a51b76a (diff) |
Add AES implementation in riscv32 zkn asm
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/18308)
Diffstat (limited to 'crypto/aes')
-rw-r--r-- | crypto/aes/asm/aes-riscv32-zkn.pl | 1061 |
1 files changed, 1061 insertions, 0 deletions
diff --git a/crypto/aes/asm/aes-riscv32-zkn.pl b/crypto/aes/asm/aes-riscv32-zkn.pl new file mode 100644 index 0000000000..7a20f66e5c --- /dev/null +++ b/crypto/aes/asm/aes-riscv32-zkn.pl @@ -0,0 +1,1061 @@ +#! /usr/bin/env perl +# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +$output and open STDOUT,">$output"; + +################################################################################ +# Utility functions to help with keeping track of which registers to stack/ +# unstack when entering / exiting routines. +################################################################################ +{ + # Callee-saved registers + my @callee_saved = map("x$_",(2,8,9,18..27)); + # Caller-saved registers + my @caller_saved = map("x$_",(1,5..7,10..17,28..31)); + my @must_save; + sub use_reg { + my $reg = shift; + if (grep(/^$reg$/, @callee_saved)) { + push(@must_save, $reg); + } elsif (!grep(/^$reg$/, @caller_saved)) { + # Register is not usable! + die("Unusable register ".$reg); + } + return $reg; + } + sub use_regs { + return map(use_reg("x$_"), @_); + } + sub save_regs { + my $ret = ''; + my $stack_reservation = ($#must_save + 1) * 8; + my $stack_offset = $stack_reservation; + if ($stack_reservation % 16) { + $stack_reservation += 8; + } + $ret.=" addi sp,sp,-$stack_reservation\n"; + foreach (@must_save) { + $stack_offset -= 8; + $ret.=" sw $_,$stack_offset(sp)\n"; + } + return $ret; + } + sub load_regs { + my $ret = ''; + my $stack_reservation = ($#must_save + 1) * 8; + my $stack_offset = $stack_reservation; + if ($stack_reservation % 16) { + $stack_reservation += 8; + } + foreach (@must_save) { + $stack_offset -= 8; + $ret.=" lw $_,$stack_offset(sp)\n"; + } + $ret.=" addi sp,sp,$stack_reservation\n"; + return $ret; + } + sub clear_regs { + @must_save = (); + } +} + +################################################################################ +# util for encoding scalar crypto extension instructions +################################################################################ + +my @regs = map("x$_",(0..31)); +my %reglookup; +@reglookup{@regs} = @regs; + +# Takes a register name, possibly an alias, and converts it to a register index +# from 0 to 31 +sub read_reg { + my $reg = lc shift; + if (!exists($reglookup{$reg})) { + die("Unknown register ".$reg); + } + my $regstr = $reglookup{$reg}; + if (!($regstr =~ /^x([0-9]+)$/)) { + die("Could not process register ".$reg); + } + return $1; +} + +sub aes32dsi { + # Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32 + # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX + my $template = 0b00_10101_00000_00000_000_00000_0110011; + my $rd = read_reg shift; + my $rs1 = read_reg shift; + my $rs2 = read_reg shift; + my $bs = shift; + + return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); +} + +sub aes32dsmi { + # Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32 + # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX + my $template = 0b00_10111_00000_00000_000_00000_0110011; + my $rd = read_reg shift; + my $rs1 = read_reg shift; + my $rs2 = read_reg shift; + my $bs = shift; + + return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); +} + +sub aes32esi { + # Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32 + # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX + my $template = 0b00_10001_00000_00000_000_00000_0110011; + my $rd = read_reg shift; + my $rs1 = read_reg shift; + my $rs2 = read_reg shift; + my $bs = shift; + + return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); +} + +sub aes32esmi { + # Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32 + # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX + my $template = 0b00_10011_00000_00000_000_00000_0110011; + my $rd = read_reg shift; + my $rs1 = read_reg shift; + my $rs2 = read_reg shift; + my $bs = shift; + + return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); +} + +sub rori { + # Encoding for ror rd, rs1, imm instruction on RV64 + # XXXXXXX_shamt_ rs1 _XXX_ rd _XXXXXXX + my $template = 0b0110000_00000_00000_101_00000_0010011; + my $rd = read_reg shift; + my $rs1 = read_reg shift; + my $shamt = shift; + + return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); +} + +################################################################################ +# Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt +################################################################################ + +# Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere) +my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9); + +# Function arguments (x10-x12 are a0-a2 in the ABI) +# Input block pointer, output block pointer, key pointer +my ($INP,$OUTP,$KEYP) = use_regs(10..12); + +# Registers initially to hold Key +my ($T0,$T1,$T2,$T3) = use_regs(13..16); + +# Loop counter +my ($loopcntr) = use_regs(30); + +################################################################################ +# Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt +################################################################################ + +# outer product of whole state into one column of key +sub outer { + my $inst = shift; + my $key = shift; + # state 0 to 3 + my $s0 = shift; + my $s1 = shift; + my $s2 = shift; + my $s3 = shift; + my $ret = ''; +$ret .= <<___; + @{[$inst->($key,$key,$s0,0)]} + @{[$inst->($key,$key,$s1,1)]} + @{[$inst->($key,$key,$s2,2)]} + @{[$inst->($key,$key,$s3,3)]} +___ + return $ret; +} + +sub aes32esmi4 { + return outer(\&aes32esmi, @_) +} + +sub aes32esi4 { + return outer(\&aes32esi, @_) +} + +sub aes32dsmi4 { + return outer(\&aes32dsmi, @_) +} + +sub aes32dsi4 { + return outer(\&aes32dsi, @_) +} + +################################################################################ +# void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out, +# const AES_KEY *key); +################################################################################ +my $code .= <<___; +.text +.balign 16 +.globl rv32i_zkne_encrypt +.type rv32i_zkne_encrypt,\@function +rv32i_zkne_encrypt: +___ + +$code .= save_regs(); + +$code .= <<___; + # Load input to block cipher + lw $Q0,0($INP) + lw $Q1,4($INP) + lw $Q2,8($INP) + lw $Q3,12($INP) + + # Load key + lw $T0,0($KEYP) + lw $T1,4($KEYP) + lw $T2,8($KEYP) + lw $T3,12($KEYP) + + # Load number of rounds + lw $loopcntr,240($KEYP) + + # initial transformation + xor $Q0,$Q0,$T0 + xor $Q1,$Q1,$T1 + xor $Q2,$Q2,$T2 + xor $Q3,$Q3,$T3 + + # The main loop only executes the first N-2 rounds, each loop consumes two rounds + add $loopcntr,$loopcntr,-2 + srli $loopcntr,$loopcntr,1 +1: + # Grab next key in schedule + add $KEYP,$KEYP,16 + lw $T0,0($KEYP) + lw $T1,4($KEYP) + lw $T2,8($KEYP) + lw $T3,12($KEYP) + + @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]} + @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]} + @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]} + @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]} + # now T0~T3 hold the new state + + # Grab next key in schedule + add $KEYP,$KEYP,16 + lw $Q0,0($KEYP) + lw $Q1,4($KEYP) + lw $Q2,8($KEYP) + lw $Q3,12($KEYP) + + @{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]} + @{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]} + @{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]} + @{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]} + # now Q0~Q3 hold the new state + + add $loopcntr,$loopcntr,-1 + bgtz $loopcntr,1b + +# final two rounds + # Grab next key in schedule + add $KEYP,$KEYP,16 + lw $T0,0($KEYP) + lw $T1,4($KEYP) + lw $T2,8($KEYP) + lw $T3,12($KEYP) + + @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]} + @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]} + @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]} + @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]} + # now T0~T3 hold the new state + + # Grab next key in schedule + add $KEYP,$KEYP,16 + lw $Q0,0($KEYP) + lw $Q1,4($KEYP) + lw $Q2,8($KEYP) + lw $Q3,12($KEYP) + + # no mix column now + @{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]} + @{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]} + @{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]} + @{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]} + # now Q0~Q3 hold the new state + + sw $Q0,0($OUTP) + sw $Q1,4($OUTP) + sw $Q2,8($OUTP) + sw $Q3,12($OUTP) + + # Pop registers and return +___ + +$code .= load_regs(); + +$code .= <<___; + ret +___ + +################################################################################ +# void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out, +# const AES_KEY *key); +################################################################################ +$code .= <<___; +.text +.balign 16 +.globl rv32i_zknd_decrypt +.type rv32i_zknd_decrypt,\@function +rv32i_zknd_decrypt: +___ + +$code .= save_regs(); + +$code .= <<___; + # Load input to block cipher + lw $Q0,0($INP) + lw $Q1,4($INP) + lw $Q2,8($INP) + lw $Q3,12($INP) + + # Load number of rounds + lw $loopcntr,240($KEYP) + + # Load the last key + # use T0 as temporary now + slli $T0,$loopcntr,4 + add $KEYP,$KEYP,$T0 + # Load key + lw $T0,0($KEYP) + lw $T1,4($KEYP) + lw $T2,8($KEYP) + lw $T3,12($KEYP) + + # initial transformation + xor $Q0,$Q0,$T0 + xor $Q1,$Q1,$T1 + xor $Q2,$Q2,$T2 + xor $Q3,$Q3,$T3 + + # The main loop only executes the first N-2 rounds, each loop consumes two rounds + add $loopcntr,$loopcntr,-2 + srli $loopcntr,$loopcntr,1 +1: + # Grab next key in schedule + add $KEYP,$KEYP,-16 + lw $T0,0($KEYP) + lw $T1,4($KEYP) + lw $T2,8($KEYP) + lw $T3,12($KEYP) + + @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]} + @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]} + @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]} + @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]} + # now T0~T3 hold the new state + + # Grab next key in schedule + add $KEYP,$KEYP,-16 + lw $Q0,0($KEYP) + lw $Q1,4($KEYP) + lw $Q2,8($KEYP) + lw $Q3,12($KEYP) + + @{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]} + @{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]} + @{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]} + @{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]} + # now Q0~Q3 hold the new state + + add $loopcntr,$loopcntr,-1 + bgtz $loopcntr,1b + +# final two rounds + # Grab next key in schedule + add $KEYP,$KEYP,-16 + lw $T0,0($KEYP) + lw $T1,4($KEYP) + lw $T2,8($KEYP) + lw $T3,12($KEYP) + + @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]} + @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]} + @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]} + @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]} + # now T0~T3 hold the new state + + # Grab next key in schedule + add $KEYP,$KEYP,-16 + lw $Q0,0($KEYP) + lw $Q1,4($KEYP) + lw $Q2,8($KEYP) + lw $Q3,12($KEYP) + + # no mix column now + @{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]} + @{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]} + @{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]} + @{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]} + # now Q0~Q3 hold the new state + + sw $Q0,0($OUTP) + sw $Q1,4($OUTP) + sw $Q2,8($OUTP) + sw $Q3,12($OUTP) + + # Pop registers and return +___ + +$code .= load_regs(); + +$code .= <<___; + ret +___ + +clear_regs(); + +################################################################################ +# Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt +################################################################################ + +# Function arguments (x10-x12 are a0-a2 in the ABI) +# Pointer to user key, number of bits in key, key pointer +my ($UKEY,$BITS,$KEYP) = use_regs(10..12); + +# Temporaries +my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31); + +################################################################################ +# utility functions for rv32i_zkne_set_encrypt_key +################################################################################ + +my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36); + +# do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd +sub sbox4 { + my $inst = shift; + my $rd = shift; + my $rs = shift; + my $ret = <<___; + @{[$inst->($rd,$rd,$rs,0)]} + @{[$inst->($rd,$rd,$rs,1)]} + @{[$inst->($rd,$rd,$rs,2)]} + @{[$inst->($rd,$rd,$rs,3)]} +___ + return $ret; +} + +sub fwdsbox4 { + return sbox4(\&aes32esi, @_); +} + +sub ke128enc { + my $zbkb = shift; + my $rnum = 0; + my $ret = ''; +$ret .= <<___; + lw $T0,0($UKEY) + lw $T1,4($UKEY) + lw $T2,8($UKEY) + lw $T3,12($UKEY) + + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + while($rnum < 10) { +$ret .= <<___; + # use T4 to store rcon + li $T4,$rcon[$rnum] + # as xor is associative and commutative + # we fist xor T0 with RCON, then use T0 to + # xor the result of each SBOX result of T3 + xor $T0,$T0,$T4 + # use T4 to store rotated T3 +___ + # right rotate by 8 + if ($zbkb) { +$ret .= <<___; + @{[rori $T4,$T3,8]} +___ + } else { +$ret .= <<___; + srli $T4,$T3,8 + slli $T5,$T3,24 + or $T4,$T4,$T5 +___ + } +$ret .= <<___; + # update T0 + @{[fwdsbox4 $T0,$T4]} + + # update new T1~T3 + xor $T1,$T1,$T0 + xor $T2,$T2,$T1 + xor $T3,$T3,$T2 + + add $KEYP,$KEYP,16 + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + $rnum++; + } + return $ret; +} + +sub ke192enc { + my $zbkb = shift; + my $rnum = 0; + my $ret = ''; +$ret .= <<___; + lw $T0,0($UKEY) + lw $T1,4($UKEY) + lw $T2,8($UKEY) + lw $T3,12($UKEY) + lw $T4,16($UKEY) + lw $T5,20($UKEY) + + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) + sw $T4,16($KEYP) + sw $T5,20($KEYP) +___ + while($rnum < 8) { +$ret .= <<___; + # see the comment in ke128enc + li $T6,$rcon[$rnum] + xor $T0,$T0,$T6 +___ + # right rotate by 8 + if ($zbkb) { +$ret .= <<___; + @{[rori $T6,$T5,8]} +___ + } else { +$ret .= <<___; + srli $T6,$T5,8 + slli $T7,$T5,24 + or $T6,$T6,$T7 +___ + } +$ret .= <<___; + @{[fwdsbox4 $T0,$T6]} + xor $T1,$T1,$T0 + xor $T2,$T2,$T1 + xor $T3,$T3,$T2 +___ + if ($rnum != 7) { + # note that (8+1)*24 = 216, (12+1)*16 = 208 + # thus the last 8 bytes can be dropped +$ret .= <<___; + xor $T4,$T4,$T3 + xor $T5,$T5,$T4 +___ + } +$ret .= <<___; + add $KEYP,$KEYP,24 + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + if ($rnum != 7) { +$ret .= <<___; + sw $T4,16($KEYP) + sw $T5,20($KEYP) +___ + } + $rnum++; + } + return $ret; +} + +sub ke256enc { + my $zbkb = shift; + my $rnum = 0; + my $ret = ''; +$ret .= <<___; + lw $T0,0($UKEY) + lw $T1,4($UKEY) + lw $T2,8($UKEY) + lw $T3,12($UKEY) + lw $T4,16($UKEY) + lw $T5,20($UKEY) + lw $T6,24($UKEY) + lw $T7,28($UKEY) + + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) + sw $T4,16($KEYP) + sw $T5,20($KEYP) + sw $T6,24($KEYP) + sw $T7,28($KEYP) +___ + while($rnum < 7) { +$ret .= <<___; + # see the comment in ke128enc + li $T8,$rcon[$rnum] + xor $T0,$T0,$T8 +___ + # right rotate by 8 + if ($zbkb) { +$ret .= <<___; + @{[rori $T8,$T7,8]} +___ + } else { +$ret .= <<___; + srli $T8,$T7,8 + slli $BITS,$T7,24 + or $T8,$T8,$BITS +___ + } +$ret .= <<___; + @{[fwdsbox4 $T0,$T8]} + xor $T1,$T1,$T0 + xor $T2,$T2,$T1 + xor $T3,$T3,$T2 + + add $KEYP,$KEYP,32 + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + if ($rnum != 6) { + # note that (7+1)*32 = 256, (14+1)*16 = 240 + # thus the last 16 bytes can be dropped +$ret .= <<___; + # for aes256, T3->T4 needs 4sbox but no rotate/rcon + @{[fwdsbox4 $T4,$T3]} + xor $T5,$T5,$T4 + xor $T6,$T6,$T5 + xor $T7,$T7,$T6 + sw $T4,16($KEYP) + sw $T5,20($KEYP) + sw $T6,24($KEYP) + sw $T7,28($KEYP) +___ + } + $rnum++; + } + return $ret; +} + +################################################################################ +# void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits, +# AES_KEY *key) +################################################################################ +sub AES_set_common { + my ($ke128, $ke192, $ke256) = @_; + my $ret = ''; +$ret .= <<___; + bnez $UKEY,1f # if (!userKey || !key) return -1; + bnez $KEYP,1f + li a0,-1 + ret +1: + # Determine number of rounds from key size in bits + li $T0,128 + bne $BITS,$T0,1f + li $T1,10 # key->rounds = 10 if bits == 128 + sw $T1,240($KEYP) # store key->rounds +$ke128 + j 4f +1: + li $T0,192 + bne $BITS,$T0,2f + li $T1,12 # key->rounds = 12 if bits == 192 + sw $T1,240($KEYP) # store key->rounds +$ke192 + j 4f +2: + li $T1,14 # key->rounds = 14 if bits == 256 + li $T0,256 + beq $BITS,$T0,3f + li a0,-2 # If bits != 128, 192, or 256, return -2 + j 5f +3: + sw $T1,240($KEYP) # store key->rounds +$ke256 +4: # return 0 + li a0,0 +5: # return a0 +___ + return $ret; +} +$code .= <<___; +.text +.balign 16 +.globl rv32i_zkne_set_encrypt_key +.type rv32i_zkne_set_encrypt_key,\@function +rv32i_zkne_set_encrypt_key: +___ + +$code .= save_regs(); +$code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0)); +$code .= load_regs(); +$code .= <<___; + ret +___ + +################################################################################ +# void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey, +# const int bits, AES_KEY *key) +################################################################################ +$code .= <<___; +.text +.balign 16 +.globl rv32i_zbkb_zkne_set_encrypt_key +.type rv32i_zbkb_zkne_set_encrypt_key,\@function +rv32i_zbkb_zkne_set_encrypt_key: +___ + +$code .= save_regs(); +$code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1)); +$code .= load_regs(); +$code .= <<___; + ret +___ + +################################################################################ +# utility functions for rv32i_zknd_zkne_set_decrypt_key +################################################################################ + +sub invm4 { + # fwd sbox then inv sbox then mix column + # the result is only mix column + # this simulates aes64im T0 + my $rd = shift; + my $tmp = shift; + my $rs = shift; + my $ret = <<___; + li $tmp,0 + li $rd,0 + @{[fwdsbox4 $tmp,$rs]} + @{[sbox4(\&aes32dsmi, $rd,$tmp)]} +___ + return $ret; +} + +sub ke128dec { + my $zbkb = shift; + my $rnum = 0; + my $ret = ''; +$ret .= <<___; + lw $T0,0($UKEY) + lw $T1,4($UKEY) + lw $T2,8($UKEY) + lw $T3,12($UKEY) + + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + while($rnum < 10) { +$ret .= <<___; + # see comments in ke128enc + li $T4,$rcon[$rnum] + xor $T0,$T0,$T4 +___ + # right rotate by 8 + if ($zbkb) { +$ret .= <<___; + @{[rori $T4,$T3,8]} +___ + } else { +$ret .= <<___; + srli $T4,$T3,8 + slli $T5,$T3,24 + or $T4,$T4,$T5 +___ + } +$ret .= <<___; + @{[fwdsbox4 $T0,$T4]} + xor $T1,$T1,$T0 + xor $T2,$T2,$T1 + xor $T3,$T3,$T2 + add $KEYP,$KEYP,16 +___ + # need to mixcolumn only for [1:N-1] round keys + # this is from the fact that aes32dsmi subwords first then mix column + # intuitively decryption needs to first mix column then subwords + # however, for merging datapaths (encryption first subwords then mix column) + # aes32dsmi chooses to inverse the order of them, thus + # transform should then be done on the round key + if ($rnum < 9) { +$ret .= <<___; + # T4 and T5 are temp variables + @{[invm4 $T5,$T4,$T0]} + sw $T5,0($KEYP) + @{[invm4 $T5,$T4,$T1]} + sw $T5,4($KEYP) + @{[invm4 $T5,$T4,$T2]} + sw $T5,8($KEYP) + @{[invm4 $T5,$T4,$T3]} + sw $T5,12($KEYP) +___ + } else { +$ret .= <<___; + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + } + $rnum++; + } + return $ret; +} + +sub ke192dec { + my $zbkb = shift; + my $rnum = 0; + my $ret = ''; +$ret .= <<___; + lw $T0,0($UKEY) + lw $T1,4($UKEY) + lw $T2,8($UKEY) + lw $T3,12($UKEY) + lw $T4,16($UKEY) + lw $T5,20($UKEY) + + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) + # see the comment in ke128dec + # T7 and T6 are temp variables + @{[invm4 $T7,$T6,$T4]} + sw $T7,16($KEYP) + @{[invm4 $T7,$T6,$T5]} + sw $T7,20($KEYP) +___ + while($rnum < 8) { +$ret .= <<___; + # see the comment in ke128enc + li $T6,$rcon[$rnum] + xor $T0,$T0,$T6 +___ + # right rotate by 8 + if ($zbkb) { +$ret .= <<___; + @{[rori $T6,$T5,8]} +___ + } else { +$ret .= <<___; + srli $T6,$T5,8 + slli $T7,$T5,24 + or $T6,$T6,$T7 +___ + } +$ret .= <<___; + @{[fwdsbox4 $T0,$T6]} + xor $T1,$T1,$T0 + xor $T2,$T2,$T1 + xor $T3,$T3,$T2 + + add $KEYP,$KEYP,24 +___ + if ($rnum < 7) { +$ret .= <<___; + xor $T4,$T4,$T3 + xor $T5,$T5,$T4 + + # see the comment in ke128dec + # T7 and T6 are temp variables + @{[invm4 $T7,$T6,$T0]} + sw $T7,0($KEYP) + @{[invm4 $T7,$T6,$T1]} + sw $T7,4($KEYP) + @{[invm4 $T7,$T6,$T2]} + sw $T7,8($KEYP) + @{[invm4 $T7,$T6,$T3]} + sw $T7,12($KEYP) + @{[invm4 $T7,$T6,$T4]} + sw $T7,16($KEYP) + @{[invm4 $T7,$T6,$T5]} + sw $T7,20($KEYP) +___ + } else { # rnum == 7 +$ret .= <<___; + # the reason for dropping T4/T5 is in ke192enc + # the reason for not invm4 is in ke128dec + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) +___ + } + $rnum++; + } + return $ret; +} + +sub ke256dec { + my $zbkb = shift; + my $rnum = 0; + my $ret = ''; +$ret .= <<___; + lw $T0,0($UKEY) + lw $T1,4($UKEY) + lw $T2,8($UKEY) + lw $T3,12($UKEY) + lw $T4,16($UKEY) + lw $T5,20($UKEY) + lw $T6,24($UKEY) + lw $T7,28($UKEY) + + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) + # see the comment in ke128dec + # BITS and T8 are temp variables + # BITS are not used anymore + @{[invm4 $T8,$BITS,$T4]} + sw $T8,16($KEYP) + @{[invm4 $T8,$BITS,$T5]} + sw $T8,20($KEYP) + @{[invm4 $T8,$BITS,$T6]} + sw $T8,24($KEYP) + @{[invm4 $T8,$BITS,$T7]} + sw $T8,28($KEYP) +___ + while($rnum < 7) { +$ret .= <<___; + # see the comment in ke128enc + li $T8,$rcon[$rnum] + xor $T0,$T0,$T8 +___ + # right rotate by 8 + if ($zbkb) { +$ret .= <<___; + @{[rori $T8,$T7,8]} +___ + } else { +$ret .= <<___; + srli $T8,$T7,8 + slli $BITS,$T7,24 + or $T8,$T8,$BITS +___ + } +$ret .= <<___; + @{[fwdsbox4 $T0,$T8]} + xor $T1,$T1,$T0 + xor $T2,$T2,$T1 + xor $T3,$T3,$T2 + + add $KEYP,$KEYP,32 +___ + if ($rnum < 6) { +$ret .= <<___; + # for aes256, T3->T4 needs 4sbox but no rotate/rcon + @{[fwdsbox4 $T4,$T3]} + xor $T5,$T5,$T4 + xor $T6,$T6,$T5 + xor $T7,$T7,$T6 + + # see the comment in ke128dec + # T8 and BITS are temp variables + @{[invm4 $T8,$BITS,$T0]} + sw $T8,0($KEYP) + @{[invm4 $T8,$BITS,$T1]} + sw $T8,4($KEYP) + @{[invm4 $T8,$BITS,$T2]} + sw $T8,8($KEYP) + @{[invm4 $T8,$BITS,$T3]} + sw $T8,12($KEYP) + @{[invm4 $T8,$BITS,$T4]} + sw $T8,16($KEYP) + @{[invm4 $T8,$BITS,$T5]} + sw $T8,20($KEYP) + @{[invm4 $T8,$BITS,$T6]} + sw $T8,24($KEYP) + @{[invm4 $T8,$BITS,$T7]} + sw $T8,28($KEYP) +___ + } else { +$ret .= <<___; + sw $T0,0($KEYP) + sw $T1,4($KEYP) + sw $T2,8($KEYP) + sw $T3,12($KEYP) + # last 16 bytes are dropped + # see the comment in ke256enc +___ + } + $rnum++; + } + return $ret; +} + +################################################################################ +# void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits, +# AES_KEY *key) +################################################################################ +# a note on naming: set_decrypt_key needs aes32esi thus add zkne on name +$code .= <<___; +.text +.balign 16 +.globl rv32i_zknd_zkne_set_decrypt_key +.type rv32i_zknd_zkne_set_decrypt_key,\@function +rv32i_zknd_zkne_set_decrypt_key: +___ +$code .= save_regs(); +$code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0)); +$code .= load_regs(); +$code .= <<___; + ret +___ + +################################################################################ +# void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey, +# const int bits, AES_KEY *key) +################################################################################ +$code .= <<___; +.text +.balign 16 +.globl rv32i_zbkb_zknd_zkne_set_decrypt_key +.type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function +rv32i_zbkb_zknd_zkne_set_decrypt_key: +___ + +$code .= save_regs(); +$code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1)); +$code .= load_regs(); +$code .= <<___; + ret +___ + + + +print $code; +close STDOUT or die "error closing STDOUT: $!"; |