summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2013-03-31 14:32:05 +0200
committerAndy Polyakov <appro@openssl.org>2013-03-31 14:32:05 +0200
commitc5d975a74313268a36b6a6103cd37221724137c2 (patch)
tree9307057b720536601f7c9981d7e7a82302d6cad9
parentd8f3ed23062539c4a6f7b4153633f921ce16a15f (diff)
Add support for SPARC T4 DES opcode.
-rwxr-xr-xConfigure2
-rw-r--r--TABLE18
-rw-r--r--crypto/des/Makefile2
-rw-r--r--crypto/des/asm/dest4-sparcv9.pl594
-rw-r--r--crypto/evp/e_des.c56
-rw-r--r--crypto/evp/e_des3.c108
-rw-r--r--crypto/perlasm/sparcv9_modes.pl60
-rwxr-xr-xtest/test_t43
8 files changed, 807 insertions, 36 deletions
diff --git a/Configure b/Configure
index 24c8dc2665..e5b8c3049b 100755
--- a/Configure
+++ b/Configure
@@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf";
my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:e_padlock-x86_64.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
diff --git a/TABLE b/TABLE
index a3ca775a61..a3a58a4c0a 100644
--- a/TABLE
+++ b/TABLE
@@ -175,7 +175,7 @@ $lflags =
$bn_ops = BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -2683,7 +2683,7 @@ $lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -2716,7 +2716,7 @@ $lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -4432,7 +4432,7 @@ $lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -4663,7 +4663,7 @@ $lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -5521,7 +5521,7 @@ $lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -5554,7 +5554,7 @@ $lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -5653,7 +5653,7 @@ $lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
@@ -5686,7 +5686,7 @@ $lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
-$des_obj = des_enc-sparc.o fcrypt_b.o
+$des_obj = des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
$md5_obj = md5-sparcv9.o
diff --git a/crypto/des/Makefile b/crypto/des/Makefile
index 868f6087e8..1c061a6879 100644
--- a/crypto/des/Makefile
+++ b/crypto/des/Makefile
@@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib
des_enc-sparc.S: asm/des_enc.m4
m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S
+dest4-sparcv9.s: asm/dest4-sparcv9.pl
+ $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@
des-586.s: asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl
$(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
diff --git a/crypto/des/asm/dest4-sparcv9.pl b/crypto/des/asm/dest4-sparcv9.pl
new file mode 100644
index 0000000000..5936a658e5
--- /dev/null
+++ b/crypto/des/asm/dest4-sparcv9.pl
@@ -0,0 +1,594 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
+# <appro@openssl.org>. The module is licensed under 2-clause BSD
+# license. March 2013. All rights reserved.
+# ====================================================================
+
+######################################################################
+# DES for SPARC T4.
+#
+# As with other hardware-assisted ciphers CBC encrypt results [for
+# aligned data] are virtually identical to critical path lengths:
+#
+# DES Triple-DES
+# CBC encrypt 4.14/4.15(*) 11.7/11.7
+# CBC decrypt 1.77/4.11(**) 6.42/7.47
+#
+# (*) numbers after slash are for
+# misaligned data;
+# (**) this is result for largest
+# block size, unlike all other
+# cases smaller blocks results
+# are better[?];
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "sparcv9_modes.pl";
+
+&asm_init(@ARGV);
+
+$code.=<<___ if ($::abibits==64);
+.register %g2,#scratch
+.register %g3,#scratch
+___
+
+$code.=<<___;
+.text
+___
+
+{ my ($inp,$out)=("%o0","%o1");
+
+$code.=<<___;
+.align 32
+.globl des_t4_key_expand
+.type des_t4_key_expand,#function
+des_t4_key_expand:
+ andcc $inp, 0x7, %g0
+ alignaddr $inp, %g0, $inp
+ bz,pt %icc, 1f
+ ldd [$inp + 0x00], %f0
+ ldd [$inp + 0x08], %f2
+ faligndata %f0, %f2, %f0
+1: des_kexpand %f0, 0, %f0
+ des_kexpand %f0, 1, %f2
+ std %f0, [$out + 0x00]
+ des_kexpand %f2, 3, %f6
+ std %f2, [$out + 0x08]
+ des_kexpand %f2, 2, %f4
+ des_kexpand %f6, 3, %f10
+ std %f6, [$out + 0x18]
+ des_kexpand %f6, 2, %f8
+ std %f4, [$out + 0x10]
+ des_kexpand %f10, 3, %f14
+ std %f10, [$out + 0x28]
+ des_kexpand %f10, 2, %f12
+ std %f8, [$out + 0x20]
+ des_kexpand %f14, 1, %f16
+ std %f14, [$out + 0x38]
+ des_kexpand %f16, 3, %f20
+ std %f12, [$out + 0x30]
+ des_kexpand %f16, 2, %f18
+ std %f16, [$out + 0x40]
+ des_kexpand %f20, 3, %f24
+ std %f20, [$out + 0x50]
+ des_kexpand %f20, 2, %f22
+ std %f18, [$out + 0x48]
+ des_kexpand %f24, 3, %f28
+ std %f24, [$out + 0x60]
+ des_kexpand %f24, 2, %f26
+ std %f22, [$out + 0x58]
+ des_kexpand %f28, 1, %f30
+ std %f28, [$out + 0x70]
+ std %f26, [$out + 0x68]
+ retl
+ std %f30, [$out + 0x78]
+.size des_t4_key_expand,.-des_t4_key_expand
+___
+}
+{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
+ my ($ileft,$iright,$omask) = map("%g$_",(1..3));
+
+$code.=<<___;
+.globl des_t4_cbc_encrypt
+.align 32
+des_t4_cbc_encrypt:
+ ld [$ivec + 0], %f0 ! load ivec
+ ld [$ivec + 4], %f1
+
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 0xff, $omask
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ sub %g0, $ileft, $iright
+ and $out, 7, %g4
+ alignaddrl $out, %g0, $out
+ srl $omask, %g4, $omask
+ srlx $len, 3, $len
+ movrz %g4, 0, $omask
+ prefetch [$out], 22
+
+ ldd [$key + 0x00], %f4 ! load key schedule
+ ldd [$key + 0x08], %f6
+ ldd [$key + 0x10], %f8
+ ldd [$key + 0x18], %f10
+ ldd [$key + 0x20], %f12
+ ldd [$key + 0x28], %f14
+ ldd [$key + 0x30], %f16
+ ldd [$key + 0x38], %f18
+ ldd [$key + 0x40], %f20
+ ldd [$key + 0x48], %f22
+ ldd [$key + 0x50], %f24
+ ldd [$key + 0x58], %f26
+ ldd [$key + 0x60], %f28
+ ldd [$key + 0x68], %f30
+ ldd [$key + 0x70], %f32
+ ldd [$key + 0x78], %f34
+
+.Ldes_cbc_enc_loop:
+ ldx [$inp + 0], %g4
+ brz,pt $ileft, 4f
+ nop
+
+ ldx [$inp + 8], %g5
+ sllx %g4, $ileft, %g4
+ srlx %g5, $iright, %g5
+ or %g5, %g4, %g4
+4:
+ movxtod %g4, %f2
+ prefetch [$inp + 8+63], 20
+ add $inp, 8, $inp
+ fxor %f2, %f0, %f0 ! ^= ivec
+ prefetch [$out + 63], 22
+
+ des_ip %f0, %f0
+ des_round %f4, %f6, %f0, %f0
+ des_round %f8, %f10, %f0, %f0
+ des_round %f12, %f14, %f0, %f0
+ des_round %f16, %f18, %f0, %f0
+ des_round %f20, %f22, %f0, %f0
+ des_round %f24, %f26, %f0, %f0
+ des_round %f28, %f30, %f0, %f0
+ des_round %f32, %f34, %f0, %f0
+ des_iip %f0, %f0
+
+ brnz,pn $omask, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ brnz,pt $len, .Ldes_cbc_enc_loop
+ add $out, 8, $out
+
+ st %f0, [$ivec + 0] ! write out ivec
+ retl
+ st %f1, [$ivec + 4]
+
+.align 16
+2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
+ ! and ~4x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f2 ! handle unaligned output
+
+ stda %f8, [$out + $omask]0xc0 ! partial store
+ add $out, 8, $out
+ orn %g0, $omask, $omask
+ stda %f8, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .Ldes_cbc_enc_loop+4
+ orn %g0, $omask, $omask
+
+ st %f0, [$ivec + 0] ! write out ivec
+ retl
+ st %f1, [$ivec + 4]
+.type des_t4_cbc_encrypt,#function
+.size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
+
+.globl des_t4_cbc_decrypt
+.align 32
+des_t4_cbc_decrypt:
+ ld [$ivec + 0], %f2 ! load ivec
+ ld [$ivec + 4], %f3
+
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 0xff, $omask
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ sub %g0, $ileft, $iright
+ and $out, 7, %g4
+ alignaddrl $out, %g0, $out
+ srl $omask, %g4, $omask
+ srlx $len, 3, $len
+ movrz %g4, 0, $omask
+ prefetch [$out], 22
+
+ ldd [$key + 0x78], %f4 ! load key schedule
+ ldd [$key + 0x70], %f6
+ ldd [$key + 0x68], %f8
+ ldd [$key + 0x60], %f10
+ ldd [$key + 0x58], %f12
+ ldd [$key + 0x50], %f14
+ ldd [$key + 0x48], %f16
+ ldd [$key + 0x40], %f18
+ ldd [$key + 0x38], %f20
+ ldd [$key + 0x30], %f22
+ ldd [$key + 0x28], %f24
+ ldd [$key + 0x20], %f26
+ ldd [$key + 0x18], %f28
+ ldd [$key + 0x10], %f30
+ ldd [$key + 0x08], %f32
+ ldd [$key + 0x00], %f34
+
+.Ldes_cbc_dec_loop:
+ ldx [$inp + 0], %g4
+ brz,pt $ileft, 4f
+ nop
+
+ ldx [$inp + 8], %g5
+ sllx %g4, $ileft, %g4
+ srlx %g5, $iright, %g5
+ or %g5, %g4, %g4
+4:
+ movxtod %g4, %f0
+ prefetch [$inp + 8+63], 20
+ add $inp, 8, $inp
+ prefetch [$out + 63], 22
+
+ des_ip %f0, %f0
+ des_round %f4, %f6, %f0, %f0
+ des_round %f8, %f10, %f0, %f0
+ des_round %f12, %f14, %f0, %f0
+ des_round %f16, %f18, %f0, %f0
+ des_round %f20, %f22, %f0, %f0
+ des_round %f24, %f26, %f0, %f0
+ des_round %f28, %f30, %f0, %f0
+ des_round %f32, %f34, %f0, %f0
+ des_iip %f0, %f0
+
+ fxor %f2, %f0, %f0 ! ^= ivec
+ movxtod %g4, %f2
+
+ brnz,pn $omask, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ brnz,pt $len, .Ldes_cbc_dec_loop
+ add $out, 8, $out
+
+ st %f2, [$ivec + 0] ! write out ivec
+ retl
+ st %f3, [$ivec + 4]
+
+.align 16
+2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
+ ! and ~4x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f0 ! handle unaligned output
+
+ stda %f0, [$out + $omask]0xc0 ! partial store
+ add $out, 8, $out
+ orn %g0, $omask, $omask
+ stda %f0, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .Ldes_cbc_dec_loop+4
+ orn %g0, $omask, $omask
+
+ st %f2, [$ivec + 0] ! write out ivec
+ retl
+ st %f3, [$ivec + 4]
+.type des_t4_cbc_decrypt,#function
+.size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
+
+.globl des_t4_ede3_cbc_encrypt
+.align 32
+des_t4_ede3_cbc_encrypt:
+ ld [$ivec + 0], %f0 ! load ivec
+ ld [$ivec + 4], %f1
+
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 0xff, $omask
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ sub %g0, $ileft, $iright
+ and $out, 7, %g4
+ alignaddrl $out, %g0, $out
+ srl $omask, %g4, $omask
+ srlx $len, 3, $len
+ movrz %g4, 0, $omask
+ prefetch [$out], 22
+
+ ldd [$key + 0x00], %f4 ! load key schedule
+ ldd [$key + 0x08], %f6
+ ldd [$key + 0x10], %f8
+ ldd [$key + 0x18], %f10
+ ldd [$key + 0x20], %f12
+ ldd [$key + 0x28], %f14
+ ldd [$key + 0x30], %f16
+ ldd [$key + 0x38], %f18
+ ldd [$key + 0x40], %f20
+ ldd [$key + 0x48], %f22
+ ldd [$key + 0x50], %f24
+ ldd [$key + 0x58], %f26
+ ldd [$key + 0x60], %f28
+ ldd [$key + 0x68], %f30
+ ldd [$key + 0x70], %f32
+ ldd [$key + 0x78], %f34
+
+.Ldes_ede3_cbc_enc_loop:
+ ldx [$inp + 0], %g4
+ brz,pt $ileft, 4f
+ nop
+
+ ldx [$inp + 8], %g5
+ sllx %g4, $ileft, %g4
+ srlx %g5, $iright, %g5
+ or %g5, %g4, %g4
+4:
+ movxtod %g4, %f2
+ prefetch [$inp + 8+63], 20
+ add $inp, 8, $inp
+ fxor %f2, %f0, %f0 ! ^= ivec
+ prefetch [$out + 63], 22
+
+ des_ip %f0, %f0
+ des_round %f4, %f6, %f0, %f0
+ des_round %f8, %f10, %f0, %f0
+ des_round %f12, %f14, %f0, %f0
+ des_round %f16, %f18, %f0, %f0
+ ldd [$key + 0x100-0x08], %f36
+ ldd [$key + 0x100-0x10], %f38
+ des_round %f20, %f22, %f0, %f0
+ ldd [$key + 0x100-0x18], %f40
+ ldd [$key + 0x100-0x20], %f42
+ des_round %f24, %f26, %f0, %f0
+ ldd [$key + 0x100-0x28], %f44
+ ldd [$key + 0x100-0x30], %f46
+ des_round %f28, %f30, %f0, %f0
+ ldd [$key + 0x100-0x38], %f48
+ ldd [$key + 0x100-0x40], %f50
+ des_round %f32, %f34, %f0, %f0
+ ldd [$key + 0x100-0x48], %f52
+ ldd [$key + 0x100-0x50], %f54
+ des_iip %f0, %f0
+
+ ldd [$key + 0x100-0x58], %f56
+ ldd [$key + 0x100-0x60], %f58
+ des_ip %f0, %f0
+ ldd [$key + 0x100-0x68], %f60
+ ldd [$key + 0x100-0x70], %f62
+ des_round %f36, %f38, %f0, %f0
+ ldd [$key + 0x100-0x78], %f36
+ ldd [$key + 0x100-0x80], %f38
+ des_round %f40, %f42, %f0, %f0
+ des_round %f44, %f46, %f0, %f0
+ des_round %f48, %f50, %f0, %f0
+ ldd [$key + 0x100+0x00], %f40
+ ldd [$key + 0x100+0x08], %f42
+ des_round %f52, %f54, %f0, %f0
+ ldd [$key + 0x100+0x10], %f44
+ ldd [$key + 0x100+0x18], %f46
+ des_round %f56, %f58, %f0, %f0
+ ldd [$key + 0x100+0x20], %f48
+ ldd [$key + 0x100+0x28], %f50
+ des_round %f60, %f62, %f0, %f0
+ ldd [$key + 0x100+0x30], %f52
+ ldd [$key + 0x100+0x38], %f54
+ des_round %f36, %f38, %f0, %f0
+ ldd [$key + 0x100+0x40], %f56
+ ldd [$key + 0x100+0x48], %f58
+ des_iip %f0, %f0
+
+ ldd [$key + 0x100+0x50], %f60
+ ldd [$key + 0x100+0x58], %f62
+ des_ip %f0, %f0
+ ldd [$key + 0x100+0x60], %f36
+ ldd [$key + 0x100+0x68], %f38
+ des_round %f40, %f42, %f0, %f0
+ ldd [$key + 0x100+0x70], %f40
+ ldd [$key + 0x100+0x78], %f42
+ des_round %f44, %f46, %f0, %f0
+ des_round %f48, %f50, %f0, %f0
+ des_round %f52, %f54, %f0, %f0
+ des_round %f56, %f58, %f0, %f0
+ des_round %f60, %f62, %f0, %f0
+ des_round %f36, %f38, %f0, %f0
+ des_round %f40, %f42, %f0, %f0
+ des_iip %f0, %f0
+
+ brnz,pn $omask, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ brnz,pt $len, .Ldes_ede3_cbc_enc_loop
+ add $out, 8, $out
+
+ st %f0, [$ivec + 0] ! write out ivec
+ retl
+ st %f1, [$ivec + 4]
+
+.align 16
+2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
+ ! and ~2x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f2 ! handle unaligned output
+
+ stda %f2, [$out + $omask]0xc0 ! partial store
+ add $out, 8, $out
+ orn %g0, $omask, $omask
+ stda %f2, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
+ orn %g0, $omask, $omask
+
+ st %f0, [$ivec + 0] ! write out ivec
+ retl
+ st %f1, [$ivec + 4]
+.type des_t4_ede3_cbc_encrypt,#function
+.size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
+
+.globl des_t4_ede3_cbc_decrypt
+.align 32
+des_t4_ede3_cbc_decrypt:
+ ld [$ivec + 0], %f2 ! load ivec
+ ld [$ivec + 4], %f3
+
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 0xff, $omask
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ sub %g0, $ileft, $iright
+ and $out, 7, %g4
+ alignaddrl $out, %g0, $out
+ srl $omask, %g4, $omask
+ srlx $len, 3, $len
+ movrz %g4, 0, $omask
+ prefetch [$out], 22
+
+ ldd [$key + 0x100+0x78], %f4 ! load key schedule
+ ldd [$key + 0x100+0x70], %f6
+ ldd [$key + 0x100+0x68], %f8
+ ldd [$key + 0x100+0x60], %f10
+ ldd [$key + 0x100+0x58], %f12
+ ldd [$key + 0x100+0x50], %f14
+ ldd [$key + 0x100+0x48], %f16
+ ldd [$key + 0x100+0x40], %f18
+ ldd [$key + 0x100+0x38], %f20
+ ldd [$key + 0x100+0x30], %f22
+ ldd [$key + 0x100+0x28], %f24
+ ldd [$key + 0x100+0x20], %f26
+ ldd [$key + 0x100+0x18], %f28
+ ldd [$key + 0x100+0x10], %f30
+ ldd [$key + 0x100+0x08], %f32
+ ldd [$key + 0x100+0x00], %f34
+
+.Ldes_ede3_cbc_dec_loop:
+ ldx [$inp + 0], %g4
+ brz,pt $ileft, 4f
+ nop
+
+ ldx [$inp + 8], %g5
+ sllx %g4, $ileft, %g4
+ srlx %g5, $iright, %g5
+ or %g5, %g4, %g4
+4:
+ movxtod %g4, %f0
+ prefetch [$inp + 8+63], 20
+ add $inp, 8, $inp
+ prefetch [$out + 63], 22
+
+ des_ip %f0, %f0
+ des_round %f4, %f6, %f0, %f0
+ des_round %f8, %f10, %f0, %f0
+ des_round %f12, %f14, %f0, %f0
+ des_round %f16, %f18, %f0, %f0
+ ldd [$key + 0x80+0x00], %f36
+ ldd [$key + 0x80+0x08], %f38
+ des_round %f20, %f22, %f0, %f0
+ ldd [$key + 0x80+0x10], %f40
+ ldd [$key + 0x80+0x18], %f42
+ des_round %f24, %f26, %f0, %f0
+ ldd [$key + 0x80+0x20], %f44
+ ldd [$key + 0x80+0x28], %f46
+ des_round %f28, %f30, %f0, %f0
+ ldd [$key + 0x80+0x30], %f48
+ ldd [$key + 0x80+0x38], %f50
+ des_round %f32, %f34, %f0, %f0
+ ldd [$key + 0x80+0x40], %f52
+ ldd [$key + 0x80+0x48], %f54
+ des_iip %f0, %f0
+
+ ldd [$key + 0x80+0x50], %f56
+ ldd [$key + 0x80+0x58], %f58
+ des_ip %f0, %f0
+ ldd [$key + 0x80+0x60], %f60
+ ldd [$key + 0x80+0x68], %f62
+ des_round %f36, %f38, %f0, %f0
+ ldd [$key + 0x80+0x70], %f36
+ ldd [$key + 0x80+0x78], %f38
+ des_round %f40, %f42, %f0, %f0
+ des_round %f44, %f46, %f0, %f0
+ des_round %f48, %f50, %f0, %f0
+ ldd [$key + 0x80-0x08], %f40
+ ldd [$key + 0x80-0x10], %f42
+ des_round %f52, %f54, %f0, %f0
+ ldd [$key + 0x80-0x18], %f44
+ ldd [$key + 0x80-0x20], %f46
+ des_round %f56, %f58, %f0, %f0
+ ldd [$key + 0x80-0x28], %f48
+ ldd [$key + 0x80-0x30], %f50
+ des_round %f60, %f62, %f0, %f0
+ ldd [$key + 0x80-0x38], %f52
+ ldd [$key + 0x80-0x40], %f54
+ des_round %f36, %f38, %f0, %f0
+ ldd [$key + 0x80-0x48], %f56
+ ldd [$key + 0x80-0x50], %f58
+ des_iip %f0, %f0
+
+ ldd [$key + 0x80-0x58], %f60
+ ldd [$key + 0x80-0x60], %f62
+ des_ip %f0, %f0
+ ldd [$key + 0x80-0x68], %f36
+ ldd [$key + 0x80-0x70], %f38
+ des_round %f40, %f42, %f0, %f0
+ ldd [$key + 0x80-0x78], %f40
+ ldd [$key + 0x80-0x80], %f42
+ des_round %f44, %f46, %f0, %f0
+ des_round %f48, %f50, %f0, %f0
+ des_round %f52, %f54, %f0, %f0
+ des_round %f56, %f58, %f0, %f0
+ des_round %f60, %f62, %f0, %f0
+ des_round %f36, %f38, %f0, %f0
+ des_round %f40, %f42, %f0, %f0
+ des_iip %f0, %f0
+
+ fxor %f2, %f0, %f0 ! ^= ivec
+ movxtod %g4, %f2
+
+ brnz,pn $omask, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ brnz,pt $len, .Ldes_ede3_cbc_dec_loop
+ add $out, 8, $out
+
+ st %f2, [$ivec + 0] ! write out ivec
+ retl
+ st %f3, [$ivec + 4]
+
+.align 16
+2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f0 ! handle unaligned output
+
+ stda %f0, [$out + $omask]0xc0 ! partial store
+ add $out, 8, $out
+ orn %g0, $omask, $omask
+ stda %f0, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
+ orn %g0, $omask, $omask
+
+ st %f2, [$ivec + 0] ! write out ivec
+ retl
+ st %f3, [$ivec + 4]
+.type des_t4_ede3_cbc_decrypt,#function
+.size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
+___
+}
+$code.=<<___;
+.asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
+.align 4
+___
+
+&emit_assembler();
+
+close STDOUT;
diff --git a/crypto/evp/e_des.c b/crypto/evp/e_des.c
index ca009f2c52..bf4366590f 100644
--- a/crypto/evp/e_des.c
+++ b/crypto/evp/e_des.c
@@ -65,6 +65,30 @@
#include <openssl/des.h>
#include <openssl/rand.h>
+typedef struct
+ {
+ union { double align; DES_key_schedule ks; } ks;
+ union {
+ void (*cbc)(const void *,void *,size_t,const void *,void *);
+ } stream;
+ } EVP_DES_KEY;
+
+#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
+/* ---------^^^ this is not a typo, just a way to detect that
+ * assembler support was in general requested... */
+#include "sparc_arch.h"
+
+extern unsigned int OPENSSL_sparcv9cap_P[];
+
+#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES)
+
+void des_t4_key_expand(const void *key, DES_key_schedule *ks);
+void des_t4_cbc_encrypt(const void *inp,void *out,size_t len,
+ DES_key_schedule *ks,unsigned char iv[8]);
+void des_t4_cbc_decrypt(const void *inp,void *out,size_t len,
+ DES_key_schedule *ks,unsigned char iv[8]);
+#endif
+
static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const unsigned char *iv, int enc);
static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
@@ -99,6 +123,13 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
+ EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
+
+ if (dat->stream.cbc)
+ {
+ (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv);
+ return 1;
+ }
while(inl>=EVP_MAXCHUNK)
{
DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data,
@@ -176,18 +207,18 @@ static int des_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
return 1;
}
-BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64,
+BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64,
EVP_CIPH_RAND_KEY, des_init_key, NULL,
EVP_CIPHER_set_asn1_iv,
EVP_CIPHER_get_asn1_iv,
des_ctrl)
-BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,1,
+BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,1,
EVP_CIPH_RAND_KEY, des_init_key,NULL,
EVP_CIPHER_set_asn1_iv,
EVP_CIPHER_get_asn1_iv,des_ctrl)
-BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,8,
+BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,8,
EVP_CIPH_RAND_KEY,des_init_key,NULL,
EVP_CIPHER_set_asn1_iv,
EVP_CIPHER_get_asn1_iv,des_ctrl)
@@ -196,8 +227,25 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const unsigned char *iv, int enc)
{
DES_cblock *deskey = (DES_cblock *)key;
+ EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
+
+ dat->stream.cbc = NULL;
+#if defined(SPARC_DES_CAPABLE)
+ if (SPARC_DES_CAPABLE)
+ {
+ int mode = ctx->cipher->flags & EVP_CIPH_MODE;
+
+ if (mode == EVP_CIPH_CBC_MODE)
+ {
+ des_t4_key_expand(key,&dat->ks.ks);
+ dat->stream.cbc = enc ? des_t4_cbc_encrypt :
+ des_t4_cbc_decrypt;
+ return 1;
+ }
+ }
+#endif
#ifdef EVP_CHECK_DES_KEY
- if(DES_set_key_checked(deskey,ctx->cipher_data) != 0)
+ if(DES_set_key_checked(deskey,dat->ks.ks) != 0)
return 0;
#else
DES_set_key_unchecked(deskey,ctx->cipher_data);
diff --git a/crypto/evp/e_des3.c b/crypto/evp/e_des3.c
index 2f016f0431..6b0bd2b367 100644
--- a/crypto/evp/e_des3.c
+++ b/crypto/evp/e_des3.c
@@ -67,6 +67,33 @@
#include <openssl/des.h>
#include <openssl/rand.h>
+typedef struct
+ {
+ union { double align; DES_key_schedule ks[3]; } ks;
+ union {
+ void (*cbc)(const void *,void *,size_t,const void *,void *);
+ } stream;
+ } DES_EDE_KEY;
+#define ks1 ks.ks[0]
+#define ks2 ks.ks[1]
+#define ks3 ks.ks[2]
+
+#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
+/* ---------^^^ this is not a typo, just a way to detect that
+ * assembler support was in general requested... */
+#include "sparc_arch.h"
+
+extern unsigned int OPENSSL_sparcv9cap_P[];
+
+#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES)
+
+void des_t4_key_expand(const void *key, DES_key_schedule *ks);
+void des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len,
+ DES_key_schedule *ks,unsigned char iv[8]);
+void des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len,
+ DES_key_schedule *ks,unsigned char iv[8]);
+#endif
+
static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const unsigned char *iv,int enc);
@@ -75,13 +102,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
-typedef struct
- {
- DES_key_schedule ks1;/* key schedule */
- DES_key_schedule ks2;/* key schedule (for ede) */
- DES_key_schedule ks3;/* key schedule (for ede3) */
- } DES_EDE_KEY;
-
#define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data)
/* Because of various casts and different args can't use IMPLEMENT_BLOCK_CIPHER */
@@ -121,6 +141,8 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
+ DES_EDE_KEY *dat = data(ctx);
+
#ifdef KSSL_DEBUG
{
int i;
@@ -132,10 +154,16 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
printf("\n");
}
#endif /* KSSL_DEBUG */
+ if (dat->stream.cbc)
+ {
+ (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv);
+ return 1;
+ }
+
if (inl>=EVP_MAXCHUNK)
{
DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK,
- &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3,
+ &dat->ks1, &dat->ks2, &dat->ks3,
(DES_cblock *)ctx->iv, ctx->encrypt);
inl-=EVP_MAXCHUNK;
in +=EVP_MAXCHUNK;
@@ -143,7 +171,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
}
if (inl)
DES_ede3_cbc_encrypt(in, out, (long)inl,
- &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3,
+ &dat->ks1, &dat->ks2, &dat->ks3,
(DES_cblock *)ctx->iv, ctx->encrypt);
return 1;
}
@@ -236,16 +264,35 @@ static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const unsigned char *iv, int enc)
{
DES_cblock *deskey = (DES_cblock *)key;
+ DES_EDE_KEY *dat = data(ctx);
+
+ dat->stream.cbc = NULL;
+#if defined(SPARC_DES_CAPABLE)
+ if (SPARC_DES_CAPABLE)
+ {
+ int mode = ctx->cipher->flags & EVP_CIPH_MODE;
+
+ if (mode == EVP_CIPH_CBC_MODE)
+ {
+ des_t4_key_expand(&deskey[0],&dat->ks1);
+ des_t4_key_expand(&deskey[1],&dat->ks2);
+ memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1));
+ dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
+ des_t4_ede3_cbc_decrypt;
+ return 1;
+ }
+ }
+#endif
#ifdef EVP_CHECK_DES_KEY
- if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1)
- !! DES_set_key_checked(&deskey[1],&data(ctx)->ks2))
+ if (DES_set_key_checked(&deskey[0],&dat->ks1)
+ !! DES_set_key_checked(&deskey[1],&dat->ks2))
return 0;
#else
- DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1);
- DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2);
+ DES_set_key_unchecked(&deskey[0],&dat->ks1);
+ DES_set_key_unchecked(&deskey[1],&dat->ks2);
#endif
- memcpy(&data(ctx)->ks3,&data(ctx)->ks1,
- sizeof(data(ctx)->ks1));
+ memcpy(&dat->ks3,&dat->ks1,
+ sizeof(dat->ks1));
return 1;
}
@@ -253,6 +300,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const unsigned char *iv, int enc)
{
DES_cblock *deskey = (DES_cblock *)key;
+ DES_EDE_KEY *dat = data(ctx);
+
#ifdef KSSL_DEBUG
{
int i;
@@ -264,15 +313,32 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
}
#endif /* KSSL_DEBUG */
+ dat->stream.cbc = NULL;
+#if defined(SPARC_DES_CAPABLE)
+ if (SPARC_DES_CAPABLE)
+ {
+ int mode = ctx->cipher->flags & EVP_CIPH_MODE;
+
+ if (mode == EVP_CIPH_CBC_MODE)
+ {
+ des_t4_key_expand(&deskey[0],&dat->ks1);
+ des_t4_key_expand(&deskey[1],&dat->ks2);
+ des_t4_key_expand(&deskey[2],&dat->ks3);
+ dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
+ des_t4_ede3_cbc_decrypt;
+ return 1;
+ }
+ }
+#endif
#ifdef EVP_CHECK_DES_KEY
- if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1)
- || DES_set_key_checked(&deskey[1],&data(ctx)->ks2)
- || DES_set_key_checked(&deskey[2],&data(ctx)->ks3))
+ if (DES_set_key_checked(&deskey[0],&dat->ks1)
+ || DES_set_key_checked(&deskey[1],&dat->ks2)
+ || DES_set_key_checked(&deskey[2],&dat->ks3))
return 0;
#else
- DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1);
- DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2);
- DES_set_key_unchecked(&deskey[2],&data(ctx)->ks3);
+ DES_set_key_unchecked(&deskey[0],&dat->ks1);
+ DES_set_key_unchecked(&deskey[1],&dat->ks2);
+ DES_set_key_unchecked(&deskey[2],&dat->ks3);
#endif
return 1;
}
diff --git a/crypto/perlasm/sparcv9_modes.pl b/crypto/perlasm/sparcv9_modes.pl
index 363da32d59..a13ffcec2d 100644
--- a/crypto/perlasm/sparcv9_modes.pl
+++ b/crypto/perlasm/sparcv9_modes.pl
@@ -1582,6 +1582,63 @@ my %movxopf = ( "movdtox" => 0x110,
}
}
+sub undes {
+my ($mnemonic)=shift;
+my @args=@_;
+my ($ref,$opf);
+my %desopf = ( "des_round" => 0b1001,
+ "des_ip" => 0b100110100,
+ "des_iip" => 0b100110101,
+ "des_kexpand" => 0b100110110 );
+
+ $ref = "$mnemonic\t".join(",",@_);
+
+ if (defined($opf=$desopf{$mnemonic})) { # 4-arg
+ if ($mnemonic eq "des_round") {
+ foreach (@args[0..3]) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {