From 6f6a6130327bb033730bf36ad826797a1b074317 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 3 Oct 2013 23:08:31 +0200 Subject: aes/asm/bsaes-*.pl: improve decrypt performance. Improve decrypt performance by 10-20% depending on platform. Thanks to Jussi Kivilinna for providing valuable hint. Also thanks to Ard Biesheuvel. --- crypto/aes/asm/bsaes-x86_64.pl | 71 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 5 deletions(-) (limited to 'crypto/aes/asm/bsaes-x86_64.pl') diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl index ceb02b50dd..8cde67681e 100644 --- a/crypto/aes/asm/bsaes-x86_64.pl +++ b/crypto/aes/asm/bsaes-x86_64.pl @@ -83,9 +83,9 @@ # Add decryption procedure. Performance in CPU cycles spent to decrypt # one byte out of 4096-byte buffer with 128-bit key is: # -# Core 2 11.0 -# Nehalem 9.16 -# Atom 20.9 +# Core 2 9.83 +# Nehalem 7.74 +# Atom 18.9 (estimated, not measured yet) # # November 2011. # @@ -456,6 +456,7 @@ sub MixColumns { # modified to emit output in order suitable for feeding back to aesenc[last] my @x=@_[0..7]; my @t=@_[8..15]; +my $inv=@_[16]; # optional $code.=<<___; pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 pshufd \$0x93, @x[1], @t[1] @@ -497,7 +498,8 @@ $code.=<<___; pxor @t[4], @t[0] pshufd \$0x4E, @x[2], @x[6] pxor @t[5], @t[1] - +___ +$code.=<<___ if (!$inv); pxor @t[3], @x[4] pxor @t[7], @x[5] pxor @t[6], @x[3] @@ -505,9 +507,20 @@ $code.=<<___; pxor @t[2], @x[6] movdqa @t[1], @x[7] ___ +$code.=<<___ if ($inv); + pxor @x[4], @t[3] + pxor @t[7], @x[5] + pxor @x[3], @t[6] + movdqa @t[0], @x[3] + pxor @t[2], @x[6] + movdqa @t[6], @x[2] + movdqa @t[1], @x[7] + movdqa @x[6], @x[4] + movdqa @t[3], @x[6] +___ } -sub InvMixColumns { +sub InvMixColumns_orig { my @x=@_[0..7]; my @t=@_[8..15]; @@ -661,6 +674,54 @@ $code.=<<___; ___ } +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + # multiplication by 0x05-0x00-0x04-0x00 + pshufd \$0x4E, @x[0], @t[0] + pshufd \$0x4E, @x[6], @t[6] + pxor @x[0], @t[0] + pshufd \$0x4E, @x[7], @t[7] + pxor @x[6], @t[6] + pshufd \$0x4E, @x[1], @t[1] + pxor @x[7], @t[7] + pshufd \$0x4E, @x[2], @t[2] + pxor @x[1], @t[1] + pshufd \$0x4E, @x[3], @t[3] + pxor @x[2], @t[2] + pxor @t[6], @x[0] + pxor @t[6], @x[1] + pshufd \$0x4E, @x[4], @t[4] + pxor @x[3], @t[3] + pxor @t[0], @x[2] + pxor @t[1], @x[3] + pshufd \$0x4E, @x[5], @t[5] + pxor @x[4], @t[4] + pxor @t[7], @x[1] + pxor @t[2], @x[4] + pxor @x[5], @t[5] + + pxor @t[7], @x[2] + pxor @t[6], @x[3] + pxor @t[6], @x[4] + pxor @t[3], @x[5] + pxor @t[4], @x[6] + pxor @t[7], @x[4] + pxor @t[7], @x[5] + pxor @t[5], @x[7] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + sub aesenc { # not used my @b=@_[0..7]; my @t=@_[8..15]; -- cgit v1.2.3