diff options
Diffstat (limited to 'crypto/aes')
-rw-r--r-- | crypto/aes/Makefile | 4 | ||||
-rw-r--r-- | crypto/aes/asm/vpaes-x86.pl | 7 | ||||
-rw-r--r-- | crypto/aes/asm/vpaes-x86_64.pl | 7 |
3 files changed, 12 insertions, 6 deletions
diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile index ae16e659e4..78d5984140 100644 --- a/crypto/aes/Makefile +++ b/crypto/aes/Makefile @@ -50,11 +50,15 @@ aes-ia64.s: asm/aes-ia64.S aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ +vpaes-x86.s: asm/vpaes-x86.pl ../perlasm/x86asm.pl + $(PERL) asm/vpaes-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ aes-x86_64.s: asm/aes-x86_64.pl $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@ +vpaes-x86_64.s: asm/vpaes-x86_64.pl + $(PERL) asm/vpaes-x86_64.pl $(PERLASM_SCHEME) > $@ aesni-x86_64.s: asm/aesni-x86_64.pl $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@ aesni-sha1-x86_64.s: asm/aesni-sha1-x86_64.pl diff --git a/crypto/aes/asm/vpaes-x86.pl b/crypto/aes/asm/vpaes-x86.pl index efe68dff41..f2414ccd23 100644 --- a/crypto/aes/asm/vpaes-x86.pl +++ b/crypto/aes/asm/vpaes-x86.pl @@ -21,7 +21,7 @@ # about its alignment... # # Performance summary. aes-586.pl column lists large-block CBC -# encrypt/decrypt/with-hypert-hreading-off(*) results in cycles per +# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per # byte processed with 128-bit key, and vpaes-x86.pl column - # encrypt/decrypt. # @@ -41,7 +41,8 @@ # # (***) Less impressive improvement on Core 2 and Atom is due to slow # pshufb, yet it's respectable +32%/65% improvement on Core 2 -# and +58%/40% on Atom. +# and +58%/40% on Atom (as implied, over "hyper-threading-safe" +# code path). # # <appro@openssl.org> @@ -51,7 +52,7 @@ require "x86asm.pl"; &asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); -$PREFIX="AES"; +$PREFIX="vpaes"; my ($round, $base, $magic, $key, $const, $inp, $out)= ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index 3ea85d23e8..01011defbd 100644 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -21,7 +21,7 @@ # about its alignment... # # Performance summary. aes-x86_64.pl column lists large-block CBC -# encrypt/decrypt/with-hypert-hreading-off(*) results in cycles per +# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per # byte processed with 128-bit key, and vpaes-x86_64.pl column - # encrypt/decrypt. # @@ -40,7 +40,8 @@ # (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. # # (***) Less impressive improvement on Core 2 and Atom is due to slow -# pshufb, yet it's respectable +40%/78% improvement on Core 2. +# pshufb, yet it's respectable +40%/78% improvement on Core 2 +# (as implied, over "hyper-threading-safe" code path). # # <appro@openssl.org> @@ -57,7 +58,7 @@ die "can't locate x86_64-xlate.pl"; open STDOUT,"| $^X $xlate $flavour $output"; -$PREFIX="AES"; +$PREFIX="vpaes"; $code.=<<___; .text |