summaryrefslogtreecommitdiffstats
path: root/crypto/x86cpuid.pl
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2007-05-14 21:35:25 +0000
committerAndy Polyakov <appro@openssl.org>2007-05-14 21:35:25 +0000
commitb2dba9bf1f8f73376b9c1f0904a86996c728b236 (patch)
tree8848a4f5efdf02d841b1ebea4969d879d6e61eb5 /crypto/x86cpuid.pl
parent932cc129ee61f5b72636eee6a7c3268e23967f7b (diff)
Profiling revealed that OPENSSL_cleanse consumes *more* CPU time than
sha1_block_data_order when hashing short messages. Move OPENSSL_cleanse to "cpuid" assembler module and gain 2x.
Diffstat (limited to 'crypto/x86cpuid.pl')
-rw-r--r--crypto/x86cpuid.pl31
1 files changed, 31 insertions, 0 deletions
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index 7d924a60b7..13828d5633 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -216,6 +216,37 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
}
&function_end_B("OPENSSL_indirect_call");
+&function_begin_B("OPENSSL_cleanse");
+ &mov ("edx",&wparam(0));
+ &mov ("ecx",&wparam(1));
+ &xor ("eax","eax");
+ &cmp ("ecx",7);
+ &jae (&label("lot"));
+&set_label("little");
+ &mov (&BP(0,"edx"),"al");
+ &sub ("ecx",1);
+ &lea ("edx",&DWP(1,"edx"));
+ &jnz (&label("little"));
+ &ret ();
+
+&set_label("lot",16);
+ &test ("edx",3);
+ &jz (&label("aligned"));
+ &mov (&BP(0,"edx"),"al");
+ &lea ("ecx",&DWP(-1,"ecx"));
+ &lea ("edx",&DWP(1,"edx"));
+ &jmp (&label("lot"));
+&set_label("aligned");
+ &mov (&DWP(0,"edx"),"eax");
+ &lea ("ecx",&DWP(-4,"ecx"));
+ &test ("ecx",-4);
+ &lea ("edx",&DWP(4,"edx"));
+ &jnz (&label("aligned"));
+ &cmp ("ecx",0);
+ &jne (&label("little"));
+ &ret ();
+&function_end_B("OPENSSL_cleanse");
+
&initseg("OPENSSL_cpuid_setup");
&asm_finish();