summaryrefslogtreecommitdiffstats
path: root/engines/asm/e_padlock-x86.pl
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2011-10-11 21:07:53 +0000
committerAndy Polyakov <appro@openssl.org>2011-10-11 21:07:53 +0000
commit6c8ce3c2ffd8aee6d0db6e37a369f64586ad8f31 (patch)
treeb2b31586bbde95134ae2436377aecaf6aa28913c /engines/asm/e_padlock-x86.pl
parent3231e42d726dcb1c9fd064ea8350d4f362718443 (diff)
e_padlock-x86[_64].pl: protection against prefetch errata.
Diffstat (limited to 'engines/asm/e_padlock-x86.pl')
-rw-r--r--engines/asm/e_padlock-x86.pl40
1 files changed, 32 insertions, 8 deletions
diff --git a/engines/asm/e_padlock-x86.pl b/engines/asm/e_padlock-x86.pl
index e211706ae1..1b2ba52253 100644
--- a/engines/asm/e_padlock-x86.pl
+++ b/engines/asm/e_padlock-x86.pl
@@ -37,6 +37,7 @@ require "x86asm.pl";
&asm_init($ARGV[0],$0);
+%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
$ctx="edx";
@@ -187,6 +188,10 @@ my ($mode,$opcode) = @_;
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
} else {
&xor ("ebx","ebx");
+ if ($PADLOCK_MARGIN{$mode}) {
+ &cmp ($len,$PADLOCK_MARGIN{$mode});
+ &jbe (&label("${mode}_short"));
+ }
&test (&DWP(0,$ctx),1<<5); # align bit in control word
&jnz (&label("${mode}_aligned"));
&test ($out,0x0f);
@@ -285,20 +290,39 @@ my ($mode,$opcode) = @_;
&mov ($chunk,$PADLOCK_CHUNK);
&jnz (&label("${mode}_loop"));
if ($mode ne "ctr32") {
- &test ($out,0x0f); # out_misaligned
- &jz (&label("${mode}_done"));
+ &cmp ("esp","ebp");
+ &je (&label("${mode}_done"));
}
- &mov ($len,"ebp");
- &mov ($out,"esp");
- &sub ($len,"esp");
- &xor ("eax","eax");
- &shr ($len,2);
- &data_byte(0xf3,0xab); # rep stosl
+ &pxor ("xmm0","xmm0");
+ &lea ("eax",&DWP(0,"esp"));
+&set_label("${mode}_bzero");
+ &movaps (&QWP(0,"eax"),"xmm0");
+ &lea ("eax",&DWP(16,"eax"));
+ &cmp ("ebp","eax");
+ &ja (&label("${mode}_bzero"));
+
&set_label("${mode}_done");
&lea ("esp",&DWP(24,"ebp"));
if ($mode ne "ctr32") {
&jmp (&label("${mode}_exit"));
+&set_label("${mode}_short",16);
+ &xor ("eax","eax");
+ &lea ("ebp",&DWP(-24,"esp"));
+ &sub ("eax",$len);
+ &lea ("esp",&DWP(0,"eax","ebp"));
+ &and ("esp",-16);
+ &xor ($chunk,$chunk);
+&set_label("${mode}_short_copy");
+ &movups ("xmm0",&QWP(0,$inp,$chunk));
+ &lea ($chunk,&DWP(16,$chunk));
+ &cmp ($len,$chunk);
+ &movaps (&QWP(-16,"esp",$chunk),"xmm0");
+ &ja (&label("${mode}_short_copy"));
+ &mov ($inp,"esp");
+ &mov ($chunk,$len);
+ &jmp (&label("${mode}_loop"));
+
&set_label("${mode}_aligned",16);
&lea ("eax",&DWP(-16,$ctx)); # ivp
&lea ("ebx",&DWP(16,$ctx)); # key