summaryrefslogtreecommitdiffstats
path: root/engines
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2011-10-11 21:07:53 +0000
committerAndy Polyakov <appro@openssl.org>2011-10-11 21:07:53 +0000
commit6c8ce3c2ffd8aee6d0db6e37a369f64586ad8f31 (patch)
treeb2b31586bbde95134ae2436377aecaf6aa28913c /engines
parent3231e42d726dcb1c9fd064ea8350d4f362718443 (diff)
e_padlock-x86[_64].pl: protection against prefetch errata.
Diffstat (limited to 'engines')
-rw-r--r--engines/asm/e_padlock-x86.pl40
-rw-r--r--engines/asm/e_padlock-x86_64.pl50
2 files changed, 73 insertions, 17 deletions
diff --git a/engines/asm/e_padlock-x86.pl b/engines/asm/e_padlock-x86.pl
index e211706ae1..1b2ba52253 100644
--- a/engines/asm/e_padlock-x86.pl
+++ b/engines/asm/e_padlock-x86.pl
@@ -37,6 +37,7 @@ require "x86asm.pl";
&asm_init($ARGV[0],$0);
+%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
$ctx="edx";
@@ -187,6 +188,10 @@ my ($mode,$opcode) = @_;
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
} else {
&xor ("ebx","ebx");
+ if ($PADLOCK_MARGIN{$mode}) {
+ &cmp ($len,$PADLOCK_MARGIN{$mode});
+ &jbe (&label("${mode}_short"));
+ }
&test (&DWP(0,$ctx),1<<5); # align bit in control word
&jnz (&label("${mode}_aligned"));
&test ($out,0x0f);
@@ -285,20 +290,39 @@ my ($mode,$opcode) = @_;
&mov ($chunk,$PADLOCK_CHUNK);
&jnz (&label("${mode}_loop"));
if ($mode ne "ctr32") {
- &test ($out,0x0f); # out_misaligned
- &jz (&label("${mode}_done"));
+ &cmp ("esp","ebp");
+ &je (&label("${mode}_done"));
}
- &mov ($len,"ebp");
- &mov ($out,"esp");
- &sub ($len,"esp");
- &xor ("eax","eax");
- &shr ($len,2);
- &data_byte(0xf3,0xab); # rep stosl
+ &pxor ("xmm0","xmm0");
+ &lea ("eax",&DWP(0,"esp"));
+&set_label("${mode}_bzero");
+ &movaps (&QWP(0,"eax"),"xmm0");
+ &lea ("eax",&DWP(16,"eax"));
+ &cmp ("ebp","eax");
+ &ja (&label("${mode}_bzero"));
+
&set_label("${mode}_done");
&lea ("esp",&DWP(24,"ebp"));
if ($mode ne "ctr32") {
&jmp (&label("${mode}_exit"));
+&set_label("${mode}_short",16);
+ &xor ("eax","eax");
+ &lea ("ebp",&DWP(-24,"esp"));
+ &sub ("eax",$len);
+ &lea ("esp",&DWP(0,"eax","ebp"));
+ &and ("esp",-16);
+ &xor ($chunk,$chunk);
+&set_label("${mode}_short_copy");
+ &movups ("xmm0",&QWP(0,$inp,$chunk));
+ &lea ($chunk,&DWP(16,$chunk));
+ &cmp ($len,$chunk);
+ &movaps (&QWP(-16,"esp",$chunk),"xmm0");
+ &ja (&label("${mode}_short_copy"));
+ &mov ($inp,"esp");
+ &mov ($chunk,$len);
+ &jmp (&label("${mode}_loop"));
+
&set_label("${mode}_aligned",16);
&lea ("eax",&DWP(-16,$ctx)); # ivp
&lea ("ebx",&DWP(16,$ctx)); # key
diff --git a/engines/asm/e_padlock-x86_64.pl b/engines/asm/e_padlock-x86_64.pl
index db79a62ad6..5091c7aaca 100644
--- a/engines/asm/e_padlock-x86_64.pl
+++ b/engines/asm/e_padlock-x86_64.pl
@@ -27,6 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
$code=".text\n";
+%PADLOCK_MARGIN=(ecb=>128, cbc=>64, ctr32=>64); # prefetch errata
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
$ctx="%rdx";
@@ -284,6 +285,17 @@ padlock_${mode}_encrypt:
lea 16($ctx),$ctx # control word
xor %eax,%eax
xor %ebx,%ebx
+___
+# Formally speaking correct condtion is $len<=$margin and $inp+$margin
+# crosses page boundary [and next page is unreadable]. But $inp can
+# be unaligned in which case data can be copied to $out if latter is
+# aligned, in which case $out+$margin has to be checked. Covering all
+# cases appears more complicated than just copying short input...
+$code.=<<___ if ($PADLOCK_MARGIN{$mode});
+ cmp \$$PADLOCK_MARGIN{$mode},$len
+ jbe .L${mode}_short
+___
+$code.=<<___;
testl \$`1<<5`,($ctx) # align bit in control word
jnz .L${mode}_aligned
test \$0x0f,$out
@@ -305,6 +317,7 @@ padlock_${mode}_encrypt:
lea (%rax,%rbp),%rsp
___
$code.=<<___ if ($mode eq "ctr32");
+.L${mode}_reenter:
mov -4($ctx),%eax # pull 32-bit counter
bswap %eax
neg %eax
@@ -373,19 +386,38 @@ $code.=<<___;
mov \$$PADLOCK_CHUNK,$chunk
jnz .L${mode}_loop
- test \$0x0f,$out
- jz .L${mode}_done
+ cmp %rsp,%rbp
+ je .L${mode}_done
+
+ pxor %xmm0,%xmm0
+ lea (%rsp),%rax
+.L${mode}_bzero:
+ movaps %xmm0,(%rax)
+ lea 16(%rax),%rax
+ cmp %rax,%rbp
+ ja .L${mode}_bzero
- mov %rbp,$len
- mov %rsp,$out
- sub %rsp,$len
- xor %rax,%rax
- shr \$3,$len
- .byte 0xf3,0x48,0xab # rep stosq
.L${mode}_done:
lea (%rbp),%rsp
jmp .L${mode}_exit
-
+___
+$code.=<<___ if ($PADLOCK_MARGIN{$mode});
+.align 16
+.L${mode}_short:
+ mov %rsp,%rbp
+ sub $len,%rsp
+ xor $chunk,$chunk
+.L${mode}_short_copy:
+ movups ($inp,$chunk),%xmm0
+ lea 16($chunk),$chunk
+ cmp $chunk,$len
+ movaps %xmm0,-16(%rsp,$chunk)
+ ja .L${mode}_short_copy
+ mov %rsp,$inp
+ mov $len,$chunk
+ jmp .L${mode}_`${mode} eq "ctr32"?"reenter":"loop"`
+___
+$code.=<<___;
.align 16
.L${mode}_aligned:
___