summaryrefslogtreecommitdiffstats
path: root/engines/asm
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2011-10-01 10:16:13 +0000
committerAndy Polyakov <appro@openssl.org>2011-10-01 10:16:13 +0000
commit3edc26a2563c607273f36a253744ed41ba2ff116 (patch)
tree6bf9a4da1bf54e753d8ea91532bc0e56c27cdefb /engines/asm
parent10465aca60ca13866fdb83f7a866c3167ae1cea9 (diff)
e_padlock-x86.pl: make it work on VIA C3 (which doesn't support SSE2).
Diffstat (limited to 'engines/asm')
-rw-r--r--engines/asm/e_padlock-x86.pl37
1 files changed, 17 insertions, 20 deletions
diff --git a/engines/asm/e_padlock-x86.pl b/engines/asm/e_padlock-x86.pl
index 1ee622ff88..47f05dc9f1 100644
--- a/engines/asm/e_padlock-x86.pl
+++ b/engines/asm/e_padlock-x86.pl
@@ -177,7 +177,7 @@ my ($mode,$opcode) = @_;
&lea ($ctx,&DWP(16,$ctx)); # control word
&xor ("eax","eax");
if ($mode eq "ctr16") {
- &movdqa ("xmm0",&QWP(-16,$ctx));# load iv
+ &movq ("xmm0",&QWP(-16,$ctx));# load [upper part of] counter
} else {
&xor ("ebx","ebx");
&test (&DWP(0,$ctx),1<<5); # align bit in control word
@@ -210,20 +210,21 @@ my ($mode,$opcode) = @_;
&mov ($len,$chunk);
&mov (&DWP(12,"ebp"),$chunk); # chunk
if ($mode eq "ctr16") {
- &pextrw ("ecx","xmm0",7); # borrow $len
- &mov ($inp,1);
+ &mov ("ecx",&DWP(-4,$ctx));
&xor ($out,$out);
- &xchg ("ch","cl");
+ &mov ("eax",&DWP(-8,$ctx)); # borrow $len
&set_label("${mode}_prepare");
- &movdqa (&QWP(0,"esp",$out),"xmm0");
- &lea ("eax",&DWP(0,"ecx",$inp));
- &xchg ("ah","al");
+ &mov (&DWP(12,"esp",$out),"ecx");
+ &bswap ("ecx");
+ &movq (&QWP(0,"esp",$out),"xmm0");
+ &inc ("ecx");
+ &mov (&DWP(8,"esp",$out),"eax");
+ &bswap ("ecx");
&lea ($out,&DWP(16,$out));
- &pinsrw ("xmm0","eax",7);
- &lea ($inp,&DWP(1,$inp));
&cmp ($out,$chunk);
&jb (&label("${mode}_prepare"));
+ &mov (&DWP(-4,$ctx),"ecx");
&lea ($inp,&DWP(0,"esp"));
&lea ($out,&DWP(0,"esp"));
&mov ($len,$chunk);
@@ -244,8 +245,8 @@ my ($mode,$opcode) = @_;
&shr ($len,4); # len/=AES_BLOCK_SIZE
&data_byte(0xf3,0x0f,0xa7,$opcode); # rep xcrypt*
if ($mode !~ /ecb|ctr/) {
- &movdqa ("xmm0",&QWP(0,"eax"));
- &movdqa (&QWP(-16,$ctx),"xmm0"); # copy [or refresh] iv
+ &movaps ("xmm0",&QWP(0,"eax"));
+ &movaps (&QWP(-16,$ctx),"xmm0"); # copy [or refresh] iv
}
&mov ($out,&DWP(0,"ebp")); # restore parameters
&mov ($chunk,&DWP(12,"ebp"));
@@ -253,10 +254,10 @@ my ($mode,$opcode) = @_;
&mov ($inp,&DWP(4,"ebp"));
&xor ($len,$len);
&set_label("${mode}_xor");
- &movdqu ("xmm1",&QWP(0,$inp,$len));
+ &movups ("xmm1",&QWP(0,$inp,$len));
&lea ($len,&DWP(16,$len));
&pxor ("xmm1",&QWP(-16,"esp",$len));
- &movdqu (&QWP(-16,$out,$len),"xmm1");
+ &movups (&QWP(-16,$out,$len),"xmm1");
&cmp ($len,$chunk);
&jb (&label("${mode}_xor"));
} else {
@@ -276,11 +277,7 @@ my ($mode,$opcode) = @_;
&sub ($len,$chunk);
&mov ($chunk,$PADLOCK_CHUNK);
&jnz (&label("${mode}_loop"));
- if ($mode eq "ctr16") {
- &movdqa (&QWP(-16,$ctx),"xmm0"); # write out iv
- &pxor ("xmm0","xmm0");
- &pxor ("xmm1","xmm1");
- } else {
+ if ($mode ne "ctr16") {
&test ($out,0x0f); # out_misaligned
&jz (&label("${mode}_done"));
}
@@ -301,8 +298,8 @@ my ($mode,$opcode) = @_;
&shr ($len,4); # len/=AES_BLOCK_SIZE
&data_byte(0xf3,0x0f,0xa7,$opcode); # rep xcrypt*
if ($mode ne "ecb") {
- &movdqa ("xmm0",&QWP(0,"eax"));
- &movdqa (&QWP(-16,$ctx),"xmm0"); # copy [or refresh] iv
+ &movaps ("xmm0",&QWP(0,"eax"));
+ &movaps (&QWP(-16,$ctx),"xmm0"); # copy [or refresh] iv
}
&set_label("${mode}_exit"); }
&mov ("eax",1);