summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2014-06-06 21:27:18 +0200
committerAndy Polyakov <appro@openssl.org>2014-06-10 22:51:15 +0200
commit3a97ebb16b0d1eaab83171b9220280dacd99bf04 (patch)
tree89717093f8561e28fc5e6c40e02c8411d8eb0f48 /crypto
parent16f4d2e32f1ea6cfa668ae64493998ae51db7e51 (diff)
ARM assembly pack: get ARMv7 instruction endianness right.
Pointer out and suggested by: Ard Biesheuvel. (cherry picked from commit 5dcf70a1c57c2019bfad640fe14fd4a73212860a)
Diffstat (limited to 'crypto')
-rw-r--r--crypto/aes/asm/aes-armv4.pl5
-rw-r--r--crypto/armv4cpuid.S70
-rw-r--r--crypto/bn/asm/armv4-gf2m.pl3
-rw-r--r--crypto/bn/asm/armv4-mont.pl10
-rw-r--r--crypto/modes/asm/ghash-armv4.pl5
-rw-r--r--crypto/sha/asm/sha1-armv4-large.pl20
-rw-r--r--crypto/sha/asm/sha256-armv4.pl20
-rw-r--r--crypto/sha/asm/sha512-armv4.pl3
8 files changed, 89 insertions, 47 deletions
diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl
index a5d97ce053..4f8917089f 100644
--- a/crypto/aes/asm/aes-armv4.pl
+++ b/crypto/aes/asm/aes-armv4.pl
@@ -715,8 +715,8 @@ _armv4_AES_set_encrypt_key:
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
.Labrt:
-#if defined(__thumb2__) && __ARM_ARCH__>=7
- .short 0x4770 @ bx lr in Thumb2 encoding
+#if __ARM_ARCH__>=5
+ ret @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
@@ -1203,6 +1203,7 @@ _armv4_AES_decrypt:
___
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx\tlr/gm;
open SELF,$0;
while(<SELF>) {
diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S
index 4f6ae17232..add11d405e 100644
--- a/crypto/armv4cpuid.S
+++ b/crypto/armv4cpuid.S
@@ -7,42 +7,46 @@
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
- .word 0xf26ee1fe @ vorr q15,q15,q15
- .word 0xe12fff1e @ bx lr
+ .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
- mrrc p15,1,r0,r1,c14 @ CNTVCT
- .word 0xe12fff1e @ bx lr
+ mrrc p15,1,r0,r1,c14 @ CNTVCT
+#if __ARM_ARCH__>=5
+ bx lr
+#else
+ .word 0xe12fff1e @ bx lr
+#endif
.size _armv7_tick,.-_armv7_tick
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
- .word 0xf3b00300 @ aese.8 q0,q0
- .word 0xe12fff1e @ bx lr
+ .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
- .word 0xf2000c40 @ sha1c.32 q0,q0,q0
- .word 0xe12fff1e @ bx lr
+ .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
- .word 0xf3000c40 @ sha256h.32 q0,q0,q0
- .word 0xe12fff1e @ bx lr
+ .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
- .word 0xf2a00e00 @ vmull.p64 q0,d0,d0
- .word 0xe12fff1e @ bx lr
+ .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
+ .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
.align 5
@@ -56,7 +60,7 @@ OPENSSL_atomic_add:
cmp r2,#0
bne .Ladd
mov r0,r3
- .word 0xe12fff1e @ bx lr
+ bx lr
#else
stmdb sp!,{r4-r6,lr}
ldr r2,.Lspinlock
@@ -109,9 +113,13 @@ OPENSSL_cleanse:
adds r1,r1,#4
bne .Little
.Lcleanse_done:
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global OPENSSL_wipe_cpu
@@ -125,41 +133,53 @@ OPENSSL_wipe_cpu:
eor ip,ip,ip
tst r0,#1
beq .Lwipe_done
- .word 0xf3000150 @ veor q0, q0, q0
- .word 0xf3022152 @ veor q1, q1, q1
- .word 0xf3044154 @ veor q2, q2, q2
- .word 0xf3066156 @ veor q3, q3, q3
- .word 0xf34001f0 @ veor q8, q8, q8
- .word 0xf34221f2 @ veor q9, q9, q9
- .word 0xf34441f4 @ veor q10, q10, q10
- .word 0xf34661f6 @ veor q11, q11, q11
- .word 0xf34881f8 @ veor q12, q12, q12
- .word 0xf34aa1fa @ veor q13, q13, q13
- .word 0xf34cc1fc @ veor q14, q14, q14
- .word 0xf34ee1fe @ veor q15, q15, q15
+ .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
+ .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
+ .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
+ .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
+ .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
+ .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
+ .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
+ .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
+ .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
+ .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
+ .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
+ .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
.Lwipe_done:
mov r0,sp
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.global OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
eor r0,r0,r0
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.global OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
eor r0,r0,r0
+#if __ARM_ARCH__>=5
+ bx lr
+#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
+#endif
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
index c66495040c..b781afbf89 100644
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -202,7 +202,7 @@ bn_GF2m_mul_2x2:
veor $r, $r, $t2
vst1.32 {$r}, [r0]
- bx lr
+ ret @ bx lr
.align 4
.Lialu:
#endif
@@ -273,6 +273,7 @@ foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/geo;
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
index fe81f9b6f6..72bad8e308 100644
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -230,9 +230,14 @@ bn_mul_mont:
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
-.Labrt: tst lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
+#endif
.size bn_mul_mont,.-bn_mul_mont
___
{
@@ -650,7 +655,7 @@ bn_mul8x_mont_neon:
sub sp,ip,#96
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r11}
- bx lr
+ ret @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
___
@@ -665,5 +670,6 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT;
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 0b0dcc8a68..0023bf994b 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -386,7 +386,7 @@ gcm_init_neon:
veor $IN,$IN,$t0 @ twisted H
vstmia r0,{$IN}
- bx lr
+ ret @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.global gcm_gmult_neon
@@ -470,7 +470,7 @@ $code.=<<___;
vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi
vst1.64 $Xl#lo,[$Xi,:64]
- bx lr
+ ret @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
___
@@ -484,6 +484,7 @@ foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/geo;
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl
index 43a1b9fd7f..50bd07b331 100644
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -631,7 +631,7 @@ $code.=<<___;
vst1.32 {$E\[0]},[$ctx]
vldmia sp!,{d8-d15}
- bx lr
+ ret @ bx lr
.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
#endif
___
@@ -648,13 +648,18 @@ ___
sub unsha1 {
my ($mnemonic,$arg)=@_;
- $arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o
- &&
- sprintf ".long\t0x%08x\t@ %s %s",
- $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
- |(($2&7)<<17)|(($2&8)<<4)
- |(($3&7)<<1) |(($3&8)<<2),
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
+ }
}
}
@@ -664,6 +669,7 @@ foreach (split($/,$code)) {
s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo;
+ s/\bret\b/bx lr/o or
s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4
print $_,$/;
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
index 5e5c54ec18..505ca8f350 100644
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -608,7 +608,7 @@ $code.=<<___;
vst1.32 {$ABCD,$EFGH},[$ctx]
- bx lr
+ ret @ bx lr
.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
#endif
___
@@ -626,13 +626,18 @@ ___
sub unsha256 {
my ($mnemonic,$arg)=@_;
- $arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o
- &&
- sprintf ".long\t0x%08x\t@ %s %s",
- $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
- |(($2&7)<<17)|(($2&8)<<4)
- |(($3&7)<<1) |(($3&8)<<2),
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
+ }
}
}
@@ -642,6 +647,7 @@ foreach (split($/,$code)) {
s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+ s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index d3065794b3..1d5275b917 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -584,7 +584,7 @@ $code.=<<___;
bne .Loop_neon
vldmia sp!,{d8-d15} @ epilogue
- bx lr
+ ret @ bx lr
#endif
___
}
@@ -597,5 +597,6 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT; # enforce flush