summaryrefslogtreecommitdiffstats
path: root/crypto/md5/asm
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2005-07-19 22:33:03 +0000
committerAndy Polyakov <appro@openssl.org>2005-07-19 22:33:03 +0000
commit0f04379d9cd08107e2915d6121b3831f8df08e70 (patch)
tree37772f222ef58ced569b1940e3d0fcc3534354dc /crypto/md5/asm
parent7e4d335943839b54e6f25625c7b12c263ec6a2d4 (diff)
This update gets endianness-neutrality right and adds second required
entry point, md5_block_asm_data_order.
Diffstat (limited to 'crypto/md5/asm')
-rw-r--r--crypto/md5/asm/md5-ia64.S249
1 files changed, 133 insertions, 116 deletions
diff --git a/crypto/md5/asm/md5-ia64.S b/crypto/md5/asm/md5-ia64.S
index 900263224f..73273fa828 100644
--- a/crypto/md5/asm/md5-ia64.S
+++ b/crypto/md5/asm/md5-ia64.S
@@ -86,6 +86,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define pPad2 p12
#define pPad3 p13
#define pSkip p8
+// This two below shall remain constant througout whole routine
+#define pDataOrder p14
+#define pHostOrder p15
#define A_ out24
#define B_ out25
@@ -159,6 +162,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define _NOUTPUT 0
#define _NROTATE 24 /* this must be <= _NINPUTS */
+#if defined(_HPUX_SOURCE) && !defined(_LP64)
+#define ADDP addp4
+#else
+#define ADDP add
+#endif
// Macros for getting the left and right portions of little-endian words
@@ -225,78 +233,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define LCSave r21
#define PFSSave r20
#define PRSave r22
-#define pAgain p14
-#define pOff p14
-
- .rodata
- // Values are specified as bytes to ensure they are
- // in little-endian byte-order.
- .align 4
-md5_round_constants:
- data1 0x78, 0xa4, 0x6a, 0xd7 // 0
- data1 0x56, 0xb7, 0xc7, 0xe8 // 1
- data1 0xdb, 0x70, 0x20, 0x24 // 2
- data1 0xee, 0xce, 0xbd, 0xc1 // 3
- data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
- data1 0x2a, 0xc6, 0x87, 0x47 // 5
- data1 0x13, 0x46, 0x30, 0xa8 // 6
- data1 0x01, 0x95, 0x46, 0xfd // 7
- data1 0xd8, 0x98, 0x80, 0x69 // 8
- data1 0xaf, 0xf7, 0x44, 0x8b // 9
- data1 0xb1, 0x5b, 0xff, 0xff // 10
- data1 0xbe, 0xd7, 0x5c, 0x89 // 11
- data1 0x22, 0x11, 0x90, 0x6b // 12
- data1 0x93, 0x71, 0x98, 0xfd // 13
- data1 0x8e, 0x43, 0x79, 0xa6 // 14
- data1 0x21, 0x08, 0xb4, 0x49 // 15
- data1 0x62, 0x25, 0x1e, 0xf6 // 16
- data1 0x40, 0xb3, 0x40, 0xc0 // 17
- data1 0x51, 0x5a, 0x5e, 0x26 // 18
- data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
- data1 0x5d, 0x10, 0x2f, 0xd6 // 20
- data1 0x53, 0x14, 0x44, 0x02 // 21
- data1 0x81, 0xe6, 0xa1, 0xd8 // 22
- data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
- data1 0xe6, 0xcd, 0xe1, 0x21 // 24
- data1 0xd6, 0x07, 0x37, 0xc3 // 25
- data1 0x87, 0x0d, 0xd5, 0xf4 // 26
- data1 0xed, 0x14, 0x5a, 0x45 // 27
- data1 0x05, 0xe9, 0xe3, 0xa9 // 28
- data1 0xf8, 0xa3, 0xef, 0xfc // 29
- data1 0xd9, 0x02, 0x6f, 0x67 // 30
- data1 0x8a, 0x4c, 0x2a, 0x8d // 31
- data1 0x42, 0x39, 0xfa, 0xff // 32
- data1 0x81, 0xf6, 0x71, 0x87 // 33
- data1 0x22, 0x61, 0x9d, 0x6d // 34
- data1 0x0c, 0x38, 0xe5, 0xfd // 35
- data1 0x44, 0xea, 0xbe, 0xa4 // 36
- data1 0xa9, 0xcf, 0xde, 0x4b // 37
- data1 0x60, 0x4b, 0xbb, 0xf6 // 38
- data1 0x70, 0xbc, 0xbf, 0xbe // 39
- data1 0xc6, 0x7e, 0x9b, 0x28 // 40
- data1 0xfa, 0x27, 0xa1, 0xea // 41
- data1 0x85, 0x30, 0xef, 0xd4 // 42
- data1 0x05, 0x1d, 0x88, 0x04 // 43
- data1 0x39, 0xd0, 0xd4, 0xd9 // 44
- data1 0xe5, 0x99, 0xdb, 0xe6 // 45
- data1 0xf8, 0x7c, 0xa2, 0x1f // 46
- data1 0x65, 0x56, 0xac, 0xc4 // 47
- data1 0x44, 0x22, 0x29, 0xf4 // 48
- data1 0x97, 0xff, 0x2a, 0x43 // 49
- data1 0xa7, 0x23, 0x94, 0xab // 50
- data1 0x39, 0xa0, 0x93, 0xfc // 51
- data1 0xc3, 0x59, 0x5b, 0x65 // 52
- data1 0x92, 0xcc, 0x0c, 0x8f // 53
- data1 0x7d, 0xf4, 0xef, 0xff // 54
- data1 0xd1, 0x5d, 0x84, 0x85 // 55
- data1 0x4f, 0x7e, 0xa8, 0x6f // 56
- data1 0xe0, 0xe6, 0x2c, 0xfe // 57
- data1 0x14, 0x43, 0x01, 0xa3 // 58
- data1 0xa1, 0x11, 0x08, 0x4e // 59
- data1 0x82, 0x7e, 0x53, 0xf7 // 60
- data1 0x35, 0xf2, 0x3a, 0xbd // 61
- data1 0xbb, 0xd2, 0xd7, 0x2a // 62
- data1 0x91, 0xd3, 0x86, 0xeb // 63
+#define pAgain p63
+#define pOff p63
.text
@@ -320,53 +258,48 @@ md5_round_constants:
*/
+ .type md5_block_asm_data_order, @function
+ .global md5_block_asm_data_order
+ .align 32
+ .proc md5_block_asm_data_order
+md5_block_asm_data_order:
+{ .mib
+ cmp.eq pDataOrder,pHostOrder = r0,r0
+ br.sptk.many .md5_block
+};;
+ .endp md5_block_asm_data_order
+
.type md5_block_asm_host_order, @function
.global md5_block_asm_host_order
- .align 32
.proc md5_block_asm_host_order
md5_block_asm_host_order:
.prologue
-#ifndef __LP64__
+{ .mib
+ cmp.eq pHostOrder,pDataOrder = r0,r0
+};;
+.md5_block:
{ .mmi
- .save ar.pfs, PFSSave
+ .save ar.pfs, PFSSave
alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
- addp4 DPtrIn = 0, DPtrIn
- addp4 CtxPtr0 = 0, CtxPtr0
+ ADDP CtxPtr1 = 8, CtxPtr0
+ mov CTable = ip
}
-;;
{ .mmi
- nop 0x0
- and InAlign = 0x3, DPtrIn
- .save ar.lc, LCSave
+ ADDP DPtrIn = 0, DPtrIn
+ ADDP CtxPtr0 = 0, CtxPtr0
+ .save ar.lc, LCSave
mov LCSave = ar.lc
}
-#else
+;;
+.pred.rel "mutex",pDataOrder,pHostOrder
{ .mmi
- .save ar.pfs, PFSSave
- alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
+(pDataOrder) add CTable = .md5_tbl_data_order#-.md5_block#, CTable
+(pHostOrder) add CTable = .md5_tbl_host_order#-.md5_block#, CTable
and InAlign = 0x3, DPtrIn
- .save ar.lc, LCSave
- mov LCSave = ar.lc
}
-#endif
{ .mmi
- addl CTable = @ltoffx(md5_round_constants), gp
- ;;
- ld8.mov CTable = [CTable], md5_round_constants // native byte-order
- add CtxPtr1 = 8, CtxPtr0
-}
-#ifdef B_ENDIAN
-{
- .mmi
- rum psr.be // switch to little-endian mode
- nop.m 0x0
- nop.i 0x0
-}
-#endif
-;;
-{ .mmi
ld4 AccumA = [CtxPtr0], 4
ld4 AccumC = [CtxPtr1], 4
.save pr, PRSave
@@ -379,15 +312,12 @@ md5_block_asm_host_order:
ld4 AccumD = [CtxPtr1]
dep DPtr_ = 0, DPtrIn, 0, 2
} ;;
-
-{ .mmi
+#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
+(pDataOrder) rum psr.be;; // switch to little-endian
+#endif
+{ .mmb
ld4 CTable0 = [CTable], 4
cmp.ne pOff, p0 = 0, InAlign
-} ;;
-
-{ .mib
- nop.m 0x0
- nop.i 0x0
(pOff) br.cond.spnt.many .md5_unaligned
} ;;
@@ -431,9 +361,9 @@ md5_block_asm_host_order:
} ;;
.md5_exit:
-// Note that we switch back to the entry endianess AFTER storing so
-// that the memory image of the hash is preserved.
-
+#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
+(pDataOrder) sum psr.be;; // switch back to big-endian mode
+#endif
{ .mmi
st4 [CtxPtr0] = AccumB, -4
st4 [CtxPtr1] = AccumD, -4
@@ -445,9 +375,6 @@ md5_block_asm_host_order:
mov ar.lc = LCSave
} ;;
{ .mib
-#ifdef B_ENDIAN
- sum psr.be // switch back to big-endian mode
-#endif
mov ar.pfs = PFSSave
br.ret.sptk.few rp
} ;;
@@ -1001,9 +928,99 @@ md5_digest_block##offset: \
nop 0x0 ; \
nop 0x0 ; \
br.cond.sptk.many md5_digest_GHI ; \
-} ; \
+} ;; \
.endp md5digestBlock ## offset
MD5FBLOCK(1)
MD5FBLOCK(2)
MD5FBLOCK(3)
+
+ .align 64
+ .type md5_constants, @object
+md5_constants:
+.md5_tbl_data_order: // To ensure little-endian data
+ // order, code as bytes.
+ data1 0x78, 0xa4, 0x6a, 0xd7 // 0
+ data1 0x56, 0xb7, 0xc7, 0xe8 // 1
+ data1 0xdb, 0x70, 0x20, 0x24 // 2
+ data1 0xee, 0xce, 0xbd, 0xc1 // 3
+ data1 0xaf, 0x0f, 0x7c, 0xf5 // 4
+ data1 0x2a, 0xc6, 0x87, 0x47 // 5
+ data1 0x13, 0x46, 0x30, 0xa8 // 6
+ data1 0x01, 0x95, 0x46, 0xfd // 7
+ data1 0xd8, 0x98, 0x80, 0x69 // 8
+ data1 0xaf, 0xf7, 0x44, 0x8b // 9
+ data1 0xb1, 0x5b, 0xff, 0xff // 10
+ data1 0xbe, 0xd7, 0x5c, 0x89 // 11
+ data1 0x22, 0x11, 0x90, 0x6b // 12
+ data1 0x93, 0x71, 0x98, 0xfd // 13
+ data1 0x8e, 0x43, 0x79, 0xa6 // 14
+ data1 0x21, 0x08, 0xb4, 0x49 // 15
+ data1 0x62, 0x25, 0x1e, 0xf6 // 16
+ data1 0x40, 0xb3, 0x40, 0xc0 // 17
+ data1 0x51, 0x5a, 0x5e, 0x26 // 18
+ data1 0xaa, 0xc7, 0xb6, 0xe9 // 19
+ data1 0x5d, 0x10, 0x2f, 0xd6 // 20
+ data1 0x53, 0x14, 0x44, 0x02 // 21
+ data1 0x81, 0xe6, 0xa1, 0xd8 // 22
+ data1 0xc8, 0xfb, 0xd3, 0xe7 // 23
+ data1 0xe6, 0xcd, 0xe1, 0x21 // 24
+ data1 0xd6, 0x07, 0x37, 0xc3 // 25
+ data1 0x87, 0x0d, 0xd5, 0xf4 // 26
+ data1 0xed, 0x14, 0x5a, 0x45 // 27
+ data1 0x05, 0xe9, 0xe3, 0xa9 // 28
+ data1 0xf8, 0xa3, 0xef, 0xfc // 29
+ data1 0xd9, 0x02, 0x6f, 0x67 // 30
+ data1 0x8a, 0x4c, 0x2a, 0x8d // 31
+ data1 0x42, 0x39, 0xfa, 0xff // 32
+ data1 0x81, 0xf6, 0x71, 0x87 // 33
+ data1 0x22, 0x61, 0x9d, 0x6d // 34
+ data1 0x0c, 0x38, 0xe5, 0xfd // 35
+ data1 0x44, 0xea, 0xbe, 0xa4 // 36
+ data1 0xa9, 0xcf, 0xde, 0x4b // 37
+ data1 0x60, 0x4b, 0xbb, 0xf6 // 38
+ data1 0x70, 0xbc, 0xbf, 0xbe // 39
+ data1 0xc6, 0x7e, 0x9b, 0x28 // 40
+ data1 0xfa, 0x27, 0xa1, 0xea // 41
+ data1 0x85, 0x30, 0xef, 0xd4 // 42
+ data1 0x05, 0x1d, 0x88, 0x04 // 43
+ data1 0x39, 0xd0, 0xd4, 0xd9 // 44
+ data1 0xe5, 0x99, 0xdb, 0xe6 // 45
+ data1 0xf8, 0x7c, 0xa2, 0x1f // 46
+ data1 0x65, 0x56, 0xac, 0xc4 // 47
+ data1 0x44, 0x22, 0x29, 0xf4 // 48
+ data1 0x97, 0xff, 0x2a, 0x43 // 49
+ data1 0xa7, 0x23, 0x94, 0xab // 50
+ data1 0x39, 0xa0, 0x93, 0xfc // 51
+ data1 0xc3, 0x59, 0x5b, 0x65 // 52
+ data1 0x92, 0xcc, 0x0c, 0x8f // 53
+ data1 0x7d, 0xf4, 0xef, 0xff // 54
+ data1 0xd1, 0x5d, 0x84, 0x85 // 55
+ data1 0x4f, 0x7e, 0xa8, 0x6f // 56
+ data1 0xe0, 0xe6, 0x2c, 0xfe // 57
+ data1 0x14, 0x43, 0x01, 0xa3 // 58
+ data1 0xa1, 0x11, 0x08, 0x4e // 59
+ data1 0x82, 0x7e, 0x53, 0xf7 // 60
+ data1 0x35, 0xf2, 0x3a, 0xbd // 61
+ data1 0xbb, 0xd2, 0xd7, 0x2a // 62
+ data1 0x91, 0xd3, 0x86, 0xeb // 63
+
+.md5_tbl_host_order: // OS data order, might as well
+ // be little-endian.
+ data4 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee // 0
+ data4 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 // 4
+ data4 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be // 8
+ data4 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 // 12
+ data4 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa // 16
+ data4 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 // 20
+ data4 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed // 24
+ data4 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a // 28
+ data4 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c // 32
+ data4 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 // 36
+ data4 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 // 40
+ data4 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 // 44
+ data4 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 // 48
+ data4 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 // 52
+ data4 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 // 56
+ data4 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 // 60
+.size md5_constants#,64*4*2