.ident "md5-sparcv9.S, Version 1.0"
.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
.file "md5-sparcv9.S"
/*
* ====================================================================
* Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>.
*
* Rights for redistribution and usage in source and binary forms are
* granted as long as above copyright notices are retained. Warranty
* of any kind is (of course:-) disclaimed.
* ====================================================================
*/
/*
* This is my modest contribution to OpenSSL project (see
* http://www.openssl.org/ for more information about it) and is an
* assembler implementation of MD5 block hash function. I've hand-coded
* this for the sole reason to reach UltraSPARC-specific "load in
* little-endian byte order" instruction. This gives up to 15%
* performance improvement for cases when input message is aligned at
* 32 bits boundary. The module was tested under both 32 *and* 64 bit
* kernels. For updates see http://fy.chalmers.se/~appro/hpe/.
*
* To compile with SC4.x/SC5.x:
*
* cc -xarch=v[9|8plus] -DOPENSSL_SYSNAME_ULTRASPARC -DMD5_BLOCK_DATA_ORDER \
* -c md5-sparcv9.S
*
* and with gcc:
*
* gcc -mcpu=ultrasparc -DOPENSSL_SYSNAME_ULTRASPARC -DMD5_BLOCK_DATA_ORDER \
* -c md5-sparcv9.S
*
* or if above fails (it does if you have gas):
*
* gcc -E -DOPENSSL_SYSNAMEULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \
* as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o
*/
#include <openssl/e_os2.h>
#define A %o0
#define B %o1
#define C %o2
#define D %o3
#define T1 %o4
#define T2 %o5
#define R0 %l0
#define R1 %l1
#define R2 %l2
#define R3 %l3
#define R4 %l4
#define R5 %l5
#define R6 %l6
#define R7 %l7
#define R8 %i3
#define R9 %i4
#define R10 %i5
#define R11 %g1
#define R12 %g2
#define R13 %g3
#define RX %g4
#define Aptr %i0+0
#define Bptr %i0+4
#define Cptr %i0+8
#define Dptr %i0+12
#define Aval R5 /* those not used at the end of the last round */
#define Bval R6
#define Cval R7
#define Dval R8
#if defined(MD5_BLOCK_DATA_ORDER)
# if defined(OPENSSL_SYSNAME_ULTRASPARC)
# define LOAD lda
# define X(i) [%i1+i*4]%asi
# define md5_block md5_block_asm_data_order_aligned
# define ASI_PRIMARY_LITTLE 0x88
# else
# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!"
# endif
#else
# define LOAD ld
# define X(i) [%i1+i*4]
# define md5_block md5_block_asm_host_order
#endif
.section ".text",#alloc,#execinstr
#if defined(__SUNPRO_C) && defined(__sparcv9)
/* They've said -xarch=v9 at command line */
.register %g2,#scratch
.register %g3,#scratch
# define FRAME -192
#elif defined(__GNUC__) && defined(__arch64__)
/* They've said -m64 at command line */
.register %g2,#scratch
.register %g3,#scratch
# define FRAME -192
#else
# define FRAME -96
#endif
.align 32
.global md5_block
md5_block:
save %sp,FRAME,%sp
ld [Dptr],D
ld [Cptr],C
ld [Bptr],B
ld [Aptr],A
#ifdef ASI_PRIMARY_LITTLE
rd %asi,%o7 ! How dare I? Well, I just do:-)
wr %g0,ASI_PRIMARY_LITTLE,%asi
#endif
LOAD X(0),R0
.Lmd5_block_loop:
!!!!!!!!Round 0
xor C,D,T1
sethi %hi(0xd76aa478),T2
and T1,B,T1
or T2,%lo(0xd76aa478),T2 !=
xor T1,D,T1
add T1,R0,T1
LOAD X(1),R1
add T1,T2,T1 !=
add A,T1,A
sll A,7,T2
srl A,32-7,A
or A,T2,A !=
xor B,C,T1
add A,B,A
sethi %hi(0xe8c7b756),T2
and T1,A,T1 !=
or T2,%lo(0xe8c7b756),T2
xor T1,C,T1
LOAD X(2),R2
add T1,R1,T1 !=
add T1,T2,T1
add D,T1,D
sll D,12,T2
srl D,32-12,D !=
or D,T2,D
xor A,B,T1
add D,A,D
sethi %hi(0x242070db),T2 !=
and T1,D,T1
or T2,%lo(0x242070db),T2
xor T1,B,T1
add T1,R2,T1 !=
LOAD X(3),R3
add T1,T2,T1
add C,T1,C
sll C,17,T2 !=
srl C,32-17,C
or C,T2,C
xor D,A,T1
add C,D,C !=
sethi %hi(0xc1bdceee),T2
and T1,C,T1
or T2,%lo(0xc1bdceee),T2
xor T1,A,T1 !=
add T1,R3,T1
LOAD X(4),R4
add T1,T2,T1
add B,T1,B !=
sll B,22,T2
srl B,32-22,B
or B,T2,B
xor C,D,T1