diff options
Diffstat (limited to 'crypto/bn/asm/x86_64-mont5.pl')
-rwxr-xr-x | crypto/bn/asm/x86_64-mont5.pl | 196 |
1 files changed, 0 insertions, 196 deletions
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index c03473f13a..1faea0bcf8 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -2103,193 +2103,6 @@ __bn_post4x_internal: .size __bn_post4x_internal,.-__bn_post4x_internal ___ } -{ -$code.=<<___; -.globl bn_from_montgomery -.type bn_from_montgomery,\@abi-omnipotent -.align 32 -bn_from_montgomery: -.cfi_startproc - testl \$7,`($win64?"48(%rsp)":"%r9d")` - jz bn_from_mont8x - xor %eax,%eax - ret -.cfi_endproc -.size bn_from_montgomery,.-bn_from_montgomery - -.type bn_from_mont8x,\@function,6 -.align 32 -bn_from_mont8x: -.cfi_startproc - .byte 0x67 - mov %rsp,%rax -.cfi_def_cfa_register %rax - push %rbx -.cfi_push %rbx - push %rbp -.cfi_push %rbp - push %r12 -.cfi_push %r12 - push %r13 -.cfi_push %r13 - push %r14 -.cfi_push %r14 - push %r15 -.cfi_push %r15 -.Lfrom_prologue: - - shl \$3,${num}d # convert $num to bytes - lea ($num,$num,2),%r10 # 3*$num in bytes - neg $num - mov ($n0),$n0 # *n0 - - ############################################################## - # Ensure that stack frame doesn't alias with $rptr+3*$num - # modulo 4096, which covers ret[num], am[num] and n[num] - # (see bn_exp.c). The stack is allocated to aligned with - # bn_power5's frame, and as bn_from_montgomery happens to be - # last operation, we use the opportunity to cleanse it. - # - lea -320(%rsp,$num,2),%r11 - mov %rsp,%rbp - sub $rptr,%r11 - and \$4095,%r11 - cmp %r11,%r10 - jb .Lfrom_sp_alt - sub %r11,%rbp # align with $aptr - lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256) - jmp .Lfrom_sp_done - -.align 32 -.Lfrom_sp_alt: - lea 4096-320(,$num,2),%r10 - lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256) - sub %r10,%r11 - mov \$0,%r10 - cmovc %r10,%r11 - sub %r11,%rbp -.Lfrom_sp_done: - and \$-64,%rbp - mov %rsp,%r11 - sub %rbp,%r11 - and \$-4096,%r11 - lea (%rbp,%r11),%rsp - mov (%rsp),%r10 - cmp %rbp,%rsp - ja .Lfrom_page_walk - jmp .Lfrom_page_walk_done - -.Lfrom_page_walk: - lea -4096(%rsp),%rsp - mov (%rsp),%r10 - cmp %rbp,%rsp - ja .Lfrom_page_walk -.Lfrom_page_walk_done: - - mov $num,%r10 - neg $num - - ############################################################## - # Stack layout - # - # +0 saved $num, used in reduction section - # +8 &t[2*$num], used in reduction section - # +32 saved *n0 - # +40 saved %rsp - # +48 t[2*$num] - # - mov $n0, 32(%rsp) - mov %rax, 40(%rsp) # save original %rsp -.cfi_cfa_expression %rsp+40,deref,+8 -.Lfrom_body: - mov $num,%r11 - lea 48(%rsp),%rax - pxor %xmm0,%xmm0 - jmp .Lmul_by_1 - -.align 32 -.Lmul_by_1: - movdqu ($aptr),%xmm1 - movdqu 16($aptr),%xmm2 - movdqu 32($aptr),%xmm3 - movdqa %xmm0,(%rax,$num) - movdqu 48($aptr),%xmm4 - movdqa %xmm0,16(%rax,$num) - .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 # lea 64($aptr),$aptr - movdqa %xmm1,(%rax) - movdqa %xmm0,32(%rax,$num) - movdqa %xmm2,16(%rax) - movdqa %xmm0,48(%rax,$num) - movdqa %xmm3,32(%rax) - movdqa %xmm4,48(%rax) - lea 64(%rax),%rax - sub \$64,%r11 - jnz .Lmul_by_1 - - movq $rptr,%xmm1 - movq $nptr,%xmm2 - .byte 0x67 - mov $nptr,%rbp - movq %r10, %xmm3 # -num -___ -$code.=<<___ if ($addx); - mov OPENSSL_ia32cap_P+8(%rip),%r11d - and \$0x80108,%r11d - cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 - jne .Lfrom_mont_nox - - lea (%rax,$num),$rptr - call __bn_sqrx8x_reduction - call __bn_postx4x_internal - - pxor %xmm0,%xmm0 - lea 48(%rsp),%rax - jmp .Lfrom_mont_zero - -.align 32 -.Lfrom_mont_nox: -___ -$code.=<<___; - call __bn_sqr8x_reduction - call __bn_post4x_internal - - pxor %xmm0,%xmm0 - lea 48(%rsp),%rax - jmp .Lfrom_mont_zero - -.align 32 -.Lfrom_mont_zero: - mov 40(%rsp),%rsi # restore %rsp -.cfi_def_cfa %rsi,8 - movdqa %xmm0,16*0(%rax) - movdqa %xmm0,16*1(%rax) - movdqa %xmm0,16*2(%rax) - movdqa %xmm0,16*3(%rax) - lea 16*4(%rax),%rax - sub \$32,$num - jnz .Lfrom_mont_zero - - mov \$1,%rax - mov -48(%rsi),%r15 -.cfi_restore %r15 - mov -40(%rsi),%r14 -.cfi_restore %r14 - mov -32(%rsi),%r13 -.cfi_restore %r13 - mov -24(%rsi),%r12 -.cfi_restore %r12 - mov -16(%rsi),%rbp -.cfi_restore %rbp - mov -8(%rsi),%rbx -.cfi_restore %rbx - lea (%rsi),%rsp -.cfi_def_cfa_register %rsp -.Lfrom_epilogue: - ret -.cfi_endproc -.size bn_from_mont8x,.-bn_from_mont8x -___ -} }}} if ($addx) {{{ @@ -3896,10 +3709,6 @@ mul_handler: .rva .LSEH_begin_bn_power5 .rva .LSEH_end_bn_power5 .rva .LSEH_info_bn_power5 - - .rva .LSEH_begin_bn_from_mont8x - .rva .LSEH_end_bn_from_mont8x - .rva .LSEH_info_bn_from_mont8x ___ $code.=<<___ if ($addx); .rva .LSEH_begin_bn_mulx4x_mont_gather5 @@ -3931,11 +3740,6 @@ $code.=<<___; .byte 9,0,0,0 .rva mul_handler .rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[] -.align 8 -.LSEH_info_bn_from_mont8x: - .byte 9,0,0,0 - .rva mul_handler - .rva .Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue # HandlerData[] ___ $code.=<<___ if ($addx); .align 8 |