Always end BN_mod_exp_mont_consttime with normal Montgomery reduction.

This partially fixes a bug where, on x86_64, BN_mod_exp_mont_consttime would sometimes return m, the modulus, when it should have returned zero. Thanks to Guido Vranken for reporting it. It is only a partial fix because the same bug also exists in the "rsaz" codepath. The bug only affects zero outputs (with non-zero inputs), so we believe it has no security impact on our cryptographic functions. The fx is to delete lowercase bn_from_montgomery altogether, and have the mont5 path use the same BN_from_montgomery ending as the non-mont5 path. This only impacts the final step of the whole exponentiation and has no measurable perf impact. See the original BoringSSL commit https://boringssl.googlesource.com/boringssl/+/13c9d5c69d04485a7a8840c12185c832026c8315 for further analysis. Original-author: David Benjamin <davidben@google.com> Reviewed-by: Matt Caswell <matt@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/18510)
author: Tomas Mraz <tomas@openssl.org> 2022-06-09 12:34:55 +0200
committer: Tomas Mraz <tomas@openssl.org> 2022-06-16 15:22:35 +0200
commit: 0ae365e1f80648f4c52aa3ac9bbc279b6192b23e (patch)
tree: 3c64a03999de0210c8b171207d79a5d119c3022b /crypto/bn/asm/x86_64-mont5.pl
parent: b2feb9f0e394da6570346598837f1b01eb58c028 (diff)
1 files changed, 0 insertions, 196 deletions
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index c03473f13a..1faea0bcf8 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -2103,193 +2103,6 @@ __bn_post4x_internal:
 .size	__bn_post4x_internal,.-__bn_post4x_internal
 ___
 }
-{
-$code.=<<___;
-.globl	bn_from_montgomery
-.type	bn_from_montgomery,\@abi-omnipotent
-.align	32
-bn_from_montgomery:
-.cfi_startproc
-	testl	\$7,`($win64?"48(%rsp)":"%r9d")`
-	jz	bn_from_mont8x
-	xor	%eax,%eax
-	ret
-.cfi_endproc
-.size	bn_from_montgomery,.-bn_from_montgomery
-
-.type	bn_from_mont8x,\@function,6
-.align	32
-bn_from_mont8x:
-.cfi_startproc
-	.byte	0x67
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
-	push	%rbx
-.cfi_push	%rbx
-	push	%rbp
-.cfi_push	%rbp
-	push	%r12
-.cfi_push	%r12
-	push	%r13
-.cfi_push	%r13
-	push	%r14
-.cfi_push	%r14
-	push	%r15
-.cfi_push	%r15
-.Lfrom_prologue:
-
-	shl	\$3,${num}d		# convert $num to bytes
-	lea	($num,$num,2),%r10	# 3*$num in bytes
-	neg	$num
-	mov	($n0),$n0		# *n0
-
-	##############################################################
-	# Ensure that stack frame doesn't alias with $rptr+3*$num
-	# modulo 4096, which covers ret[num], am[num] and n[num]
-	# (see bn_exp.c). The stack is allocated to aligned with
-	# bn_power5's frame, and as bn_from_montgomery happens to be
-	# last operation, we use the opportunity to cleanse it.
-	#
-	lea	-320(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
-	sub	$rptr,%r11
-	and	\$4095,%r11
-	cmp	%r11,%r10
-	jb	.Lfrom_sp_alt
-	sub	%r11,%rbp		# align with $aptr
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
-	jmp	.Lfrom_sp_done
-
-.align	32
-.Lfrom_sp_alt:
-	lea	4096-320(,$num,2),%r10
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
-	sub	%r10,%r11
-	mov	\$0,%r10
-	cmovc	%r10,%r11
-	sub	%r11,%rbp
-.Lfrom_sp_done:
-	and	\$-64,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lfrom_page_walk
-	jmp	.Lfrom_page_walk_done
-
-.Lfrom_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lfrom_page_walk
-.Lfrom_page_walk_done:
-
-	mov	$num,%r10
-	neg	$num
-
-	##############################################################
-	# Stack layout
-	#
-	# +0	saved $num, used in reduction section
-	# +8	&t[2*$num], used in reduction section
-	# +32	saved *n0
-	# +40	saved %rsp
-	# +48	t[2*$num]
-	#
-	mov	$n0,  32(%rsp)
-	mov	%rax, 40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
-.Lfrom_body:
-	mov	$num,%r11
-	lea	48(%rsp),%rax
-	pxor	%xmm0,%xmm0
-	jmp	.Lmul_by_1
-
-.align	32
-.Lmul_by_1:
-	movdqu	($aptr),%xmm1
-	movdqu	16($aptr),%xmm2
-	movdqu	32($aptr),%xmm3
-	movdqa	%xmm0,(%rax,$num)
-	movdqu	48($aptr),%xmm4
-	movdqa	%xmm0,16(%rax,$num)
-	.byte	0x48,0x8d,0xb6,0x40,0x00,0x00,0x00	# lea	64($aptr),$aptr
-	movdqa	%xmm1,(%rax)
-	movdqa	%xmm0,32(%rax,$num)
-	movdqa	%xmm2,16(%rax)
-	movdqa	%xmm0,48(%rax,$num)
-	movdqa	%xmm3,32(%rax)
-	movdqa	%xmm4,48(%rax)
-	lea	64(%rax),%rax
-	sub	\$64,%r11
-	jnz	.Lmul_by_1
-
-	movq	$rptr,%xmm1
-	movq	$nptr,%xmm2
-	.byte	0x67
-	mov	$nptr,%rbp
-	movq	%r10, %xmm3		# -num
-___
-$code.=<<___ if ($addx);
-	mov	OPENSSL_ia32cap_P+8(%rip),%r11d
-	and	\$0x80108,%r11d
-	cmp	\$0x80108,%r11d		# check for AD*X+BMI2+BMI1
-	jne	.Lfrom_mont_nox
-
-	lea	(%rax,$num),$rptr
-	call	__bn_sqrx8x_reduction
-	call	__bn_postx4x_internal
-
-	pxor	%xmm0,%xmm0
-	lea	48(%rsp),%rax
-	jmp	.Lfrom_mont_zero
-
-.align	32
-.Lfrom_mont_nox:
-___
-$code.=<<___;
-	call	__bn_sqr8x_reduction
-	call	__bn_post4x_internal
-
-	pxor	%xmm0,%xmm0
-	lea	48(%rsp),%rax
-	jmp	.Lfrom_mont_zero
-
-.align	32
-.Lfrom_mont_zero:
-	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
-	movdqa	%xmm0,16*0(%rax)
-	movdqa	%xmm0,16*1(%rax)
-	movdqa	%xmm0,16*2(%rax)
-	movdqa	%xmm0,16*3(%rax)
-	lea	16*4(%rax),%rax
-	sub	\$32,$num
-	jnz	.Lfrom_mont_zero
-
-	mov	\$1,%rax
-	mov	-48(%rsi),%r15
-.cfi_restore	%r15
-	mov	-40(%rsi),%r14
-.cfi_restore	%r14
-	mov	-32(%rsi),%r13
-.cfi_restore	%r13
-	mov	-24(%rsi),%r12
-.cfi_restore	%r12
-	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
-	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
-	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
-.Lfrom_epilogue:
-	ret
-.cfi_endproc
-.size	bn_from_mont8x,.-bn_from_mont8x
-___
-}
 }}}
 
 if ($addx) {{{
@@ -3896,10 +3709,6 @@ mul_handler:
 	.rva	.LSEH_begin_bn_power5
 	.rva	.LSEH_end_bn_power5
 	.rva	.LSEH_info_bn_power5
-
-	.rva	.LSEH_begin_bn_from_mont8x
-	.rva	.LSEH_end_bn_from_mont8x
-	.rva	.LSEH_info_bn_from_mont8x
 ___
 $code.=<<___ if ($addx);
 	.rva	.LSEH_begin_bn_mulx4x_mont_gather5
@@ -3931,11 +3740,6 @@ $code.=<<___;
 	.byte	9,0,0,0
 	.rva	mul_handler
 	.rva	.Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue	# HandlerData[]
-.align	8
-.LSEH_info_bn_from_mont8x:
-	.byte	9,0,0,0
-	.rva	mul_handler
-	.rva	.Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue		# HandlerData[]
 ___
 $code.=<<___ if ($addx);
 .align	8
author	Tomas Mraz <tomas@openssl.org>	2022-06-09 12:34:55 +0200
committer	Tomas Mraz <tomas@openssl.org>	2022-06-16 15:22:35 +0200
commit	0ae365e1f80648f4c52aa3ac9bbc279b6192b23e (patch)
tree	3c64a03999de0210c8b171207d79a5d119c3022b /crypto/bn/asm/x86_64-mont5.pl
parent	b2feb9f0e394da6570346598837f1b01eb58c028 (diff)