diff options
author | Andy Polyakov <appro@openssl.org> | 2017-11-24 11:35:50 +0100 |
---|---|---|
committer | Matt Caswell <matt@openssl.org> | 2017-12-06 15:36:08 +0000 |
commit | 5630661aecbea5fe3c4740f5fea744a1f07a6253 (patch) | |
tree | 547a39110fe9dc93d5a700034af6ee93c4aecec3 /crypto/bn/asm | |
parent | e1dd8fa00a1e06d27c8b024dac7657a8d8a9b451 (diff) |
bn/asm/rsaz-avx2.pl: fix digit correction bug in rsaz_1024_mul_avx2.
Credit to OSS-Fuzz for finding this.
CVE-2017-3738
Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/bn/asm')
-rwxr-xr-x | crypto/bn/asm/rsaz-avx2.pl | 15 |
1 files changed, 7 insertions, 8 deletions
diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index f26390155c..8c56a91484 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -217,7 +217,7 @@ $code.=<<___; vmovdqu 32*8-128($ap), $ACC8 lea 192(%rsp), $tp0 # 64+128=192 - vpbroadcastq .Land_mask(%rip), $AND_MASK + vmovdqu .Land_mask(%rip), $AND_MASK jmp .LOOP_GRANDE_SQR_1024 .align 32 @@ -1067,10 +1067,10 @@ $code.=<<___; vpmuludq 32*6-128($np),$Yi,$TEMP1 vpaddq $TEMP1,$ACC6,$ACC6 vpmuludq 32*7-128($np),$Yi,$TEMP2 - vpblendd \$3, $ZERO, $ACC9, $ACC9 # correct $ACC3 + vpblendd \$3, $ZERO, $ACC9, $TEMP1 # correct $ACC3 vpaddq $TEMP2,$ACC7,$ACC7 vpmuludq 32*8-128($np),$Yi,$TEMP0 - vpaddq $ACC9, $ACC3, $ACC3 # correct $ACC3 + vpaddq $TEMP1, $ACC3, $ACC3 # correct $ACC3 vpaddq $TEMP0,$ACC8,$ACC8 mov %rbx, %rax @@ -1083,7 +1083,9 @@ $code.=<<___; vmovdqu -8+32*2-128($ap),$TEMP2 mov $r1, %rax + vpblendd \$0xfc, $ZERO, $ACC9, $ACC9 # correct $ACC3 imull $n0, %eax + vpaddq $ACC9,$ACC4,$ACC4 # correct $ACC3 and \$0x1fffffff, %eax imulq 16-128($ap),%rbx @@ -1319,15 +1321,12 @@ ___ # But as we underutilize resources, it's possible to correct in # each iteration with marginal performance loss. But then, as # we do it in each iteration, we can correct less digits, and -# avoid performance penalties completely. Also note that we -# correct only three digits out of four. This works because -# most significant digit is subjected to less additions. +# avoid performance penalties completely. $TEMP0 = $ACC9; $TEMP3 = $Bi; $TEMP4 = $Yi; $code.=<<___; - vpermq \$0, $AND_MASK, $AND_MASK vpaddq (%rsp), $TEMP1, $ACC0 vpsrlq \$29, $ACC0, $TEMP1 @@ -1774,7 +1773,7 @@ $code.=<<___; .align 64 .Land_mask: - .quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 + .quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff .Lscatter_permd: .long 0,2,4,6,7,7,7,7 .Lgather_permd: |