72 files changed, 139 insertions, 139 deletions
diff --git a/crypto/aes/asm/aes-586.pl b/crypto/aes/asm/aes-586.pl
index 20c19e98bf..29059edf8b 100755
--- a/crypto/aes/asm/aes-586.pl
+++ b/crypto/aes/asm/aes-586.pl
@@ -55,8 +55,8 @@
 # better performance on most recent µ-archs...
 #
 # Third version adds AES_cbc_encrypt implementation, which resulted in
-# up to 40% performance imrovement of CBC benchmark results. 40% was
-# observed on P4 core, where "overall" imrovement coefficient, i.e. if
+# up to 40% performance improvement of CBC benchmark results. 40% was
+# observed on P4 core, where "overall" improvement coefficient, i.e. if
 # compared to PIC generated by GCC and in CBC mode, was observed to be
 # as large as 4x:-) CBC performance is virtually identical to ECB now
 # and on some platforms even better, e.g. 17.6 "small" cycles/byte on
@@ -159,7 +159,7 @@
 # combinations then attack becomes infeasible. This is why revised
 # AES_cbc_encrypt "dares" to switch to larger S-box when larger chunk
 # of data is to be processed in one stroke. The current size limit of
-# 512 bytes is chosen to provide same [diminishigly low] probability
+# 512 bytes is chosen to provide same [diminishingly low] probability
 # for cache-line to remain untouched in large chunk operation with
 # large S-box as for single block operation with compact S-box and
 # surely needs more careful consideration...
@@ -171,12 +171,12 @@
 # yield execution to process performing AES just before timer fires
 # off the scheduler, immediately regain control of CPU and analyze the
 # cache state. For this attack to be efficient attacker would have to
-# effectively slow down the operation by several *orders* of magnitute,
+# effectively slow down the operation by several *orders* of magnitude,
 # by ratio of time slice to duration of handful of AES rounds, which
 # unlikely to remain unnoticed. Not to mention that this also means
-# that he would spend correspondigly more time to collect enough
+# that he would spend correspondingly more time to collect enough
 # statistical data to mount the attack. It's probably appropriate to
-# say that if adeversary reckons that this attack is beneficial and
+# say that if adversary reckons that this attack is beneficial and
 # risks to be noticed, you probably have larger problems having him
 # mere opportunity. In other words suggested code design expects you
 # to preclude/mitigate this attack by overall system security design.
@@ -240,7 +240,7 @@ $small_footprint=1;	# $small_footprint=1 code is ~5% slower [on
 			# contention and in hope to "collect" 5% back
 			# in real-life applications...
 
-$vertical_spin=0;	# shift "verticaly" defaults to 0, because of
+$vertical_spin=0;	# shift "vertically" defaults to 0, because of
 			# its proof-of-concept status...
 # Note that there is no decvert(), as well as last encryption round is
 # performed with "horizontal" shifts. This is because this "vertical"
@@ -1606,7 +1606,7 @@ sub decstep()
 	# no instructions are reordered, as performance appears
 	# optimal... or rather that all attempts to reorder didn't
 	# result in better performance [which by the way is not a
-	# bit lower than ecryption].
+	# bit lower than encryption].
 	if($i==3)   {	&mov	($key,$__key);			}
 	else        {	&mov	($out,$s[0]);			}
 			&and	($out,0xFF);
diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl
index ba3e4545df..d81d76d77a 100644
--- a/crypto/aes/asm/aes-mips.pl
+++ b/crypto/aes/asm/aes-mips.pl
@@ -120,7 +120,7 @@ my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
 my ($key0,$cnt)=($gp,$fp);
 
-# instuction ordering is "stolen" from output from MIPSpro assembler
+# instruction ordering is "stolen" from output from MIPSpro assembler
 # invoked with -mips3 -O3 arguments...
 $code.=<<___;
 .align	5
diff --git a/crypto/aes/asm/aes-parisc.pl b/crypto/aes/asm/aes-parisc.pl
index fb754eb460..b688ab3050 100644
--- a/crypto/aes/asm/aes-parisc.pl
+++ b/crypto/aes/asm/aes-parisc.pl
@@ -1015,7 +1015,7 @@ ___
 foreach (split("\n",$code)) {
 	s/\`([^\`]*)\`/eval $1/ge;
 
-	# translate made up instructons: _ror, _srm
+	# translate made up instructions: _ror, _srm
 	s/_ror(\s+)(%r[0-9]+),/shd$1$2,$2,/				or
 
 	s/_srm(\s+%r[0-9]+),([0-9]+),/
diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl
index 1495917d26..0ef1f6b50a 100644
--- a/crypto/aes/asm/aes-s390x.pl
+++ b/crypto/aes/asm/aes-s390x.pl
@@ -44,7 +44,7 @@
 # minimize/avoid Address Generation Interlock hazard and to favour
 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
 # almost 50% on z9. The gain is smaller on z10, because being dual-
-# issue z10 makes it improssible to eliminate the interlock condition:
+# issue z10 makes it impossible to eliminate the interlock condition:
 # critial path is not long enough. Yet it spends ~24 cycles per byte
 # processed with 128-bit key.
 #
diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl
index c24a5510bd..4d1dc9c701 100755
--- a/crypto/aes/asm/aes-x86_64.pl
+++ b/crypto/aes/asm/aes-x86_64.pl
@@ -2018,7 +2018,7 @@ AES_cbc_encrypt:
 	lea	($key,%rax),%rax
 	mov	%rax,$keyend
 
-	# pick Te4 copy which can't "overlap" with stack frame or key scdedule
+	# pick Te4 copy which can't "overlap" with stack frame or key schedule
 	lea	2048($sbox),$sbox
 	lea	768-8(%rsp),%rax
 	sub	$sbox,%rax
diff --git a/crypto/aes/asm/aesfx-sparcv9.pl b/crypto/aes/asm/aesfx-sparcv9.pl
index 04b3cf7116..9ddf0b4b00 100644
--- a/crypto/aes/asm/aesfx-sparcv9.pl
+++ b/crypto/aes/asm/aesfx-sparcv9.pl
@@ -22,7 +22,7 @@
 # April 2016
 #
 # Add "teaser" CBC and CTR mode-specific subroutines. "Teaser" means
-# that parallelizeable nature of CBC decrypt and CTR is not utilized
+# that parallelizable nature of CBC decrypt and CTR is not utilized
 # yet. CBC encrypt on the other hand is as good as it can possibly
 # get processing one byte in 4.1 cycles with 128-bit key on SPARC64 X.
 # This is ~6x faster than pure software implementation...
diff --git a/crypto/aes/asm/aesni-mb-x86_64.pl b/crypto/aes/asm/aesni-mb-x86_64.pl
index c4151feb7e..1f356d2d3f 100644
--- a/crypto/aes/asm/aesni-mb-x86_64.pl
+++ b/crypto/aes/asm/aesni-mb-x86_64.pl
@@ -1325,10 +1325,10 @@ se_handler:
 	mov	-48(%rax),%r15
 	mov	%rbx,144($context)	# restore context->Rbx
 	mov	%rbp,160($context)	# restore context->Rbp
-	mov	%r12,216($context)	# restore cotnext->R12
-	mov	%r13,224($context)	# restore cotnext->R13
-	mov	%r14,232($context)	# restore cotnext->R14
-	mov	%r15,240($context)	# restore cotnext->R15
+	mov	%r12,216($context)	# restore context->R12
+	mov	%r13,224($context)	# restore context->R13
+	mov	%r14,232($context)	# restore context->R14
+	mov	%r15,240($context)	# restore context->R15
 
 	lea	-56-10*16(%rax),%rsi
 	lea	512($context),%rdi	# &context.Xmm6
diff --git a/crypto/aes/asm/aesni-x86.pl b/crypto/aes/asm/aesni-x86.pl
index 81c1cd651b..b351fca28e 100644
--- a/crypto/aes/asm/aesni-x86.pl
+++ b/crypto/aes/asm/aesni-x86.pl
@@ -239,7 +239,7 @@ sub aesni_generate1	# fully unrolled loop
 # can schedule aes[enc|dec] every cycle optimal interleave factor
 # equals to corresponding instructions latency. 8x is optimal for
 # * Bridge, but it's unfeasible to accommodate such implementation
-# in XMM registers addreassable in 32-bit mode and therefore maximum
+# in XMM registers addressable in 32-bit mode and therefore maximum
 # of 6x is used instead...
 
 sub aesni_generate2
diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl
index c5c3614eee..2a202c53e5 100644
--- a/crypto/aes/asm/aesni-x86_64.pl
+++ b/crypto/aes/asm/aesni-x86_64.pl
@@ -60,7 +60,7 @@
 # identical to CBC, because CBC-MAC is essentially CBC encrypt without
 # saving output. CCM CTR "stays invisible," because it's neatly
 # interleaved wih CBC-MAC. This provides ~30% improvement over
-# "straghtforward" CCM implementation with CTR and CBC-MAC performed
+# "straightforward" CCM implementation with CTR and CBC-MAC performed
 # disjointly. Parallelizable modes practically achieve the theoretical
 # limit.
 #
@@ -143,14 +143,14 @@
 # asymptotic, if it can be surpassed, isn't it? What happens there?
 # Rewind to CBC paragraph for the answer. Yes, out-of-order execution
 # magic is responsible for this. Processor overlaps not only the
-# additional instructions with AES ones, but even AES instuctions
+# additional instructions with AES ones, but even AES instructions
 # processing adjacent triplets of independent blocks. In the 6x case
 # additional instructions  still claim disproportionally small amount
 # of additional cycles, but in 8x case number of instructions must be
 # a tad too high for out-of-order logic to cope with, and AES unit
 # remains underutilized... As you can see 8x interleave is hardly
 # justifiable, so there no need to feel bad that 32-bit aesni-x86.pl
-# utilizies 6x interleave because of limited register bank capacity.
+# utilizes 6x interleave because of limited register bank capacity.
 #
 # Higher interleave factors do have negative impact on Westmere
 # performance. While for ECB mode it's negligible ~1.5%, other
@@ -1550,7 +1550,7 @@ $code.=<<___;
 	sub	\$8,$len
 	jnc	.Lctr32_loop8			# loop if $len-=8 didn't borrow
 
-	add	\$8,$len			# restore real remainig $len
+	add	\$8,$len			# restore real remaining $len
 	jz	.Lctr32_done			# done if ($len==0)
 	lea	-0x80($key),$key
 
@@ -1667,7 +1667,7 @@ $code.=<<___;
 	movups	$inout2,0x20($out)		# $len was 3, stop store
 
 .Lctr32_done:
-	xorps	%xmm0,%xmm0			# clear regiser bank
+	xorps	%xmm0,%xmm0			# clear register bank
 	xor	$key0,$key0
 	pxor	%xmm1,%xmm1
 	pxor	%xmm2,%xmm2
@@ -1856,7 +1856,7 @@ $code.=<<___;
 	lea	`16*6`($inp),$inp
 	pxor	$twmask,$inout5
 
-	 pxor	$twres,@tweak[0]		# calclulate tweaks^round[last]
+	 pxor	$twres,@tweak[0]		# calculate tweaks^round[last]
 	aesenc		$rndkey1,$inout4
 	 pxor	$twres,@tweak[1]
 	 movdqa	@tweak[0],`16*0`(%rsp)		# put aside tweaks^round[last]
@@ -2342,7 +2342,7 @@ $code.=<<___;
 	lea	`16*6`($inp),$inp
 	pxor	$twmask,$inout5
 
-	 pxor	$twres,@tweak[0]		# calclulate tweaks^round[last]
+	 pxor	$twres,@tweak[0]		# calculate tweaks^round[last]
 	aesdec		$rndkey1,$inout4
 	 pxor	$twres,@tweak[1]
 	 movdqa	@tweak[0],`16*0`(%rsp)		# put aside tweaks^last round key
@@ -4515,7 +4515,7 @@ __aesni_set_encrypt_key:
 
 .align	16
 .L14rounds:
-	movups	16($inp),%xmm2			# remaning half of *userKey
+	movups	16($inp),%xmm2			# remaining half of *userKey
 	mov	\$13,$bits			# 14 rounds for 256
 	lea	16(%rax),%rax
 	cmp	\$`1<<28`,%r10d			# AVX, but no XOP
diff --git a/crypto/aes/asm/aest4-sparcv9.pl b/crypto/aes/asm/aest4-sparcv9.pl
index 8d2b33d73e..54d0c58821 100644
--- a/crypto/aes/asm/aest4-sparcv9.pl
+++ b/crypto/aes/asm/aest4-sparcv9.pl
@@ -44,7 +44,7 @@
 # instructions with those on critical path. Amazing!
 #
 # As with Intel AES-NI, question is if it's possible to improve
-# performance of parallelizeable modes by interleaving round
+# performance of parallelizable modes by interleaving round
 # instructions. Provided round instruction latency and throughput
 # optimal interleave factor is 2. But can we expect 2x performance
 # improvement? Well, as round instructions can be issued one per
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index a7947af486..385b31fd54 100755
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -929,7 +929,7 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	s/^(\s+)v/$1/o		or	# strip off v prefix
 	s/\bbx\s+lr\b/ret/o;
 
-	# fix up remainig legacy suffixes
+	# fix up remaining legacy suffixes
 	s/\.[ui]?8//o;
 	m/\],#8/o and s/\.16b/\.8b/go;
 	s/\.[ui]?32//o and s/\.16b/\.4s/go;
@@ -988,7 +988,7 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers
 	s/\/\/\s?/@ /o;				# new->old style commentary
 
-	# fix up remainig new-style suffixes
+	# fix up remaining new-style suffixes
 	s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo	or
 	s/\],#[0-9]+/]!/o;
 
diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl
index 2e704a2124..5131e13a09 100755
--- a/crypto/aes/asm/vpaes-armv8.pl
+++ b/crypto/aes/asm/vpaes-armv8.pl
@@ -31,7 +31,7 @@
 # Apple A7(***)     22.7(**)    10.9/14.3        [8.45/10.0         ]
 # Mongoose(***)     26.3(**)    21.0/25.0(**)    [13.3/16.8         ]
 #
-# (*)	ECB denotes approximate result for parallelizeable modes
+# (*)	ECB denotes approximate result for parallelizable modes
 #	such as CBC decrypt, CTR, etc.;
 # (**)	these results are worse than scalar compiler-generated
 #	code, but it's constant-time and therefore preferred;
@@ -137,7 +137,7 @@ _vpaes_consts:
 	.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
 	.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
 
-.asciz  "Vector Permutaion AES for ARMv8, Mike Hamburg (Stanford University)"
+.asciz  "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
 .size	_vpaes_consts,.-_vpaes_consts
 .align	6
 ___
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index 3fc9e69b1c..8b41408296 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -28,7 +28,7 @@
 #    endif
   /*
    * Why doesn't gcc define __ARM_ARCH__? Instead it defines
-   * bunch of below macros. See all_architectires[] table in
+   * bunch of below macros. See all_architectures[] table in
    * gcc/config/arm/arm.c. On a side note it defines
    * __ARMEL__/__ARMEB__ for little-/big-endian.
    */
diff --git a/crypto/asn1/standard_methods.h b/crypto/asn1/standard_methods.h
index f5fa128346..d366aa090a 100644
--- a/crypto/asn1/standard_methods.h
+++ b/crypto/asn1/standard_methods.h
@@ -8,7 +8,7 @@
  */
 
 /*
- * This table MUST be kept in ascening order of the NID each method
+ * This table MUST be kept in ascending order of the NID each method
  * represents (corresponding to the pkey_id field) as OBJ_bsearch
  * is used to search it.
  */
diff --git a/crypto/bn/asm/c64xplus-gf2m.pl b/crypto/bn/asm/c64xplus-gf2m.pl
index c0e5400807..9c46da3af8 100644
--- a/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/crypto/bn/asm/c64xplus-gf2m.pl
@@ -43,7 +43,7 @@ $code.=<<___;
 	SHRU	$A,16,   $Ahi		; smash $A to two halfwords
 ||	EXTU	$A,16,16,$Alo
 
-	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits muliplication
+	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits multiplication
 ||	XORMPY	$Ahi,$B_2,$Ahix2
 ||	EXTU	$B,16,24,$B_1
 	XORMPY	$Alo,$B_0,$Alox0
diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S
index 2bd4209d61..58f7628e81 100644
--- a/crypto/bn/asm/ia64.S
+++ b/crypto/bn/asm/ia64.S
@@ -20,7 +20,7 @@
 // disclaimed.
 // ====================================================================
 //
-// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
+// Version 2.x is Itanium2 re-tune. Few words about how Itanium2 is
 // different from Itanium to this module viewpoint. Most notably, is it
 // "wider" than Itanium? Can you experience loop scalability as
 // discussed in commentary sections? Not really:-( Itanium2 has 6
@@ -141,7 +141,7 @@
 //	User Mask I want to excuse the kernel from preserving upper
 //	(f32-f128) FP register bank over process context switch, thus
 //	minimizing bus bandwidth consumption during the switch (i.e.
-//	after PKI opration completes and the program is off doing
+//	after PKI operation completes and the program is off doing
 //	something else like bulk symmetric encryption). Having said
 //	this, I also want to point out that it might be good idea
 //	to compile the whole toolkit (as well as majority of the
@@ -162,7 +162,7 @@
 //
 // bn_[add|sub]_words routines.
 //
-// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
+// Loops are spinning in 2*(n+5) ticks on Itanium (provided that the
 // data reside in L1 cache, i.e. 2 ticks away). It's possible to
 // compress the epilogue and get down to 2*n+6, but at the cost of
 // scalability (the neat feature of this implementation is that it
@@ -500,7 +500,7 @@ bn_sqr_words:
 // possible to compress the epilogue (I'm getting tired to write this
 // comment over and over) and get down to 2*n+16 at the cost of
 // scalability. The decision will very likely be reconsidered after the
-// benchmark program is profiled. I.e. if perfomance gain on Itanium
+// benchmark program is profiled. I.e. if performance gain on Itanium
 // will appear larger than loss on "wider" IA-64, then the loop should
 // be explicitly split and the epilogue compressed.
 .L_bn_sqr_words_ctop:
@@ -936,7 +936,7 @@ bn_mul_comba8:
 		xma.hu	f118=f39,f127,f117	}
 { .mfi;		xma.lu	f117=f39,f127,f117	};;//
 //-------------------------------------------------//
-// Leaving muliplier's heaven... Quite a ride, huh?
+// Leaving multiplier's heaven... Quite a ride, huh?
 
 { .mii;	getf.sig	r31=f47
 	add		r25=r25,r24
diff --git a/crypto/bn/asm/parisc-mont.pl b/crypto/bn/asm/parisc-mont.pl
index 5b1c5eacca..a705e16392 100644
--- a/crypto/bn/asm/parisc-mont.pl
+++ b/crypto/bn/asm/parisc-mont.pl
@@ -21,7 +21,7 @@
 # optimal in respect to instruction set capabilities. Fair comparison
 # with vendor compiler is problematic, because OpenSSL doesn't define
 # BN_LLONG [presumably] for historical reasons, which drives compiler
-# toward 4 times 16x16=32-bit multiplicatons [plus complementary
+# toward 4 times 16x16=32-bit multiplications [plus complementary
 # shifts and additions] instead. This means that you should observe
 # several times improvement over code generated by vendor compiler
 # for PA-RISC 1.1, but the "baseline" is far from optimal. The actual
diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl
index 213c7ec777..9aa96c8e87 100644
--- a/crypto/bn/asm/ppc-mont.pl
+++ b/crypto/bn/asm/ppc-mont.pl
@@ -37,7 +37,7 @@
 # and squaring procedure operating on lengths divisible by 8. Length
 # is expressed in number of limbs. RSA private key operations are
 # ~35-50% faster (more for longer keys) on contemporary high-end POWER
-#