summaryrefslogtreecommitdiffstats
path: root/crypto/bn/bn_lcl.h
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/bn_lcl.h')
-rw-r--r--crypto/bn/bn_lcl.h35
1 files changed, 14 insertions, 21 deletions
diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h
index 9171879520..bab2c9d705 100644
--- a/crypto/bn/bn_lcl.h
+++ b/crypto/bn/bn_lcl.h
@@ -349,29 +349,22 @@ struct bn_gencb_st {
# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
/*
* BN_UMULT_HIGH section.
- *
- * No, I'm not trying to overwhelm you when stating that the
- * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
- * you to be impressed when I say that if the compiler doesn't
- * support 2*N integer type, then you have to replace every N*N
- * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
- * and additions which unavoidably results in severe performance
- * penalties. Of course provided that the hardware is capable of
- * producing 2*N result... That's when you normally start
- * considering assembler implementation. However! It should be
- * pointed out that some CPUs (most notably Alpha, PowerPC and
- * upcoming IA-64 family:-) provide *separate* instruction
- * calculating the upper half of the product placing the result
- * into a general purpose register. Now *if* the compiler supports
- * inline assembler, then it's not impossible to implement the
- * "bignum" routines (and have the compiler optimize 'em)
- * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
- * macro is about:-)
- *
- * <appro@fy.chalmers.se>
+ * If the compiler doesn't support 2*N integer type, then you have to
+ * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some
+ * shifts and additions which unavoidably results in severe performance
+ * penalties. Of course provided that the hardware is capable of producing
+ * 2*N result... That's when you normally start considering assembler
+ * implementation. However! It should be pointed out that some CPUs (e.g.,
+ * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating
+ * the upper half of the product placing the result into a general
+ * purpose register. Now *if* the compiler supports inline assembler,
+ * then it's not impossible to implement the "bignum" routines (and have
+ * the compiler optimize 'em) exhibiting "native" performance in C. That's
+ * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do
+ * support 2*64 integer type, which is also used here.
*/
# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \
- (defined(SIXRY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
+ (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64)
# define BN_UMULT_LOHI(low,high,a,b) ({ \
__uint128_t ret=(__uint128_t)(a)*(b); \