diff options
Diffstat (limited to 'crypto/bn/bn_lcl.h')
-rw-r--r-- | crypto/bn/bn_lcl.h | 35 |
1 files changed, 14 insertions, 21 deletions
diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h index 9171879520..bab2c9d705 100644 --- a/crypto/bn/bn_lcl.h +++ b/crypto/bn/bn_lcl.h @@ -349,29 +349,22 @@ struct bn_gencb_st { # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) /* * BN_UMULT_HIGH section. - * - * No, I'm not trying to overwhelm you when stating that the - * product of N-bit numbers is 2*N bits wide:-) No, I don't expect - * you to be impressed when I say that if the compiler doesn't - * support 2*N integer type, then you have to replace every N*N - * multiplication with 4 (N/2)*(N/2) accompanied by some shifts - * and additions which unavoidably results in severe performance - * penalties. Of course provided that the hardware is capable of - * producing 2*N result... That's when you normally start - * considering assembler implementation. However! It should be - * pointed out that some CPUs (most notably Alpha, PowerPC and - * upcoming IA-64 family:-) provide *separate* instruction - * calculating the upper half of the product placing the result - * into a general purpose register. Now *if* the compiler supports - * inline assembler, then it's not impossible to implement the - * "bignum" routines (and have the compiler optimize 'em) - * exhibiting "native" performance in C. That's what BN_UMULT_HIGH - * macro is about:-) - * - * <appro@fy.chalmers.se> + * If the compiler doesn't support 2*N integer type, then you have to + * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some + * shifts and additions which unavoidably results in severe performance + * penalties. Of course provided that the hardware is capable of producing + * 2*N result... That's when you normally start considering assembler + * implementation. However! It should be pointed out that some CPUs (e.g., + * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating + * the upper half of the product placing the result into a general + * purpose register. Now *if* the compiler supports inline assembler, + * then it's not impossible to implement the "bignum" routines (and have + * the compiler optimize 'em) exhibiting "native" performance in C. That's + * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do + * support 2*64 integer type, which is also used here. */ # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \ - (defined(SIXRY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) + (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) # define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) # define BN_UMULT_LOHI(low,high,a,b) ({ \ __uint128_t ret=(__uint128_t)(a)*(b); \ |