Leave the decision to call/implement bn_sqr_mont to assembler developer.

author: Andy Polyakov <appro@openssl.org> 2005-10-06 09:12:39 +0000
committer: Andy Polyakov <appro@openssl.org> 2005-10-06 09:12:39 +0000
commit: ca04d7a20842b2de39264b6c1605c3443fc09d16 (patch)
tree: 855379855c1710b7d5026f14bbe92439af145705 /crypto/bn/bn_asm.c
parent: 40a3c1230543c39d03a06e7b00e726313f9dd262 (diff)
1 files changed, 26 insertions, 19 deletions
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
index 741cf813db..9b8e380c4f 100644
--- a/crypto/bn/bn_asm.c
+++ b/crypto/bn/bn_asm.c
@@ -831,13 +831,14 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
 #ifdef OPENSSL_BN_ASM_MONT
 /*
  * This is essentially reference implementation, which may or may not
- * result in performance improvement. E.g. on IA-32 this does give 40%
- * faster rsa1024 private key operations and 10% faster rsa4096 ones,
- * while on AMD64 it improves rsa1024 sign only by 10% and *worsens*
- * rsa4096 sign by 15%. Once again, it's a reference implementation,
- * one to be used as start-point for platform-specific assembler.
+ * result in performance improvement. E.g. on IA-32 this routine was
+ * observed to give 40% faster rsa1024 private key operations and 10%
+ * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
+ * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
+ * reference implementation, one to be used as start-point for
+ * platform-specific assembler.
  */
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
 	{
 	BN_ULONG c0,c1,ml,*tp;
 #ifdef mul64
@@ -846,6 +847,9 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_
 	volatile BN_ULONG *vp;
 	int i=0,j;
 
+#if 0	/* template for platform-specific implementation */
+	if (ap==bp)	return bn_sqr_mont(rp,ap,np,n0,num);
+#endif
 	vp = tp = alloca((num+2)*sizeof(BN_ULONG));
 
 	tp[num]   = bn_mul_words(tp,ap,num,bp[0]);
@@ -890,18 +894,22 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_
 		if (tp[num]!=0 || c0==0)
 			{
 			for(i=0;i<num+2;i++)	vp[i] = 0;
-			return;
+			return 1;
 			}
 		}
 	for(i=0;i<num;i++)	rp[i] = tp[i],	vp[i] = 0;
 	vp[num]   = 0;
 	vp[num+1] = 0;
+	return 1;
 	}
-
-void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num)
-	{
-	bn_mul_mont(rp,ap,ap,np,n0,num);
-	}
+#else
+/*
+ * Return value of 0 indicates that multiplication/convolution was not
+ * performed to signal the caller to fall down to alternative/original
+ * code-path.
+ */
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+{	return 0;	}
 #endif /* OPENSSL_BN_ASM_MONT */
 
 #else /* !BN_MUL_COMBA */
@@ -942,7 +950,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 	}
 
 #ifdef OPENSSL_BN_ASM_MONT
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
 	{
 	BN_ULONG c0,c1,*tp;
 	volatile BN_ULONG *vp;
@@ -972,18 +980,17 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_
 		if (tp[num]!=0 || c0==0)
 			{
 			for(i=0;i<num+2;i++)	vp[i] = 0;
-			return;
+			return 1;
 			}
 		}
 	for(i=0;i<num;i++)	rp[i] = tp[i],	vp[i] = 0;
 	vp[num]   = 0;
 	vp[num+1] = 0;
+	return 1;
 	}
-
-void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num)
-	{
-	bn_mul_mont(rp,ap,ap,np,n0,num);
-	}
+#else
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+{	return 0;	}
 #endif /* OPENSSL_BN_ASM_MONT */
 
 #endif /* !BN_MUL_COMBA */
author	Andy Polyakov <appro@openssl.org>	2005-10-06 09:12:39 +0000
committer	Andy Polyakov <appro@openssl.org>	2005-10-06 09:12:39 +0000
commit	ca04d7a20842b2de39264b6c1605c3443fc09d16 (patch)
tree	855379855c1710b7d5026f14bbe92439af145705 /crypto/bn/bn_asm.c
parent	40a3c1230543c39d03a06e7b00e726313f9dd262 (diff)