From b6be13d5de6dd7d8aad5fd871eb6b0b30fc7d7f6 Mon Sep 17 00:00:00 2001
From: "Leonid S. Usov" <leonid@practi.net>
Date: Sat, 27 Oct 2018 00:04:35 +0300
Subject: Add decNumber library

The library adds support for decimal numbers of arbitrary length.
Downloaded from ICU, under ICU 1.8.1 license
http://download.icu-project.org/files/decNumber/decNumber-icu-368.zip
---
 COPYING                           |   38 +
 docs/content/download/default.yml |    4 +
 src/decNumber/ICU-license.html    |   45 +
 src/decNumber/decBasic.c          | 3908 ++++++++++++++++++
 src/decNumber/decCommon.c         | 1835 +++++++++
 src/decNumber/decContext.c        |  437 ++
 src/decNumber/decContext.h        |  254 ++
 src/decNumber/decDPD.h            | 1185 ++++++
 src/decNumber/decDouble.c         |  140 +
 src/decNumber/decDouble.h         |  155 +
 src/decNumber/decNumber.c         | 8141 +++++++++++++++++++++++++++++++++++++
 src/decNumber/decNumber.h         |  182 +
 src/decNumber/decNumberLocal.h    |  757 ++++
 src/decNumber/decPacked.c         |  220 +
 src/decNumber/decPacked.h         |   52 +
 src/decNumber/decQuad.c           |  135 +
 src/decNumber/decQuad.h           |  177 +
 src/decNumber/decSingle.c         |   71 +
 src/decNumber/decSingle.h         |   86 +
 src/decNumber/decimal128.c        |  553 +++
 src/decNumber/decimal128.h        |   81 +
 src/decNumber/decimal32.c         |  476 +++
 src/decNumber/decimal32.h         |   81 +
 src/decNumber/decimal64.c         |  839 ++++
 src/decNumber/decimal64.h         |   83 +
 src/decNumber/decnumber.pdf       |  Bin 0 -> 1416382 bytes
 src/decNumber/example1.c          |   38 +
 src/decNumber/example2.c          |   52 +
 src/decNumber/example3.c          |   64 +
 src/decNumber/example4.c          |   61 +
 src/decNumber/example5.c          |   36 +
 src/decNumber/example6.c          |   61 +
 src/decNumber/example7.c          |   35 +
 src/decNumber/example8.c          |   39 +
 src/decNumber/readme.txt          |   81 +
 tests.out                         |  101 +
 36 files changed, 20503 insertions(+)
 create mode 100644 src/decNumber/ICU-license.html
 create mode 100644 src/decNumber/decBasic.c
 create mode 100644 src/decNumber/decCommon.c
 create mode 100644 src/decNumber/decContext.c
 create mode 100644 src/decNumber/decContext.h
 create mode 100644 src/decNumber/decDPD.h
 create mode 100644 src/decNumber/decDouble.c
 create mode 100644 src/decNumber/decDouble.h
 create mode 100644 src/decNumber/decNumber.c
 create mode 100644 src/decNumber/decNumber.h
 create mode 100644 src/decNumber/decNumberLocal.h
 create mode 100644 src/decNumber/decPacked.c
 create mode 100644 src/decNumber/decPacked.h
 create mode 100644 src/decNumber/decQuad.c
 create mode 100644 src/decNumber/decQuad.h
 create mode 100644 src/decNumber/decSingle.c
 create mode 100644 src/decNumber/decSingle.h
 create mode 100644 src/decNumber/decimal128.c
 create mode 100644 src/decNumber/decimal128.h
 create mode 100644 src/decNumber/decimal32.c
 create mode 100644 src/decNumber/decimal32.h
 create mode 100644 src/decNumber/decimal64.c
 create mode 100644 src/decNumber/decimal64.h
 create mode 100644 src/decNumber/decnumber.pdf
 create mode 100644 src/decNumber/example1.c
 create mode 100644 src/decNumber/example2.c
 create mode 100644 src/decNumber/example3.c
 create mode 100644 src/decNumber/example4.c
 create mode 100644 src/decNumber/example5.c
 create mode 100644 src/decNumber/example6.c
 create mode 100644 src/decNumber/example7.c
 create mode 100644 src/decNumber/example8.c
 create mode 100644 src/decNumber/readme.txt
 create mode 100644 tests.out

diff --git a/COPYING b/COPYING
index 7222ff0b..3e1dd1fa 100644
--- a/COPYING
+++ b/COPYING
@@ -68,3 +68,41 @@ WARRANTY.  IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY
 REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 
+
+
+jq uses parts of the open source C library "decNumber", which is distribured
+under the following license:
+
+
+ICU License - ICU 1.8.1 and later
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2005 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
diff --git a/docs/content/download/default.yml b/docs/content/download/default.yml
index 8fb1216b..8ff45804 100644
--- a/docs/content/download/default.yml
+++ b/docs/content/download/default.yml
@@ -13,6 +13,10 @@ body:
       jq is licensed under the MIT license. For all of the gory
       details, read the file `COPYING` in the source distribution.
 
+      jq uses a C library for decimal number support. This is an ICU 1.8.1
+      licensed code obtained from the ICU downloads archive
+      http://download.icu-project.org/files/decNumber/decNumber-icu-368.zip.
+
 
       ### Linux
 
diff --git a/src/decNumber/ICU-license.html b/src/decNumber/ICU-license.html
new file mode 100644
index 00000000..bc99e8bf
--- /dev/null
+++ b/src/decNumber/ICU-license.html
@@ -0,0 +1,45 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
+<title>ICU License - ICU 1.8.1 and later</title>
+</head>
+
+<body>
+<h1>ICU License - ICU 1.8.1 and later</h1>
+<pre>
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2005 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+--------------------------------------------------------------------------------
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
+</pre>
+</body>
+</html>
diff --git a/src/decNumber/decBasic.c b/src/decNumber/decBasic.c
new file mode 100644
index 00000000..1f680c4a
--- /dev/null
+++ b/src/decNumber/decBasic.c
@@ -0,0 +1,3908 @@
+/* ------------------------------------------------------------------ */
+/* decBasic.c -- common base code for Basic decimal types             */
+/* ------------------------------------------------------------------ */
+/* Copyright (c) IBM Corporation, 2000, 2010.  All rights reserved.   */
+/*                                                                    */
+/* This software is made available under the terms of the             */
+/* ICU License -- ICU 1.8.1 and later.                                */
+/*                                                                    */
+/* The description and User's Guide ("The decNumber C Library") for   */
+/* this software is included in the package as decNumber.pdf.  This   */
+/* document is also available in HTML, together with specifications,  */
+/* testcases, and Web links, on the General Decimal Arithmetic page.  */
+/*                                                                    */
+/* Please send comments, suggestions, and corrections to the author:  */
+/*   mfc@uk.ibm.com                                                   */
+/*   Mike Cowlishaw, IBM Fellow                                       */
+/*   IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK         */
+/* ------------------------------------------------------------------ */
+/* This module comprises code that is shared between decDouble and    */
+/* decQuad (but not decSingle).  The main arithmetic operations are   */
+/* here (Add, Subtract, Multiply, FMA, and Division operators).       */
+/*                                                                    */
+/* Unlike decNumber, parameterization takes place at compile time     */
+/* rather than at runtime.  The parameters are set in the decDouble.c */
+/* (etc.) files, which then include this one to produce the compiled  */
+/* code.  The functions here, therefore, are code shared between      */
+/* multiple formats.                                                  */
+/*                                                                    */
+/* This must be included after decCommon.c.                           */
+/* ------------------------------------------------------------------ */
+// Names here refer to decFloat rather than to decDouble, etc., and
+// the functions are in strict alphabetical order.
+
+// The compile-time flags SINGLE, DOUBLE, and QUAD are set up in
+// decCommon.c
+#if !defined(QUAD)
+  #error decBasic.c must be included after decCommon.c
+#endif
+#if SINGLE
+  #error Routines in decBasic.c are for decDouble and decQuad only
+#endif
+
+/* Private constants */
+#define DIVIDE      0x80000000     // Divide operations [as flags]
+#define REMAINDER   0x40000000     // ..
+#define DIVIDEINT   0x20000000     // ..
+#define REMNEAR     0x10000000     // ..
+
+/* Private functions (local, used only by routines in this module) */
+static decFloat *decDivide(decFloat *, const decFloat *,
+                              const decFloat *, decContext *, uInt);
+static decFloat *decCanonical(decFloat *, const decFloat *);
+static void      decFiniteMultiply(bcdnum *, uByte *, const decFloat *,
+                              const decFloat *);
+static decFloat *decInfinity(decFloat *, const decFloat *);
+static decFloat *decInvalid(decFloat *, decContext *);
+static decFloat *decNaNs(decFloat *, const decFloat *, const decFloat *,
+                              decContext *);
+static Int       decNumCompare(const decFloat *, const decFloat *, Flag);
+static decFloat *decToIntegral(decFloat *, const decFloat *, decContext *,
+                              enum rounding, Flag);
+static uInt      decToInt32(const decFloat *, decContext *, enum rounding,
+                              Flag, Flag);
+
+/* ------------------------------------------------------------------ */
+/* decCanonical -- copy a decFloat, making canonical                  */
+/*                                                                    */
+/*   result gets the canonicalized df                                 */
+/*   df     is the decFloat to copy and make canonical                */
+/*   returns result                                                   */
+/*                                                                    */
+/* This is exposed via decFloatCanonical for Double and Quad only.    */
+/* This works on specials, too; no error or exception is possible.    */
+/* ------------------------------------------------------------------ */
+static decFloat * decCanonical(decFloat *result, const decFloat *df) {
+  uInt encode, precode, dpd;       // work
+  uInt inword, uoff, canon;        // ..
+  Int  n;                          // counter (down)
+  if (df!=result) *result=*df;     // effect copy if needed
+  if (DFISSPECIAL(result)) {
+    if (DFISINF(result)) return decInfinity(result, df); // clean Infinity
+    // is a NaN
+    DFWORD(result, 0)&=~ECONNANMASK;    // clear ECON except selector
+    if (DFISCCZERO(df)) return result;  // coefficient continuation is 0
+    // drop through to check payload
+    }
+  // return quickly if the coefficient continuation is canonical
+  { // declare block
+  #if DOUBLE
+    uInt sourhi=DFWORD(df, 0);
+    uInt sourlo=DFWORD(df, 1);
+    if (CANONDPDOFF(sourhi, 8)
+     && CANONDPDTWO(sourhi, sourlo, 30)
+     && CANONDPDOFF(sourlo, 20)
+     && CANONDPDOFF(sourlo, 10)
+     && CANONDPDOFF(sourlo, 0)) return result;
+  #elif QUAD
+    uInt sourhi=DFWORD(df, 0);
+    uInt sourmh=DFWORD(df, 1);
+    uInt sourml=DFWORD(df, 2);
+    uInt sourlo=DFWORD(df, 3);
+    if (CANONDPDOFF(sourhi, 4)
+     && CANONDPDTWO(sourhi, sourmh, 26)
+     && CANONDPDOFF(sourmh, 16)
+     && CANONDPDOFF(sourmh, 6)
+     && CANONDPDTWO(sourmh, sourml, 28)
+     && CANONDPDOFF(sourml, 18)
+     && CANONDPDOFF(sourml, 8)
+     && CANONDPDTWO(sourml, sourlo, 30)
+     && CANONDPDOFF(sourlo, 20)
+     && CANONDPDOFF(sourlo, 10)
+     && CANONDPDOFF(sourlo, 0)) return result;
+  #endif
+  } // block
+
+  // Loop to repair a non-canonical coefficent, as needed
+  inword=DECWORDS-1;               // current input word
+  uoff=0;                          // bit offset of declet
+  encode=DFWORD(result, inword);
+  for (n=DECLETS-1; n>=0; n--) {   // count down declets of 10 bits
+    dpd=encode>>uoff;
+    uoff+=10;
+    if (uoff>32) {                 // crossed uInt boundary
+      inword--;
+      encode=DFWORD(result, inword);
+      uoff-=32;
+      dpd|=encode<<(10-uoff);      // get pending bits
+      }
+    dpd&=0x3ff;                    // clear uninteresting bits
+    if (dpd<0x16e) continue;       // must be canonical
+    canon=BIN2DPD[DPD2BIN[dpd]];   // determine canonical declet
+    if (canon==dpd) continue;      // have canonical declet
+    // need to replace declet
+    if (uoff>=10) {                // all within current word
+      encode&=~(0x3ff<<(uoff-10)); // clear the 10 bits ready for replace
+      encode|=canon<<(uoff-10);    // insert the canonical form
+      DFWORD(result, inword)=encode;    // .. and save
+      continue;
+      }
+    // straddled words
+    precode=DFWORD(result, inword+1);   // get previous
+    precode&=0xffffffff>>(10-uoff);     // clear top bits
+    DFWORD(result, inword+1)=precode|(canon<<(32-(10-uoff)));
+    encode&=0xffffffff<<uoff;           // clear bottom bits
+    encode|=canon>>(10-uoff);           // insert canonical
+    DFWORD(result, inword)=encode;      // .. and save
+    } // n
+  return result;
+  } // decCanonical
+
+/* ------------------------------------------------------------------ */
+/* decDivide -- divide operations                                     */
+/*                                                                    */
+/*   result gets the result of dividing dfl by dfr:                   */
+/*   dfl    is the first decFloat (lhs)                               */
+/*   dfr    is the second decFloat (rhs)                              */
+/*   set    is the context                                            */
+/*   op     is the operation selector                                 */
+/*   returns result                                                   */
+/*                                                                    */
+/* op is one of DIVIDE, REMAINDER, DIVIDEINT, or REMNEAR.             */
+/* ------------------------------------------------------------------ */
+#define DIVCOUNT  0                // 1 to instrument subtractions counter
+#define DIVBASE   ((uInt)BILLION)  // the base used for divide
+#define DIVOPLEN  DECPMAX9         // operand length ('digits' base 10**9)
+#define DIVACCLEN (DIVOPLEN*3)     // accumulator length (ditto)
+static decFloat * decDivide(decFloat *result, const decFloat *dfl,
+                            const decFloat *dfr, decContext *set, uInt op) {
+  decFloat quotient;               // for remainders
+  bcdnum num;                      // for final conversion
+  uInt   acc[DIVACCLEN];           // coefficent in base-billion ..
+  uInt   div[DIVOPLEN];            // divisor in base-billion ..
+  uInt   quo[DIVOPLEN+1];          // quotient in base-billion ..
+  uByte  bcdacc[(DIVOPLEN+1)*9+2]; // for quotient in BCD, +1, +1
+  uInt   *msua, *msud, *msuq;      // -> msu of acc, div, and quo
+  Int    divunits, accunits;       // lengths
+  Int    quodigits;                // digits in quotient
+  uInt   *lsua, *lsuq;             // -> current acc and quo lsus
+  Int    length, multiplier;       // work
+  uInt   carry, sign;              // ..
+  uInt   *ua, *ud, *uq;            // ..
+  uByte  *ub;                      // ..
+  uInt   uiwork;                   // for macros
+  uInt   divtop;                   // top unit of div adjusted for estimating
+  #if DIVCOUNT
+  static uInt maxcount=0;          // worst-seen subtractions count
+  uInt   divcount=0;               // subtractions count [this divide]
+  #endif
+
+  // calculate sign
+  num.sign=(DFWORD(dfl, 0)^DFWORD(dfr, 0)) & DECFLOAT_Sign;
+
+  if (DFISSPECIAL(dfl) || DFISSPECIAL(dfr)) { // either is special?
+    // NaNs are handled as usual
+    if (DFISNAN(dfl) || DFISNAN(dfr)) return decNaNs(result, dfl, dfr, set);
+    // one or two infinities
+    if (DFISINF(dfl)) {
+      if (DFISINF(dfr)) return decInvalid(result, set); // Two infinities bad
+      if (op&(REMAINDER|REMNEAR)) return decInvalid(result, set); // as is rem
+      // Infinity/x is infinite and quiet, even if x=0
+      DFWORD(result, 0)=num.sign;
+      return decInfinity(result, result);
+      }
+    // must be x/Infinity -- remainders are lhs
+    if (op&(REMAINDER|REMNEAR)) return decCanonical(result, dfl);
+    // divides: return zero with correct sign and exponent depending
+    // on op (Etiny for divide, 0 for divideInt)
+    decFloatZero(result);
+    if (op==DIVIDEINT) DFWORD(result, 0)|=num.sign; // add sign
+     else DFWORD(result, 0)=num.sign;        // zeros the exponent, too
+    return result;
+    }
+  // next, handle zero operands (x/0 and 0/x)
+  if (DFISZERO(dfr)) {                       // x/0
+    if (DFISZERO(dfl)) {                     // 0/0 is undefined
+      decFloatZero(result);
+      DFWORD(result, 0)=DECFLOAT_qNaN;
+      set->status|=DEC_Division_undefined;
+      return result;
+      }
+    if (op&(REMAINDER|REMNEAR)) return decInvalid(result, set); // bad rem
+    set->status|=DEC_Division_by_zero;
+    DFWORD(result, 0)=num.sign;
+    return decInfinity(result, result);      // x/0 -> signed Infinity
+    }
+  num.exponent=GETEXPUN(dfl)-GETEXPUN(dfr);  // ideal exponent
+  if (DFISZERO(dfl)) {                       // 0/x (x!=0)
+    // if divide, result is 0 with ideal exponent; divideInt has
+    // exponent=0, remainders give zero with lower exponent
+    if (op&DIVIDEINT) {
+      decFloatZero(result);
+      DFWORD(result, 0)|=num.sign;           // add sign
+      return result;
+      }
+    if (!(op&DIVIDE)) {                      // a remainder
+      // exponent is the minimum of the operands
+      num.exponent=MINI(GETEXPUN(dfl), GETEXPUN(dfr));
+      // if the result is zero the sign shall be sign of dfl
+      num.sign=DFWORD(dfl, 0)&DECFLOAT_Sign;
+      }
+    bcdacc[0]=0;
+    num.msd=bcdacc;                          // -> 0
+    num.lsd=bcdacc;                          // ..
+    return decFinalize(result, &num, set);   // [divide may clamp exponent]
+    } // 0/x
+  // [here, both operands are known to be finite and non-zero]
+
+  // extract the operand coefficents into 'units' which are
+  // base-billion; the lhs is high-aligned in acc and the msu of both
+  // acc and div is at the right-hand end of array (offset length-1);
+  // the quotient can need one more unit than the operands as digits
+  // in it are not necessarily aligned neatly; further, the quotient
+  // may not start accumulating until after the end of the initial
+  // operand in acc if that is small (e.g., 1) so the accumulator
+  // must have at least that number of units extra (at the ls end)
+  GETCOEFFBILL(dfl, acc+DIVACCLEN-DIVOPLEN);
+  GETCOEFFBILL(dfr, div);
+  // zero the low uInts of acc
+  acc[0]=0;
+  acc[1]=0;
+  acc[2]=0;
+  acc[3]=0;
+  #if DOUBLE
+    #if DIVOPLEN!=2
+      #error Unexpected Double DIVOPLEN
+    #endif
+  #elif QUAD
+  acc[4]=0;
+  acc[5]=0;
+  acc[6]=0;
+  acc[7]=0;
+    #if DIVOPLEN!=4
+      #error Unexpected Quad DIVOPLEN
+    #endif
+  #endif
+
+  // set msu and lsu pointers
+  msua=acc+DIVACCLEN-1;       // [leading zeros removed below]
+  msuq=quo+DIVOPLEN;
+  //[loop for div will terminate because operands are non-zero]
+  for (msud=div+DIVOPLEN-1; *msud==0;) msud--;
+  // the initial least-significant unit of acc is set so acc appears
+  // to have the same length as div.
+  // This moves one position towards the least possible for each
+  // iteration
+  divunits=(Int)(msud-div+1); // precalculate
+  lsua=msua-divunits+1;       // initial working lsu of acc
+  lsuq=msuq;                  // and of quo
+
+  // set up the estimator for the multiplier; this is the msu of div,
+  // plus two bits from the unit below (if any) rounded up by one if
+  // there are any non-zero bits or units below that [the extra two
+  // bits makes for a much better estimate when the top unit is small]
+  divtop=*msud<<2;
+  if (divunits>1) {
+    uInt *um=msud-1;
+    uInt d=*um;
+    if (d>=750000000) {divtop+=3; d-=750000000;}
+     else if (d>=500000000) {divtop+=2; d-=500000000;}
+     else if (d>=250000000) {divtop++; d-=250000000;}
+    if (d) divtop++;
+     else for (um--; um>=div; um--) if (*um) {
+      divtop++;
+      break;
+      }
+    } // >1 unit
+
+  #if DECTRACE
+  {Int i;
+  printf("----- div=");
+  for (i=divunits-1; i>=0; i--) printf("%09ld ", (LI)div[i]);
+  printf("\n");}
+  #endif
+
+  // now collect up to DECPMAX+1 digits in the quotient (this may
+  // need OPLEN+1 uInts if unaligned)
+  quodigits=0;                // no digits yet
+  for (;; lsua--) {           // outer loop -- each input position
+    #if DECCHECK
+    if (lsua<acc) {
+      printf("Acc underrun...\n");
+      break;
+      }
+    #endif
+    #if DECTRACE
+    printf("Outer: quodigits=%ld acc=", (LI)quodigits);
+    for (ua=msua; ua>=lsua; ua--) printf("%09ld ", (LI)*ua);
+    printf("\n");
+    #endif
+    *lsuq=0;                  // default unit result is 0
+    for (;;) {                // inner loop -- calculate quotient unit
+      // strip leading zero units from acc (either there initially or
+      // from subtraction below); this may strip all if exactly 0
+      for (; *msua==0 && msua>=lsua;) msua--;
+      accunits=(Int)(msua-lsua+1);                // [maybe 0]
+      // subtraction is only necessary and possible if there are as
+      // least as many units remaining in acc for this iteration as
+      // there are in div
+      if (accunits<divunits) {
+        if (accunits==0) msua++;                  // restore
+        break;
+        }
+
+      // If acc is longer than div then subtraction is definitely
+      // possible (as msu of both is non-zero), but if they are the
+      // same length a comparison is needed.
+      // If a subtraction is needed then a good estimate of the
+      // multiplier for the subtraction is also needed in order to
+      // minimise the iterations of this inner loop because the
+      // subtractions needed dominate division performance.
+      if (accunits==divunits) {
+        // compare the high divunits of acc and div:
+        // acc<div:  this quotient unit is unchanged; subtraction
+        //           will be possible on the next iteration
+        // acc==div: quotient gains 1, set acc=0
+        // acc>div:  subtraction necessary at this position
+        for (ud=msud, ua=msua; ud>div; ud--, ua--) if (*ud!=*ua) break;
+        // [now at first mismatch or lsu]
+        if (*ud>*ua) break;                       // next time...
+        if (*ud==*ua) {                           // all compared equal
+          *lsuq+=1;                               // increment result
+          msua=lsua;                              // collapse acc units
+          *msua=0;                                // .. to a zero
+          break;
+          }
+
+        // subtraction necessary; estimate multiplier [see above]
+        // if both *msud and *msua are small it is cost-effective to
+        // bring in part of the following units (if any) to get a
+        // better estimate (assume some other non-zero in div)
+        #define DIVLO 1000000U
+        #define DIVHI (DIVBASE/DIVLO)
+        #if DECUSE64
+          if (divunits>1) {
+            // there cannot be a *(msud-2) for DECDOUBLE so next is
+            // an exact calculation unless DECQUAD (which needs to
+            // assume bits out there if divunits>2)
+            uLong mul=(uLong)*msua * DIVBASE + *(msua-1);
+            uLong div=(uLong)*msud * DIVBASE + *(msud-1);
+            #if QUAD
+            if (divunits>2) div++;
+            #endif
+            mul/=div;
+            multiplier=(Int)mul;
+            }
+           else multiplier=*msua/(*msud);
+        #else
+          if (divunits>1 && *msua<DIVLO && *msud<DIVLO) {
+            multiplier=(*msua*DIVHI + *(msua-1)/DIVLO)
+                      /(*msud*DIVHI + *(msud-1)/DIVLO +1);
+            }
+           else multiplier=(*msua<<2)/divtop;
+        #endif
+        }
+       else {                                     // accunits>divunits
+        // msud is one unit 'lower' than msua, so estimate differently
+        #if DECUSE64
+          uLong mul;
+          // as before, bring in extra digits if possible
+          if (divunits>1 && *msua<DIVLO && *msud<DIVLO) {
+            mul=((uLong)*msua * DIVHI * DIVBASE) + *(msua-1) * DIVHI
+               + *(msua-2)/DIVLO;
+            mul/=(*msud*DIVHI + *(msud-1)/DIVLO +1);
+            }
+           else if (divunits==1) {
+            mul=(uLong)*msua * DIVBASE + *(msua-1);
+            mul/=*msud;       // no more to the right
+            }
+           else {
+            mul=(uLong)(*msua) * (uInt)(DIVBASE<<2)
+                + (*(msua-1)<<2);
+            mul/=divtop;      // [divtop already allows for sticky bits]
+            }
+          multiplier=(Int)mul;
+        #else
+          multiplier=*msua * ((DIVBASE<<2)/divtop);
+        #endif
+        }
+      if (multiplier==0) multiplier=1;            // marginal case
+      *lsuq+=multiplier;
+
+      #if DIVCOUNT
+      // printf("Multiplier: %ld\n", (LI)multiplier);
+      divcount++;
+      #endif
+
+      // Carry out the subtraction  acc-(div*multiplier); for each
+      // unit in div, do the multiply, split to units (see
+      // decFloatMultiply for the algorithm), and subtract from acc
+      #define DIVMAGIC  2305843009U               // 2**61/10**9
+      #define DIVSHIFTA 29
+      #define DIVSHIFTB 32
+      carry=0;
+      for (ud=div, ua=lsua; ud<=msud; ud++, ua++) {
+        uInt lo, hop;
+        #if DECUSE64
+          uLong sub=(uLong)multiplier*(*ud)+carry;
+          if (sub<DIVBASE) {
+            carry=0;
+            lo=(uInt)sub;
+            }
+           else {
+            hop=(uInt)(sub>>DIVSHIFTA);
+            carry=(uInt)(((uLong)hop*DIVMAGIC)>>DIVSHIFTB);
+            // the estimate is now in hi; now calculate sub-hi*10**9
+            // to get the remainder (which will be <DIVBASE))
+            lo=(uInt)sub;
+            lo-=carry*DIVBASE;                    // low word of result
+            if (lo>=DIVBASE) {
+              lo-=DIVBASE;                        // correct by +1
+              carry++;
+              }
+            }
+        #else // 32-bit
+          uInt hi;
+          // calculate multiplier*(*ud) into hi and lo
+          LONGMUL32HI(hi, *ud, multiplier);       // get the high word
+          lo=multiplier*(*ud);                    // .. and the low
+          lo+=carry;                              // add the old hi
+          carry=hi+(lo<carry);                    // .. with any carry
+          if (carry || lo>=DIVBASE) {             // split is needed
+            hop=(carry<<3)+(lo>>DIVSHIFTA);       // hi:lo/2**29
+            LONGMUL32HI(carry, hop, DIVMAGIC);    // only need the high word
+            // [DIVSHIFTB is 32, so carry can be used directly]
+            // the estimate is now in carry; now calculate hi:lo-est*10**9;
+            // happily the top word of the result is irrelevant because it
+            // will always be zero so this needs only one multiplication
+            lo-=(carry*DIVBASE);
+            // the correction here will be at most +1; do it
+            if (lo>=DIVBASE) {
+              lo-=DIVBASE;
+              carry++;
+              }
+            }
+        #endif
+        if (lo>*ua) {              // borrow needed
+          *ua+=DIVBASE;
+          carry++;
+          }
+        *ua-=lo;
+        } // ud loop
+      if (carry) *ua-=carry;       // accdigits>divdigits [cannot borrow]
+      } // inner loop
+
+    // the outer loop terminates when there is either an exact result
+    // or enough digits; first update the quotient digit count and
+    // pointer (if any significant digits)
+    #if DECTRACE
+    if (*lsuq || quodigits) printf("*lsuq=%09ld\n", (LI)*lsuq);
+    #endif
+    if (quodigits) {
+      quodigits+=9;                // had leading unit earlier
+      lsuq--;
+      if (quodigits>DECPMAX+1) break;   // have enough
+      }
+     else if (*lsuq) {             // first quotient digits
+      const uInt *pow;
+      for (pow=DECPOWERS; *lsuq>=*pow; pow++) quodigits++;
+      lsuq--;
+      // [cannot have >DECPMAX+1 on first unit]
+      }
+
+    if (*msua!=0) continue;        // not an exact result
+    // acc is zero iff used all of original units and zero down to lsua
+    // (must also continue to original lsu for correct quotient length)
+    if (lsua>acc+DIVACCLEN-DIVOPLEN) continue;
+    for (; msua>lsua && *msua==0;) msua--;
+    if (*msua==0 && msua==lsua) break;
+    } // outer loop
+
+  // all of the original operand in acc has been covered at this point
+  // quotient now has at least DECPMAX+2 digits
+  // *msua is now non-0 if inexact and sticky bits
+  // lsuq is one below the last uint of the quotient
+  lsuq++;                          // set -> true lsu of quo
+  if (*msua) *lsuq|=1;             // apply sticky bit
+
+  // quo now holds the (unrounded) quotient in base-billion; one
+  // base-billion 'digit' per uInt.
+  #if DECTRACE
+  printf("DivQuo:");
+  for (uq=msuq; uq>=lsuq; uq--) printf(" %09ld", (LI)*uq);
+  printf("\n");
+  #endif
+
+  // Now convert to BCD for rounding and cleanup, starting from the
+  // most significant end [offset by one into bcdacc to leave room
+  // for a possible carry digit if rounding for REMNEAR is needed]
+  for (uq=msuq, ub=bcdacc+1; uq>=lsuq; uq--, ub+=9) {
+    uInt top, mid, rem;                 // work
+    if (*uq==0) {                       // no split needed
+      UBFROMUI(ub, 0);                  // clear 9 BCD8s
+      UBFROMUI(ub+4, 0);                // ..
+      *(ub+8)=0;                        // ..
+      continue;
+      }
+    // *uq is non-zero -- split the base-billion digit into
+    // hi, mid, and low three-digits
+    #define divsplit9 1000000           // divisor
+    #define divsplit6 1000              // divisor
+    // The splitting is done by simple divides and remainders,
+    // assuming the compiler will optimize these [GCC does]
+    top=*uq/divsplit9;
+    rem=*uq%divsplit9;
+    mid=rem/divsplit6;
+    rem=rem%divsplit6;
+    // lay out the nine BCD digits (plus one unwanted byte)
+    UBFROMUI(ub,   UBTOUI(&BIN2BCD8[top*4]));
+    UBFROMUI(ub+3, UBTOUI(&BIN2BCD8[mid*4]));
+    UBFROMUI(ub+6, UBTOUI(&BIN2BCD8[rem*4]));
+    } // BCD conversion loop
+  ub--;                                 // -> lsu
+
+  // complete the bcdnum; quodigits is correct, so the position of
+  // the first non-zero is known
+  num.msd=bcdacc+1+(msuq-lsuq+1)*9-quodigits;
+  num.lsd=ub;
+
+  // make exponent adjustments, etc
+  if (lsua<acc+DIVACCLEN-DIVOPLEN) {    // used extra digits
+    num.exponent-=(Int)((acc+DIVACCLEN-DIVOPLEN-lsua)*9);
+    // if the result was exact then there may be up to 8 extra
+    // trailing zeros in the overflowed quotient final unit
+    if (*msua==0) {
+      for (; *ub==0;) ub--;             // drop zeros
+      num.exponent+=(Int)(num.lsd-ub);  // and adjust exponent
+      num.lsd=ub;
+      }
+    } // adjustment needed
+
+  #if DIVCOUNT
+  if (divcount>maxcount) {              // new high-water nark
+    maxcount=divcount;
+    printf("DivNewMaxCount: %ld\n", (LI)maxcount);
+    }
+  #endif
+
+  if (op&DIVIDE) return decFinalize(result, &num, set); // all done
+
+  // Is DIVIDEINT or a remainder; there is more to do -- first form
+  // the integer (this is done 'after the fact', unlike as in
+  // decNumber, so as not to tax DIVIDE)
+
+  // The first non-zero digit will be in the first 9 digits, known
+  // from quodigits and num.msd, so there is always space for DECPMAX
+  // digits
+
+  length=(Int)(num.lsd-num.msd+1);
+  //printf("Length exp: %ld %ld\n", (LI)length, (LI)num.exponent);
+
+  if (length+num.exponent>DECPMAX) { // cannot fit
+    decFloatZero(result);
+    DFWORD(result, 0)=DECFLOAT_qNaN;
+    set->status|=DEC_Division_impossible;
+    return result;
+    }
+
+  if (num.exponent>=0) {           // already an int, or need pad zeros
+    for (ub=num.lsd+1; ub<=num.lsd+num.exponent; ub++) *ub=0;
+    num.lsd+=num.exponent;
+    }
+   else {                          // too long: round or truncate needed
+    Int drop=-num.exponent;
+    if (!(op&REMNEAR)) {           // simple truncate
+      num.lsd-=drop;
+      if (num.lsd<num.msd) {       // truncated all
+        num.lsd=num.msd;           // make 0
+        *num.lsd=0;                // .. [sign still relevant]
+        }
+      }
+     else {                        // round to nearest even [sigh]
+      // round-to-nearest, in-place; msd is at or to right of bcdacc+1
+      // (this is a special case of Quantize -- q.v. for commentary)
+      uByte *roundat;              // -> re-round digit
+      uByte reround;               // reround value
+      *(num.msd-1)=0;              // in case of left carry, or make 0
+      if (drop<length) roundat=num.lsd-drop+1;
+       else if (drop==length) roundat=num.msd;
+       else roundat=num.msd-1;     // [-> 0]
+      reround=*roundat;
+      for (ub=roundat+1; ub<=num.lsd; ub++) {
+        if (*ub!=0) {
+          reround=DECSTICKYTAB[reround];
+          break;
+          }
+        } // check stickies
+      if (roundat>num.msd) num.lsd=roundat-1;
+       else {
+        num.msd--;                           // use the 0 ..
+        num.lsd=num.msd;                     // .. at the new MSD place
+        }
+      if (reround!=0) {                      // discarding non-zero
+        uInt bump=0;
+        // rounding is DEC_ROUND_HALF_EVEN always
+        if (reround>5) bump=1;               // >0.5 goes up
+         else if (reround==5)                // exactly 0.5000 ..
+          bump=*(num.lsd) & 0x01;            // .. up iff [new] lsd is odd
+        if (bump!=0) {                       // need increment
+          // increment the coefficient; this might end up with 1000...
+          ub=num.lsd;
+          for (; UBTOUI(ub-3)==0x09090909; ub-=4) UBFROMUI(ub-3, 0);
+          for (; *ub==9; ub--) *ub=0;        // at most 3 more
+          *ub+=1;
+          if (ub<num.msd) num.msd--;         // carried
+          } // bump needed
+        } // reround!=0
+      } // remnear
+    } // round or truncate needed
+  num.exponent=0;                            // all paths
+  //decShowNum(&num, "int");
+
+  if (op&DIVIDEINT) return decFinalize(result, &num, set); // all done
+
+  // Have a remainder to calculate
+  decFinalize(&quotient, &num, set);         // lay out the integer so far
+  DFWORD(&quotient, 0)^=DECFLOAT_Sign;       // negate it
+  sign=DFWORD(dfl, 0);                       // save sign of dfl
+  decFloatFMA(result, &quotient, dfr, dfl, set);
+  if (!DFISZERO(result)) return result;
+  // if the result is zero the sign shall be sign of dfl
+  DFWORD(&quotient, 0)=sign;                 // construct decFloat of sign
+  return decFloatCopySign(result, result, &quotient);
+  } // decDivide
+
+/* ------------------------------------------------------------------ */
+/* decFiniteMultiply -- multiply two finite decFloats                 */
+/*                                                                    */
+/*   num    gets the result of multiplying dfl and dfr                */
+/*   bcdacc .. with the coefficient in this array                     */
+/*   dfl    is the first decFloat (lhs)                               */
+/*   dfr    is the second decFloat (rhs)                              */
+/*                                                                    */
+/* This effects the multiplication of two decFloats, both known to be */
+/* finite, leaving the result in a bcdnum ready for decFinalize (for  */
+/* use in Multiply) or in a following addition (FMA).                 */
+/*                                                                    */
+/* bcdacc must have space for at least DECPMAX9*18+1 bytes.           */
+/* No error is possible and no status is set.                         */
+/* ------------------------------------------------------------------ */
+// This routine has two separate implementations of the core
+// multiplication; both using base-billion.  One uses only 32-bit
+// variables (Ints and uInts) or smaller; the other uses uLongs (for
+// multiplication and addition only).  Both implementations cover
+// both arithmetic sizes (DOUBLE and QUAD) in order to allow timing
+// comparisons.  In any one compilation only one implementation for
+// each size can be used, and if DECUSE64 is 0 then use of the 32-bit
+// version is forced.
+//
+// Historical note: an earlier version of this code also supported the
+// 256-bit format and has been preserved.  That is somewhat trickier
+// during lazy carry splitting because the initial quotient estimate
+// (est) can exceed 32 bits.
+
+#define MULTBASE  ((uInt)BILLION)  // the base used for multiply
+#define MULOPLEN  DECPMAX9         // operand length ('digits' base 10**9)
+#define MULACCLEN (MULOPLEN*2)              // accumulator length (ditto)
+#define LEADZEROS (MULACCLEN*9 - DECPMAX*2) // leading zeros always
+
+// Assertions: exponent not too large and MULACCLEN is a multiple of 4
+#if DECEMAXD>9
+  #error Exponent may overflow when doubled for Multiply
+#endif
+#if MULACCLEN!=(MULACCLEN/4)*4
+  // This assumption is used below only for initialization
+  #error MULACCLEN is not a multiple of 4
+#endif
+
+static void decFiniteMultiply(bcdnum *num, uByte *bcdacc,
+                              const decFloat *dfl, const decFloat *dfr) {
+  uInt   bufl[MULOPLEN];           // left  coefficient (base-billion)
+  uInt   bufr[MULOPLEN];           // right coefficient (base-billion)
+  uInt   *ui, *uj;                 // work
+  uByte  *ub;                      // ..
+  uInt   uiwork;                   // for macros
+
+  #if DECUSE64
+  uLong  accl[MULACCLEN];          // lazy accumulator (base-billion+)
+  uLong  *pl;                      // work -> lazy accumulator
+  uInt   acc[MULACCLEN];           // coefficent in base-billion ..
+  #else
+  uInt   acc[MULACCLEN*2];         // accumulator in base-billion ..
+  #endif
+  uInt   *pa;                      // work -> accumulator
+  //printf("Base10**9: OpLen=%d MulAcclen=%d\n", OPLEN, MULACCLEN);
+
+  /* Calculate sign and exponent */
+  num->sign=(DFWORD(dfl, 0)^DFWORD(dfr, 0)) & DECFLOAT_Sign;
+  num->exponent=GETEXPUN(dfl)+GETEXPUN(dfr); // [see assertion above]
+
+  /* Extract the coefficients and prepare the accumulator */
+  // the coefficients of the operands are decoded into base-billion
+  // numbers in uInt arrays (bufl and bufr, LSD at offset 0) of the
+  // appropriate size.
+  GETCOEFFBILL(dfl, bufl);
+  GETCOEFFBILL(dfr, bufr);
+  #if DECTRACE && 0
+    printf("CoeffbL:");
+    for (ui=bufl+MULOPLEN-1; ui>=bufl; ui--) printf(" %08lx", (LI)*ui);
+    printf("\n");
+    printf("CoeffbR:");
+    for (uj=bufr+MULOPLEN-1; uj>=bufr; uj--) printf(" %08lx", (LI)*uj);
+    printf("\n");
+  #endif
+
+  // start the 64-bit/32-bit differing paths...
+#if DECUSE64
+
+  // zero the accumulator
+  #if MULACCLEN==4
+    accl[0]=0; accl[1]=0; accl[2]=0; accl[3]=0;
+  #else                                      // use a loop
+    // MULACCLEN is a multiple of four, asserted above
+    for (pl=accl; pl<accl+MULACCLEN; pl+=4) {
+      *pl=0; *(pl+1)=0; *(pl+2)=0; *(pl+3)=0;// [reduce overhead]
+      } // pl
+  #endif
+
+  /* Effect the multiplication */
+  // The multiplcation proceeds using MFC's lazy-carry resolution
+  // algorithm from decNumber.  First, the multiplication is
+  // effected, allowing accumulation of the partial products (which
+  // are in base-billion at each column position) into 64 bits
+  // without resolving back to base=billion after each addition.
+  // These 64-bit numbers (which may contain up to 19 decimal digits)
+  // are then split using the Clark & Cowlishaw algorithm (see below).
+  // [Testing for 0 in the inner loop is not really a 'win']
+  for (ui=bufr; ui<bufr+MULOPLEN; ui++) { // over each item in rhs
+    if (*ui==0) continue;                 // product cannot affect result
+    pl=accl+(ui-bufr);                    // where to add the lhs
+    for (uj=bufl; uj<bufl+MULOPLEN; uj++, pl++) { // over each item in lhs
+      // if (*uj==0) continue;            // product cannot affect result
+      *pl+=((uLong)*ui)*(*uj);
+      } // uj
+    } // ui
+
+  // The 64-bit carries must now be resolved; this means that a
+  // quotient/remainder has to be calculated for base-billion (1E+9).
+  // For this, Clark & Cowlishaw's quotient estimation approach (also
+  // used in decNumber) is needed, because 64-bit divide is generally
+  // extremely slow on 32-bit machines, and may be slower than this
+  // approach even on 64-bit machines.  This algorithm splits X
+  // using:
+  //
+  //   magic=2**(A+B)/1E+9;   // 'magic number'
+  //   hop=X/2**A;            // high order part of X (by shift)
+  //   est=magic*hop/2**B     // quotient estimate (may be low by 1)
+  //
+  // A and B are quite constrained; hop and magic must fit in 32 bits,
+  // and 2**(A+B) must be as large as possible (which is 2**61 if
+  // magic is to fit).  Further, maxX increases with the length of
+  // the operands (and hence the number of partial products
+  // accumulated); maxX is OPLEN*(10**18), which is up to 19 digits.
+  //
+  // It can be shown that when OPLEN is 2 then the maximum error in
+  // the estimated quotient is <1, but for larger maximum x the
+  // maximum error is above 1 so a correction that is >1 may be
+  // needed.  Values of A and B are chosen to satisfy the constraints
+  // just mentioned while minimizing the maximum error (and hence the
+  // maximum correction), as shown in the following table:
+  //
+  //   Type    OPLEN   A   B     maxX    maxError  maxCorrection
+  //   ---------------------------------------------------------
+  //   DOUBLE    2    29  32  <2*10**18    0.63       1
+  //   QUAD      4    30  31  <4*10**18    1.17       2
+  //
+  // In the OPLEN==2 case there is most choice, but the value for B
+  // of 32 has a big advantage as then the calculation of the
+  // estimate requires no shifting; the compiler can extract the high
+  // word directly after multiplying magic*hop.
+  #define MULMAGIC 2305843009U          // 2**61/10**9  [both cases]
+  #if DOUBLE
+    #define MULSHIFTA 29
+    #define MULSHIFTB 32
+  #elif QUAD
+    #define MULSHIFTA 30
+    #define MULSHIFTB 31
+  #else
+    #error Unexpected type
+  #endif
+
+  #if DECTRACE
+  printf("MulAccl:");
+  for (pl=accl+MULACCLEN-1; pl>=accl; pl--)
+    printf(" %08lx:%08lx", (LI)(*pl>>32), (LI)(*pl&0xffffffff));
+  printf("\n");
+  #endif
+
+  for (pl=accl, pa=acc; pl<accl+MULACCLEN; pl++, pa++) { // each column position
+    uInt lo, hop;                       // work
+    uInt est;                           // cannot exceed 4E+9
+    if (*pl>=MULTBASE) {
+      // *pl holds a binary number which needs to be split
+      hop=(uInt)(*pl>>MULSHIFTA);
+      est=(uInt)(((uLong)hop*MULMAGIC)>>MULSHIFTB);
+      // the estimate is now in est; now calculate hi:lo-est*10**9;
+      // happily the top word of the result is irrelevant because it
+      // will always be zero so this needs only one multiplication
+      lo=(uInt)(*pl-((uLong)est*MULTBASE));  // low word of result
+      // If QUAD, the correction here could be +2
+      if (lo>=MULTBASE) {
+        lo-=MULTBASE;                   // correct by +1
+        est++;
+        #if QUAD
+        // may need to correct by +2
+        if (lo>=MULTBASE) {
+          lo-=MULTBASE;
+          est++;
+          }
+        #endif
+        }
+      // finally place lo as the new coefficient 'digit' and add est to
+      // the next place up [this is safe because this path is never
+      // taken on the final iteration as *pl will fit]
+      *pa=lo;
+      *(pl+1)+=est;
+      } // *pl needed split
+     else {                             // *pl<MULTBASE
+      *pa=(uInt)*pl;                    // just copy across
+      }
+    } // pl loop
+
+#else  // 32-bit
+  for (pa=acc;; pa+=4) {                     // zero the accumulator
+    *pa=0; *(pa+1)=0; *(pa+2)=0; *(pa+3)=0;  // [reduce overhead]
+    if (pa==acc+MULACCLEN*2-4) break;        // multiple of 4 asserted
+    } // pa
+
+  /* Effect the multiplication */
+  // uLongs are not available (and in particular, there is no uLong
+  // divide) but it is still possible to use MFC's lazy-carry
+  // resolution algorithm from decNumber.  First, the multiplication
+  // is effected, allowing accumulation of the partial products
+  // (which are in base-billion at each column position) into 64 bits
+  // [with the high-order 32 bits in each position being held at
+  // offset +ACCLEN from the low-order 32 bits in the accumulator].
+  // These 64-bit numbers (which may contain up to 19 decimal digits)
+  // are then split using the Clark & Cowlishaw algorithm (see
+  // below).
+  for (ui=bufr;; ui++) {                // over each item in rhs
+    uInt hi, lo;                        // words of exact multiply result
+    pa=acc+(ui-bufr);                   // where to add the lhs
+    for (uj=bufl;; uj++, pa++) {        // over each item in lhs
+      LONGMUL32HI(hi, *ui, *uj);        // calculate product of digits
+      lo=(*ui)*(*uj);                   // ..
+      *pa+=lo;                          // accumulate low bits and ..
+      *(pa+MULACCLEN)+=hi+(*pa<lo);     // .. high bits with any carry
+      if (uj==bufl+MULOPLEN-1) break;
+      }
+    if (ui==bufr+MULOPLEN-1) break;
+    }
+
+  // The 64-bit carries must now be resolved; this means that a
+  // quotient/remainder has to be calculated for base-billion (1E+9).
+  // For this, Clark & Cowlishaw's quotient estimation approach (also
+  // used in decNumber) is needed, because 64-bit divide is generally
+  // extremely slow on 32-bit machines.  This algorithm splits X
+  // using:
+  //
+  //   magic=2**(A+B)/1E+9;   // 'magic number'
+  //   hop=X/2**A;            // high order part of X (by shift)
+  //   est=magic*hop/2**B     // quotient estimate (may be low by 1)
+  //
+  // A and B are quite constrained; hop and magic must fit in 32 bits,
+  // and 2**(A+B) must be as large as possible (which is 2**61 if
+  // magic is to fit).  Further, maxX increases with the length of
+  // the operands (and hence the number of partial products
+  // accumulated); maxX is OPLEN*(10**18), which is up to 19 digits.
+  //
+  // It can be shown that when OPLEN is 2 then the maximum error in
+  // the estimated quotient is <1, but for larger maximum x the
+  // maximum error is above 1 so a correction that is >1 may be
+  // needed.  Values of A and B are chosen to satisfy the constraints
+  // just mentioned while minimizing the maximum error (and hence the
+  // maximum correction), as shown in the following table:
+  //
+  //   Type    OPLEN   A   B     maxX    maxError  maxCorrection
+  //   ---------------------------------------------------------
+  //   DOUBLE    2    29  32  <2*10**18    0.63       1
+  //   QUAD      4    30  31  <4*10**18    1.17       2
+  //
+  // In the OPLEN==2 case there is most choice, but the value for B
+  // of 32 has a big advantage as then the calculation of the
+  // estimate requires no shifting; the high word is simply
+  // calculated from multiplying magic*hop.
+  #define MULMAGIC 2305843009U          // 2**61/10**9  [both cases]
+  #if DOUBLE
+    #define MULSHIFTA 29
+    #define MULSHIFTB 32
+  #elif QUAD
+    #define MULSHIFTA 30
+    #define MULSHIFTB 31
+  #else
+    #error Unexpected type
+  #endif
+
+  #if DECTRACE
+  printf("MulHiLo:");
+  for (pa=acc+MULACCLEN-1; pa>=acc; pa--)
+    printf(" %08lx:%08lx", (LI)*(pa+MULACCLEN), (LI)*pa);
+  printf("\n");
+  #endif
+
+  for (pa=acc;; pa++) {                 // each low uInt
+    uInt hi, lo;                        // words of exact multiply result
+    uInt hop, estlo;                    // work
+    #if QUAD
+    uInt esthi;                         // ..
+    #endif
+
+    lo=*pa;
+    hi=*(pa+MULACCLEN);                 // top 32 bits
+    // hi and lo now hold a binary number which needs to be split
+
+    #if DOUBLE
+      hop=(hi<<3)+(lo>>MULSHIFTA);      // hi:lo/2**29
+      LONGMUL32HI(estlo, hop, MULMAGIC);// only need the high word
+      // [MULSHIFTB is 32, so estlo can be used directly]
+      // the estimate is now in estlo; now calculate hi:lo-est*10**9;
+      // happily the top word of the result is irrelevant because it
+      // will always be zero so this needs only one multiplication
+      lo-=(estlo*MULTBASE);
+      // esthi=0;                       // high word is ignored below
+      // the correction here will be at most +1; do it
+      if (lo>=MULTBASE) {
+        lo-=MULTBASE;
+        estlo++;
+        }
+    #elif QUAD
+      hop=(hi<<2)+(lo>>MULSHIFTA);      // hi:lo/2**30
+      LONGMUL32HI(esthi, hop, MULMAGIC);// shift will be 31 ..
+      estlo=hop*MULMAGIC;               // .. so low word needed
+      estlo=(esthi<<1)+(estlo>>MULSHIFTB); // [just the top bit]
+      // esthi=0;                       // high word is ignored below
+      lo-=(estlo*MULTBASE);             // as above
+      // the correction here could be +1 or +2
+      if (lo>=MULTBASE) {
+        lo-=MULTBASE;
+        estlo++;
+        }
+      if (lo>=MULTBASE) {
+        lo-=MULTBASE;
+        estlo++;
+        }
+    #else
+      #error Unexpected type
+    #endif
+
+    // finally place lo as the new accumulator digit and add est to
+    // the next place up; this latter add could cause a carry of 1
+    // to the high word of the next place
+    *pa=lo;
+    *(pa+1)+=estlo;
+    // esthi is always 0 for DOUBLE and QUAD so this is skipped
+    // *(pa+1+MULACCLEN)+=esthi;
+    if (*(pa+1)<estlo) *(pa+1+MULACCLEN)+=1; // carry
+    if (pa==acc+MULACCLEN-2) break;          // [MULACCLEN-1 will never need split]
+    } // pa loop
+#endif
+
+  // At this point, whether using the 64-bit or the 32-bit paths, the
+  // accumulator now holds the (unrounded) result in base-billion;
+  // one base-billion 'digit' per uInt.
+  #if DECTRACE
+  printf("MultAcc:");
+  for (pa=acc+MULACCLEN-1; pa>=acc; pa--) printf(" %09ld", (LI)*pa);
+  printf("\n");
+  #endif
+
+  // Now convert to BCD for rounding and cleanup, starting from the
+  // most significant end
+  pa=acc+MULACCLEN-1;
+  if (*pa!=0) num->msd=bcdacc+LEADZEROS;// drop known lead zeros
+   else {                               // >=1 word of leading zeros
+    num->msd=bcdacc;                    // known leading zeros are gone
+    pa--;                               // skip first word ..
+    for (; *pa==0; pa--) if (pa==acc) break; // .. and any more leading 0s
+    }
+  for (ub=bcdacc;; pa--, ub+=9) {
+    if (*pa!=0) {                       // split(s) needed
+      uInt top, mid, rem;               // work
+      // *pa is non-zero -- split the base-billion acc digit into
+      // hi, mid, and low three-digits
+      #define mulsplit9 1000000         // divisor
+      #define mulsplit6 1000            // divisor
+      // The splitting is done by simple divides and remainders,
+      // assuming the compiler will optimize these where useful
+      // [GCC does]
+      top=*pa/mulsplit9;
+      rem=*pa%mulsplit9;
+      mid=rem/mulsplit6;
+      rem=rem%mulsplit6;
+      // lay out the nine BCD digits (plus one unwanted byte)
+      UBFROMUI(ub,   UBTOUI(&BIN2BCD8[top*4]));
+      UBFROMUI(ub+3, UBTOUI(&BIN2BCD8[mid*4]));
+      UBFROMUI(ub+6, UBTOUI(&BIN2BCD8[rem*4]));
+      }
+     else {                             // *pa==0
+      UBFROMUI(ub, 0);                  // clear 9 BCD8s
+      UBFROMUI(ub+4, 0);                // ..
+      *(ub+8)=0;                        // ..
+      }
+    if (pa==acc) break;
+    } // BCD conversion loop
+
+  num->lsd=ub+8;                        // complete the bcdnum ..
+
+  #if DECTRACE
+  decShowNum(num, "postmult");
+  decFloatShow(dfl, "dfl");
+  decFloatShow(dfr, "dfr");
+  #endif
+  return;
+  } // decFiniteMultiply
+
+/* ------------------------------------------------------------------ */
+/* decFloatAbs -- absolute value, heeding NaNs, etc.                  */
+/*                                                                    */
+/*   result gets the canonicalized df with sign 0                     */
+/*   df     is the decFloat to abs                                    */
+/*   set    is the context                                            */
+/*   returns result                                                   */
+/*                                                                    */
+/* This has the same effect as decFloatPlus unless df is negative,    */
+/* in which case it has the same effect as decFloatMinus.  The        */
+/* effect is also the same as decFloatCopyAbs except that NaNs are    */
+/* handled normally (the sign of a NaN is not affected, and an sNaN   */
+/* will signal) and the result will be canonical.                     */
+/* ------------------------------------------------------------------ */
+decFloat * decFloatAbs(decFloat *result, const decFloat *df,
+                       decContext *set) {
+  if (DFISNAN(df)) return decNaNs(result, df, NULL, set);
+  decCanonical(result, df);             // copy and check
+  DFBYTE(result, 0)&=~0x80;             // zero sign bit
+  return result;
+  } // decFloatAbs
+
+/* ------------------------------------------------------------------ */
+/* decFloatAdd -- add two decFloats                                   */
+/*                                                                    */
+/*   result gets the result of adding dfl and dfr:                    */
+/*   dfl    is the first decFloat (lhs)                               */
+/*   dfr    is the second decFloat (rhs)                              */
+/*   set    is the context                                            */
+/*   returns result                                                   */
+/*                                                                    */
+/* ------------------------------------------------------------------ */
+#if QUAD
+// Table for testing MSDs for fastpath elimination; returns the MSD of
+// a decDouble or decQuad (top 6 bits tested) ignoring the sign.
+// Infinities return -32 and NaNs return -128 so that summing the two
+// MSDs also allows rapid tests for the Specials (see code below).
+const Int DECTESTMSD[64]={
+  0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,   6,    7,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 9, 8, 9, -32, -128,
+  0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,   6,    7,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 9, 8, 9, -32, -128};
+#else
+// The table for testing MSDs is shared between the modules
+extern const Int DECTESTMSD[64];
+#endif
+
+decFloat * decFloatAdd(decFloat *result,
+                       const decFloat *dfl, const decFloat *dfr,
+                       decContext *set) {
+  bcdnum num;                      // for final conversion
+  Int    bexpl, bexpr;             // left and right biased exponents
+  uByte  *ub, *us, *ut;            // work
+  uInt   uiwork;                   // for macros
+  #if QUAD
+  uShort uswork;                   // ..
+  #endif
+
+  uInt sourhil, sourhir;           // top words from source decFloats
+                                   // [valid only through end of
+                                   // fastpath code -- before swap]
+  uInt diffsign;                   // non-zero if signs differ
+  uInt carry;                      // carry: 0 or 1 before add loop
+  Int  overlap;                    // coefficient overlap (if full)
+  Int  summ;                       // sum of the MSDs
+  // the following buffers hold coefficients with various alignments
+  // (see commentary and diagrams below)
+  uByte acc[4+2+DECPMAX*3+8];
+  uByte buf[4+2+DECPMAX*2];
+  uByte *umsd, *ulsd;              // local MSD and LSD pointers
+
+  #if DECLITEND
+    #define CARRYPAT 0x01000000    // carry=1 pattern
+  #else
+    #define CARRYPAT 0x00000001    // carry=1 pattern
+  #endif
+
+  /* Start decoding the arguments */
+  // The initial exponents are placed into the opposite Ints to
+  // that which might be expected; there are two sets of data to
+  // keep track of (each decFloat and the corresponding exponent),
+  // and this scheme means that at the swap point (after comparing
+  // exponents) only one pair of words needs to be swapped
+  // whichever path is taken (thereby minimising worst-case path).
+  // The calculated exponents will be nonsense when the arguments are
+  // Special, but are not used in that path
+  sourhil=DFWORD(dfl, 0);          // LHS top word
+  summ=DECTESTMSD[sourhil>>26];    // get first MSD for testing
+  bexpr=DECCOMBEXP[sourhil>>26];   // get exponent high bits (in place)
+  bexpr+=GETECON(dfl);             // .. + continuation
+
+  sourhir=DFWORD(dfr, 0);          // RHS top word
+  summ+=DECTESTMSD[sourhir>>26];   // sum MSDs for testing
+  bexpl=DECCOMBEXP[sourhir>>26];
+  bexpl+=GETECON(dfr);
+
+  // here bexpr has biased exponent from lhs, and vice versa
+
+  diffsign=(sourhil^sourhir)&DECFLOAT_Sign;
+
+  // now determine whether to take a fast path or the full-function
+  // slow path.  The slow path must be taken when:
+  //   -- both numbers are finite, and:
+  //         the exponents are different, or
+  //         the signs are different, or
+  //         the sum of the MSDs is >8 (hence might overflow)
+  // specialness and the sum of the MSDs can be tested at once using
+  // the summ value just calculated, so the test for specials is no
+  // longer on the worst-case path (as of 3.60)
+
+  if (summ<=8) {                   // MSD+MSD is good, or there is a special
+    if (summ<0) {                  // there is a special
+      // Inf+Inf would give -64; Inf+finite is -32 or higher
+      if (summ<-64) return decNaNs(result, dfl, dfr, set);  // one or two NaNs
+      // two infinities with different signs is invalid
+      if (summ==-64 && diffsign) return decInvalid(result, set);
+      if (DFISINF(dfl)) return decInfinity(result, dfl);    // LHS is infinite
+      return decInfinity(result, dfr);                      // RHS must be Inf
+      }
+    // Here when both arguments are finite; fast path is possible
+    // (currently only for aligned and same-sign)
+    if (bexpr==bexpl && !diffsign) {
+      uInt tac[DECLETS+1];              // base-1000 coefficient
+      uInt encode;                      // work
+
+      // Get one coefficient as base-1000 and add the other
+      GETCOEFFTHOU(dfl, tac);           // least-significant goes to [0]
+      ADDCOEFFTHOU(dfr, tac);
+      // here the sum of the MSDs (plus any carry) will be <10 due to
+      // the fastpath test earlier
+
+      // construct the result; low word is the same for both formats
+      encode =BIN2DPD[tac[0]];
+      encode|=BIN2DPD[tac[1]]<<10;
+      encode|=BIN2DPD[tac[2]]<<20;
+      encode|=BIN2DPD[tac[3]]<<30;
+      DFWORD(result, (DECBYTES/4)-1)=encode;
+
+      // collect next two declets (all that remains, for Double)
+      encode =BIN2DPD[tac[3]]>>2;
+      encode|=BIN2DPD[tac[4]]<<8;
+
+      #if QUAD
+      // complete and lay out middling words
+      encode|=BIN2DPD[tac[5]]<<18;
+      encode|=BIN2DPD[tac[6]]<<28;
+      DFWORD(result, 2)=encode;
+
+      encode =BIN2DPD[tac[6]]>>4;
+      encode|=BIN2DPD[tac[7]]<<6;
+      encode|=BIN2DPD[tac[8]]<<16;
+      encode|=BIN2DPD[tac[9]]<<26;
+      DFWORD(result, 1)=encode;
+
+      // and final two declets
+      encode =BIN2DPD[tac[9]]>>6;
+      encode|=BIN2DPD[tac[10]]<<4;
+      #endif
+
+      // add exponent continuation and sign (from either argument)
+      encode|=sourhil & (ECONMASK | DECFLOAT_Sign);
+
+      // create lookup index = MSD + top two bits of biased exponent <<4
+      tac[DECLETS]|=(bexpl>>DECECONL)<<4;
+      encode|=DECCOMBFROM[tac[DECLETS]]; // add constructed combination field
+      DFWORD(result, 0)=encode;          // complete
+
+      // decFloatShow(result, ">");
+      return result;
+      } // fast path OK
+    // drop through to slow path
+    } // low sum or Special(s)
+
+  /* Slow path required -- arguments are finite and might overflow,   */
+  /* or require alignment, or might have different signs              */
+
+  // now swap either exponents or argument pointers
+  if (bexpl<=bexpr) {
+    // original left is bigger
+    Int bexpswap=bexpl;
+    bexpl=bexpr;
+    bexpr=bexpswap;
+    // printf("left bigger\n");
+    }
+   else {
+    const decFloat *dfswap=dfl;
+    dfl=dfr;
+    dfr=dfswap;
+    // printf("right bigger\n");
+    }
+  // [here dfl and bexpl refer to the datum with the larger exponent,
+  // of if the exponents are equal then the orig