mono/metadata/decimal-ms.c

   1 //
   2 // Copyright (c) Microsoft. All rights reserved.
   3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
   4 //
   5 // Copyright 2015 Xamarin Inc
   6 //
   7 // File: decimal.c
   8 //
   9 // Ported from C++ to C and adjusted to Mono runtime
  10 //
  11 // Pending:
  12 //   DoToCurrency (they look like new methods we do not have)
  13 //
  14 #ifndef DISABLE_DECIMAL
  15 #include "config.h"
  16 #include <stdint.h>
  17 #include <glib.h>
  18 #include <mono/utils/mono-compiler.h>
  19 #include <mono/metadata/exception.h>
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <math.h>
  24 #ifdef HAVE_MEMORY_H
  25 #include <memory.h>
  26 #endif
  27 #ifdef _MSC_VER
  28 #include <intrin.h>
  29 #endif
  30 #include "decimal-ms.h"
  31
  32 #define min(a, b) (((a) < (b)) ? (a) : (b))
  33
  34 typedef enum {
  35         MONO_DECIMAL_OK,
  36         MONO_DECIMAL_OVERFLOW,
  37         MONO_DECIMAL_INVALID_ARGUMENT,
  38         MONO_DECIMAL_DIVBYZERO,
  39         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  40 } MonoDecimalStatus;
  41
  42 #ifndef FC_GC_POLL
  43 #   define FC_GC_POLL()
  44 #endif
  45
  46 static const uint32_t ten_to_nine    = 1000000000U;
  47 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  48 #define POWER10_MAX     9
  49 #define DECIMAL_NEG ((uint8_t)0x80)
  50 #define DECMAX 28
  51 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  52 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  53 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  54 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  55 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  56 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  57 #define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  58 #define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  59
  60 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  61 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  62     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  63
  64 #define DEC_SCALE_MAX   28
  65 #define POWER10_MAX     9
  66
  67 #define OVFL_MAX_9_HI   4
  68 #define OVFL_MAX_9_MID  1266874889
  69 #define OVFL_MAX_9_LO   3047500985u
  70
  71 #define OVFL_MAX_5_HI   42949
  72 #define OVFL_MAX_5_MID  2890341191
  73
  74 #define OVFL_MAX_1_HI   429496729
  75
  76 typedef union {
  77         uint64_t int64;
  78         struct {
  79 #if BYTE_ORDER == G_BIG_ENDIAN
  80         uint32_t Hi;
  81         uint32_t Lo;
  82 #else
  83         uint32_t Lo;
  84         uint32_t Hi;
  85 #endif
  86     } u;
  87 } SPLIT64;
  88
  89 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  90 // Double Bias
  91 #define DBLBIAS 1022
  92
  93 // Structure to access an encoded double floating point
  94 typedef union{
  95     struct {
  96 #if BYTE_ORDER == G_BIG_ENDIAN
  97       unsigned int sign:1;
  98       unsigned int exp:11;
  99       unsigned int mantHi:20;
 100       unsigned int mantLo;
 101 #else // BIGENDIAN
 102       unsigned int mantLo;
 103       unsigned int mantHi:20;
 104       unsigned int exp:11;
 105       unsigned int sign:1;
 106 #endif
 107     } u;
 108     double dbl;
 109 } DoubleStructure;
 110
 111 #if BYTE_ORDER == G_BIG_ENDIAN
 112 #define DEFDS(Lo, Hi, exp, sign) { {sign, exp, Hi, Lo } }
 113 #else
 114 #define DEFDS(Lo, Hi, exp, sign) { {Lo, Hi, exp, sign} }
 115 #endif
 116
 117 const DoubleStructure ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0);
 118
 119 // Single floating point Bias
 120 #define SNGBIAS 126
 121
 122 // Structure to access an encoded single floating point
 123 typedef struct {
 124 #if BYTE_ORDER == G_BIG_ENDIAN
 125     unsigned int sign:1;
 126     unsigned int exp:8;
 127     unsigned int mant:23;
 128 #else
 129     unsigned int mant:23;
 130     unsigned int exp:8;
 131     unsigned int sign:1;
 132 #endif
 133 } SingleStructure;
 134
 135 //
 136 // Data tables
 137 //
 138
 139 static const uint32_t power10 [POWER10_MAX+1] = {
 140         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 141 };
 142
 143
 144 static const double double_power10[] = {
 145         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 146         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 147         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 148         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 149         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 150         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 151         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 152         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 153         1e80 };
 154
 155 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 156                                 {100000000000ULL},         // 1E11
 157                                 {1000000000000ULL},        // 1E12
 158                                 {10000000000000ULL},       // 1E13
 159                                 {100000000000000ULL} };    // 1E14
 160
 161 static const uint64_t long_power10[] = {
 162         1,
 163         10ULL,
 164         100ULL,
 165         1000ULL,
 166         10000ULL,
 167         100000ULL,
 168         1000000ULL,
 169         10000000ULL,
 170         100000000ULL,
 171         1000000000ULL,
 172         10000000000ULL,
 173         100000000000ULL,
 174         1000000000000ULL,
 175         10000000000000ULL,
 176         100000000000000ULL,
 177         1000000000000000ULL,
 178         10000000000000000ULL,
 179         100000000000000000ULL,
 180         1000000000000000000ULL,
 181         10000000000000000000ULL};
 182
 183 typedef struct  {
 184         uint32_t Hi, Mid, Lo;
 185 } DECOVFL;
 186
 187 const DECOVFL power_overflow[] = {
 188 // This is a table of the largest values that can be in the upper two
 189 // ULONGs of a 96-bit number that will not overflow when multiplied
 190 // by a given power.  For the upper word, this is a table of
 191 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 192 // remaining fraction part * 2^32.  2^32 = 4294967296.
 193 //
 194     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 195     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 196     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 197     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 198     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 199     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 200     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 201     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 202     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 203 };
 204
 205
 206 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 207 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 208 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 209
 210 static double
 211 fnDblPower10(int ix)
 212 {
 213     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 214     g_assert(ix >= 0);
 215     if (ix < maxIx)
 216         return double_power10[ix];
 217     return pow(10.0, ix);
 218 } // double fnDblPower10()
 219
 220
 221 static inline int64_t
 222 DivMod32by32(int32_t num, int32_t den)
 223 {
 224     SPLIT64  sdl;
 225
 226     sdl.u.Lo = num / den;
 227     sdl.u.Hi = num % den;
 228     return sdl.int64;
 229 }
 230
 231 static inline int64_t
 232 DivMod64by32(int64_t num, int32_t den)
 233 {
 234     SPLIT64  sdl;
 235
 236     sdl.u.Lo = Div64by32(num, den);
 237     sdl.u.Hi = Mod64by32(num, den);
 238     return sdl.int64;
 239 }
 240
 241 static uint64_t
 242 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 243 {
 244         SPLIT64  tmp1;
 245         SPLIT64  tmp2;
 246         SPLIT64  tmp3;
 247
 248         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 249         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 250         tmp1.u.Hi += tmp2.u.Lo;
 251         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 252                 tmp2.u.Hi++;
 253         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 254         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 255         tmp1.u.Hi += tmp2.u.Lo;
 256         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 257                 tmp2.u.Hi++;
 258         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 259
 260         *hi = tmp3.int64;
 261         return tmp1.int64;
 262 }
 263
 264 /**
 265 * FullDiv64By32:
 266 *
 267 * Entry:
 268 *   pdlNum  - Pointer to 64-bit dividend
 269 *   ulDen   - 32-bit divisor
 270 *
 271 * Purpose:
 272 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 273 *
 274 * Exit:
 275 *   Quotient overwrites dividend.
 276 *   Returns remainder.
 277 *
 278 * Exceptions:
 279 *   None.
 280 */
 281 // Was: FullDiv64By32
 282 static uint32_t
 283 FullDiv64By32 (uint64_t *num, uint32_t den)
 284 {
 285         SPLIT64  tmp;
 286         SPLIT64  res;
 287
 288         tmp.int64 = *num;
 289         res.u.Hi = 0;
 290
 291         if (tmp.u.Hi >= den) {
 292                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 293                 //
 294                 res.u.Lo = tmp.u.Hi;
 295                 res.int64 = DivMod64by32(res.int64, den);
 296                 tmp.u.Hi = res.u.Hi;
 297                 res.u.Hi = res.u.Lo;
 298         }
 299
 300         tmp.int64 = DivMod64by32(tmp.int64, den);
 301         res.u.Lo = tmp.u.Lo;
 302         *num = res.int64;
 303         return tmp.u.Hi;
 304 }
 305
 306 /***
 307  * SearchScale
 308  *
 309  * Entry:
 310  *   res_hi - Top uint32_t of quotient
 311  *   res_mid - Middle uint32_t of quotient
 312  *   res_lo - Bottom uint32_t of quotient
 313  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 314  *
 315  * Purpose:
 316  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 317  *   up by and still fit in 96 bits.
 318  *
 319  * Exit:
 320  *   Returns power of 10 to scale by, -1 if overflow error.
 321  *
 322  ***********************************************************************/
 323
 324 static int
 325 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 326 {
 327         int   cur_scale;
 328
 329         // Quick check to stop us from trying to scale any more.
 330         //
 331         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 332                 cur_scale = 0;
 333                 goto HaveScale;
 334         }
 335
 336         if (scale > DEC_SCALE_MAX - 9) {
 337                 // We can't scale by 10^9 without exceeding the max scale factor.
 338                 // See if we can scale to the max.  If not, we'll fall into
 339                 // standard search for scale factor.
 340                 //
 341                 cur_scale = DEC_SCALE_MAX - scale;
 342                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 343                         goto HaveScale;
 344
 345                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 346                 UpperEq:
 347                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 348                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 349                                 cur_scale--;
 350                         }
 351                         goto HaveScale;
 352                 }
 353         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 354                 return 9;
 355
 356         // Search for a power to scale by < 9.  Do a binary search
 357         // on power_overflow[].
 358         //
 359         cur_scale = 5;
 360         if (res_hi < OVFL_MAX_5_HI)
 361                 cur_scale = 7;
 362         else if (res_hi > OVFL_MAX_5_HI)
 363                 cur_scale = 3;
 364         else
 365                 goto UpperEq;
 366
 367         // cur_scale is 3 or 7.
 368         //
 369         if (res_hi < power_overflow[cur_scale - 1].Hi)
 370                 cur_scale++;
 371         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 372                 cur_scale--;
 373         else
 374                 goto UpperEq;
 375
 376         // cur_scale is 2, 4, 6, or 8.
 377         //
 378         // In all cases, we already found we could not use the power one larger.
 379         // So if we can use this power, it is the biggest, and we're done.  If
 380         // we can't use this power, the one below it is correct for all cases
 381         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 382         //
 383         if (res_hi > power_overflow[cur_scale - 1].Hi)
 384                 cur_scale--;
 385
 386         if (res_hi == power_overflow[cur_scale - 1].Hi)
 387                 goto UpperEq;
 388
 389 HaveScale:
 390         // cur_scale = largest power of 10 we can scale by without overflow,
 391         // cur_scale < 9.  See if this is enough to make scale factor
 392         // positive if it isn't already.
 393         //
 394         if (cur_scale + scale < 0)
 395                 cur_scale = -1;
 396
 397         return cur_scale;
 398 }
 399
 400
 401 /**
 402 * Div96By32
 403 *
 404 * Entry:
 405 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 406 *   ulDen   - 32-bit divisor.
 407 *
 408 * Purpose:
 409 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 410 *
 411 * Exit:
 412 *   Quotient overwrites dividend.
 413 *   Returns remainder.
 414 *
 415 * Exceptions:
 416 *   None.
 417 *
 418 */
 419 static uint32_t
 420 Div96By32(uint32_t *num, uint32_t den)
 421 {
 422         SPLIT64  tmp;
 423
 424         tmp.u.Hi = 0;
 425
 426         if (num[2] != 0)
 427                 goto Div3Word;
 428
 429         if (num[1] >= den)
 430                 goto Div2Word;
 431
 432         tmp.u.Hi = num[1];
 433         num[1] = 0;
 434         goto Div1Word;
 435
 436 Div3Word:
 437         tmp.u.Lo = num[2];
 438         tmp.int64 = DivMod64by32(tmp.int64, den);
 439         num[2] = tmp.u.Lo;
 440 Div2Word:
 441         tmp.u.Lo = num[1];
 442         tmp.int64 = DivMod64by32(tmp.int64, den);
 443         num[1] = tmp.u.Lo;
 444 Div1Word:
 445         tmp.u.Lo = num[0];
 446         tmp.int64 = DivMod64by32(tmp.int64, den);
 447         num[0] = tmp.u.Lo;
 448         return tmp.u.Hi;
 449 }
 450
 451 /***
 452  * DecFixInt
 453  *
 454  * Entry:
 455  *   pdecRes - Pointer to Decimal result location
 456  *   operand  - Pointer to Decimal operand
 457  *
 458  * Purpose:
 459  *   Chop the value to integer.  Return remainder so Int() function
 460  *   can round down if non-zero.
 461  *
 462  * Exit:
 463  *   Returns remainder.
 464  *
 465  * Exceptions:
 466  *   None.
 467  *
 468  ***********************************************************************/
 469
 470 static uint32_t
 471 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 472 {
 473         uint32_t   num[3];
 474         uint32_t   rem;
 475         uint32_t   pwr;
 476         int     scale;
 477
 478         if (operand->u.u.scale > 0) {
 479                 num[0] = operand->v.v.Lo32;
 480                 num[1] = operand->v.v.Mid32;
 481                 num[2] = operand->Hi32;
 482                 scale = operand->u.u.scale;
 483                 result->u.u.sign = operand->u.u.sign;
 484                 rem = 0;
 485
 486                 do {
 487                         if (scale > POWER10_MAX)
 488                                 pwr = ten_to_nine;
 489                         else
 490                                 pwr = power10[scale];
 491
 492                         rem |= Div96By32(num, pwr);
 493                         scale -= 9;
 494                 }while (scale > 0);
 495
 496                 result->v.v.Lo32 = num[0];
 497                 result->v.v.Mid32 = num[1];
 498                 result->Hi32 = num[2];
 499                 result->u.u.scale = 0;
 500
 501                 return rem;
 502         }
 503
 504         COPYDEC(*result, *operand);
 505         // Odd, the Microsoft code does not set result->reserved to zero on this case
 506         return 0;
 507 }
 508
 509 /**
 510  * ScaleResult:
 511  *
 512  * Entry:
 513  *   res - Array of uint32_ts with value, least-significant first.
 514  *   hi_res  - Index of last non-zero value in res.
 515  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 516  *
 517  * Purpose:
 518  *   See if we need to scale the result to fit it in 96 bits.
 519  *   Perform needed scaling.  Adjust scale factor accordingly.
 520  *
 521  * Exit:
 522  *   res updated in place, always 3 uint32_ts.
 523  *   New scale factor returned, -1 if overflow error.
 524  *
 525  */
 526 static int
 527 ScaleResult(uint32_t *res, int hi_res, int scale)
 528 {
 529         int     new_scale;
 530         int     cur;
 531         uint32_t   pwr;
 532         uint32_t   tmp;
 533         uint32_t   sticky;
 534         SPLIT64 sdlTmp;
 535
 536         // See if we need to scale the result.  The combined scale must
 537         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 538         //
 539         // Start by figuring a lower bound on the scaling needed to make
 540         // the upper 96 bits zero.  hi_res is the index into res[]
 541         // of the highest non-zero uint32_t.
 542         //
 543         new_scale =   hi_res * 32 - 64 - 1;
 544         if (new_scale > 0) {
 545
 546                 // Find the MSB.
 547                 //
 548                 tmp = res[hi_res];
 549                 if (!(tmp & 0xFFFF0000)) {
 550                         new_scale -= 16;
 551                         tmp <<= 16;
 552                 }
 553                 if (!(tmp & 0xFF000000)) {
 554                         new_scale -= 8;
 555                         tmp <<= 8;
 556                 }
 557                 if (!(tmp & 0xF0000000)) {
 558                         new_scale -= 4;
 559                         tmp <<= 4;
 560                 }
 561                 if (!(tmp & 0xC0000000)) {
 562                         new_scale -= 2;
 563                         tmp <<= 2;
 564                 }
 565                 if (!(tmp & 0x80000000)) {
 566                         new_scale--;
 567                         tmp <<= 1;
 568                 }
 569
 570                 // Multiply bit position by log10(2) to figure it's power of 10.
 571                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 572                 // with a multiply saves a 96-byte lookup table.  The power returned
 573                 // is <= the power of the number, so we must add one power of 10
 574                 // to make it's integer part zero after dividing by 256.
 575                 //
 576                 // Note: the result of this multiplication by an approximation of
 577                 // log10(2) have been exhaustively checked to verify it gives the
 578                 // correct result.  (There were only 95 to check...)
 579                 //
 580                 new_scale = ((new_scale * 77) >> 8) + 1;
 581
 582                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 583                 // This reduces the scale factor of the result.  If it exceeds the
 584                 // current scale of the result, we'll overflow.
 585                 //
 586                 if (new_scale > scale)
 587                         return -1;
 588         }
 589         else
 590                 new_scale = 0;
 591
 592         // Make sure we scale by enough to bring the current scale factor
 593         // into valid range.
 594         //
 595         if (new_scale < scale - DEC_SCALE_MAX)
 596                 new_scale = scale - DEC_SCALE_MAX;
 597
 598         if (new_scale != 0) {
 599                 // Scale by the power of 10 given by new_scale.  Note that this is
 600                 // NOT guaranteed to bring the number within 96 bits -- it could
 601                 // be 1 power of 10 short.
 602                 //
 603                 scale -= new_scale;
 604                 sticky = 0;
 605                 sdlTmp.u.Hi = 0; // initialize remainder
 606
 607                 for (;;) {
 608
 609                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 610
 611                         if (new_scale > POWER10_MAX)
 612                                 pwr = ten_to_nine;
 613                         else
 614                                 pwr = power10[new_scale];
 615
 616                         // Compute first quotient.
 617                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 618                         //
 619                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 620                         res[hi_res] = sdlTmp.u.Lo;
 621                         cur = hi_res - 1;
 622
 623                         if (cur >= 0) {
 624                                 // If first quotient was 0, update hi_res.
 625                                 //
 626                                 if (sdlTmp.u.Lo == 0)
 627                                         hi_res--;
 628
 629                                 // Compute subsequent quotients.
 630                                 //
 631                                 do {
 632                                         sdlTmp.u.Lo = res[cur];
 633                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 634                                         res[cur] = sdlTmp.u.Lo;
 635                                         cur--;
 636                                 } while (cur >= 0);
 637
 638                         }
 639
 640                         new_scale -= POWER10_MAX;
 641                         if (new_scale > 0)
 642                                 continue; // scale some more
 643
 644                         // If we scaled enough, hi_res would be 2 or less.  If not,
 645                         // divide by 10 more.
 646                         //
 647                         if (hi_res > 2) {
 648                                 new_scale = 1;
 649                                 scale--;
 650                                 continue; // scale by 10
 651                         }
 652
 653                         // Round final result.  See if remainder >= 1/2 of divisor.
 654                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 655                         //
 656                         pwr >>= 1;  // power of 10 always even
 657                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 658                                                     ((res[0] & 1) | sticky)) ) {
 659                                 cur = -1;
 660                                 while (++res[++cur] == 0);
 661
 662                                 if (cur > 2) {
 663                                         // The rounding caused us to carry beyond 96 bits.
 664                                         // Scale by 10 more.
 665                                         //
 666                                         hi_res = cur;
 667                                         sticky = 0;  // no sticky bit
 668                                         sdlTmp.u.Hi = 0; // or remainder
 669                                         new_scale = 1;
 670                                         scale--;
 671                                         continue; // scale by 10
 672                                 }
 673                         }
 674
 675                         // We may have scaled it more than we planned.  Make sure the scale
 676                         // factor hasn't gone negative, indicating overflow.
 677                         //
 678                         if (scale < 0)
 679                                 return -1;
 680
 681                         return scale;
 682                 } // for(;;)
 683         }
 684         return scale;
 685 }
 686
 687 // Decimal multiply
 688 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 689 static MonoDecimalStatus
 690 VarDecMul(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 691 {
 692         SPLIT64 tmp;
 693         SPLIT64 tmp2;
 694         SPLIT64 tmp3;
 695         int     scale;
 696         int     hi_prod;
 697         uint32_t   pwr;
 698         uint32_t   rem_lo;
 699         uint32_t   rem_hi;
 700         uint32_t   prod[6];
 701
 702         scale = left->u.u.scale + right->u.u.scale;
 703
 704         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 705                 // Upper 64 bits are zero.
 706                 //
 707                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 708                 if (scale > DEC_SCALE_MAX)
 709                 {
 710                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 711                         // If the amount to divide by is > 19 the result is guaranteed
 712                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 713                         //
 714                         scale -= DEC_SCALE_MAX;
 715                         if (scale > 19) {
 716                         ReturnZero:
 717                                 DECIMAL_SETZERO(*result);
 718                                 return MONO_DECIMAL_OK;
 719                         }
 720
 721                         if (scale > POWER10_MAX) {
 722                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 723                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 724                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 725                                 //
 726                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 727                                 pwr = power10[scale - 10] << 2;
 728                         } else {
 729                                 pwr = power10[scale];
 730                                 rem_lo = 0;
 731                         }
 732
 733                         // Power to divide by fits in 32 bits.
 734                         //
 735                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 736
 737                         // Round result.  See if remainder >= 1/2 of divisor.
 738                         // Divisor is a power of 10, so it is always even.
 739                         //
 740                         pwr >>= 1;
 741                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 742                                 tmp.int64++;
 743
 744                         scale = DEC_SCALE_MAX;
 745                 }
 746                 DECIMAL_LO32(*result) = tmp.u.Lo;
 747                 DECIMAL_MID32(*result) = tmp.u.Hi;
 748                 DECIMAL_HI32(*result) = 0;
 749         } else {
 750                 // At least one operand has bits set in the upper 64 bits.
 751                 //
 752                 // Compute and accumulate the 9 partial products into a
 753                 // 192-bit (24-byte) result.
 754                 //
 755                 //                [l-h][l-m][l-l]   left high, middle, low
 756                 //             x  [r-h][r-m][r-l]   right high, middle, low
 757                 // ------------------------------
 758                 //
 759                 //                     [0-h][0-l]   l-l * r-l
 760                 //                [1ah][1al]        l-l * r-m
 761                 //                [1bh][1bl]        l-m * r-l
 762                 //           [2ah][2al]             l-m * r-m
 763                 //           [2bh][2bl]             l-l * r-h
 764                 //           [2ch][2cl]             l-h * r-l
 765                 //      [3ah][3al]                  l-m * r-h
 766                 //      [3bh][3bl]                  l-h * r-m
 767                 // [4-h][4-l]                       l-h * r-h
 768                 // ------------------------------
 769                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 770                 //
 771                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 772                 prod[0] = tmp.u.Lo;
 773
 774                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 775
 776                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 777                 tmp.int64 += tmp2.int64; // this could generate carry
 778                 prod[1] = tmp.u.Lo;
 779                 if (tmp.int64 < tmp2.int64) // detect carry
 780                         tmp2.u.Hi = 1;
 781                 else
 782                         tmp2.u.Hi = 0;
 783                 tmp2.u.Lo = tmp.u.Hi;
 784
 785                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 786
 787                 if (left->Hi32 | right->Hi32) {
 788                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 789                         //
 790                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 791                         tmp.int64 += tmp2.int64; // this could generate carry
 792                         if (tmp.int64 < tmp2.int64) // detect carry
 793                                 tmp3.u.Hi = 1;
 794                         else
 795                                 tmp3.u.Hi = 0;
 796
 797                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 798                         tmp.int64 += tmp2.int64; // this could generate carry
 799                         prod[2] = tmp.u.Lo;
 800                         if (tmp.int64 < tmp2.int64) // detect carry
 801                                 tmp3.u.Hi++;
 802                         tmp3.u.Lo = tmp.u.Hi;
 803
 804                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 805                         tmp.int64 += tmp3.int64; // this could generate carry
 806                         if (tmp.int64 < tmp3.int64) // detect carry
 807                                 tmp3.u.Hi = 1;
 808                         else
 809                                 tmp3.u.Hi = 0;
 810
 811                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 812                         tmp.int64 += tmp2.int64; // this could generate carry
 813                         prod[3] = tmp.u.Lo;
 814                         if (tmp.int64 < tmp2.int64) // detect carry
 815                                 tmp3.u.Hi++;
 816                         tmp3.u.Lo = tmp.u.Hi;
 817
 818                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 819                         prod[4] = tmp.u.Lo;
 820                         prod[5] = tmp.u.Hi;
 821
 822                         hi_prod = 5;
 823                 }
 824                 else {
 825                         prod[2] = tmp.u.Lo;
 826                         prod[3] = tmp.u.Hi;
 827                         hi_prod = 3;
 828                 }
 829
 830                 // Check for leading zero uint32_ts on the product
 831                 //
 832                 while (prod[hi_prod] == 0) {
 833                         hi_prod--;
 834                         if (hi_prod < 0)
 835                                 goto ReturnZero;
 836                 }
 837
 838                 scale = ScaleResult(prod, hi_prod, scale);
 839                 if (scale == -1)
 840                         return MONO_DECIMAL_OVERFLOW;
 841
 842                 result->v.v.Lo32 = prod[0];
 843                 result->v.v.Mid32 = prod[1];
 844                 result->Hi32 = prod[2];
 845         }
 846
 847         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 848         result->u.u.scale = (char)scale;
 849         return MONO_DECIMAL_OK;
 850 }
 851
 852 // Addition and subtraction
 853 static MonoDecimalStatus
 854 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 855 {
 856         uint32_t     num[6];
 857         uint32_t     pwr;
 858         int       scale;
 859         int       hi_prod;
 860         int       cur;
 861         SPLIT64   tmp;
 862         MonoDecimal decRes;
 863         MonoDecimal decTmp;
 864         MonoDecimal *pdecTmp;
 865
 866         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 867
 868         if (right->u.u.scale == left->u.u.scale) {
 869                 // Scale factors are equal, no alignment necessary.
 870                 //
 871                 decRes.u.signscale = left->u.signscale;
 872
 873         AlignedAdd:
 874                 if (sign) {
 875                         // Signs differ - subtract
 876                         //
 877                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 878                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 879
 880                         // Propagate carry
 881                         //
 882                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 883                                 decRes.Hi32--;
 884                                 if (decRes.Hi32 >= left->Hi32)
 885                                         goto SignFlip;
 886                         } else if (decRes.Hi32 > left->Hi32) {
 887                                 // Got negative result.  Flip its sign.
 888                                 //
 889                         SignFlip:
 890                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 891                                 decRes.Hi32 = ~decRes.Hi32;
 892                                 if (DECIMAL_LO64_GET(decRes) == 0)
 893                                         decRes.Hi32++;
 894                                 decRes.u.u.sign ^= DECIMAL_NEG;
 895                         }
 896
 897                 } else {
 898                         // Signs are the same - add
 899                         //
 900                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 901                         decRes.Hi32 = left->Hi32 + right->Hi32;
 902
 903                         // Propagate carry
 904                         //
 905                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 906                                 decRes.Hi32++;
 907                                 if (decRes.Hi32 <= left->Hi32)
 908                                         goto AlignedScale;
 909                         } else if (decRes.Hi32 < left->Hi32) {
 910                         AlignedScale:
 911                                 // The addition carried above 96 bits.  Divide the result by 10,
 912                                 // dropping the scale factor.
 913                                 //
 914                                 if (decRes.u.u.scale == 0)
 915                                         return MONO_DECIMAL_OVERFLOW;
 916                                 decRes.u.u.scale--;
 917
 918                                 tmp.u.Lo = decRes.Hi32;
 919                                 tmp.u.Hi = 1;
 920                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 921                                 decRes.Hi32 = tmp.u.Lo;
 922
 923                                 tmp.u.Lo = decRes.v.v.Mid32;
 924                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 925                                 decRes.v.v.Mid32 = tmp.u.Lo;
 926
 927                                 tmp.u.Lo = decRes.v.v.Lo32;
 928                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 929                                 decRes.v.v.Lo32 = tmp.u.Lo;
 930
 931                                 // See if we need to round up.
 932                                 //
 933                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 934                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 935                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 936                                                         decRes.Hi32++;
 937                                 }
 938                         }
 939                 }
 940         }
 941         else {
 942                 // Scale factors are not equal.  Assume that a larger scale
 943                 // factor (more decimal places) is likely to mean that number
 944                 // is smaller.  Start by guessing that the right operand has
 945                 // the larger scale factor.  The result will have the larger
 946                 // scale factor.
 947                 //
 948                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 949                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 950                 scale = decRes.u.u.scale - left->u.u.scale;
 951
 952                 if (scale < 0) {
 953                         // Guessed scale factor wrong. Swap operands.
 954                         //
 955                         scale = -scale;
 956                         decRes.u.u.scale = left->u.u.scale;
 957                         decRes.u.u.sign ^= sign;
 958                         pdecTmp = right;
 959                         right = left;
 960                         left = pdecTmp;
 961                 }
 962
 963                 // *left will need to be multiplied by 10^scale so
 964                 // it will have the same scale as *right.  We could be
 965                 // extending it to up to 192 bits of precision.
 966                 //
 967                 if (scale <= POWER10_MAX) {
 968                         // Scaling won't make it larger than 4 uint32_ts
 969                         //
 970                         pwr = power10[scale];
 971                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 972                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 973                         tmp.int64 += decTmp.v.v.Mid32;
 974                         decTmp.v.v.Mid32 = tmp.u.Lo;
 975                         decTmp.Hi32 = tmp.u.Hi;
 976                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 977                         tmp.int64 += decTmp.Hi32;
 978                         if (tmp.u.Hi == 0) {
 979                                 // Result fits in 96 bits.  Use standard aligned add.
 980                                 //
 981                                 decTmp.Hi32 = tmp.u.Lo;
 982                                 left = &decTmp;
 983                                 goto AlignedAdd;
 984                         }
 985                         num[0] = decTmp.v.v.Lo32;
 986                         num[1] = decTmp.v.v.Mid32;
 987                         num[2] = tmp.u.Lo;
 988                         num[3] = tmp.u.Hi;
 989                         hi_prod = 3;
 990                 }
 991                 else {
 992                         // Have to scale by a bunch.  Move the number to a buffer
 993                         // where it has room to grow as it's scaled.
 994                         //
 995                         num[0] = left->v.v.Lo32;
 996                         num[1] = left->v.v.Mid32;
 997                         num[2] = left->Hi32;
 998                         hi_prod = 2;
 999
1000                         // Scan for zeros in the upper words.
1001                         //
1002                         if (num[2] == 0) {
1003                                 hi_prod = 1;
1004                                 if (num[1] == 0) {
1005                                         hi_prod = 0;
1006                                         if (num[0] == 0) {
1007                                                 // Left arg is zero, return right.
1008                                                 //
1009                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
1010                                                 decRes.Hi32 = right->Hi32;
1011                                                 decRes.u.u.sign ^= sign;
1012                                                 goto RetDec;
1013                                         }
1014                                 }
1015                         }
1016
1017                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
1018                         // with index of highest non-zero uint32_t.
1019                         //
1020                         for (; scale > 0; scale -= POWER10_MAX) {
1021                                 if (scale > POWER10_MAX)
1022                                         pwr = ten_to_nine;
1023                                 else
1024                                         pwr = power10[scale];
1025
1026                                 tmp.u.Hi = 0;
1027                                 for (cur = 0; cur <= hi_prod; cur++) {
1028                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1029                                         num[cur] = tmp.u.Lo;
1030                                 }
1031
1032                                 if (tmp.u.Hi != 0)
1033                                         // We're extending the result by another uint32_t.
1034                                         num[++hi_prod] = tmp.u.Hi;
1035                         }
1036                 }
1037
1038                 // Scaling complete, do the add.  Could be subtract if signs differ.
1039                 //
1040                 tmp.u.Lo = num[0];
1041                 tmp.u.Hi = num[1];
1042
1043                 if (sign) {
1044                         // Signs differ, subtract.
1045                         //
1046                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1047                         decRes.Hi32 = num[2] - right->Hi32;
1048
1049                         // Propagate carry
1050                         //
1051                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1052                                 decRes.Hi32--;
1053                                 if (decRes.Hi32 >= num[2])
1054                                         goto LongSub;
1055                         }
1056                         else if (decRes.Hi32 > num[2]) {
1057                         LongSub:
1058                                 // If num has more than 96 bits of precision, then we need to
1059                                 // carry the subtraction into the higher bits.  If it doesn't,
1060                                 // then we subtracted in the wrong order and have to flip the
1061                                 // sign of the result.
1062                                 //
1063                                 if (hi_prod <= 2)
1064                                         goto SignFlip;
1065
1066                                 cur = 3;
1067                                 while(num[cur++]-- == 0);
1068                                 if (num[hi_prod] == 0)
1069                                         hi_prod--;
1070                         }
1071                 }
1072                 else {
1073                         // Signs the same, add.
1074                         //
1075                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1076                         decRes.Hi32 = num[2] + right->Hi32;
1077
1078                         // Propagate carry
1079                         //
1080                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1081                                 decRes.Hi32++;
1082                                 if (decRes.Hi32 <= num[2])
1083                                         goto LongAdd;
1084                         }
1085                         else if (decRes.Hi32 < num[2]) {
1086                         LongAdd:
1087                                 // Had a carry above 96 bits.
1088                                 //
1089                                 cur = 3;
1090                                 do {
1091                                         if (hi_prod < cur) {
1092                                                 num[cur] = 1;
1093                                                 hi_prod = cur;
1094                                                 break;
1095                                         }
1096                                 }while (++num[cur++] == 0);
1097                         }
1098                 }
1099
1100                 if (hi_prod > 2) {
1101                         num[0] = decRes.v.v.Lo32;
1102                         num[1] = decRes.v.v.Mid32;
1103                         num[2] = decRes.Hi32;
1104                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1105                         if (decRes.u.u.scale == (uint8_t) -1)
1106                                 return MONO_DECIMAL_OVERFLOW;
1107
1108                         decRes.v.v.Lo32 = num[0];
1109                         decRes.v.v.Mid32 = num[1];
1110                         decRes.Hi32 = num[2];
1111                 }
1112         }
1113
1114 RetDec:
1115         COPYDEC(*result, decRes);
1116         // Odd, the Microsoft code does not set result->reserved to zero on this case
1117         return MONO_DECIMAL_OK;
1118 }
1119
1120 // Decimal addition
1121 static MonoDecimalStatus
1122 VarDecAdd(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1123 {
1124     return DecAddSub (left, right, result, 0);
1125 }
1126
1127 // Decimal subtraction
1128 static MonoDecimalStatus
1129 VarDecSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1130 {
1131     return DecAddSub (left, right, result, DECIMAL_NEG);
1132 }
1133
1134 /**
1135  * IncreaseScale:
1136  *
1137  * Entry:
1138  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1139  *   pwr   - Scale factor to multiply by
1140  *
1141  * Purpose:
1142  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1143  *   the input.  The last 32 bits of the product are the return value.
1144  *
1145  * Exit:
1146  *   Returns highest 32 bits of product.
1147  *
1148  * Exceptions:
1149  *   None.
1150  *
1151  */
1152 static uint32_t
1153 IncreaseScale(uint32_t *num, uint32_t pwr)
1154 {
1155         SPLIT64   sdlTmp;
1156
1157         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1158         num[0] = sdlTmp.u.Lo;
1159         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1160         num[1] = sdlTmp.u.Lo;
1161         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1162         num[2] = sdlTmp.u.Lo;
1163         return sdlTmp.u.Hi;
1164 }
1165
1166 /**
1167  * Div96By64:
1168  *
1169  * Entry:
1170  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1171  *   sdlDen  - 64-bit divisor.
1172  *
1173  * Purpose:
1174  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1175  *   Divisor must be larger than upper 64 bits of dividend.
1176  *
1177  * Exit:
1178  *   Remainder overwrites lower 64-bits of dividend.
1179  *   Returns quotient.
1180  *
1181  * Exceptions:
1182  *   None.
1183  *
1184  */
1185 static uint32_t
1186 Div96By64(uint32_t *num, SPLIT64 den)
1187 {
1188         SPLIT64 quo;
1189         SPLIT64 sdlNum;
1190         SPLIT64 prod;
1191
1192         sdlNum.u.Lo = num[0];
1193
1194         if (num[2] >= den.u.Hi) {
1195                 // Divide would overflow.  Assume a quotient of 2^32, and set
1196                 // up remainder accordingly.  Then jump to loop which reduces
1197                 // the quotient.
1198                 //
1199                 sdlNum.u.Hi = num[1] - den.u.Lo;
1200                 quo.u.Lo = 0;
1201                 goto NegRem;
1202         }
1203
1204         // Hardware divide won't overflow
1205         //
1206         if (num[2] == 0 && num[1] < den.u.Hi)
1207                 // Result is zero.  Entire dividend is remainder.
1208                 //
1209                 return 0;
1210
1211         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1212         //
1213         quo.u.Lo = num[1];
1214         quo.u.Hi = num[2];
1215         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1216         sdlNum.u.Hi = quo.u.Hi; // remainder
1217
1218         // Compute full remainder, rem = dividend - (quo * divisor).
1219         //
1220         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1221         sdlNum.int64 -= prod.int64;
1222
1223         if (sdlNum.int64 > ~prod.int64) {
1224         NegRem:
1225                 // Remainder went negative.  Add divisor back in until it's positive,
1226                 // a max of 2 times.
1227                 //
1228                 do {
1229                         quo.u.Lo--;
1230                         sdlNum.int64 += den.int64;
1231                 }while (sdlNum.int64 >= den.int64);
1232         }
1233
1234         num[0] = sdlNum.u.Lo;
1235         num[1] = sdlNum.u.Hi;
1236         return quo.u.Lo;
1237 }
1238
1239 /***
1240 * Div128By96
1241 *
1242 * Entry:
1243 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1244 *   den - Pointer to 96-bit divisor.
1245 *
1246 * Purpose:
1247 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1248 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1249 *   assured in the initial call because the divisor is normalized
1250 *   and the dividend can't be.  In subsequent calls, the remainder
1251 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1252 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1253 *
1254 * Exit:
1255 *   Remainder overwrites lower 96-bits of dividend.
1256 *   Returns quotient.
1257 *
1258 * Exceptions:
1259 *   None.
1260 *
1261 ***********************************************************************/
1262
1263 static uint32_t
1264 Div128By96(uint32_t *num, uint32_t *den)
1265 {
1266         SPLIT64 sdlQuo;
1267         SPLIT64 sdlNum;
1268         SPLIT64 sdlProd1;
1269         SPLIT64 sdlProd2;
1270
1271         sdlNum.u.Lo = num[0];
1272         sdlNum.u.Hi = num[1];
1273
1274         if (num[3] == 0 && num[2] < den[2]){
1275                 // Result is zero.  Entire dividend is remainder.
1276                 //
1277                 return 0;
1278         }
1279
1280         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1281         //
1282         sdlQuo.u.Lo = num[2];
1283         sdlQuo.u.Hi = num[3];
1284         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1285
1286         // Compute full remainder, rem = dividend - (quo * divisor).
1287         //
1288         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1289         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1290         sdlProd2.int64 += sdlProd1.u.Hi;
1291         sdlProd1.u.Hi = sdlProd2.u.Lo;
1292
1293         sdlNum.int64 -= sdlProd1.int64;
1294         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1295
1296         // Propagate carries
1297         //
1298         if (sdlNum.int64 > ~sdlProd1.int64) {
1299                 num[2]--;
1300                 if (num[2] >= ~sdlProd2.u.Hi)
1301                         goto NegRem;
1302         } else if (num[2] > ~sdlProd2.u.Hi) {
1303         NegRem:
1304                 // Remainder went negative.  Add divisor back in until it's positive,
1305                 // a max of 2 times.
1306                 //
1307                 sdlProd1.u.Lo = den[0];
1308                 sdlProd1.u.Hi = den[1];
1309
1310                 for (;;) {
1311                         sdlQuo.u.Lo--;
1312                         sdlNum.int64 += sdlProd1.int64;
1313                         num[2] += den[2];
1314
1315                         if (sdlNum.int64 < sdlProd1.int64) {
1316                                 // Detected carry. Check for carry out of top
1317                                 // before adding it in.
1318                                 //
1319                                 if (num[2]++ < den[2])
1320                                         break;
1321                         }
1322                         if (num[2] < den[2])
1323                                 break; // detected carry
1324                 }
1325         }
1326
1327         num[0] = sdlNum.u.Lo;
1328         num[1] = sdlNum.u.Hi;
1329         return sdlQuo.u.Lo;
1330 }
1331
1332 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1333 // Returns FALSE if there is an overflow
1334 static gboolean
1335 Add32To96(uint32_t *num, uint32_t value)
1336 {
1337         num[0] += value;
1338         if (num[0] < value) {
1339                 if (++num[1] == 0) {
1340                         if (++num[2] == 0) {
1341                                 return FALSE;
1342                         }
1343                 }
1344         }
1345         return TRUE;
1346 }
1347
1348 static void
1349 OverflowUnscale (uint32_t *quo, gboolean remainder)
1350 {
1351         SPLIT64  sdlTmp;
1352
1353         // We have overflown, so load the high bit with a one.
1354         sdlTmp.u.Hi = 1u;
1355         sdlTmp.u.Lo = quo[2];
1356         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1357         quo[2] = sdlTmp.u.Lo;
1358         sdlTmp.u.Lo = quo[1];
1359         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1360         quo[1] = sdlTmp.u.Lo;
1361         sdlTmp.u.Lo = quo[0];
1362         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1363         quo[0] = sdlTmp.u.Lo;
1364         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1365         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1366                 Add32To96(quo, 1u);
1367         }
1368 }
1369
1370 // VarDecDiv - Decimal divide
1371 static MonoDecimalStatus
1372 VarDecDiv(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1373 {
1374         uint32_t   quo[3];
1375         uint32_t   quoSave[3];
1376         uint32_t   rem[4];
1377         uint32_t   divisor[3];
1378         uint32_t   pwr;
1379         uint32_t   utmp;
1380         uint32_t   utmp1;
1381         SPLIT64 sdlTmp;
1382         SPLIT64 sdlDivisor;
1383         int     scale;
1384         int     cur_scale;
1385
1386         scale = left->u.u.scale - right->u.u.scale;
1387         divisor[0] = right->v.v.Lo32;
1388         divisor[1] = right->v.v.Mid32;
1389         divisor[2] = right->Hi32;
1390
1391         if (divisor[1] == 0 && divisor[2] == 0) {
1392                 // Divisor is only 32 bits.  Easy divide.
1393                 //
1394                 if (divisor[0] == 0)
1395                         return MONO_DECIMAL_DIVBYZERO;
1396
1397                 quo[0] = left->v.v.Lo32;
1398                 quo[1] = left->v.v.Mid32;
1399                 quo[2] = left->Hi32;
1400                 rem[0] = Div96By32(quo, divisor[0]);
1401
1402                 for (;;) {
1403                         if (rem[0] == 0) {
1404                                 if (scale < 0) {
1405                                         cur_scale = min(9, -scale);
1406                                         goto HaveScale;
1407                                 }
1408                                 break;
1409                         }
1410
1411                         // We have computed a quotient based on the natural scale
1412                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1413                         // remainder, so now we should increase the scale if possible to
1414                         // include more quotient bits.
1415                         //
1416                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1417                         // computing more quotient bits as long as the remainder stays
1418                         // non-zero.  If scaling by that much would cause overflow, we'll
1419                         // drop out of the loop and scale by as much as we can.
1420                         //
1421                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1422                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1423                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1424                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1425                         // is the largest value in quo[1] (when quo[2] == 4) that is
1426                         // assured not to overflow.
1427                         //
1428                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1429                         if (cur_scale == 0) {
1430                                 // No more scaling to be done, but remainder is non-zero.
1431                                 // Round quotient.
1432                                 //
1433                                 utmp = rem[0] << 1;
1434                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1435                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1436                                 RoundUp:
1437                                         if (++quo[0] == 0)
1438                                                 if (++quo[1] == 0)
1439                                                         quo[2]++;
1440                                 }
1441                                 break;
1442                         }
1443
1444                         if (cur_scale == -1)
1445                                 return MONO_DECIMAL_OVERFLOW;
1446
1447                 HaveScale:
1448                         pwr = power10[cur_scale];
1449                         scale += cur_scale;
1450
1451                         if (IncreaseScale(quo, pwr) != 0)
1452                                 return MONO_DECIMAL_OVERFLOW;
1453
1454                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1455                         rem[0] = sdlTmp.u.Hi;
1456
1457                         quo[0] += sdlTmp.u.Lo;
1458                         if (quo[0] < sdlTmp.u.Lo) {
1459                                 if (++quo[1] == 0)
1460                                         quo[2]++;
1461                         }
1462                 } // for (;;)
1463         }
1464         else {
1465                 // Divisor has bits set in the upper 64 bits.
1466                 //
1467                 // Divisor must be fully normalized (shifted so bit 31 of the most
1468                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1469                 // normalize by.  The dividend will be shifted by the same amount so
1470                 // the quotient is not changed.
1471                 //
1472                 if (divisor[2] == 0)
1473                         utmp = divisor[1];
1474                 else
1475                         utmp = divisor[2];
1476
1477                 cur_scale = 0;
1478                 if (!(utmp & 0xFFFF0000)) {
1479                         cur_scale += 16;
1480                         utmp <<= 16;
1481                 }
1482                 if (!(utmp & 0xFF000000)) {
1483                         cur_scale += 8;
1484                         utmp <<= 8;
1485                 }
1486                 if (!(utmp & 0xF0000000)) {
1487                         cur_scale += 4;
1488                         utmp <<= 4;
1489                 }
1490                 if (!(utmp & 0xC0000000)) {
1491                         cur_scale += 2;
1492                         utmp <<= 2;
1493                 }
1494                 if (!(utmp & 0x80000000)) {
1495                         cur_scale++;
1496                         utmp <<= 1;
1497                 }
1498
1499                 // Shift both dividend and divisor left by cur_scale.
1500                 //
1501                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1502                 rem[0] = sdlTmp.u.Lo;
1503                 rem[1] = sdlTmp.u.Hi;
1504                 sdlTmp.u.Lo = left->v.v.Mid32;
1505                 sdlTmp.u.Hi = left->Hi32;
1506                 sdlTmp.int64 <<= cur_scale;
1507                 rem[2] = sdlTmp.u.Hi;
1508                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1509
1510                 sdlDivisor.u.Lo = divisor[0];
1511                 sdlDivisor.u.Hi = divisor[1];
1512                 sdlDivisor.int64 <<= cur_scale;
1513
1514                 if (divisor[2] == 0) {
1515                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1516                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1517                         //
1518                         sdlTmp.u.Lo = rem[2];
1519                         sdlTmp.u.Hi = rem[3];
1520
1521                         quo[2] = 0;
1522                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1523                         quo[0] = Div96By64(rem, sdlDivisor);
1524
1525                         for (;;) {
1526                                 if ((rem[0] | rem[1]) == 0) {
1527                                         if (scale < 0) {
1528                                                 cur_scale = min(9, -scale);
1529                                                 goto HaveScale64;
1530                                         }
1531                                         break;
1532                                 }
1533
1534                                 // Remainder is non-zero.  Scale up quotient and remainder by
1535                                 // powers of 10 so we can compute more significant bits.
1536                                 //
1537                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1538                                 if (cur_scale == 0) {
1539                                         // No more scaling to be done, but remainder is non-zero.
1540                                         // Round quotient.
1541                                         //
1542                                         sdlTmp.u.Lo = rem[0];
1543                                         sdlTmp.u.Hi = rem[1];
1544                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1545                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1546                                                 goto RoundUp;
1547                                         break;
1548                                 }
1549
1550                                 if (cur_scale == -1)
1551                                         return MONO_DECIMAL_OVERFLOW;
1552
1553                         HaveScale64:
1554                                 pwr = power10[cur_scale];
1555                                 scale += cur_scale;
1556
1557                                 if (IncreaseScale(quo, pwr) != 0)
1558                                         return MONO_DECIMAL_OVERFLOW;
1559
1560                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1561                                 IncreaseScale(rem, pwr);
1562                                 utmp = Div96By64(rem, sdlDivisor);
1563                                 quo[0] += utmp;
1564                                 if (quo[0] < utmp)
1565                                         if (++quo[1] == 0)
1566                                                 quo[2]++;
1567
1568                         } // for (;;)
1569                 }
1570                 else {
1571                         // Have a 96-bit divisor in divisor[].
1572                         //
1573                         // Start by finishing the shift left by cur_scale.
1574                         //
1575                         sdlTmp.u.Lo = divisor[1];
1576                         sdlTmp.u.Hi = divisor[2];
1577                         sdlTmp.int64 <<= cur_scale;
1578                         divisor[0] = sdlDivisor.u.Lo;
1579                         divisor[1] = sdlDivisor.u.Hi;
1580                         divisor[2] = sdlTmp.u.Hi;
1581
1582                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1583                         // will be < divisor.
1584                         //
1585                         quo[2] = 0;
1586                         quo[1] = 0;
1587                         quo[0] = Div128By96(rem, divisor);
1588
1589                         for (;;) {
1590                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1591                                         if (scale < 0) {
1592                                                 cur_scale = min(9, -scale);
1593                                                 goto HaveScale96;
1594                                         }
1595                                         break;
1596                                 }
1597
1598                                 // Remainder is non-zero.  Scale up quotient and remainder by
1599                                 // powers of 10 so we can compute more significant bits.
1600                                 //
1601                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1602                                 if (cur_scale == 0) {
1603                                         // No more scaling to be done, but remainder is non-zero.
1604                                         // Round quotient.
1605                                         //
1606                                         if (rem[2] >= 0x80000000)
1607                                                 goto RoundUp;
1608
1609                                         utmp = rem[0] > 0x80000000;
1610                                         utmp1 = rem[1] > 0x80000000;
1611                                         rem[0] <<= 1;
1612                                         rem[1] = (rem[1] << 1) + utmp;
1613                                         rem[2] = (rem[2] << 1) + utmp1;
1614
1615                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1616                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1617                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1618                                               (quo[0] & 1))))
1619                                                 goto RoundUp;
1620                                         break;
1621                                 }
1622
1623                                 if (cur_scale == -1)
1624                                         return MONO_DECIMAL_OVERFLOW;
1625
1626                         HaveScale96:
1627                                 pwr = power10[cur_scale];
1628                                 scale += cur_scale;
1629
1630                                 if (IncreaseScale(quo, pwr) != 0)
1631                                         return MONO_DECIMAL_OVERFLOW;
1632
1633                                 rem[3] = IncreaseScale(rem, pwr);
1634                                 utmp = Div128By96(rem, divisor);
1635                                 quo[0] += utmp;
1636                                 if (quo[0] < utmp)
1637                                         if (++quo[1] == 0)
1638                                                 quo[2]++;
1639
1640                         } // for (;;)
1641                 }
1642         }
1643
1644         // No more remainder.  Try extracting any extra powers of 10 we may have
1645         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1646         // If a division by one of these powers returns a zero remainder, then
1647         // we keep the quotient.  If the remainder is not zero, then we restore
1648         // the previous value.
1649         //
1650         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1651         // we can extract.  We use this as a quick test on whether to try a
1652         // given power.
1653         //
1654         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1655                 quoSave[0] = quo[0];
1656                 quoSave[1] = quo[1];
1657                 quoSave[2] = quo[2];
1658
1659                 if (Div96By32(quoSave, 100000000) == 0) {
1660                         quo[0] = quoSave[0];
1661                         quo[1] = quoSave[1];
1662                         quo[2] = quoSave[2];
1663                         scale -= 8;
1664                 }
1665                 else
1666                         break;
1667         }
1668
1669         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1670                 quoSave[0] = quo[0];
1671                 quoSave[1] = quo[1];
1672                 quoSave[2] = quo[2];
1673
1674                 if (Div96By32(quoSave, 10000) == 0) {
1675                         quo[0] = quoSave[0];
1676                         quo[1] = quoSave[1];
1677                         quo[2] = quoSave[2];
1678                         scale -= 4;
1679                 }
1680         }
1681
1682         if ((quo[0] & 3) == 0 && scale >= 2) {
1683                 quoSave[0] = quo[0];
1684                 quoSave[1] = quo[1];
1685                 quoSave[2] = quo[2];
1686
1687                 if (Div96By32(quoSave, 100) == 0) {
1688                         quo[0] = quoSave[0];
1689                         quo[1] = quoSave[1];
1690                         quo[2] = quoSave[2];
1691                         scale -= 2;
1692                 }
1693         }
1694
1695         if ((quo[0] & 1) == 0 && scale >= 1) {
1696                 quoSave[0] = quo[0];
1697                 quoSave[1] = quo[1];
1698                 quoSave[2] = quo[2];
1699
1700                 if (Div96By32(quoSave, 10) == 0) {
1701                         quo[0] = quoSave[0];
1702                         quo[1] = quoSave[1];
1703                         quo[2] = quoSave[2];
1704                         scale -= 1;
1705                 }
1706         }
1707
1708         result->Hi32 = quo[2];
1709         result->v.v.Mid32 = quo[1];
1710         result->v.v.Lo32 = quo[0];
1711         result->u.u.scale = scale;
1712         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1713         return MONO_DECIMAL_OK;
1714 }
1715
1716 // VarDecAbs - Decimal Absolute Value
1717 static void
1718 VarDecAbs (MonoDecimal *pdecOprd, MonoDecimal *result)
1719 {
1720         COPYDEC(*result, *pdecOprd);
1721         result->u.u.sign &= ~DECIMAL_NEG;
1722         // Microsoft does not set reserved here
1723 }
1724
1725 // VarDecFix - Decimal Fix (chop to integer)
1726 static void
1727 VarDecFix (MonoDecimal *pdecOprd, MonoDecimal *result)
1728 {
1729         DecFixInt(result, pdecOprd);
1730 }
1731
1732
1733 // VarDecInt - Decimal Int (round down to integer)
1734 static void
1735 VarDecInt (MonoDecimal *pdecOprd, MonoDecimal *result)
1736 {
1737         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1738                 // We have chopped off a non-zero amount from a negative value.  Since
1739                 // we round toward -infinity, we must increase the integer result by
1740                 // 1 to make it more negative.  This will never overflow because
1741                 // in order to have a remainder, we must have had a non-zero scale factor.
1742                 // Our scale factor is back to zero now.
1743                 //
1744                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1745                 if (DECIMAL_LO64_GET(*result) == 0)
1746                         result->Hi32++;
1747         }
1748 }
1749
1750
1751 // VarDecNeg - Decimal Negate
1752 static void
1753 VarDecNeg (MonoDecimal *pdecOprd, MonoDecimal *result)
1754 {
1755         COPYDEC(*result, *pdecOprd);
1756         // Microsoft does not set result->reserved to zero on this case.
1757         result->u.u.sign ^= DECIMAL_NEG;
1758 }
1759
1760 //
1761 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1762 //
1763 static MonoDecimalStatus
1764 VarDecRound(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1765 {
1766         uint32_t num[3];
1767         uint32_t rem;
1768         uint32_t sticky;
1769         uint32_t pwr;
1770         int scale;
1771
1772         if (cDecimals < 0)
1773                 return MONO_DECIMAL_INVALID_ARGUMENT;
1774
1775         scale = input->u.u.scale - cDecimals;
1776         if (scale > 0) {
1777                 num[0] = input->v.v.Lo32;
1778                 num[1] = input->v.v.Mid32;
1779                 num[2] = input->Hi32;
1780                 result->u.u.sign = input->u.u.sign;
1781                 rem = sticky = 0;
1782
1783                 do {
1784                         sticky |= rem;
1785                         if (scale > POWER10_MAX)
1786                                 pwr = ten_to_nine;
1787                         else
1788                                 pwr = power10[scale];
1789
1790                         rem = Div96By32(num, pwr);
1791                         scale -= 9;
1792                 }while (scale > 0);
1793
1794                 // Now round.  rem has last remainder, sticky has sticky bits.
1795                 // To do IEEE rounding, we add LSB of result to sticky bits so
1796                 // either causes round up if remainder * 2 == last divisor.
1797                 //
1798                 sticky |= num[0] & 1;
1799                 rem = (rem << 1) + (sticky != 0);
1800                 if (pwr < rem &&
1801                     ++num[0] == 0 &&
1802                     ++num[1] == 0
1803                         )
1804                         ++num[2];
1805
1806                 result->v.v.Lo32 = num[0];
1807                 result->v.v.Mid32 = num[1];
1808                 result->Hi32 = num[2];
1809                 result->u.u.scale = cDecimals;
1810                 return MONO_DECIMAL_OK;
1811         }
1812
1813         COPYDEC(*result, *input);
1814         // Odd, the Microsoft source does not set the result->reserved to zero here.
1815         return MONO_DECIMAL_OK;
1816 }
1817
1818 //
1819 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1820 static MonoDecimalStatus
1821 VarDecFromR4 (float input, MonoDecimal* result)
1822 {
1823         int         exp;    // number of bits to left of binary point
1824         int         power;
1825         uint32_t       mant;
1826         double      dbl;
1827         SPLIT64     sdlLo;
1828         SPLIT64     sdlHi;
1829         int         lmax, cur;  // temps used during scale reduction
1830
1831         // The most we can scale by is 10^28, which is just slightly more
1832         // than 2^93.  So a float with an exponent of -94 could just
1833         // barely reach 0.5, but smaller exponents will always round to zero.
1834         //
1835         if ((exp = ((SingleStructure *)&input)->exp - SNGBIAS) < -94 ) {
1836                 DECIMAL_SETZERO(*result);
1837                 return MONO_DECIMAL_OK;
1838         }
1839
1840         if (exp > 96)
1841                 return MONO_DECIMAL_OVERFLOW;
1842
1843         // Round the input to a 7-digit integer.  The R4 format has
1844         // only 7 digits of precision, and we want to keep garbage digits
1845         // out of the Decimal were making.
1846         //
1847         // Calculate max power of 10 input value could have by multiplying
1848         // the exponent by log10(2).  Using scaled integer multiplcation,
1849         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1850         //
1851         dbl = fabs(input);
1852         power = 6 - ((exp * 19728) >> 16);
1853
1854         if (power >= 0) {
1855                 // We have less than 7 digits, scale input up.
1856                 //
1857                 if (power > DECMAX)
1858                         power = DECMAX;
1859
1860                 dbl = dbl * double_power10[power];
1861         } else {
1862                 if (power != -1 || dbl >= 1E7)
1863                         dbl = dbl / fnDblPower10(-power);
1864                 else
1865                         power = 0; // didn't scale it
1866         }
1867
1868         g_assert (dbl < 1E7);
1869         if (dbl < 1E6 && power < DECMAX) {
1870                 dbl *= 10;
1871                 power++;
1872                 g_assert(dbl >= 1E6);
1873         }
1874
1875         // Round to integer
1876         //
1877         mant = (int32_t)dbl;
1878         dbl -= (double)mant;  // difference between input & integer
1879         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1880                 mant++;
1881
1882         if (mant == 0) {
1883                 DECIMAL_SETZERO(*result);
1884                 return MONO_DECIMAL_OK;
1885         }
1886
1887         if (power < 0) {
1888                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1889                 //
1890                 power = -power;
1891                 if (power < 10) {
1892                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1893
1894                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1895                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1896                         DECIMAL_HI32(*result) = 0;
1897                 } else {
1898                         // Have a big power of 10.
1899                         //
1900                         if (power > 18) {
1901                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1902                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1903
1904                                 if (sdlHi.u.Hi != 0)
1905                                         return MONO_DECIMAL_OVERFLOW;
1906                         }
1907                         else {
1908                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1909                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1910                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1911                                 sdlHi.int64 += sdlLo.u.Hi;
1912                                 sdlLo.u.Hi = sdlHi.u.Lo;
1913                                 sdlHi.u.Lo = sdlHi.u.Hi;
1914                         }
1915                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1916                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1917                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1918                 }
1919                 DECIMAL_SCALE(*result) = 0;
1920         } else {
1921                 // Factor out powers of 10 to reduce the scale, if possible.
1922                 // The maximum number we could factor out would be 6.  This
1923                 // comes from the fact we have a 7-digit number, and the
1924                 // MSD must be non-zero -- but the lower 6 digits could be
1925                 // zero.  Note also the scale factor is never negative, so
1926                 // we can't scale by any more than the power we used to
1927                 // get the integer.
1928                 //
1929                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1930                 //
1931                 lmax = min(power, 6);
1932
1933                 // lmax is the largest power of 10 to try, lmax <= 6.
1934                 // We'll try powers 4, 2, and 1 unless they're too big.
1935                 //
1936                 for (cur = 4; cur > 0; cur >>= 1)
1937                 {
1938                         if (cur > lmax)
1939                                 continue;
1940
1941                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1942
1943                         if (sdlLo.u.Hi == 0) {
1944                                 mant = sdlLo.u.Lo;
1945                                 power -= cur;
1946                                 lmax -= cur;
1947                         }
1948                 }
1949                 DECIMAL_LO32(*result) = mant;
1950                 DECIMAL_MID32(*result) = 0;
1951                 DECIMAL_HI32(*result) = 0;
1952                 DECIMAL_SCALE(*result) = power;
1953         }
1954
1955         DECIMAL_SIGN(*result) = (char)((SingleStructure *)&input)->sign << 7;
1956         return MONO_DECIMAL_OK;
1957 }
1958
1959 //
1960 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1961 static MonoDecimalStatus
1962 VarDecFromR8 (double input, MonoDecimal *result)
1963 {
1964         int         exp;    // number of bits to left of binary point
1965         int         power;  // power-of-10 scale factor
1966         SPLIT64     sdlMant;
1967         SPLIT64     sdlLo;
1968         double      dbl;
1969         int         lmax, cur;  // temps used during scale reduction
1970         uint32_t       pwr_cur;
1971         uint32_t       quo;
1972
1973
1974         // The most we can scale by is 10^28, which is just slightly more
1975         // than 2^93.  So a float with an exponent of -94 could just
1976         // barely reach 0.5, but smaller exponents will always round to zero.
1977         //
1978         if ((exp = ((DoubleStructure *)&input)->u.exp - DBLBIAS) < -94) {
1979                 DECIMAL_SETZERO(*result);
1980                 return MONO_DECIMAL_OK;
1981         }
1982
1983         if (exp > 96)
1984                 return MONO_DECIMAL_OVERFLOW;
1985
1986         // Round the input to a 15-digit integer.  The R8 format has
1987         // only 15 digits of precision, and we want to keep garbage digits
1988         // out of the Decimal were making.
1989         //
1990         // Calculate max power of 10 input value could have by multiplying
1991         // the exponent by log10(2).  Using scaled integer multiplcation,
1992         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1993         //
1994         dbl = fabs(input);
1995         power = 14 - ((exp * 19728) >> 16);
1996
1997         if (power >= 0) {
1998                 // We have less than 15 digits, scale input up.
1999                 //
2000                 if (power > DECMAX)
2001                         power = DECMAX;
2002
2003                 dbl = dbl * double_power10[power];
2004         } else {
2005                 if (power != -1 || dbl >= 1E15)
2006                         dbl = dbl / fnDblPower10(-power);
2007                 else
2008                         power = 0; // didn't scale it
2009         }
2010
2011         g_assert (dbl < 1E15);
2012         if (dbl < 1E14 && power < DECMAX) {
2013                 dbl *= 10;
2014                 power++;
2015                 g_assert(dbl >= 1E14);
2016         }
2017
2018         // Round to int64
2019         //
2020         sdlMant.int64 = (int64_t)dbl;
2021         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
2022         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
2023                 sdlMant.int64++;
2024
2025         if (sdlMant.int64 == 0) {
2026                 DECIMAL_SETZERO(*result);
2027                 return MONO_DECIMAL_OK;
2028         }
2029
2030         if (power < 0) {
2031                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2032                 //
2033                 power = -power;
2034                 if (power < 10) {
2035                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2036                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2037                         sdlMant.int64 += sdlLo.u.Hi;
2038                         sdlLo.u.Hi = sdlMant.u.Lo;
2039                         sdlMant.u.Lo = sdlMant.u.Hi;
2040                 }
2041                 else {
2042                         // Have a big power of 10.
2043                         //
2044                         g_assert(power <= 14);
2045                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2046
2047                         if (sdlMant.u.Hi != 0)
2048                                 return MONO_DECIMAL_OVERFLOW;
2049                 }
2050                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2051                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2052                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2053                 DECIMAL_SCALE(*result) = 0;
2054         }
2055         else {
2056                 // Factor out powers of 10 to reduce the scale, if possible.
2057                 // The maximum number we could factor out would be 14.  This
2058                 // comes from the fact we have a 15-digit number, and the
2059                 // MSD must be non-zero -- but the lower 14 digits could be
2060                 // zero.  Note also the scale factor is never negative, so
2061                 // we can't scale by any more than the power we used to
2062                 // get the integer.
2063                 //
2064                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2065                 //
2066                 lmax = min(power, 14);
2067
2068                 // lmax is the largest power of 10 to try, lmax <= 14.
2069                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2070                 //
2071                 for (cur = 8; cur > 0; cur >>= 1)
2072                 {
2073                         if (cur > lmax)
2074                                 continue;
2075
2076                         pwr_cur = (uint32_t)long_power10[cur];
2077
2078                         if (sdlMant.u.Hi >= pwr_cur) {
2079                                 // Overflow if we try to divide in one step.
2080                                 //
2081                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2082                                 quo = sdlLo.u.Lo;
2083                                 sdlLo.u.Lo = sdlMant.u.Lo;
2084                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2085                         }
2086                         else {
2087                                 quo = 0;
2088                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2089                         }
2090
2091                         if (sdlLo.u.Hi == 0) {
2092                                 sdlMant.u.Hi = quo;
2093                                 sdlMant.u.Lo = sdlLo.u.Lo;
2094                                 power -= cur;
2095                                 lmax -= cur;
2096                         }
2097                 }
2098
2099                 DECIMAL_HI32(*result) = 0;
2100                 DECIMAL_SCALE(*result) = power;
2101                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2102                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2103         }
2104
2105         DECIMAL_SIGN(*result) = (char)((DoubleStructure *)&input)->u.sign << 7;
2106         return MONO_DECIMAL_OK;
2107 }
2108
2109 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2110 static MonoDecimalStatus
2111 VarR8FromDec(MonoDecimal *input, double *result)
2112 {
2113         SPLIT64  tmp;
2114         double   dbl;
2115
2116         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2117                 return MONO_DECIMAL_INVALID_ARGUMENT;
2118
2119         tmp.u.Lo = DECIMAL_LO32(*input);
2120         tmp.u.Hi = DECIMAL_MID32(*input);
2121
2122         if ((int32_t)DECIMAL_MID32(*input) < 0)
2123                 dbl = (ds2to64.dbl + (double)(int64_t)tmp.int64 +
2124                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2125         else
2126                 dbl = ((double)(int64_t)tmp.int64 +
2127                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input));
2128
2129         if (DECIMAL_SIGN(*input))
2130                 dbl = -dbl;
2131
2132         *result = dbl;
2133         return MONO_DECIMAL_OK;
2134 }
2135
2136 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2137 static MonoDecimalStatus
2138 VarR4FromDec(MonoDecimal *input, float *result)
2139 {
2140         double   dbl;
2141
2142         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2143                 return MONO_DECIMAL_INVALID_ARGUMENT;
2144
2145         // Can't overflow; no errors possible.
2146         //
2147         VarR8FromDec(input, &dbl);
2148         *result = (float)dbl;
2149         return MONO_DECIMAL_OK;
2150 }
2151
2152 static void
2153 DecShiftLeft(MonoDecimal* value)
2154 {
2155         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2156     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2157     g_assert(value != NULL);
2158
2159     DECIMAL_LO32(*value) <<= 1;
2160     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2161     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2162 }
2163
2164 static int
2165 D32AddCarry(uint32_t* value, uint32_t i)
2166 {
2167     uint32_t v = *value;
2168     uint32_t sum = v + i;
2169     *value = sum;
2170     return sum < v || sum < i? 1: 0;
2171 }
2172
2173 static void
2174 DecAdd(MonoDecimal *value, MonoDecimal* d)
2175 {
2176         g_assert(value != NULL && d != NULL);
2177
2178         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2179                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2180                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2181                 }
2182         }
2183         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2184                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2185         }
2186         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2187 }
2188
2189 static void
2190 DecMul10(MonoDecimal* value)
2191 {
2192         MonoDecimal d = *value;
2193         g_assert (value != NULL);
2194
2195         DecShiftLeft(value);
2196         DecShiftLeft(value);
2197         DecAdd(value, &d);
2198         DecShiftLeft(value);
2199 }
2200
2201 static void
2202 DecAddInt32(MonoDecimal* value, unsigned int i)
2203 {
2204         g_assert(value != NULL);
2205
2206         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2207                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2208                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2209                 }
2210         }
2211 }
2212
2213 MonoDecimalCompareResult
2214 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2215 {
2216         uint32_t   left_sign;
2217         uint32_t   right_sign;
2218         MonoDecimal result;
2219
2220         // First check signs and whether either are zero.  If both are
2221         // non-zero and of the same sign, just use subtraction to compare.
2222         //
2223         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2224         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2225         if (left_sign != 0)
2226                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2227
2228         if (right_sign != 0)
2229                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2230
2231         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2232         // operand is +, 0, or -.
2233         //
2234         if (left_sign == right_sign) {
2235                 if (left_sign == 0)    // both are zero
2236                         return MONO_DECIMAL_CMP_EQ; // return equal
2237
2238                 DecAddSub(left, right, &result, DECIMAL_NEG);
2239                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2240                         return MONO_DECIMAL_CMP_EQ;
2241                 if (result.u.u.sign & DECIMAL_NEG)
2242                         return MONO_DECIMAL_CMP_LT;
2243                 return MONO_DECIMAL_CMP_GT;
2244         }
2245
2246         //
2247         // Signs are different.  Used signed byte compares
2248         //
2249         if ((char)left_sign > (char)right_sign)
2250                 return MONO_DECIMAL_CMP_GT;
2251         return MONO_DECIMAL_CMP_LT;
2252 }
2253
2254 void
2255 mono_decimal_init_single (MonoDecimal *_this, float value)
2256 {
2257         if (VarDecFromR4 (value, _this) == MONO_DECIMAL_OVERFLOW)
2258                 mono_raise_exception (mono_get_exception_overflow ());
2259         _this->reserved = 0;
2260 }
2261
2262 void
2263 mono_decimal_init_double (MonoDecimal *_this, double value)
2264 {
2265         if (VarDecFromR8 (value, _this) == MONO_DECIMAL_OVERFLOW)
2266                 mono_raise_exception (mono_get_exception_overflow ());
2267         _this->reserved = 0;
2268 }
2269
2270 void
2271 mono_decimal_floor (MonoDecimal *d)
2272 {
2273         MonoDecimal decRes;
2274
2275         VarDecInt(d, &decRes);
2276
2277         // copy decRes into d
2278         COPYDEC(*d, decRes);
2279         d->reserved = 0;
2280         FC_GC_POLL ();
2281 }
2282
2283 int32_t
2284 mono_decimal_get_hash_code (MonoDecimal *d)
2285 {
2286         double dbl;
2287
2288         if (VarR8FromDec(d, &dbl) != MONO_DECIMAL_OK)
2289                 return 0;
2290
2291         if (dbl == 0.0) {
2292                 // Ensure 0 and -0 have the same hash code
2293                 return 0;
2294         }
2295         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2296         //
2297         // For example these two numerically equal decimals with different internal representations produce
2298         // slightly different results when converted to double:
2299         //
2300         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2301         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2302         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2303         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2304         //
2305         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2306
2307 }
2308
2309 void
2310 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2311 {
2312         MonoDecimal decRes;
2313
2314         MonoDecimalStatus status = VarDecMul(d1, d2, &decRes);
2315         if (status != MONO_DECIMAL_OK)
2316                 mono_raise_exception (mono_get_exception_overflow ());
2317
2318         COPYDEC(*d1, decRes);
2319         d1->reserved = 0;
2320
2321         FC_GC_POLL ();
2322 }
2323
2324 void
2325 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2326 {
2327         MonoDecimal decRes;
2328
2329         // GC is only triggered for throwing, no need to protect result
2330         if (decimals < 0 || decimals > 28)
2331                 mono_raise_exception (mono_get_exception_argument_out_of_range ("d"));
2332
2333         VarDecRound(d, decimals, &decRes);
2334
2335         // copy decRes into d
2336         COPYDEC(*d, decRes);
2337         d->reserved = 0;
2338
2339         FC_GC_POLL();
2340 }
2341
2342 void
2343 mono_decimal_tocurrency (MonoDecimal *decimal)
2344 {
2345         // TODO
2346 }
2347
2348 double
2349 mono_decimal_to_double (MonoDecimal d)
2350 {
2351         double result = 0.0;
2352         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2353         VarR8FromDec(&d, &result);
2354         return result;
2355 }
2356
2357 int32_t
2358 mono_decimal_to_int32 (MonoDecimal d)
2359 {
2360         MonoDecimal result;
2361
2362         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2363         VarDecRound(&d, 0, &result);
2364
2365         if (DECIMAL_SCALE(result) != 0) {
2366                 d = result;
2367                 VarDecFix (&d, &result);
2368         }
2369
2370         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2371                 int32_t i = DECIMAL_LO32(result);
2372                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2373                         if (i >= 0)
2374                                 return i;
2375                 } else {
2376                         i = -i;
2377                         if (i <= 0)
2378                                 return i;
2379                 }
2380         }
2381
2382         mono_raise_exception (mono_get_exception_overflow ());
2383         // Not reachable
2384         return 0;
2385 }
2386
2387 float
2388 mono_decimal_to_float (MonoDecimal d)
2389 {
2390         float result = 0.0f;
2391         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2392         VarR4FromDec(&d, &result);
2393         return result;
2394 }
2395
2396 void
2397 mono_decimal_truncate (MonoDecimal *d)
2398 {
2399         MonoDecimal decRes;
2400
2401         VarDecFix(d, &decRes);
2402
2403         // copy decRes into d
2404         COPYDEC(*d, decRes);
2405         d->reserved = 0;
2406         FC_GC_POLL();
2407 }
2408
2409 void
2410 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2411 {
2412         MonoDecimal result, decTmp;
2413         MonoDecimal *pdecTmp, *leftOriginal;
2414         uint32_t    num[6], pwr;
2415         int         scale, hi_prod, cur;
2416         SPLIT64     sdlTmp;
2417
2418         g_assert(sign == 0 || sign == DECIMAL_NEG);
2419
2420         leftOriginal = left;
2421
2422         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2423
2424         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2425                 // Scale factors are equal, no alignment necessary.
2426                 //
2427                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2428
2429         AlignedAdd:
2430                 if (sign) {
2431                         // Signs differ - subtract
2432                         //
2433                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2434                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2435
2436                         // Propagate carry
2437                         //
2438                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2439                                 DECIMAL_HI32(result)--;
2440                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2441                                         goto SignFlip;
2442                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2443                                 // Got negative result.  Flip its sign.
2444                                 //
2445                         SignFlip:
2446                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2447                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2448                                 if (DECIMAL_LO64_GET(result) == 0)
2449                                         DECIMAL_HI32(result)++;
2450                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2451                         }
2452
2453                 } else {
2454                         // Signs are the same - add
2455                         //
2456                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2457                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2458
2459                         // Propagate carry
2460                         //
2461                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2462                                 DECIMAL_HI32(result)++;
2463                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2464                                         goto AlignedScale;
2465                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2466                         AlignedScale:
2467                                 // The addition carried above 96 bits.  Divide the result by 10,
2468                                 // dropping the scale factor.
2469                                 //
2470                                 if (DECIMAL_SCALE(result) == 0)
2471                                         mono_raise_exception (mono_get_exception_overflow ());
2472                                 DECIMAL_SCALE(result)--;
2473
2474                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2475                                 sdlTmp.u.Hi = 1;
2476                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2477                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2478
2479                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2480                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2481                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2482
2483                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2484                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2485                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2486
2487                                 // See if we need to round up.
2488                                 //
2489                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2490                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2491                                         if (DECIMAL_LO64_GET(result) == 0)
2492                                                 DECIMAL_HI32(result)++;
2493                                 }
2494                         }
2495                 }
2496         } else {
2497                 // Scale factors are not equal.  Assume that a larger scale
2498                 // factor (more decimal places) is likely to mean that number
2499                 // is smaller.  Start by guessing that the right operand has
2500                 // the larger scale factor.  The result will have the larger
2501                 // scale factor.
2502                 //
2503                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2504                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2505                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2506
2507                 if (scale < 0) {
2508                         // Guessed scale factor wrong. Swap operands.
2509                         //
2510                         scale = -scale;
2511                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2512                         DECIMAL_SIGN(result) ^= sign;
2513                         pdecTmp = right;
2514                         right = left;
2515                         left = pdecTmp;
2516                 }
2517
2518                 // *left will need to be multiplied by 10^scale so
2519                 // it will have the same scale as *right.  We could be
2520                 // extending it to up to 192 bits of precision.
2521                 //
2522                 if (scale <= POWER10_MAX) {
2523                         // Scaling won't make it larger than 4 uint32_ts
2524                         //
2525                         pwr = power10[scale];
2526                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2527                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2528                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2529                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2530                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2531                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2532                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2533                         if (sdlTmp.u.Hi == 0) {
2534                                 // Result fits in 96 bits.  Use standard aligned add.
2535                                 //
2536                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2537                                 left = &decTmp;
2538                                 goto AlignedAdd;
2539                         }
2540                         num[0] = DECIMAL_LO32(decTmp);
2541                         num[1] = DECIMAL_MID32(decTmp);
2542                         num[2] = sdlTmp.u.Lo;
2543                         num[3] = sdlTmp.u.Hi;
2544                         hi_prod = 3;
2545                 } else {
2546                         // Have to scale by a bunch.  Move the number to a buffer
2547                         // where it has room to grow as it's scaled.
2548                         //
2549                         num[0] = DECIMAL_LO32(*left);
2550                         num[1] = DECIMAL_MID32(*left);
2551                         num[2] = DECIMAL_HI32(*left);
2552                         hi_prod = 2;
2553
2554                         // Scan for zeros in the upper words.
2555                         //
2556                         if (num[2] == 0) {
2557                                 hi_prod = 1;
2558                                 if (num[1] == 0) {
2559                                         hi_prod = 0;
2560                                         if (num[0] == 0) {
2561                                                 // Left arg is zero, return right.
2562                                                 //
2563                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2564                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2565                                                 DECIMAL_SIGN(result) ^= sign;
2566                                                 goto RetDec;
2567                                         }
2568                                 }
2569                         }
2570
2571                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2572                         // with index of highest non-zero uint32_t.
2573                         //
2574                         for (; scale > 0; scale -= POWER10_MAX) {
2575                                 if (scale > POWER10_MAX)
2576                                         pwr = ten_to_nine;
2577                                 else
2578                                         pwr = power10[scale];
2579
2580                                 sdlTmp.u.Hi = 0;
2581                                 for (cur = 0; cur <= hi_prod; cur++) {
2582                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2583                                         num[cur] = sdlTmp.u.Lo;
2584                                 }
2585
2586                                 if (sdlTmp.u.Hi != 0)
2587                                         // We're extending the result by another uint32_t.
2588                                         num[++hi_prod] = sdlTmp.u.Hi;
2589                         }
2590                 }
2591
2592                 // Scaling complete, do the add.  Could be subtract if signs differ.
2593                 //
2594                 sdlTmp.u.Lo = num[0];
2595                 sdlTmp.u.Hi = num[1];
2596
2597                 if (sign) {
2598                         // Signs differ, subtract.
2599                         //
2600                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2601                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2602
2603                         // Propagate carry
2604                         //
2605                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2606                                 DECIMAL_HI32(result)--;
2607                                 if (DECIMAL_HI32(result) >= num[2])
2608                                         goto LongSub;
2609                         } else if (DECIMAL_HI32(result) > num[2]) {
2610                         LongSub:
2611                                 // If num has more than 96 bits of precision, then we need to
2612                                 // carry the subtraction into the higher bits.  If it doesn't,
2613                                 // then we subtracted in the wrong order and have to flip the
2614                                 // sign of the result.
2615                                 //
2616                                 if (hi_prod <= 2)
2617                                         goto SignFlip;
2618
2619                                 cur = 3;
2620                                 while(num[cur++]-- == 0);
2621                                 if (num[hi_prod] == 0)
2622                                         hi_prod--;
2623                         }
2624                 } else {
2625                         // Signs the same, add.
2626                         //
2627                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2628                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2629
2630                         // Propagate carry
2631                         //
2632                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2633                                 DECIMAL_HI32(result)++;
2634                                 if (DECIMAL_HI32(result) <= num[2])
2635                                         goto LongAdd;
2636                         } else if (DECIMAL_HI32(result) < num[2]) {
2637                         LongAdd:
2638                                 // Had a carry above 96 bits.
2639                                 //
2640                                 cur = 3;
2641                                 do {
2642                                         if (hi_prod < cur) {
2643                                                 num[cur] = 1;
2644                                                 hi_prod = cur;
2645                                                 break;
2646                                         }
2647                                 }while (++num[cur++] == 0);
2648                         }
2649                 }
2650
2651                 if (hi_prod > 2) {
2652                         num[0] = DECIMAL_LO32(result);
2653                         num[1] = DECIMAL_MID32(result);
2654                         num[2] = DECIMAL_HI32(result);
2655                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2656                         if (DECIMAL_SCALE(result) == (uint8_t)-1)
2657                                 mono_raise_exception (mono_get_exception_overflow ());
2658
2659                         DECIMAL_LO32(result) = num[0];
2660                         DECIMAL_MID32(result) = num[1];
2661                         DECIMAL_HI32(result) = num[2];
2662                 }
2663         }
2664
2665 RetDec:
2666         left = leftOriginal;
2667         COPYDEC(*left, result);
2668         left->reserved = 0;
2669 }
2670
2671 void
2672 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2673 {
2674         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2675         uint32_t pwr, tmp, tmp1;
2676         SPLIT64  sdlTmp, sdlDivisor;
2677         int      scale, cur_scale;
2678         gboolean unscale;
2679
2680         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2681         unscale = FALSE;
2682         divisor[0] = DECIMAL_LO32(*right);
2683         divisor[1] = DECIMAL_MID32(*right);
2684         divisor[2] = DECIMAL_HI32(*right);
2685
2686         if (divisor[1] == 0 && divisor[2] == 0) {
2687                 // Divisor is only 32 bits.  Easy divide.
2688                 //
2689                 if (divisor[0] == 0)
2690                         mono_raise_exception (mono_get_exception_divide_by_zero ());
2691
2692                 quo[0] = DECIMAL_LO32(*left);
2693                 quo[1] = DECIMAL_MID32(*left);
2694                 quo[2] = DECIMAL_HI32(*left);
2695                 rem[0] = Div96By32(quo, divisor[0]);
2696
2697                 for (;;) {
2698                         if (rem[0] == 0) {
2699                                 if (scale < 0) {
2700                                         cur_scale = min(9, -scale);
2701                                         goto HaveScale;
2702                                 }
2703                                 break;
2704                         }
2705                         // We need to unscale if and only if we have a non-zero remainder
2706                         unscale = TRUE;
2707
2708                         // We have computed a quotient based on the natural scale
2709                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2710                         // remainder, so now we should increase the scale if possible to
2711                         // include more quotient bits.
2712                         //
2713                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2714                         // computing more quotient bits as long as the remainder stays
2715                         // non-zero.  If scaling by that much would cause overflow, we'll
2716                         // drop out of the loop and scale by as much as we can.
2717                         //
2718                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2719                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2720                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2721                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2722                         // is the largest value in quo[1] (when quo[2] == 4) that is
2723                         // assured not to overflow.
2724                         //
2725                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2726                         if (cur_scale == 0) {
2727                                 // No more scaling to be done, but remainder is non-zero.
2728                                 // Round quotient.
2729                                 //
2730                                 tmp = rem[0] << 1;
2731                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2732                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2733                                 RoundUp:
2734                                         if (!Add32To96(quo, 1)) {
2735                                                 if (scale == 0)
2736                                                         mono_raise_exception (mono_get_exception_overflow ());
2737                                                 scale--;
2738                                                 OverflowUnscale(quo, TRUE);
2739                                                 break;
2740                                         }
2741                                 }
2742                                 break;
2743                         }
2744
2745                         if (cur_scale < 0)
2746                                 mono_raise_exception (mono_get_exception_overflow ());
2747
2748                 HaveScale:
2749                         pwr = power10[cur_scale];
2750                         scale += cur_scale;
2751
2752                         if (IncreaseScale(quo, pwr) != 0)
2753                                 mono_raise_exception (mono_get_exception_overflow ());
2754
2755
2756                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2757                         rem[0] = sdlTmp.u.Hi;
2758
2759                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2760                                 if (scale == 0)
2761                                         mono_raise_exception (mono_get_exception_overflow ());
2762                                 scale--;
2763                                 OverflowUnscale(quo, (rem[0] != 0));
2764                                 break;
2765                         }
2766                 } // for (;;)
2767         } else {
2768                 // Divisor has bits set in the upper 64 bits.
2769                 //
2770                 // Divisor must be fully normalized (shifted so bit 31 of the most
2771                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2772                 // normalize by.  The dividend will be shifted by the same amount so
2773                 // the quotient is not changed.
2774                 //
2775                 if (divisor[2] == 0)
2776                         tmp = divisor[1];
2777                 else
2778                         tmp = divisor[2];
2779
2780                 cur_scale = 0;
2781                 if (!(tmp & 0xFFFF0000)) {
2782                         cur_scale += 16;
2783                         tmp <<= 16;
2784                 }
2785                 if (!(tmp & 0xFF000000)) {
2786                         cur_scale += 8;
2787                         tmp <<= 8;
2788                 }
2789                 if (!(tmp & 0xF0000000)) {
2790                         cur_scale += 4;
2791                         tmp <<= 4;
2792                 }
2793                 if (!(tmp & 0xC0000000)) {
2794                         cur_scale += 2;
2795                         tmp <<= 2;
2796                 }
2797                 if (!(tmp & 0x80000000)) {
2798                         cur_scale++;
2799                         tmp <<= 1;
2800                 }
2801
2802                 // Shift both dividend and divisor left by cur_scale.
2803                 //
2804                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2805                 rem[0] = sdlTmp.u.Lo;
2806                 rem[1] = sdlTmp.u.Hi;
2807                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2808                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2809                 sdlTmp.int64 <<= cur_scale;
2810                 rem[2] = sdlTmp.u.Hi;
2811                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2812
2813                 sdlDivisor.u.Lo = divisor[0];
2814                 sdlDivisor.u.Hi = divisor[1];
2815                 sdlDivisor.int64 <<= cur_scale;
2816
2817                 if (divisor[2] == 0) {
2818                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2819                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2820                         //
2821                         sdlTmp.u.Lo = rem[2];
2822                         sdlTmp.u.Hi = rem[3];
2823
2824                         quo[2] = 0;
2825                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2826                         quo[0] = Div96By64(rem, sdlDivisor);
2827
2828                         for (;;) {
2829                                 if ((rem[0] | rem[1]) == 0) {
2830                                         if (scale < 0) {
2831                                                 cur_scale = min(9, -scale);
2832                                                 goto HaveScale64;
2833                                         }
2834                                         break;
2835                                 }
2836
2837                                 // We need to unscale if and only if we have a non-zero remainder
2838                                 unscale = TRUE;
2839
2840                                 // Remainder is non-zero.  Scale up quotient and remainder by
2841                                 // powers of 10 so we can compute more significant bits.
2842                                 //
2843                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2844                                 if (cur_scale == 0) {
2845                                         // No more scaling to be done, but remainder is non-zero.
2846                                         // Round quotient.
2847                                         //
2848                                         sdlTmp.u.Lo = rem[0];
2849                                         sdlTmp.u.Hi = rem[1];
2850                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2851                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2852                                                 goto RoundUp;
2853                                         break;
2854                                 }
2855
2856                                 if (cur_scale < 0)
2857                                         mono_raise_exception (mono_get_exception_overflow ());
2858
2859                         HaveScale64:
2860                                 pwr = power10[cur_scale];
2861                                 scale += cur_scale;
2862
2863                                 if (IncreaseScale(quo, pwr) != 0)
2864                                         mono_raise_exception (mono_get_exception_overflow ());
2865
2866                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2867                                 IncreaseScale(rem, pwr);
2868                                 tmp = Div96By64(rem, sdlDivisor);
2869                                 if (!Add32To96(quo, tmp)) {
2870                                         if (scale == 0)
2871                                                 mono_raise_exception (mono_get_exception_overflow ());
2872                                         scale--;
2873                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2874                                         break;
2875                                 }
2876
2877                         } // for (;;)
2878                 } else {
2879                         // Have a 96-bit divisor in divisor[].
2880                         //
2881                         // Start by finishing the shift left by cur_scale.
2882                         //
2883                         sdlTmp.u.Lo = divisor[1];
2884                         sdlTmp.u.Hi = divisor[2];
2885                         sdlTmp.int64 <<= cur_scale;
2886                         divisor[0] = sdlDivisor.u.Lo;
2887                         divisor[1] = sdlDivisor.u.Hi;
2888                         divisor[2] = sdlTmp.u.Hi;
2889
2890                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2891                         // will be < divisor.
2892                         //
2893                         quo[2] = 0;
2894                         quo[1] = 0;
2895                         quo[0] = Div128By96(rem, divisor);
2896
2897                         for (;;) {
2898                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2899                                         if (scale < 0) {
2900                                                 cur_scale = min(9, -scale);
2901                                                 goto HaveScale96;
2902                                         }
2903                                         break;
2904                                 }
2905
2906                                 // We need to unscale if and only if we have a non-zero remainder
2907                                 unscale = TRUE;
2908
2909                                 // Remainder is non-zero.  Scale up quotient and remainder by
2910                                 // powers of 10 so we can compute more significant bits.
2911                                 //
2912                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2913                                 if (cur_scale == 0) {
2914                                         // No more scaling to be done, but remainder is non-zero.
2915                                         // Round quotient.
2916                                         //
2917                                         if (rem[2] >= 0x80000000)
2918                                                 goto RoundUp;
2919
2920                                         tmp = rem[0] > 0x80000000;
2921                                         tmp1 = rem[1] > 0x80000000;
2922                                         rem[0] <<= 1;
2923                                         rem[1] = (rem[1] << 1) + tmp;
2924                                         rem[2] = (rem[2] << 1) + tmp1;
2925
2926                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2927                                                 goto RoundUp;
2928                                         break;
2929                                 }
2930
2931                                 if (cur_scale < 0)
2932                                         mono_raise_exception (mono_get_exception_overflow ());
2933
2934                         HaveScale96:
2935                                 pwr = power10[cur_scale];
2936                                 scale += cur_scale;
2937
2938                                 if (IncreaseScale(quo, pwr) != 0)
2939                                         mono_raise_exception (mono_get_exception_overflow ());
2940
2941                                 rem[3] = IncreaseScale(rem, pwr);
2942                                 tmp = Div128By96(rem, divisor);
2943                                 if (!Add32To96(quo, tmp)) {
2944                                         if (scale == 0)
2945                                                 mono_raise_exception (mono_get_exception_overflow ());
2946
2947                                         scale--;
2948                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2949                                         break;
2950                                 }
2951
2952                         } // for (;;)
2953                 }
2954         }
2955
2956         // We need to unscale if and only if we have a non-zero remainder
2957         if (unscale) {
2958                 // Try extracting any extra powers of 10 we may have
2959                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2960                 // If a division by one of these powers returns a zero remainder, then
2961                 // we keep the quotient.  If the remainder is not zero, then we restore
2962                 // the previous value.
2963                 //
2964                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2965                 // we can extract.  We use this as a quick test on whether to try a
2966                 // given power.
2967                 //
2968                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2969                         quo_save[0] = quo[0];
2970                         quo_save[1] = quo[1];
2971                         quo_save[2] = quo[2];
2972
2973                         if (Div96By32(quo_save, 100000000) == 0) {
2974                                 quo[0] = quo_save[0];
2975                                 quo[1] = quo_save[1];
2976                                 quo[2] = quo_save[2];
2977                                 scale -= 8;
2978                         } else
2979                                 break;
2980                 }
2981
2982                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
2983                         quo_save[0] = quo[0];
2984                         quo_save[1] = quo[1];
2985                         quo_save[2] = quo[2];
2986
2987                         if (Div96By32(quo_save, 10000) == 0) {
2988                                 quo[0] = quo_save[0];
2989                                 quo[1] = quo_save[1];
2990                                 quo[2] = quo_save[2];
2991                                 scale -= 4;
2992                         }
2993                 }
2994
2995                 if ((quo[0] & 3) == 0 && scale >= 2) {
2996                         quo_save[0] = quo[0];
2997                         quo_save[1] = quo[1];
2998                         quo_save[2] = quo[2];
2999
3000                         if (Div96By32(quo_save, 100) == 0) {
3001                                 quo[0] = quo_save[0];
3002                                 quo[1] = quo_save[1];
3003                                 quo[2] = quo_save[2];
3004                                 scale -= 2;
3005                         }
3006                 }
3007
3008                 if ((quo[0] & 1) == 0 && scale >= 1) {
3009                         quo_save[0] = quo[0];
3010                         quo_save[1] = quo[1];
3011                         quo_save[2] = quo[2];
3012
3013                         if (Div96By32(quo_save, 10) == 0) {
3014                                 quo[0] = quo_save[0];
3015                                 quo[1] = quo_save[1];
3016                                 quo[2] = quo_save[2];
3017                                 scale -= 1;
3018                         }
3019                 }
3020         }
3021
3022         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3023         DECIMAL_HI32(*left) = quo[2];
3024         DECIMAL_MID32(*left) = quo[1];
3025         DECIMAL_LO32(*left) = quo[0];
3026         DECIMAL_SCALE(*left) = (uint8_t)scale;
3027         left->reserved = 0;
3028
3029 }
3030
3031 #define DECIMAL_PRECISION 29
3032 #define NUMBER_MAXDIGITS 50
3033 typedef struct  {
3034         int32_t precision;
3035         int32_t scale;
3036         int32_t sign;
3037         uint16_t digits[NUMBER_MAXDIGITS + 1];
3038         uint16_t* allDigits;
3039 } CLRNumber;
3040
3041 int
3042 mono_decimal_from_number (void *from, MonoDecimal *target)
3043 {
3044         CLRNumber *number = (CLRNumber *) from;
3045         uint16_t* p = number->digits;
3046         MonoDecimal d;
3047         int e = number->scale;
3048         g_assert(number != NULL);
3049         g_assert(target != NULL);
3050
3051         d.reserved = 0;
3052         DECIMAL_SIGNSCALE(d) = 0;
3053         DECIMAL_HI32(d) = 0;
3054         DECIMAL_LO32(d) = 0;
3055         DECIMAL_MID32(d) = 0;
3056         g_assert(p != NULL);
3057         if (!*p) {
3058                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3059                 // the scale to 0 if the scale was previously positive
3060                 if (e > 0) {
3061                         e = 0;
3062                 }
3063         } else {
3064                 if (e > DECIMAL_PRECISION) return 0;
3065                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3066                         DecMul10(&d);
3067                         if (*p)
3068                                 DecAddInt32(&d, *p++ - '0');
3069                         e--;
3070                 }
3071                 if (*p++ >= '5') {
3072                         gboolean round = TRUE;
3073                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3074                                 // For digits > 5 we will be roundinp up anyway.
3075                                 int count = 20; // Look at the next 20 digits to check to round
3076                                 while (*p == '0' && count != 0) {
3077                                         p++;
3078                                         count--;
3079                                 }
3080                                 if (*p == '\0' || count == 0)
3081                                         round = FALSE;// Do nothing
3082                         }
3083
3084                         if (round) {
3085                                 DecAddInt32(&d, 1);
3086                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3087                                         DECIMAL_HI32(d) = 0x19999999;
3088                                         DECIMAL_MID32(d) = 0x99999999;
3089                                         DECIMAL_LO32(d) = 0x9999999A;
3090                                         e++;
3091                                 }
3092                         }
3093                 }
3094         }
3095         if (e > 0)
3096                 return 0;
3097         if (e <= -DECIMAL_PRECISION) {
3098                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3099                 // This should only happen for actual zeros or very small numbers that round to zero.
3100                 DECIMAL_SIGNSCALE(d) = 0;
3101                 DECIMAL_HI32(d) = 0;
3102                 DECIMAL_LO32(d) = 0;
3103                 DECIMAL_MID32(d) = 0;
3104                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3105         } else {
3106                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3107         }
3108
3109         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3110         *target = d;
3111         return 1;
3112 }
3113
3114
3115 #endif