mono/metadata/decimal-ms.c

   1 //
   2 // Copyright (c) Microsoft. All rights reserved.
   3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
   4 //
   5 // Copyright 2015 Xamarin Inc
   6 //
   7 // File: decimal.c
   8 //
   9 // Ported from C++ to C and adjusted to Mono runtime
  10 //
  11 // Pending:
  12 //   DoToCurrency (they look like new methods we do not have)
  13 //
  14 #ifndef DISABLE_DECIMAL
  15 #include "config.h"
  16 #include <stdint.h>
  17 #include <glib.h>
  18 #include <mono/utils/mono-compiler.h>
  19 #include <mono/metadata/exception.h>
  20 #include <mono/metadata/object-internals.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <math.h>
  25 #ifdef HAVE_MEMORY_H
  26 #include <memory.h>
  27 #endif
  28 #ifdef _MSC_VER
  29 #include <intrin.h>
  30 #endif
  31 #include "decimal-ms.h"
  32
  33 #define min(a, b) (((a) < (b)) ? (a) : (b))
  34
  35 typedef enum {
  36         MONO_DECIMAL_OK,
  37         MONO_DECIMAL_OVERFLOW,
  38         MONO_DECIMAL_INVALID_ARGUMENT,
  39         MONO_DECIMAL_DIVBYZERO,
  40         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  41 } MonoDecimalStatus;
  42
  43 #ifndef FC_GC_POLL
  44 #   define FC_GC_POLL()
  45 #endif
  46
  47 static const uint32_t ten_to_nine    = 1000000000U;
  48 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  49 #define POWER10_MAX     9
  50 #define DECIMAL_NEG ((uint8_t)0x80)
  51 #define DECMAX 28
  52 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  53 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  54 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  55 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  56 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  57 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  58 #define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  59 #define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  60
  61 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  62 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  63     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  64
  65 #define DEC_SCALE_MAX   28
  66 #define POWER10_MAX     9
  67
  68 #define OVFL_MAX_9_HI   4
  69 #define OVFL_MAX_9_MID  1266874889
  70 #define OVFL_MAX_9_LO   3047500985u
  71
  72 #define OVFL_MAX_5_HI   42949
  73 #define OVFL_MAX_5_MID  2890341191
  74
  75 #define OVFL_MAX_1_HI   429496729
  76
  77 typedef union {
  78         uint64_t int64;
  79         struct {
  80 #if BYTE_ORDER == G_BIG_ENDIAN
  81         uint32_t Hi;
  82         uint32_t Lo;
  83 #else
  84         uint32_t Lo;
  85         uint32_t Hi;
  86 #endif
  87     } u;
  88 } SPLIT64;
  89
  90 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  91 // Double Bias
  92 #define DBLBIAS 1022
  93
  94 // Structure to access an encoded double floating point
  95 typedef union{
  96     struct {
  97 #if BYTE_ORDER == G_BIG_ENDIAN
  98       unsigned int sign:1;
  99       unsigned int exp:11;
 100       unsigned int mantHi:20;
 101       unsigned int mantLo;
 102 #else // BIGENDIAN
 103       unsigned int mantLo;
 104       unsigned int mantHi:20;
 105       unsigned int exp:11;
 106       unsigned int sign:1;
 107 #endif
 108     } u;
 109     double dbl;
 110 } DoubleStructure;
 111
 112 #if BYTE_ORDER == G_BIG_ENDIAN
 113 #define DEFDS(Lo, Hi, exp, sign) { {sign, exp, Hi, Lo } }
 114 #else
 115 #define DEFDS(Lo, Hi, exp, sign) { {Lo, Hi, exp, sign} }
 116 #endif
 117
 118 const DoubleStructure ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0);
 119
 120 // Single floating point Bias
 121 #define SNGBIAS 126
 122
 123 // Structure to access an encoded single floating point
 124 typedef struct {
 125 #if BYTE_ORDER == G_BIG_ENDIAN
 126     unsigned int sign:1;
 127     unsigned int exp:8;
 128     unsigned int mant:23;
 129 #else
 130     unsigned int mant:23;
 131     unsigned int exp:8;
 132     unsigned int sign:1;
 133 #endif
 134 } SingleStructure;
 135
 136 //
 137 // Data tables
 138 //
 139
 140 static const uint32_t power10 [POWER10_MAX+1] = {
 141         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 142 };
 143
 144
 145 static const double double_power10[] = {
 146         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 147         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 148         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 149         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 150         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 151         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 152         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 153         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 154         1e80 };
 155
 156 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 157                                 {100000000000ULL},         // 1E11
 158                                 {1000000000000ULL},        // 1E12
 159                                 {10000000000000ULL},       // 1E13
 160                                 {100000000000000ULL} };    // 1E14
 161
 162 static const uint64_t long_power10[] = {
 163         1,
 164         10ULL,
 165         100ULL,
 166         1000ULL,
 167         10000ULL,
 168         100000ULL,
 169         1000000ULL,
 170         10000000ULL,
 171         100000000ULL,
 172         1000000000ULL,
 173         10000000000ULL,
 174         100000000000ULL,
 175         1000000000000ULL,
 176         10000000000000ULL,
 177         100000000000000ULL,
 178         1000000000000000ULL,
 179         10000000000000000ULL,
 180         100000000000000000ULL,
 181         1000000000000000000ULL,
 182         10000000000000000000ULL};
 183
 184 typedef struct  {
 185         uint32_t Hi, Mid, Lo;
 186 } DECOVFL;
 187
 188 const DECOVFL power_overflow[] = {
 189 // This is a table of the largest values that can be in the upper two
 190 // ULONGs of a 96-bit number that will not overflow when multiplied
 191 // by a given power.  For the upper word, this is a table of
 192 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 193 // remaining fraction part * 2^32.  2^32 = 4294967296.
 194 //
 195     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 196     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 197     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 198     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 199     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 200     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 201     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 202     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 203     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 204 };
 205
 206
 207 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 208 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 209 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 210
 211 static double
 212 fnDblPower10(int ix)
 213 {
 214     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 215     g_assert(ix >= 0);
 216     if (ix < maxIx)
 217         return double_power10[ix];
 218     return pow(10.0, ix);
 219 } // double fnDblPower10()
 220
 221
 222 static inline int64_t
 223 DivMod32by32(int32_t num, int32_t den)
 224 {
 225     SPLIT64  sdl;
 226
 227     sdl.u.Lo = num / den;
 228     sdl.u.Hi = num % den;
 229     return sdl.int64;
 230 }
 231
 232 static inline int64_t
 233 DivMod64by32(int64_t num, int32_t den)
 234 {
 235     SPLIT64  sdl;
 236
 237     sdl.u.Lo = Div64by32(num, den);
 238     sdl.u.Hi = Mod64by32(num, den);
 239     return sdl.int64;
 240 }
 241
 242 static uint64_t
 243 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 244 {
 245         SPLIT64  tmp1;
 246         SPLIT64  tmp2;
 247         SPLIT64  tmp3;
 248
 249         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 250         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 251         tmp1.u.Hi += tmp2.u.Lo;
 252         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 253                 tmp2.u.Hi++;
 254         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 255         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 256         tmp1.u.Hi += tmp2.u.Lo;
 257         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 258                 tmp2.u.Hi++;
 259         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 260
 261         *hi = tmp3.int64;
 262         return tmp1.int64;
 263 }
 264
 265 /**
 266 * FullDiv64By32:
 267 *
 268 * Entry:
 269 *   pdlNum  - Pointer to 64-bit dividend
 270 *   ulDen   - 32-bit divisor
 271 *
 272 * Purpose:
 273 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 274 *
 275 * Exit:
 276 *   Quotient overwrites dividend.
 277 *   Returns remainder.
 278 *
 279 * Exceptions:
 280 *   None.
 281 */
 282 // Was: FullDiv64By32
 283 static uint32_t
 284 FullDiv64By32 (uint64_t *num, uint32_t den)
 285 {
 286         SPLIT64  tmp;
 287         SPLIT64  res;
 288
 289         tmp.int64 = *num;
 290         res.u.Hi = 0;
 291
 292         if (tmp.u.Hi >= den) {
 293                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 294                 //
 295                 res.u.Lo = tmp.u.Hi;
 296                 res.int64 = DivMod64by32(res.int64, den);
 297                 tmp.u.Hi = res.u.Hi;
 298                 res.u.Hi = res.u.Lo;
 299         }
 300
 301         tmp.int64 = DivMod64by32(tmp.int64, den);
 302         res.u.Lo = tmp.u.Lo;
 303         *num = res.int64;
 304         return tmp.u.Hi;
 305 }
 306
 307 /***
 308  * SearchScale
 309  *
 310  * Entry:
 311  *   res_hi - Top uint32_t of quotient
 312  *   res_mid - Middle uint32_t of quotient
 313  *   res_lo - Bottom uint32_t of quotient
 314  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 315  *
 316  * Purpose:
 317  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 318  *   up by and still fit in 96 bits.
 319  *
 320  * Exit:
 321  *   Returns power of 10 to scale by, -1 if overflow error.
 322  *
 323  ***********************************************************************/
 324
 325 static int
 326 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 327 {
 328         int   cur_scale;
 329
 330         // Quick check to stop us from trying to scale any more.
 331         //
 332         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 333                 cur_scale = 0;
 334                 goto HaveScale;
 335         }
 336
 337         if (scale > DEC_SCALE_MAX - 9) {
 338                 // We can't scale by 10^9 without exceeding the max scale factor.
 339                 // See if we can scale to the max.  If not, we'll fall into
 340                 // standard search for scale factor.
 341                 //
 342                 cur_scale = DEC_SCALE_MAX - scale;
 343                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 344                         goto HaveScale;
 345
 346                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 347                 UpperEq:
 348                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 349                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 350                                 cur_scale--;
 351                         }
 352                         goto HaveScale;
 353                 }
 354         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 355                 return 9;
 356
 357         // Search for a power to scale by < 9.  Do a binary search
 358         // on power_overflow[].
 359         //
 360         cur_scale = 5;
 361         if (res_hi < OVFL_MAX_5_HI)
 362                 cur_scale = 7;
 363         else if (res_hi > OVFL_MAX_5_HI)
 364                 cur_scale = 3;
 365         else
 366                 goto UpperEq;
 367
 368         // cur_scale is 3 or 7.
 369         //
 370         if (res_hi < power_overflow[cur_scale - 1].Hi)
 371                 cur_scale++;
 372         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 373                 cur_scale--;
 374         else
 375                 goto UpperEq;
 376
 377         // cur_scale is 2, 4, 6, or 8.
 378         //
 379         // In all cases, we already found we could not use the power one larger.
 380         // So if we can use this power, it is the biggest, and we're done.  If
 381         // we can't use this power, the one below it is correct for all cases
 382         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 383         //
 384         if (res_hi > power_overflow[cur_scale - 1].Hi)
 385                 cur_scale--;
 386
 387         if (res_hi == power_overflow[cur_scale - 1].Hi)
 388                 goto UpperEq;
 389
 390 HaveScale:
 391         // cur_scale = largest power of 10 we can scale by without overflow,
 392         // cur_scale < 9.  See if this is enough to make scale factor
 393         // positive if it isn't already.
 394         //
 395         if (cur_scale + scale < 0)
 396                 cur_scale = -1;
 397
 398         return cur_scale;
 399 }
 400
 401
 402 /**
 403 * Div96By32
 404 *
 405 * Entry:
 406 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 407 *   ulDen   - 32-bit divisor.
 408 *
 409 * Purpose:
 410 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 411 *
 412 * Exit:
 413 *   Quotient overwrites dividend.
 414 *   Returns remainder.
 415 *
 416 * Exceptions:
 417 *   None.
 418 *
 419 */
 420 static uint32_t
 421 Div96By32(uint32_t *num, uint32_t den)
 422 {
 423         SPLIT64  tmp;
 424
 425         tmp.u.Hi = 0;
 426
 427         if (num[2] != 0)
 428                 goto Div3Word;
 429
 430         if (num[1] >= den)
 431                 goto Div2Word;
 432
 433         tmp.u.Hi = num[1];
 434         num[1] = 0;
 435         goto Div1Word;
 436
 437 Div3Word:
 438         tmp.u.Lo = num[2];
 439         tmp.int64 = DivMod64by32(tmp.int64, den);
 440         num[2] = tmp.u.Lo;
 441 Div2Word:
 442         tmp.u.Lo = num[1];
 443         tmp.int64 = DivMod64by32(tmp.int64, den);
 444         num[1] = tmp.u.Lo;
 445 Div1Word:
 446         tmp.u.Lo = num[0];
 447         tmp.int64 = DivMod64by32(tmp.int64, den);
 448         num[0] = tmp.u.Lo;
 449         return tmp.u.Hi;
 450 }
 451
 452 /***
 453  * DecFixInt
 454  *
 455  * Entry:
 456  *   pdecRes - Pointer to Decimal result location
 457  *   operand  - Pointer to Decimal operand
 458  *
 459  * Purpose:
 460  *   Chop the value to integer.  Return remainder so Int() function
 461  *   can round down if non-zero.
 462  *
 463  * Exit:
 464  *   Returns remainder.
 465  *
 466  * Exceptions:
 467  *   None.
 468  *
 469  ***********************************************************************/
 470
 471 static uint32_t
 472 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 473 {
 474         uint32_t   num[3];
 475         uint32_t   rem;
 476         uint32_t   pwr;
 477         int     scale;
 478
 479         if (operand->u.u.scale > 0) {
 480                 num[0] = operand->v.v.Lo32;
 481                 num[1] = operand->v.v.Mid32;
 482                 num[2] = operand->Hi32;
 483                 scale = operand->u.u.scale;
 484                 result->u.u.sign = operand->u.u.sign;
 485                 rem = 0;
 486
 487                 do {
 488                         if (scale > POWER10_MAX)
 489                                 pwr = ten_to_nine;
 490                         else
 491                                 pwr = power10[scale];
 492
 493                         rem |= Div96By32(num, pwr);
 494                         scale -= 9;
 495                 }while (scale > 0);
 496
 497                 result->v.v.Lo32 = num[0];
 498                 result->v.v.Mid32 = num[1];
 499                 result->Hi32 = num[2];
 500                 result->u.u.scale = 0;
 501
 502                 return rem;
 503         }
 504
 505         COPYDEC(*result, *operand);
 506         // Odd, the Microsoft code does not set result->reserved to zero on this case
 507         return 0;
 508 }
 509
 510 /**
 511  * ScaleResult:
 512  *
 513  * Entry:
 514  *   res - Array of uint32_ts with value, least-significant first.
 515  *   hi_res  - Index of last non-zero value in res.
 516  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 517  *
 518  * Purpose:
 519  *   See if we need to scale the result to fit it in 96 bits.
 520  *   Perform needed scaling.  Adjust scale factor accordingly.
 521  *
 522  * Exit:
 523  *   res updated in place, always 3 uint32_ts.
 524  *   New scale factor returned, -1 if overflow error.
 525  *
 526  */
 527 static int
 528 ScaleResult(uint32_t *res, int hi_res, int scale)
 529 {
 530         int     new_scale;
 531         int     cur;
 532         uint32_t   pwr;
 533         uint32_t   tmp;
 534         uint32_t   sticky;
 535         SPLIT64 sdlTmp;
 536
 537         // See if we need to scale the result.  The combined scale must
 538         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 539         //
 540         // Start by figuring a lower bound on the scaling needed to make
 541         // the upper 96 bits zero.  hi_res is the index into res[]
 542         // of the highest non-zero uint32_t.
 543         //
 544         new_scale =   hi_res * 32 - 64 - 1;
 545         if (new_scale > 0) {
 546
 547                 // Find the MSB.
 548                 //
 549                 tmp = res[hi_res];
 550                 if (!(tmp & 0xFFFF0000)) {
 551                         new_scale -= 16;
 552                         tmp <<= 16;
 553                 }
 554                 if (!(tmp & 0xFF000000)) {
 555                         new_scale -= 8;
 556                         tmp <<= 8;
 557                 }
 558                 if (!(tmp & 0xF0000000)) {
 559                         new_scale -= 4;
 560                         tmp <<= 4;
 561                 }
 562                 if (!(tmp & 0xC0000000)) {
 563                         new_scale -= 2;
 564                         tmp <<= 2;
 565                 }
 566                 if (!(tmp & 0x80000000)) {
 567                         new_scale--;
 568                         tmp <<= 1;
 569                 }
 570
 571                 // Multiply bit position by log10(2) to figure it's power of 10.
 572                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 573                 // with a multiply saves a 96-byte lookup table.  The power returned
 574                 // is <= the power of the number, so we must add one power of 10
 575                 // to make it's integer part zero after dividing by 256.
 576                 //
 577                 // Note: the result of this multiplication by an approximation of
 578                 // log10(2) have been exhaustively checked to verify it gives the
 579                 // correct result.  (There were only 95 to check...)
 580                 //
 581                 new_scale = ((new_scale * 77) >> 8) + 1;
 582
 583                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 584                 // This reduces the scale factor of the result.  If it exceeds the
 585                 // current scale of the result, we'll overflow.
 586                 //
 587                 if (new_scale > scale)
 588                         return -1;
 589         }
 590         else
 591                 new_scale = 0;
 592
 593         // Make sure we scale by enough to bring the current scale factor
 594         // into valid range.
 595         //
 596         if (new_scale < scale - DEC_SCALE_MAX)
 597                 new_scale = scale - DEC_SCALE_MAX;
 598
 599         if (new_scale != 0) {
 600                 // Scale by the power of 10 given by new_scale.  Note that this is
 601                 // NOT guaranteed to bring the number within 96 bits -- it could
 602                 // be 1 power of 10 short.
 603                 //
 604                 scale -= new_scale;
 605                 sticky = 0;
 606                 sdlTmp.u.Hi = 0; // initialize remainder
 607
 608                 for (;;) {
 609
 610                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 611
 612                         if (new_scale > POWER10_MAX)
 613                                 pwr = ten_to_nine;
 614                         else
 615                                 pwr = power10[new_scale];
 616
 617                         // Compute first quotient.
 618                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 619                         //
 620                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 621                         res[hi_res] = sdlTmp.u.Lo;
 622                         cur = hi_res - 1;
 623
 624                         if (cur >= 0) {
 625                                 // If first quotient was 0, update hi_res.
 626                                 //
 627                                 if (sdlTmp.u.Lo == 0)
 628                                         hi_res--;
 629
 630                                 // Compute subsequent quotients.
 631                                 //
 632                                 do {
 633                                         sdlTmp.u.Lo = res[cur];
 634                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 635                                         res[cur] = sdlTmp.u.Lo;
 636                                         cur--;
 637                                 } while (cur >= 0);
 638
 639                         }
 640
 641                         new_scale -= POWER10_MAX;
 642                         if (new_scale > 0)
 643                                 continue; // scale some more
 644
 645                         // If we scaled enough, hi_res would be 2 or less.  If not,
 646                         // divide by 10 more.
 647                         //
 648                         if (hi_res > 2) {
 649                                 new_scale = 1;
 650                                 scale--;
 651                                 continue; // scale by 10
 652                         }
 653
 654                         // Round final result.  See if remainder >= 1/2 of divisor.
 655                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 656                         //
 657                         pwr >>= 1;  // power of 10 always even
 658                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 659                                                     ((res[0] & 1) | sticky)) ) {
 660                                 cur = -1;
 661                                 while (++res[++cur] == 0);
 662
 663                                 if (cur > 2) {
 664                                         // The rounding caused us to carry beyond 96 bits.
 665                                         // Scale by 10 more.
 666                                         //
 667                                         hi_res = cur;
 668                                         sticky = 0;  // no sticky bit
 669                                         sdlTmp.u.Hi = 0; // or remainder
 670                                         new_scale = 1;
 671                                         scale--;
 672                                         continue; // scale by 10
 673                                 }
 674                         }
 675
 676                         // We may have scaled it more than we planned.  Make sure the scale
 677                         // factor hasn't gone negative, indicating overflow.
 678                         //
 679                         if (scale < 0)
 680                                 return -1;
 681
 682                         return scale;
 683                 } // for(;;)
 684         }
 685         return scale;
 686 }
 687
 688 // Decimal multiply
 689 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 690 static MonoDecimalStatus
 691 mono_decimal_multiply_result(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 692 {
 693         SPLIT64 tmp;
 694         SPLIT64 tmp2;
 695         SPLIT64 tmp3;
 696         int     scale;
 697         int     hi_prod;
 698         uint32_t   pwr;
 699         uint32_t   rem_lo;
 700         uint32_t   rem_hi;
 701         uint32_t   prod[6];
 702
 703         scale = left->u.u.scale + right->u.u.scale;
 704
 705         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 706                 // Upper 64 bits are zero.
 707                 //
 708                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 709                 if (scale > DEC_SCALE_MAX)
 710                 {
 711                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 712                         // If the amount to divide by is > 19 the result is guaranteed
 713                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 714                         //
 715                         scale -= DEC_SCALE_MAX;
 716                         if (scale > 19) {
 717                         ReturnZero:
 718                                 DECIMAL_SETZERO(*result);
 719                                 return MONO_DECIMAL_OK;
 720                         }
 721
 722                         if (scale > POWER10_MAX) {
 723                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 724                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 725                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 726                                 //
 727                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 728                                 pwr = power10[scale - 10] << 2;
 729                         } else {
 730                                 pwr = power10[scale];
 731                                 rem_lo = 0;
 732                         }
 733
 734                         // Power to divide by fits in 32 bits.
 735                         //
 736                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 737
 738                         // Round result.  See if remainder >= 1/2 of divisor.
 739                         // Divisor is a power of 10, so it is always even.
 740                         //
 741                         pwr >>= 1;
 742                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 743                                 tmp.int64++;
 744
 745                         scale = DEC_SCALE_MAX;
 746                 }
 747                 DECIMAL_LO32(*result) = tmp.u.Lo;
 748                 DECIMAL_MID32(*result) = tmp.u.Hi;
 749                 DECIMAL_HI32(*result) = 0;
 750         } else {
 751                 // At least one operand has bits set in the upper 64 bits.
 752                 //
 753                 // Compute and accumulate the 9 partial products into a
 754                 // 192-bit (24-byte) result.
 755                 //
 756                 //                [l-h][l-m][l-l]   left high, middle, low
 757                 //             x  [r-h][r-m][r-l]   right high, middle, low
 758                 // ------------------------------
 759                 //
 760                 //                     [0-h][0-l]   l-l * r-l
 761                 //                [1ah][1al]        l-l * r-m
 762                 //                [1bh][1bl]        l-m * r-l
 763                 //           [2ah][2al]             l-m * r-m
 764                 //           [2bh][2bl]             l-l * r-h
 765                 //           [2ch][2cl]             l-h * r-l
 766                 //      [3ah][3al]                  l-m * r-h
 767                 //      [3bh][3bl]                  l-h * r-m
 768                 // [4-h][4-l]                       l-h * r-h
 769                 // ------------------------------
 770                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 771                 //
 772                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 773                 prod[0] = tmp.u.Lo;
 774
 775                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 776
 777                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 778                 tmp.int64 += tmp2.int64; // this could generate carry
 779                 prod[1] = tmp.u.Lo;
 780                 if (tmp.int64 < tmp2.int64) // detect carry
 781                         tmp2.u.Hi = 1;
 782                 else
 783                         tmp2.u.Hi = 0;
 784                 tmp2.u.Lo = tmp.u.Hi;
 785
 786                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 787
 788                 if (left->Hi32 | right->Hi32) {
 789                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 790                         //
 791                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 792                         tmp.int64 += tmp2.int64; // this could generate carry
 793                         if (tmp.int64 < tmp2.int64) // detect carry
 794                                 tmp3.u.Hi = 1;
 795                         else
 796                                 tmp3.u.Hi = 0;
 797
 798                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 799                         tmp.int64 += tmp2.int64; // this could generate carry
 800                         prod[2] = tmp.u.Lo;
 801                         if (tmp.int64 < tmp2.int64) // detect carry
 802                                 tmp3.u.Hi++;
 803                         tmp3.u.Lo = tmp.u.Hi;
 804
 805                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 806                         tmp.int64 += tmp3.int64; // this could generate carry
 807                         if (tmp.int64 < tmp3.int64) // detect carry
 808                                 tmp3.u.Hi = 1;
 809                         else
 810                                 tmp3.u.Hi = 0;
 811
 812                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 813                         tmp.int64 += tmp2.int64; // this could generate carry
 814                         prod[3] = tmp.u.Lo;
 815                         if (tmp.int64 < tmp2.int64) // detect carry
 816                                 tmp3.u.Hi++;
 817                         tmp3.u.Lo = tmp.u.Hi;
 818
 819                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 820                         prod[4] = tmp.u.Lo;
 821                         prod[5] = tmp.u.Hi;
 822
 823                         hi_prod = 5;
 824                 }
 825                 else {
 826                         prod[2] = tmp.u.Lo;
 827                         prod[3] = tmp.u.Hi;
 828                         hi_prod = 3;
 829                 }
 830
 831                 // Check for leading zero uint32_ts on the product
 832                 //
 833                 while (prod[hi_prod] == 0) {
 834                         hi_prod--;
 835                         if (hi_prod < 0)
 836                                 goto ReturnZero;
 837                 }
 838
 839                 scale = ScaleResult(prod, hi_prod, scale);
 840                 if (scale == -1)
 841                         return MONO_DECIMAL_OVERFLOW;
 842
 843                 result->v.v.Lo32 = prod[0];
 844                 result->v.v.Mid32 = prod[1];
 845                 result->Hi32 = prod[2];
 846         }
 847
 848         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 849         result->u.u.scale = (char)scale;
 850         return MONO_DECIMAL_OK;
 851 }
 852
 853 // Addition and subtraction
 854 static MonoDecimalStatus
 855 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 856 {
 857         uint32_t     num[6];
 858         uint32_t     pwr;
 859         int       scale;
 860         int       hi_prod;
 861         int       cur;
 862         SPLIT64   tmp;
 863         MonoDecimal decRes;
 864         MonoDecimal decTmp;
 865         MonoDecimal *pdecTmp;
 866
 867         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 868
 869         if (right->u.u.scale == left->u.u.scale) {
 870                 // Scale factors are equal, no alignment necessary.
 871                 //
 872                 decRes.u.signscale = left->u.signscale;
 873
 874         AlignedAdd:
 875                 if (sign) {
 876                         // Signs differ - subtract
 877                         //
 878                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 879                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 880
 881                         // Propagate carry
 882                         //
 883                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 884                                 decRes.Hi32--;
 885                                 if (decRes.Hi32 >= left->Hi32)
 886                                         goto SignFlip;
 887                         } else if (decRes.Hi32 > left->Hi32) {
 888                                 // Got negative result.  Flip its sign.
 889                                 //
 890                         SignFlip:
 891                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 892                                 decRes.Hi32 = ~decRes.Hi32;
 893                                 if (DECIMAL_LO64_GET(decRes) == 0)
 894                                         decRes.Hi32++;
 895                                 decRes.u.u.sign ^= DECIMAL_NEG;
 896                         }
 897
 898                 } else {
 899                         // Signs are the same - add
 900                         //
 901                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 902                         decRes.Hi32 = left->Hi32 + right->Hi32;
 903
 904                         // Propagate carry
 905                         //
 906                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 907                                 decRes.Hi32++;
 908                                 if (decRes.Hi32 <= left->Hi32)
 909                                         goto AlignedScale;
 910                         } else if (decRes.Hi32 < left->Hi32) {
 911                         AlignedScale:
 912                                 // The addition carried above 96 bits.  Divide the result by 10,
 913                                 // dropping the scale factor.
 914                                 //
 915                                 if (decRes.u.u.scale == 0)
 916                                         return MONO_DECIMAL_OVERFLOW;
 917                                 decRes.u.u.scale--;
 918
 919                                 tmp.u.Lo = decRes.Hi32;
 920                                 tmp.u.Hi = 1;
 921                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 922                                 decRes.Hi32 = tmp.u.Lo;
 923
 924                                 tmp.u.Lo = decRes.v.v.Mid32;
 925                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 926                                 decRes.v.v.Mid32 = tmp.u.Lo;
 927
 928                                 tmp.u.Lo = decRes.v.v.Lo32;
 929                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 930                                 decRes.v.v.Lo32 = tmp.u.Lo;
 931
 932                                 // See if we need to round up.
 933                                 //
 934                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 935                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 936                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 937                                                         decRes.Hi32++;
 938                                 }
 939                         }
 940                 }
 941         }
 942         else {
 943                 // Scale factors are not equal.  Assume that a larger scale
 944                 // factor (more decimal places) is likely to mean that number
 945                 // is smaller.  Start by guessing that the right operand has
 946                 // the larger scale factor.  The result will have the larger
 947                 // scale factor.
 948                 //
 949                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 950                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 951                 scale = decRes.u.u.scale - left->u.u.scale;
 952
 953                 if (scale < 0) {
 954                         // Guessed scale factor wrong. Swap operands.
 955                         //
 956                         scale = -scale;
 957                         decRes.u.u.scale = left->u.u.scale;
 958                         decRes.u.u.sign ^= sign;
 959                         pdecTmp = right;
 960                         right = left;
 961                         left = pdecTmp;
 962                 }
 963
 964                 // *left will need to be multiplied by 10^scale so
 965                 // it will have the same scale as *right.  We could be
 966                 // extending it to up to 192 bits of precision.
 967                 //
 968                 if (scale <= POWER10_MAX) {
 969                         // Scaling won't make it larger than 4 uint32_ts
 970                         //
 971                         pwr = power10[scale];
 972                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 973                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 974                         tmp.int64 += decTmp.v.v.Mid32;
 975                         decTmp.v.v.Mid32 = tmp.u.Lo;
 976                         decTmp.Hi32 = tmp.u.Hi;
 977                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 978                         tmp.int64 += decTmp.Hi32;
 979                         if (tmp.u.Hi == 0) {
 980                                 // Result fits in 96 bits.  Use standard aligned add.
 981                                 //
 982                                 decTmp.Hi32 = tmp.u.Lo;
 983                                 left = &decTmp;
 984                                 goto AlignedAdd;
 985                         }
 986                         num[0] = decTmp.v.v.Lo32;
 987                         num[1] = decTmp.v.v.Mid32;
 988                         num[2] = tmp.u.Lo;
 989                         num[3] = tmp.u.Hi;
 990                         hi_prod = 3;
 991                 }
 992                 else {
 993                         // Have to scale by a bunch.  Move the number to a buffer
 994                         // where it has room to grow as it's scaled.
 995                         //
 996                         num[0] = left->v.v.Lo32;
 997                         num[1] = left->v.v.Mid32;
 998                         num[2] = left->Hi32;
 999                         hi_prod = 2;
1000
1001                         // Scan for zeros in the upper words.
1002                         //
1003                         if (num[2] == 0) {
1004                                 hi_prod = 1;
1005                                 if (num[1] == 0) {
1006                                         hi_prod = 0;
1007                                         if (num[0] == 0) {
1008                                                 // Left arg is zero, return right.
1009                                                 //
1010                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
1011                                                 decRes.Hi32 = right->Hi32;
1012                                                 decRes.u.u.sign ^= sign;
1013                                                 goto RetDec;
1014                                         }
1015                                 }
1016                         }
1017
1018                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
1019                         // with index of highest non-zero uint32_t.
1020                         //
1021                         for (; scale > 0; scale -= POWER10_MAX) {
1022                                 if (scale > POWER10_MAX)
1023                                         pwr = ten_to_nine;
1024                                 else
1025                                         pwr = power10[scale];
1026
1027                                 tmp.u.Hi = 0;
1028                                 for (cur = 0; cur <= hi_prod; cur++) {
1029                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1030                                         num[cur] = tmp.u.Lo;
1031                                 }
1032
1033                                 if (tmp.u.Hi != 0)
1034                                         // We're extending the result by another uint32_t.
1035                                         num[++hi_prod] = tmp.u.Hi;
1036                         }
1037                 }
1038
1039                 // Scaling complete, do the add.  Could be subtract if signs differ.
1040                 //
1041                 tmp.u.Lo = num[0];
1042                 tmp.u.Hi = num[1];
1043
1044                 if (sign) {
1045                         // Signs differ, subtract.
1046                         //
1047                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1048                         decRes.Hi32 = num[2] - right->Hi32;
1049
1050                         // Propagate carry
1051                         //
1052                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1053                                 decRes.Hi32--;
1054                                 if (decRes.Hi32 >= num[2])
1055                                         goto LongSub;
1056                         }
1057                         else if (decRes.Hi32 > num[2]) {
1058                         LongSub:
1059                                 // If num has more than 96 bits of precision, then we need to
1060                                 // carry the subtraction into the higher bits.  If it doesn't,
1061                                 // then we subtracted in the wrong order and have to flip the
1062                                 // sign of the result.
1063                                 //
1064                                 if (hi_prod <= 2)
1065                                         goto SignFlip;
1066
1067                                 cur = 3;
1068                                 while(num[cur++]-- == 0);
1069                                 if (num[hi_prod] == 0)
1070                                         hi_prod--;
1071                         }
1072                 }
1073                 else {
1074                         // Signs the same, add.
1075                         //
1076                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1077                         decRes.Hi32 = num[2] + right->Hi32;
1078
1079                         // Propagate carry
1080                         //
1081                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1082                                 decRes.Hi32++;
1083                                 if (decRes.Hi32 <= num[2])
1084                                         goto LongAdd;
1085                         }
1086                         else if (decRes.Hi32 < num[2]) {
1087                         LongAdd:
1088                                 // Had a carry above 96 bits.
1089                                 //
1090                                 cur = 3;
1091                                 do {
1092                                         if (hi_prod < cur) {
1093                                                 num[cur] = 1;
1094                                                 hi_prod = cur;
1095                                                 break;
1096                                         }
1097                                 }while (++num[cur++] == 0);
1098                         }
1099                 }
1100
1101                 if (hi_prod > 2) {
1102                         num[0] = decRes.v.v.Lo32;
1103                         num[1] = decRes.v.v.Mid32;
1104                         num[2] = decRes.Hi32;
1105                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1106                         if (decRes.u.u.scale == (uint8_t) -1)
1107                                 return MONO_DECIMAL_OVERFLOW;
1108
1109                         decRes.v.v.Lo32 = num[0];
1110                         decRes.v.v.Mid32 = num[1];
1111                         decRes.Hi32 = num[2];
1112                 }
1113         }
1114
1115 RetDec:
1116         COPYDEC(*result, decRes);
1117         // Odd, the Microsoft code does not set result->reserved to zero on this case
1118         return MONO_DECIMAL_OK;
1119 }
1120
1121 // Decimal addition
1122 static MonoDecimalStatus G_GNUC_UNUSED
1123 mono_decimal_add(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1124 {
1125     return DecAddSub (left, right, result, 0);
1126 }
1127
1128 // Decimal subtraction
1129 static MonoDecimalStatus G_GNUC_UNUSED
1130 mono_decimal_sub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1131 {
1132     return DecAddSub (left, right, result, DECIMAL_NEG);
1133 }
1134
1135 /**
1136  * IncreaseScale:
1137  *
1138  * Entry:
1139  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1140  *   pwr   - Scale factor to multiply by
1141  *
1142  * Purpose:
1143  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1144  *   the input.  The last 32 bits of the product are the return value.
1145  *
1146  * Exit:
1147  *   Returns highest 32 bits of product.
1148  *
1149  * Exceptions:
1150  *   None.
1151  *
1152  */
1153 static uint32_t
1154 IncreaseScale(uint32_t *num, uint32_t pwr)
1155 {
1156         SPLIT64   sdlTmp;
1157
1158         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1159         num[0] = sdlTmp.u.Lo;
1160         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1161         num[1] = sdlTmp.u.Lo;
1162         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1163         num[2] = sdlTmp.u.Lo;
1164         return sdlTmp.u.Hi;
1165 }
1166
1167 /**
1168  * Div96By64:
1169  *
1170  * Entry:
1171  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1172  *   sdlDen  - 64-bit divisor.
1173  *
1174  * Purpose:
1175  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1176  *   Divisor must be larger than upper 64 bits of dividend.
1177  *
1178  * Exit:
1179  *   Remainder overwrites lower 64-bits of dividend.
1180  *   Returns quotient.
1181  *
1182  * Exceptions:
1183  *   None.
1184  *
1185  */
1186 static uint32_t
1187 Div96By64(uint32_t *num, SPLIT64 den)
1188 {
1189         SPLIT64 quo;
1190         SPLIT64 sdlNum;
1191         SPLIT64 prod;
1192
1193         sdlNum.u.Lo = num[0];
1194
1195         if (num[2] >= den.u.Hi) {
1196                 // Divide would overflow.  Assume a quotient of 2^32, and set
1197                 // up remainder accordingly.  Then jump to loop which reduces
1198                 // the quotient.
1199                 //
1200                 sdlNum.u.Hi = num[1] - den.u.Lo;
1201                 quo.u.Lo = 0;
1202                 goto NegRem;
1203         }
1204
1205         // Hardware divide won't overflow
1206         //
1207         if (num[2] == 0 && num[1] < den.u.Hi)
1208                 // Result is zero.  Entire dividend is remainder.
1209                 //
1210                 return 0;
1211
1212         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1213         //
1214         quo.u.Lo = num[1];
1215         quo.u.Hi = num[2];
1216         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1217         sdlNum.u.Hi = quo.u.Hi; // remainder
1218
1219         // Compute full remainder, rem = dividend - (quo * divisor).
1220         //
1221         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1222         sdlNum.int64 -= prod.int64;
1223
1224         if (sdlNum.int64 > ~prod.int64) {
1225         NegRem:
1226                 // Remainder went negative.  Add divisor back in until it's positive,
1227                 // a max of 2 times.
1228                 //
1229                 do {
1230                         quo.u.Lo--;
1231                         sdlNum.int64 += den.int64;
1232                 }while (sdlNum.int64 >= den.int64);
1233         }
1234
1235         num[0] = sdlNum.u.Lo;
1236         num[1] = sdlNum.u.Hi;
1237         return quo.u.Lo;
1238 }
1239
1240 /***
1241 * Div128By96
1242 *
1243 * Entry:
1244 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1245 *   den - Pointer to 96-bit divisor.
1246 *
1247 * Purpose:
1248 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1249 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1250 *   assured in the initial call because the divisor is normalized
1251 *   and the dividend can't be.  In subsequent calls, the remainder
1252 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1253 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1254 *
1255 * Exit:
1256 *   Remainder overwrites lower 96-bits of dividend.
1257 *   Returns quotient.
1258 *
1259 * Exceptions:
1260 *   None.
1261 *
1262 ***********************************************************************/
1263
1264 static uint32_t
1265 Div128By96(uint32_t *num, uint32_t *den)
1266 {
1267         SPLIT64 sdlQuo;
1268         SPLIT64 sdlNum;
1269         SPLIT64 sdlProd1;
1270         SPLIT64 sdlProd2;
1271
1272         sdlNum.u.Lo = num[0];
1273         sdlNum.u.Hi = num[1];
1274
1275         if (num[3] == 0 && num[2] < den[2]){
1276                 // Result is zero.  Entire dividend is remainder.
1277                 //
1278                 return 0;
1279         }
1280
1281         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1282         //
1283         sdlQuo.u.Lo = num[2];
1284         sdlQuo.u.Hi = num[3];
1285         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1286
1287         // Compute full remainder, rem = dividend - (quo * divisor).
1288         //
1289         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1290         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1291         sdlProd2.int64 += sdlProd1.u.Hi;
1292         sdlProd1.u.Hi = sdlProd2.u.Lo;
1293
1294         sdlNum.int64 -= sdlProd1.int64;
1295         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1296
1297         // Propagate carries
1298         //
1299         if (sdlNum.int64 > ~sdlProd1.int64) {
1300                 num[2]--;
1301                 if (num[2] >= ~sdlProd2.u.Hi)
1302                         goto NegRem;
1303         } else if (num[2] > ~sdlProd2.u.Hi) {
1304         NegRem:
1305                 // Remainder went negative.  Add divisor back in until it's positive,
1306                 // a max of 2 times.
1307                 //
1308                 sdlProd1.u.Lo = den[0];
1309                 sdlProd1.u.Hi = den[1];
1310
1311                 for (;;) {
1312                         sdlQuo.u.Lo--;
1313                         sdlNum.int64 += sdlProd1.int64;
1314                         num[2] += den[2];
1315
1316                         if (sdlNum.int64 < sdlProd1.int64) {
1317                                 // Detected carry. Check for carry out of top
1318                                 // before adding it in.
1319                                 //
1320                                 if (num[2]++ < den[2])
1321                                         break;
1322                         }
1323                         if (num[2] < den[2])
1324                                 break; // detected carry
1325                 }
1326         }
1327
1328         num[0] = sdlNum.u.Lo;
1329         num[1] = sdlNum.u.Hi;
1330         return sdlQuo.u.Lo;
1331 }
1332
1333 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1334 // Returns FALSE if there is an overflow
1335 static gboolean
1336 Add32To96(uint32_t *num, uint32_t value)
1337 {
1338         num[0] += value;
1339         if (num[0] < value) {
1340                 if (++num[1] == 0) {
1341                         if (++num[2] == 0) {
1342                                 return FALSE;
1343                         }
1344                 }
1345         }
1346         return TRUE;
1347 }
1348
1349 static void
1350 OverflowUnscale (uint32_t *quo, gboolean remainder)
1351 {
1352         SPLIT64  sdlTmp;
1353
1354         // We have overflown, so load the high bit with a one.
1355         sdlTmp.u.Hi = 1u;
1356         sdlTmp.u.Lo = quo[2];
1357         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1358         quo[2] = sdlTmp.u.Lo;
1359         sdlTmp.u.Lo = quo[1];
1360         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1361         quo[1] = sdlTmp.u.Lo;
1362         sdlTmp.u.Lo = quo[0];
1363         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1364         quo[0] = sdlTmp.u.Lo;
1365         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1366         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1367                 Add32To96(quo, 1u);
1368         }
1369 }
1370
1371 // mono_decimal_divide - Decimal divide
1372 static MonoDecimalStatus G_GNUC_UNUSED
1373 mono_decimal_divide_result(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1374 {
1375         uint32_t   quo[3];
1376         uint32_t   quoSave[3];
1377         uint32_t   rem[4];
1378         uint32_t   divisor[3];
1379         uint32_t   pwr;
1380         uint32_t   utmp;
1381         uint32_t   utmp1;
1382         SPLIT64 sdlTmp;
1383         SPLIT64 sdlDivisor;
1384         int     scale;
1385         int     cur_scale;
1386
1387         scale = left->u.u.scale - right->u.u.scale;
1388         divisor[0] = right->v.v.Lo32;
1389         divisor[1] = right->v.v.Mid32;
1390         divisor[2] = right->Hi32;
1391
1392         if (divisor[1] == 0 && divisor[2] == 0) {
1393                 // Divisor is only 32 bits.  Easy divide.
1394                 //
1395                 if (divisor[0] == 0)
1396                         return MONO_DECIMAL_DIVBYZERO;
1397
1398                 quo[0] = left->v.v.Lo32;
1399                 quo[1] = left->v.v.Mid32;
1400                 quo[2] = left->Hi32;
1401                 rem[0] = Div96By32(quo, divisor[0]);
1402
1403                 for (;;) {
1404                         if (rem[0] == 0) {
1405                                 if (scale < 0) {
1406                                         cur_scale = min(9, -scale);
1407                                         goto HaveScale;
1408                                 }
1409                                 break;
1410                         }
1411
1412                         // We have computed a quotient based on the natural scale
1413                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1414                         // remainder, so now we should increase the scale if possible to
1415                         // include more quotient bits.
1416                         //
1417                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1418                         // computing more quotient bits as long as the remainder stays
1419                         // non-zero.  If scaling by that much would cause overflow, we'll
1420                         // drop out of the loop and scale by as much as we can.
1421                         //
1422                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1423                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1424                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1425                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1426                         // is the largest value in quo[1] (when quo[2] == 4) that is
1427                         // assured not to overflow.
1428                         //
1429                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1430                         if (cur_scale == 0) {
1431                                 // No more scaling to be done, but remainder is non-zero.
1432                                 // Round quotient.
1433                                 //
1434                                 utmp = rem[0] << 1;
1435                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1436                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1437                                 RoundUp:
1438                                         if (++quo[0] == 0)
1439                                                 if (++quo[1] == 0)
1440                                                         quo[2]++;
1441                                 }
1442                                 break;
1443                         }
1444
1445                         if (cur_scale == -1)
1446                                 return MONO_DECIMAL_OVERFLOW;
1447
1448                 HaveScale:
1449                         pwr = power10[cur_scale];
1450                         scale += cur_scale;
1451
1452                         if (IncreaseScale(quo, pwr) != 0)
1453                                 return MONO_DECIMAL_OVERFLOW;
1454
1455                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1456                         rem[0] = sdlTmp.u.Hi;
1457
1458                         quo[0] += sdlTmp.u.Lo;
1459                         if (quo[0] < sdlTmp.u.Lo) {
1460                                 if (++quo[1] == 0)
1461                                         quo[2]++;
1462                         }
1463                 } // for (;;)
1464         }
1465         else {
1466                 // Divisor has bits set in the upper 64 bits.
1467                 //
1468                 // Divisor must be fully normalized (shifted so bit 31 of the most
1469                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1470                 // normalize by.  The dividend will be shifted by the same amount so
1471                 // the quotient is not changed.
1472                 //
1473                 if (divisor[2] == 0)
1474                         utmp = divisor[1];
1475                 else
1476                         utmp = divisor[2];
1477
1478                 cur_scale = 0;
1479                 if (!(utmp & 0xFFFF0000)) {
1480                         cur_scale += 16;
1481                         utmp <<= 16;
1482                 }
1483                 if (!(utmp & 0xFF000000)) {
1484                         cur_scale += 8;
1485                         utmp <<= 8;
1486                 }
1487                 if (!(utmp & 0xF0000000)) {
1488                         cur_scale += 4;
1489                         utmp <<= 4;
1490                 }
1491                 if (!(utmp & 0xC0000000)) {
1492                         cur_scale += 2;
1493                         utmp <<= 2;
1494                 }
1495                 if (!(utmp & 0x80000000)) {
1496                         cur_scale++;
1497                         utmp <<= 1;
1498                 }
1499
1500                 // Shift both dividend and divisor left by cur_scale.
1501                 //
1502                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1503                 rem[0] = sdlTmp.u.Lo;
1504                 rem[1] = sdlTmp.u.Hi;
1505                 sdlTmp.u.Lo = left->v.v.Mid32;
1506                 sdlTmp.u.Hi = left->Hi32;
1507                 sdlTmp.int64 <<= cur_scale;
1508                 rem[2] = sdlTmp.u.Hi;
1509                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1510
1511                 sdlDivisor.u.Lo = divisor[0];
1512                 sdlDivisor.u.Hi = divisor[1];
1513                 sdlDivisor.int64 <<= cur_scale;
1514
1515                 if (divisor[2] == 0) {
1516                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1517                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1518                         //
1519                         sdlTmp.u.Lo = rem[2];
1520                         sdlTmp.u.Hi = rem[3];
1521
1522                         quo[2] = 0;
1523                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1524                         quo[0] = Div96By64(rem, sdlDivisor);
1525
1526                         for (;;) {
1527                                 if ((rem[0] | rem[1]) == 0) {
1528                                         if (scale < 0) {
1529                                                 cur_scale = min(9, -scale);
1530                                                 goto HaveScale64;
1531                                         }
1532                                         break;
1533                                 }
1534
1535                                 // Remainder is non-zero.  Scale up quotient and remainder by
1536                                 // powers of 10 so we can compute more significant bits.
1537                                 //
1538                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1539                                 if (cur_scale == 0) {
1540                                         // No more scaling to be done, but remainder is non-zero.
1541                                         // Round quotient.
1542                                         //
1543                                         sdlTmp.u.Lo = rem[0];
1544                                         sdlTmp.u.Hi = rem[1];
1545                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1546                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1547                                                 goto RoundUp;
1548                                         break;
1549                                 }
1550
1551                                 if (cur_scale == -1)
1552                                         return MONO_DECIMAL_OVERFLOW;
1553
1554                         HaveScale64:
1555                                 pwr = power10[cur_scale];
1556                                 scale += cur_scale;
1557
1558                                 if (IncreaseScale(quo, pwr) != 0)
1559                                         return MONO_DECIMAL_OVERFLOW;
1560
1561                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1562                                 IncreaseScale(rem, pwr);
1563                                 utmp = Div96By64(rem, sdlDivisor);
1564                                 quo[0] += utmp;
1565                                 if (quo[0] < utmp)
1566                                         if (++quo[1] == 0)
1567                                                 quo[2]++;
1568
1569                         } // for (;;)
1570                 }
1571                 else {
1572                         // Have a 96-bit divisor in divisor[].
1573                         //
1574                         // Start by finishing the shift left by cur_scale.
1575                         //
1576                         sdlTmp.u.Lo = divisor[1];
1577                         sdlTmp.u.Hi = divisor[2];
1578                         sdlTmp.int64 <<= cur_scale;
1579                         divisor[0] = sdlDivisor.u.Lo;
1580                         divisor[1] = sdlDivisor.u.Hi;
1581                         divisor[2] = sdlTmp.u.Hi;
1582
1583                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1584                         // will be < divisor.
1585                         //
1586                         quo[2] = 0;
1587                         quo[1] = 0;
1588                         quo[0] = Div128By96(rem, divisor);
1589
1590                         for (;;) {
1591                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1592                                         if (scale < 0) {
1593                                                 cur_scale = min(9, -scale);
1594                                                 goto HaveScale96;
1595                                         }
1596                                         break;
1597                                 }
1598
1599                                 // Remainder is non-zero.  Scale up quotient and remainder by
1600                                 // powers of 10 so we can compute more significant bits.
1601                                 //
1602                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1603                                 if (cur_scale == 0) {
1604                                         // No more scaling to be done, but remainder is non-zero.
1605                                         // Round quotient.
1606                                         //
1607                                         if (rem[2] >= 0x80000000)
1608                                                 goto RoundUp;
1609
1610                                         utmp = rem[0] > 0x80000000;
1611                                         utmp1 = rem[1] > 0x80000000;
1612                                         rem[0] <<= 1;
1613                                         rem[1] = (rem[1] << 1) + utmp;
1614                                         rem[2] = (rem[2] << 1) + utmp1;
1615
1616                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1617                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1618                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1619                                               (quo[0] & 1))))
1620                                                 goto RoundUp;
1621                                         break;
1622                                 }
1623
1624                                 if (cur_scale == -1)
1625                                         return MONO_DECIMAL_OVERFLOW;
1626
1627                         HaveScale96:
1628                                 pwr = power10[cur_scale];
1629                                 scale += cur_scale;
1630
1631                                 if (IncreaseScale(quo, pwr) != 0)
1632                                         return MONO_DECIMAL_OVERFLOW;
1633
1634                                 rem[3] = IncreaseScale(rem, pwr);
1635                                 utmp = Div128By96(rem, divisor);
1636                                 quo[0] += utmp;
1637                                 if (quo[0] < utmp)
1638                                         if (++quo[1] == 0)
1639                                                 quo[2]++;
1640
1641                         } // for (;;)
1642                 }
1643         }
1644
1645         // No more remainder.  Try extracting any extra powers of 10 we may have
1646         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1647         // If a division by one of these powers returns a zero remainder, then
1648         // we keep the quotient.  If the remainder is not zero, then we restore
1649         // the previous value.
1650         //
1651         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1652         // we can extract.  We use this as a quick test on whether to try a
1653         // given power.
1654         //
1655         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1656                 quoSave[0] = quo[0];
1657                 quoSave[1] = quo[1];
1658                 quoSave[2] = quo[2];
1659
1660                 if (Div96By32(quoSave, 100000000) == 0) {
1661                         quo[0] = quoSave[0];
1662                         quo[1] = quoSave[1];
1663                         quo[2] = quoSave[2];
1664                         scale -= 8;
1665                 }
1666                 else
1667                         break;
1668         }
1669
1670         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1671                 quoSave[0] = quo[0];
1672                 quoSave[1] = quo[1];
1673                 quoSave[2] = quo[2];
1674
1675                 if (Div96By32(quoSave, 10000) == 0) {
1676                         quo[0] = quoSave[0];
1677                         quo[1] = quoSave[1];
1678                         quo[2] = quoSave[2];
1679                         scale -= 4;
1680                 }
1681         }
1682
1683         if ((quo[0] & 3) == 0 && scale >= 2) {
1684                 quoSave[0] = quo[0];
1685                 quoSave[1] = quo[1];
1686                 quoSave[2] = quo[2];
1687
1688                 if (Div96By32(quoSave, 100) == 0) {
1689                         quo[0] = quoSave[0];
1690                         quo[1] = quoSave[1];
1691                         quo[2] = quoSave[2];
1692                         scale -= 2;
1693                 }
1694         }
1695
1696         if ((quo[0] & 1) == 0 && scale >= 1) {
1697                 quoSave[0] = quo[0];
1698                 quoSave[1] = quo[1];
1699                 quoSave[2] = quo[2];
1700
1701                 if (Div96By32(quoSave, 10) == 0) {
1702                         quo[0] = quoSave[0];
1703                         quo[1] = quoSave[1];
1704                         quo[2] = quoSave[2];
1705                         scale -= 1;
1706                 }
1707         }
1708
1709         result->Hi32 = quo[2];
1710         result->v.v.Mid32 = quo[1];
1711         result->v.v.Lo32 = quo[0];
1712         result->u.u.scale = scale;
1713         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1714         return MONO_DECIMAL_OK;
1715 }
1716
1717 // mono_decimal_absolute - Decimal Absolute Value
1718 static void G_GNUC_UNUSED
1719 mono_decimal_absolute (MonoDecimal *pdecOprd, MonoDecimal *result)
1720 {
1721         COPYDEC(*result, *pdecOprd);
1722         result->u.u.sign &= ~DECIMAL_NEG;
1723         // Microsoft does not set reserved here
1724 }
1725
1726 // mono_decimal_fix - Decimal Fix (chop to integer)
1727 static void
1728 mono_decimal_fix (MonoDecimal *pdecOprd, MonoDecimal *result)
1729 {
1730         DecFixInt(result, pdecOprd);
1731 }
1732
1733 // mono_decimal_round_to_int - Decimal Int (round down to integer)
1734 static void
1735 mono_decimal_round_to_int (MonoDecimal *pdecOprd, MonoDecimal *result)
1736 {
1737         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1738                 // We have chopped off a non-zero amount from a negative value.  Since
1739                 // we round toward -infinity, we must increase the integer result by
1740                 // 1 to make it more negative.  This will never overflow because
1741                 // in order to have a remainder, we must have had a non-zero scale factor.
1742                 // Our scale factor is back to zero now.
1743                 //
1744                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1745                 if (DECIMAL_LO64_GET(*result) == 0)
1746                         result->Hi32++;
1747         }
1748 }
1749
1750 // mono_decimal_negate - Decimal Negate
1751 static void G_GNUC_UNUSED
1752 mono_decimal_negate (MonoDecimal *pdecOprd, MonoDecimal *result)
1753 {
1754         COPYDEC(*result, *pdecOprd);
1755         // Microsoft does not set result->reserved to zero on this case.
1756         result->u.u.sign ^= DECIMAL_NEG;
1757 }
1758
1759 //
1760 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1761 //
1762 static MonoDecimalStatus
1763 mono_decimal_round_result(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1764 {
1765         uint32_t num[3];
1766         uint32_t rem;
1767         uint32_t sticky;
1768         uint32_t pwr;
1769         int scale;
1770
1771         if (cDecimals < 0)
1772                 return MONO_DECIMAL_INVALID_ARGUMENT;
1773
1774         scale = input->u.u.scale - cDecimals;
1775         if (scale > 0) {
1776                 num[0] = input->v.v.Lo32;
1777                 num[1] = input->v.v.Mid32;
1778                 num[2] = input->Hi32;
1779                 result->u.u.sign = input->u.u.sign;
1780                 rem = sticky = 0;
1781
1782                 do {
1783                         sticky |= rem;
1784                         if (scale > POWER10_MAX)
1785                                 pwr = ten_to_nine;
1786                         else
1787                                 pwr = power10[scale];
1788
1789                         rem = Div96By32(num, pwr);
1790                         scale -= 9;
1791                 }while (scale > 0);
1792
1793                 // Now round.  rem has last remainder, sticky has sticky bits.
1794                 // To do IEEE rounding, we add LSB of result to sticky bits so
1795                 // either causes round up if remainder * 2 == last divisor.
1796                 //
1797                 sticky |= num[0] & 1;
1798                 rem = (rem << 1) + (sticky != 0);
1799                 if (pwr < rem &&
1800                     ++num[0] == 0 &&
1801                     ++num[1] == 0
1802                         )
1803                         ++num[2];
1804
1805                 result->v.v.Lo32 = num[0];
1806                 result->v.v.Mid32 = num[1];
1807                 result->Hi32 = num[2];
1808                 result->u.u.scale = cDecimals;
1809                 return MONO_DECIMAL_OK;
1810         }
1811
1812         COPYDEC(*result, *input);
1813         // Odd, the Microsoft source does not set the result->reserved to zero here.
1814         return MONO_DECIMAL_OK;
1815 }
1816
1817 //
1818 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1819 static MonoDecimalStatus
1820 mono_decimal_from_float (float input, MonoDecimal* result)
1821 {
1822         int         exp;    // number of bits to left of binary point
1823         int         power;
1824         uint32_t       mant;
1825         double      dbl;
1826         SPLIT64     sdlLo;
1827         SPLIT64     sdlHi;
1828         int         lmax, cur;  // temps used during scale reduction
1829
1830         // The most we can scale by is 10^28, which is just slightly more
1831         // than 2^93.  So a float with an exponent of -94 could just
1832         // barely reach 0.5, but smaller exponents will always round to zero.
1833         //
1834         if ((exp = ((SingleStructure *)&input)->exp - SNGBIAS) < -94 ) {
1835                 DECIMAL_SETZERO(*result);
1836                 return MONO_DECIMAL_OK;
1837         }
1838
1839         if (exp > 96)
1840                 return MONO_DECIMAL_OVERFLOW;
1841
1842         // Round the input to a 7-digit integer.  The R4 format has
1843         // only 7 digits of precision, and we want to keep garbage digits
1844         // out of the Decimal were making.
1845         //
1846         // Calculate max power of 10 input value could have by multiplying
1847         // the exponent by log10(2).  Using scaled integer multiplcation,
1848         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1849         //
1850         dbl = fabs(input);
1851         power = 6 - ((exp * 19728) >> 16);
1852
1853         if (power >= 0) {
1854                 // We have less than 7 digits, scale input up.
1855                 //
1856                 if (power > DECMAX)
1857                         power = DECMAX;
1858
1859                 dbl = dbl * double_power10[power];
1860         } else {
1861                 if (power != -1 || dbl >= 1E7)
1862                         dbl = dbl / fnDblPower10(-power);
1863                 else
1864                         power = 0; // didn't scale it
1865         }
1866
1867         g_assert (dbl < 1E7);
1868         if (dbl < 1E6 && power < DECMAX) {
1869                 dbl *= 10;
1870                 power++;
1871                 g_assert(dbl >= 1E6);
1872         }
1873
1874         // Round to integer
1875         //
1876         mant = (int32_t)dbl;
1877         dbl -= (double)mant;  // difference between input & integer
1878         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1879                 mant++;
1880
1881         if (mant == 0) {
1882                 DECIMAL_SETZERO(*result);
1883                 return MONO_DECIMAL_OK;
1884         }
1885
1886         if (power < 0) {
1887                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1888                 //
1889                 power = -power;
1890                 if (power < 10) {
1891                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1892
1893                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1894                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1895                         DECIMAL_HI32(*result) = 0;
1896                 } else {
1897                         // Have a big power of 10.
1898                         //
1899                         if (power > 18) {
1900                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1901                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1902
1903                                 if (sdlHi.u.Hi != 0)
1904                                         return MONO_DECIMAL_OVERFLOW;
1905                         }
1906                         else {
1907                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1908                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1909                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1910                                 sdlHi.int64 += sdlLo.u.Hi;
1911                                 sdlLo.u.Hi = sdlHi.u.Lo;
1912                                 sdlHi.u.Lo = sdlHi.u.Hi;
1913                         }
1914                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1915                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1916                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1917                 }
1918                 DECIMAL_SCALE(*result) = 0;
1919         } else {
1920                 // Factor out powers of 10 to reduce the scale, if possible.
1921                 // The maximum number we could factor out would be 6.  This
1922                 // comes from the fact we have a 7-digit number, and the
1923                 // MSD must be non-zero -- but the lower 6 digits could be
1924                 // zero.  Note also the scale factor is never negative, so
1925                 // we can't scale by any more than the power we used to
1926                 // get the integer.
1927                 //
1928                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1929                 //
1930                 lmax = min(power, 6);
1931
1932                 // lmax is the largest power of 10 to try, lmax <= 6.
1933                 // We'll try powers 4, 2, and 1 unless they're too big.
1934                 //
1935                 for (cur = 4; cur > 0; cur >>= 1)
1936                 {
1937                         if (cur > lmax)
1938                                 continue;
1939
1940                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1941
1942                         if (sdlLo.u.Hi == 0) {
1943                                 mant = sdlLo.u.Lo;
1944                                 power -= cur;
1945                                 lmax -= cur;
1946                         }
1947                 }
1948                 DECIMAL_LO32(*result) = mant;
1949                 DECIMAL_MID32(*result) = 0;
1950                 DECIMAL_HI32(*result) = 0;
1951                 DECIMAL_SCALE(*result) = power;
1952         }
1953
1954         DECIMAL_SIGN(*result) = (char)((SingleStructure *)&input)->sign << 7;
1955         return MONO_DECIMAL_OK;
1956 }
1957
1958 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1959 static MonoDecimalStatus
1960 mono_decimal_from_double (double input, MonoDecimal *result)
1961 {
1962         int         exp;    // number of bits to left of binary point
1963         int         power;  // power-of-10 scale factor
1964         SPLIT64     sdlMant;
1965         SPLIT64     sdlLo;
1966         double      dbl;
1967         int         lmax, cur;  // temps used during scale reduction
1968         uint32_t       pwr_cur;
1969         uint32_t       quo;
1970
1971
1972         // The most we can scale by is 10^28, which is just slightly more
1973         // than 2^93.  So a float with an exponent of -94 could just
1974         // barely reach 0.5, but smaller exponents will always round to zero.
1975         //
1976         if ((exp = ((DoubleStructure *)&input)->u.exp - DBLBIAS) < -94) {
1977                 DECIMAL_SETZERO(*result);
1978                 return MONO_DECIMAL_OK;
1979         }
1980
1981         if (exp > 96)
1982                 return MONO_DECIMAL_OVERFLOW;
1983
1984         // Round the input to a 15-digit integer.  The R8 format has
1985         // only 15 digits of precision, and we want to keep garbage digits
1986         // out of the Decimal were making.
1987         //
1988         // Calculate max power of 10 input value could have by multiplying
1989         // the exponent by log10(2).  Using scaled integer multiplcation,
1990         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1991         //
1992         dbl = fabs(input);
1993         power = 14 - ((exp * 19728) >> 16);
1994
1995         if (power >= 0) {
1996                 // We have less than 15 digits, scale input up.
1997                 //
1998                 if (power > DECMAX)
1999                         power = DECMAX;
2000
2001                 dbl = dbl * double_power10[power];
2002         } else {
2003                 if (power != -1 || dbl >= 1E15)
2004                         dbl = dbl / fnDblPower10(-power);
2005                 else
2006                         power = 0; // didn't scale it
2007         }
2008
2009         g_assert (dbl < 1E15);
2010         if (dbl < 1E14 && power < DECMAX) {
2011                 dbl *= 10;
2012                 power++;
2013                 g_assert(dbl >= 1E14);
2014         }
2015
2016         // Round to int64
2017         //
2018         sdlMant.int64 = (int64_t)dbl;
2019         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
2020         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
2021                 sdlMant.int64++;
2022
2023         if (sdlMant.int64 == 0) {
2024                 DECIMAL_SETZERO(*result);
2025                 return MONO_DECIMAL_OK;
2026         }
2027
2028         if (power < 0) {
2029                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2030                 //
2031                 power = -power;
2032                 if (power < 10) {
2033                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2034                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2035                         sdlMant.int64 += sdlLo.u.Hi;
2036                         sdlLo.u.Hi = sdlMant.u.Lo;
2037                         sdlMant.u.Lo = sdlMant.u.Hi;
2038                 }
2039                 else {
2040                         // Have a big power of 10.
2041                         //
2042                         g_assert(power <= 14);
2043                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2044
2045                         if (sdlMant.u.Hi != 0)
2046                                 return MONO_DECIMAL_OVERFLOW;
2047                 }
2048                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2049                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2050                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2051                 DECIMAL_SCALE(*result) = 0;
2052         }
2053         else {
2054                 // Factor out powers of 10 to reduce the scale, if possible.
2055                 // The maximum number we could factor out would be 14.  This
2056                 // comes from the fact we have a 15-digit number, and the
2057                 // MSD must be non-zero -- but the lower 14 digits could be
2058                 // zero.  Note also the scale factor is never negative, so
2059                 // we can't scale by any more than the power we used to
2060                 // get the integer.
2061                 //
2062                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2063                 //
2064                 lmax = min(power, 14);
2065
2066                 // lmax is the largest power of 10 to try, lmax <= 14.
2067                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2068                 //
2069                 for (cur = 8; cur > 0; cur >>= 1)
2070                 {
2071                         if (cur > lmax)
2072                                 continue;
2073
2074                         pwr_cur = (uint32_t)long_power10[cur];
2075
2076                         if (sdlMant.u.Hi >= pwr_cur) {
2077                                 // Overflow if we try to divide in one step.
2078                                 //
2079                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2080                                 quo = sdlLo.u.Lo;
2081                                 sdlLo.u.Lo = sdlMant.u.Lo;
2082                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2083                         }
2084                         else {
2085                                 quo = 0;
2086                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2087                         }
2088
2089                         if (sdlLo.u.Hi == 0) {
2090                                 sdlMant.u.Hi = quo;
2091                                 sdlMant.u.Lo = sdlLo.u.Lo;
2092                                 power -= cur;
2093                                 lmax -= cur;
2094                         }
2095                 }
2096
2097                 DECIMAL_HI32(*result) = 0;
2098                 DECIMAL_SCALE(*result) = power;
2099                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2100                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2101         }
2102
2103         DECIMAL_SIGN(*result) = (char)((DoubleStructure *)&input)->u.sign << 7;
2104         return MONO_DECIMAL_OK;
2105 }
2106
2107 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2108 static MonoDecimalStatus
2109 mono_decimal_to_double_result(MonoDecimal *input, double *result)
2110 {
2111         SPLIT64  tmp;
2112         double   dbl;
2113
2114         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2115                 return MONO_DECIMAL_INVALID_ARGUMENT;
2116
2117         tmp.u.Lo = DECIMAL_LO32(*input);
2118         tmp.u.Hi = DECIMAL_MID32(*input);
2119
2120         if ((int32_t)DECIMAL_MID32(*input) < 0)
2121                 dbl = (ds2to64.dbl + (double)(int64_t)tmp.int64 +
2122                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2123         else
2124                 dbl = ((double)(int64_t)tmp.int64 +
2125                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input));
2126
2127         if (DECIMAL_SIGN(*input))
2128                 dbl = -dbl;
2129
2130         *result = dbl;
2131         return MONO_DECIMAL_OK;
2132 }
2133
2134 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2135 static MonoDecimalStatus
2136 mono_decimal_to_float_result(MonoDecimal *input, float *result)
2137 {
2138         double   dbl;
2139
2140         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2141                 return MONO_DECIMAL_INVALID_ARGUMENT;
2142
2143         // Can't overflow; no errors possible.
2144         //
2145         mono_decimal_to_double_result(input, &dbl);
2146         *result = (float)dbl;
2147         return MONO_DECIMAL_OK;
2148 }
2149
2150 static void
2151 DecShiftLeft(MonoDecimal* value)
2152 {
2153         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2154     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2155     g_assert(value != NULL);
2156
2157     DECIMAL_LO32(*value) <<= 1;
2158     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2159     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2160 }
2161
2162 static int
2163 D32AddCarry(uint32_t* value, uint32_t i)
2164 {
2165     uint32_t v = *value;
2166     uint32_t sum = v + i;
2167     *value = sum;
2168     return sum < v || sum < i? 1: 0;
2169 }
2170
2171 static void
2172 DecAdd(MonoDecimal *value, MonoDecimal* d)
2173 {
2174         g_assert(value != NULL && d != NULL);
2175
2176         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2177                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2178                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2179                 }
2180         }
2181         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2182                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2183         }
2184         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2185 }
2186
2187 static void
2188 DecMul10(MonoDecimal* value)
2189 {
2190         MonoDecimal d = *value;
2191         g_assert (value != NULL);
2192
2193         DecShiftLeft(value);
2194         DecShiftLeft(value);
2195         DecAdd(value, &d);
2196         DecShiftLeft(value);
2197 }
2198
2199 static void
2200 DecAddInt32(MonoDecimal* value, unsigned int i)
2201 {
2202         g_assert(value != NULL);
2203
2204         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2205                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2206                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2207                 }
2208         }
2209 }
2210
2211 MonoDecimalCompareResult
2212 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2213 {
2214         uint32_t   left_sign;
2215         uint32_t   right_sign;
2216         MonoDecimal result;
2217
2218         // First check signs and whether either are zero.  If both are
2219         // non-zero and of the same sign, just use subtraction to compare.
2220         //
2221         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2222         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2223         if (left_sign != 0)
2224                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2225
2226         if (right_sign != 0)
2227                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2228
2229         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2230         // operand is +, 0, or -.
2231         //
2232         if (left_sign == right_sign) {
2233                 if (left_sign == 0)    // both are zero
2234                         return MONO_DECIMAL_CMP_EQ; // return equal
2235
2236                 DecAddSub(left, right, &result, DECIMAL_NEG);
2237                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2238                         return MONO_DECIMAL_CMP_EQ;
2239                 if (result.u.u.sign & DECIMAL_NEG)
2240                         return MONO_DECIMAL_CMP_LT;
2241                 return MONO_DECIMAL_CMP_GT;
2242         }
2243
2244         //
2245         // Signs are different.  Use signed byte comparison
2246         //
2247         if ((signed char)left_sign > (signed char)right_sign)
2248                 return MONO_DECIMAL_CMP_GT;
2249         return MONO_DECIMAL_CMP_LT;
2250 }
2251
2252 void
2253 mono_decimal_init_single (MonoDecimal *_this, float value)
2254 {
2255         if (mono_decimal_from_float (value, _this) == MONO_DECIMAL_OVERFLOW) {
2256                 mono_set_pending_exception (mono_get_exception_overflow ());
2257                 return;
2258         }
2259         _this->reserved = 0;
2260 }
2261
2262 void
2263 mono_decimal_init_double (MonoDecimal *_this, double value)
2264 {
2265         if (mono_decimal_from_double (value, _this) == MONO_DECIMAL_OVERFLOW) {
2266                 mono_set_pending_exception (mono_get_exception_overflow ());
2267                 return;
2268         }
2269         _this->reserved = 0;
2270 }
2271
2272 void
2273 mono_decimal_floor (MonoDecimal *d)
2274 {
2275         MonoDecimal decRes;
2276
2277         mono_decimal_round_to_int(d, &decRes);
2278
2279         // copy decRes into d
2280         COPYDEC(*d, decRes);
2281         d->reserved = 0;
2282         FC_GC_POLL ();
2283 }
2284
2285 int32_t
2286 mono_decimal_get_hash_code (MonoDecimal *d)
2287 {
2288         double dbl;
2289
2290         if (mono_decimal_to_double_result(d, &dbl) != MONO_DECIMAL_OK)
2291                 return 0;
2292
2293         if (dbl == 0.0) {
2294                 // Ensure 0 and -0 have the same hash code
2295                 return 0;
2296         }
2297         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2298         //
2299         // For example these two numerically equal decimals with different internal representations produce
2300         // slightly different results when converted to double:
2301         //
2302         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2303         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2304         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2305         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2306         //
2307         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2308
2309 }
2310
2311 void
2312 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2313 {
2314         MonoDecimal decRes;
2315
2316         MonoDecimalStatus status = mono_decimal_multiply_result(d1, d2, &decRes);
2317         if (status != MONO_DECIMAL_OK) {
2318                 mono_set_pending_exception (mono_get_exception_overflow ());
2319                 return;
2320         }
2321
2322         COPYDEC(*d1, decRes);
2323         d1->reserved = 0;
2324
2325         FC_GC_POLL ();
2326 }
2327
2328 void
2329 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2330 {
2331         MonoDecimal decRes;
2332
2333         // GC is only triggered for throwing, no need to protect result
2334         if (decimals < 0 || decimals > 28) {
2335                 mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2336                 return;
2337         }
2338
2339         mono_decimal_round_result(d, decimals, &decRes);
2340
2341         // copy decRes into d
2342         COPYDEC(*d, decRes);
2343         d->reserved = 0;
2344
2345         FC_GC_POLL();
2346 }
2347
2348 void
2349 mono_decimal_tocurrency (MonoDecimal *decimal)
2350 {
2351         // TODO
2352 }
2353
2354 double
2355 mono_decimal_to_double (MonoDecimal d)
2356 {
2357         double result = 0.0;
2358         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2359         mono_decimal_to_double_result(&d, &result);
2360         return result;
2361 }
2362
2363 int32_t
2364 mono_decimal_to_int32 (MonoDecimal d)
2365 {
2366         MonoDecimal result;
2367
2368         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2369         mono_decimal_round_result(&d, 0, &result);
2370
2371         if (DECIMAL_SCALE(result) != 0) {
2372                 d = result;
2373                 mono_decimal_fix (&d, &result);
2374         }
2375
2376         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2377                 int32_t i = DECIMAL_LO32(result);
2378                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2379                         if (i >= 0)
2380                                 return i;
2381                 } else {
2382                         i = -i;
2383                         if (i <= 0)
2384                                 return i;
2385                 }
2386         }
2387
2388         mono_set_pending_exception (mono_get_exception_overflow ());
2389         return 0;
2390 }
2391
2392 float
2393 mono_decimal_to_float (MonoDecimal d)
2394 {
2395         float result = 0.0f;
2396         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2397         mono_decimal_to_float_result(&d, &result);
2398         return result;
2399 }
2400
2401 void
2402 mono_decimal_truncate (MonoDecimal *d)
2403 {
2404         MonoDecimal decRes;
2405
2406         mono_decimal_fix(d, &decRes);
2407
2408         // copy decRes into d
2409         COPYDEC(*d, decRes);
2410         d->reserved = 0;
2411         FC_GC_POLL();
2412 }
2413
2414 void
2415 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2416 {
2417         MonoDecimal result, decTmp;
2418         MonoDecimal *pdecTmp, *leftOriginal;
2419         uint32_t    num[6], pwr;
2420         int         scale, hi_prod, cur;
2421         SPLIT64     sdlTmp;
2422
2423         g_assert(sign == 0 || sign == DECIMAL_NEG);
2424
2425         leftOriginal = left;
2426
2427         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2428
2429         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2430                 // Scale factors are equal, no alignment necessary.
2431                 //
2432                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2433
2434         AlignedAdd:
2435                 if (sign) {
2436                         // Signs differ - subtract
2437                         //
2438                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2439                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2440
2441                         // Propagate carry
2442                         //
2443                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2444                                 DECIMAL_HI32(result)--;
2445                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2446                                         goto SignFlip;
2447                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2448                                 // Got negative result.  Flip its sign.
2449                                 //
2450                         SignFlip:
2451                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2452                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2453                                 if (DECIMAL_LO64_GET(result) == 0)
2454                                         DECIMAL_HI32(result)++;
2455                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2456                         }
2457
2458                 } else {
2459                         // Signs are the same - add
2460                         //
2461                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2462                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2463
2464                         // Propagate carry
2465                         //
2466                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2467                                 DECIMAL_HI32(result)++;
2468                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2469                                         goto AlignedScale;
2470                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2471                         AlignedScale:
2472                                 // The addition carried above 96 bits.  Divide the result by 10,
2473                                 // dropping the scale factor.
2474                                 //
2475                                 if (DECIMAL_SCALE(result) == 0) {
2476                                         mono_set_pending_exception (mono_get_exception_overflow ());
2477                                         return;
2478                                 }
2479                                 DECIMAL_SCALE(result)--;
2480
2481                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2482                                 sdlTmp.u.Hi = 1;
2483                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2484                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2485
2486                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2487                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2488                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2489
2490                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2491                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2492                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2493
2494                                 // See if we need to round up.
2495                                 //
2496                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2497                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2498                                         if (DECIMAL_LO64_GET(result) == 0)
2499                                                 DECIMAL_HI32(result)++;
2500                                 }
2501                         }
2502                 }
2503         } else {
2504                 // Scale factors are not equal.  Assume that a larger scale
2505                 // factor (more decimal places) is likely to mean that number
2506                 // is smaller.  Start by guessing that the right operand has
2507                 // the larger scale factor.  The result will have the larger
2508                 // scale factor.
2509                 //
2510                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2511                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2512                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2513
2514                 if (scale < 0) {
2515                         // Guessed scale factor wrong. Swap operands.
2516                         //
2517                         scale = -scale;
2518                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2519                         DECIMAL_SIGN(result) ^= sign;
2520                         pdecTmp = right;
2521                         right = left;
2522                         left = pdecTmp;
2523                 }
2524
2525                 // *left will need to be multiplied by 10^scale so
2526                 // it will have the same scale as *right.  We could be
2527                 // extending it to up to 192 bits of precision.
2528                 //
2529                 if (scale <= POWER10_MAX) {
2530                         // Scaling won't make it larger than 4 uint32_ts
2531                         //
2532                         pwr = power10[scale];
2533                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2534                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2535                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2536                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2537                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2538                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2539                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2540                         if (sdlTmp.u.Hi == 0) {
2541                                 // Result fits in 96 bits.  Use standard aligned add.
2542                                 //
2543                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2544                                 left = &decTmp;
2545                                 goto AlignedAdd;
2546                         }
2547                         num[0] = DECIMAL_LO32(decTmp);
2548                         num[1] = DECIMAL_MID32(decTmp);
2549                         num[2] = sdlTmp.u.Lo;
2550                         num[3] = sdlTmp.u.Hi;
2551                         hi_prod = 3;
2552                 } else {
2553                         // Have to scale by a bunch.  Move the number to a buffer
2554                         // where it has room to grow as it's scaled.
2555                         //
2556                         num[0] = DECIMAL_LO32(*left);
2557                         num[1] = DECIMAL_MID32(*left);
2558                         num[2] = DECIMAL_HI32(*left);
2559                         hi_prod = 2;
2560
2561                         // Scan for zeros in the upper words.
2562                         //
2563                         if (num[2] == 0) {
2564                                 hi_prod = 1;
2565                                 if (num[1] == 0) {
2566                                         hi_prod = 0;
2567                                         if (num[0] == 0) {
2568                                                 // Left arg is zero, return right.
2569                                                 //
2570                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2571                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2572                                                 DECIMAL_SIGN(result) ^= sign;
2573                                                 goto RetDec;
2574                                         }
2575                                 }
2576                         }
2577
2578                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2579                         // with index of highest non-zero uint32_t.
2580                         //
2581                         for (; scale > 0; scale -= POWER10_MAX) {
2582                                 if (scale > POWER10_MAX)
2583                                         pwr = ten_to_nine;
2584                                 else
2585                                         pwr = power10[scale];
2586
2587                                 sdlTmp.u.Hi = 0;
2588                                 for (cur = 0; cur <= hi_prod; cur++) {
2589                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2590                                         num[cur] = sdlTmp.u.Lo;
2591                                 }
2592
2593                                 if (sdlTmp.u.Hi != 0)
2594                                         // We're extending the result by another uint32_t.
2595                                         num[++hi_prod] = sdlTmp.u.Hi;
2596                         }
2597                 }
2598
2599                 // Scaling complete, do the add.  Could be subtract if signs differ.
2600                 //
2601                 sdlTmp.u.Lo = num[0];
2602                 sdlTmp.u.Hi = num[1];
2603
2604                 if (sign) {
2605                         // Signs differ, subtract.
2606                         //
2607                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2608                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2609
2610                         // Propagate carry
2611                         //
2612                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2613                                 DECIMAL_HI32(result)--;
2614                                 if (DECIMAL_HI32(result) >= num[2])
2615                                         goto LongSub;
2616                         } else if (DECIMAL_HI32(result) > num[2]) {
2617                         LongSub:
2618                                 // If num has more than 96 bits of precision, then we need to
2619                                 // carry the subtraction into the higher bits.  If it doesn't,
2620                                 // then we subtracted in the wrong order and have to flip the
2621                                 // sign of the result.
2622                                 //
2623                                 if (hi_prod <= 2)
2624                                         goto SignFlip;
2625
2626                                 cur = 3;
2627                                 while(num[cur++]-- == 0);
2628                                 if (num[hi_prod] == 0)
2629                                         hi_prod--;
2630                         }
2631                 } else {
2632                         // Signs the same, add.
2633                         //
2634                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2635                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2636
2637                         // Propagate carry
2638                         //
2639                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2640                                 DECIMAL_HI32(result)++;
2641                                 if (DECIMAL_HI32(result) <= num[2])
2642                                         goto LongAdd;
2643                         } else if (DECIMAL_HI32(result) < num[2]) {
2644                         LongAdd:
2645                                 // Had a carry above 96 bits.
2646                                 //
2647                                 cur = 3;
2648                                 do {
2649                                         if (hi_prod < cur) {
2650                                                 num[cur] = 1;
2651                                                 hi_prod = cur;
2652                                                 break;
2653                                         }
2654                                 }while (++num[cur++] == 0);
2655                         }
2656                 }
2657
2658                 if (hi_prod > 2) {
2659                         num[0] = DECIMAL_LO32(result);
2660                         num[1] = DECIMAL_MID32(result);
2661                         num[2] = DECIMAL_HI32(result);
2662                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2663                         if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2664                                 mono_set_pending_exception (mono_get_exception_overflow ());
2665                                 return;
2666                         }
2667
2668                         DECIMAL_LO32(result) = num[0];
2669                         DECIMAL_MID32(result) = num[1];
2670                         DECIMAL_HI32(result) = num[2];
2671                 }
2672         }
2673
2674 RetDec:
2675         left = leftOriginal;
2676         COPYDEC(*left, result);
2677         left->reserved = 0;
2678 }
2679
2680 void
2681 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2682 {
2683         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2684         uint32_t pwr, tmp, tmp1;
2685         SPLIT64  sdlTmp, sdlDivisor;
2686         int      scale, cur_scale;
2687         gboolean unscale;
2688
2689         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2690         unscale = FALSE;
2691         divisor[0] = DECIMAL_LO32(*right);
2692         divisor[1] = DECIMAL_MID32(*right);
2693         divisor[2] = DECIMAL_HI32(*right);
2694
2695         if (divisor[1] == 0 && divisor[2] == 0) {
2696                 // Divisor is only 32 bits.  Easy divide.
2697                 //
2698                 if (divisor[0] == 0) {
2699                         mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2700                         return;
2701                 }
2702
2703                 quo[0] = DECIMAL_LO32(*left);
2704                 quo[1] = DECIMAL_MID32(*left);
2705                 quo[2] = DECIMAL_HI32(*left);
2706                 rem[0] = Div96By32(quo, divisor[0]);
2707
2708                 for (;;) {
2709                         if (rem[0] == 0) {
2710                                 if (scale < 0) {
2711                                         cur_scale = min(9, -scale);
2712                                         goto HaveScale;
2713                                 }
2714                                 break;
2715                         }
2716                         // We need to unscale if and only if we have a non-zero remainder
2717                         unscale = TRUE;
2718
2719                         // We have computed a quotient based on the natural scale
2720                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2721                         // remainder, so now we should increase the scale if possible to
2722                         // include more quotient bits.
2723                         //
2724                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2725                         // computing more quotient bits as long as the remainder stays
2726                         // non-zero.  If scaling by that much would cause overflow, we'll
2727                         // drop out of the loop and scale by as much as we can.
2728                         //
2729                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2730                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2731                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2732                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2733                         // is the largest value in quo[1] (when quo[2] == 4) that is
2734                         // assured not to overflow.
2735                         //
2736                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2737                         if (cur_scale == 0) {
2738                                 // No more scaling to be done, but remainder is non-zero.
2739                                 // Round quotient.
2740                                 //
2741                                 tmp = rem[0] << 1;
2742                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2743                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2744                                 RoundUp:
2745                                         if (!Add32To96(quo, 1)) {
2746                                                 if (scale == 0) {
2747                                                         mono_set_pending_exception (mono_get_exception_overflow ());
2748                                                         return;
2749                                                 }
2750                                                 scale--;
2751                                                 OverflowUnscale(quo, TRUE);
2752                                                 break;
2753                                         }
2754                                 }
2755                                 break;
2756                         }
2757
2758                         if (cur_scale < 0) {
2759                                 mono_set_pending_exception (mono_get_exception_overflow ());
2760                                 return;
2761                         }
2762
2763                 HaveScale:
2764                         pwr = power10[cur_scale];
2765                         scale += cur_scale;
2766
2767                         if (IncreaseScale(quo, pwr) != 0) {
2768                                 mono_set_pending_exception (mono_get_exception_overflow ());
2769                                 return;
2770                         }
2771
2772                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2773                         rem[0] = sdlTmp.u.Hi;
2774
2775                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2776                                 if (scale == 0) {
2777                                         mono_set_pending_exception (mono_get_exception_overflow ());
2778                                         return;
2779                                 }
2780                                 scale--;
2781                                 OverflowUnscale(quo, (rem[0] != 0));
2782                                 break;
2783                         }
2784                 } // for (;;)
2785         } else {
2786                 // Divisor has bits set in the upper 64 bits.
2787                 //
2788                 // Divisor must be fully normalized (shifted so bit 31 of the most
2789                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2790                 // normalize by.  The dividend will be shifted by the same amount so
2791                 // the quotient is not changed.
2792                 //
2793                 if (divisor[2] == 0)
2794                         tmp = divisor[1];
2795                 else
2796                         tmp = divisor[2];
2797
2798                 cur_scale = 0;
2799                 if (!(tmp & 0xFFFF0000)) {
2800                         cur_scale += 16;
2801                         tmp <<= 16;
2802                 }
2803                 if (!(tmp & 0xFF000000)) {
2804                         cur_scale += 8;
2805                         tmp <<= 8;
2806                 }
2807                 if (!(tmp & 0xF0000000)) {
2808                         cur_scale += 4;
2809                         tmp <<= 4;
2810                 }
2811                 if (!(tmp & 0xC0000000)) {
2812                         cur_scale += 2;
2813                         tmp <<= 2;
2814                 }
2815                 if (!(tmp & 0x80000000)) {
2816                         cur_scale++;
2817                         tmp <<= 1;
2818                 }
2819
2820                 // Shift both dividend and divisor left by cur_scale.
2821                 //
2822                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2823                 rem[0] = sdlTmp.u.Lo;
2824                 rem[1] = sdlTmp.u.Hi;
2825                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2826                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2827                 sdlTmp.int64 <<= cur_scale;
2828                 rem[2] = sdlTmp.u.Hi;
2829                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2830
2831                 sdlDivisor.u.Lo = divisor[0];
2832                 sdlDivisor.u.Hi = divisor[1];
2833                 sdlDivisor.int64 <<= cur_scale;
2834
2835                 if (divisor[2] == 0) {
2836                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2837                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2838                         //
2839                         sdlTmp.u.Lo = rem[2];
2840                         sdlTmp.u.Hi = rem[3];
2841
2842                         quo[2] = 0;
2843                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2844                         quo[0] = Div96By64(rem, sdlDivisor);
2845
2846                         for (;;) {
2847                                 if ((rem[0] | rem[1]) == 0) {
2848                                         if (scale < 0) {
2849                                                 cur_scale = min(9, -scale);
2850                                                 goto HaveScale64;
2851                                         }
2852                                         break;
2853                                 }
2854
2855                                 // We need to unscale if and only if we have a non-zero remainder
2856                                 unscale = TRUE;
2857
2858                                 // Remainder is non-zero.  Scale up quotient and remainder by
2859                                 // powers of 10 so we can compute more significant bits.
2860                                 //
2861                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2862                                 if (cur_scale == 0) {
2863                                         // No more scaling to be done, but remainder is non-zero.
2864                                         // Round quotient.
2865                                         //
2866                                         sdlTmp.u.Lo = rem[0];
2867                                         sdlTmp.u.Hi = rem[1];
2868                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2869                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2870                                                 goto RoundUp;
2871                                         break;
2872                                 }
2873
2874                                 if (cur_scale < 0) {
2875                                         mono_set_pending_exception (mono_get_exception_overflow ());
2876                                         return;
2877                                 }
2878
2879                         HaveScale64:
2880                                 pwr = power10[cur_scale];
2881                                 scale += cur_scale;
2882
2883                                 if (IncreaseScale(quo, pwr) != 0) {
2884                                         mono_set_pending_exception (mono_get_exception_overflow ());
2885                                         return;
2886                                 }
2887
2888                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2889                                 IncreaseScale(rem, pwr);
2890                                 tmp = Div96By64(rem, sdlDivisor);
2891                                 if (!Add32To96(quo, tmp)) {
2892                                         if (scale == 0) {
2893                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2894                                                 return;
2895                                         }
2896                                         scale--;
2897                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2898                                         break;
2899                                 }
2900
2901                         } // for (;;)
2902                 } else {
2903                         // Have a 96-bit divisor in divisor[].
2904                         //
2905                         // Start by finishing the shift left by cur_scale.
2906                         //
2907                         sdlTmp.u.Lo = divisor[1];
2908                         sdlTmp.u.Hi = divisor[2];
2909                         sdlTmp.int64 <<= cur_scale;
2910                         divisor[0] = sdlDivisor.u.Lo;
2911                         divisor[1] = sdlDivisor.u.Hi;
2912                         divisor[2] = sdlTmp.u.Hi;
2913
2914                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2915                         // will be < divisor.
2916                         //
2917                         quo[2] = 0;
2918                         quo[1] = 0;
2919                         quo[0] = Div128By96(rem, divisor);
2920
2921                         for (;;) {
2922                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2923                                         if (scale < 0) {
2924                                                 cur_scale = min(9, -scale);
2925                                                 goto HaveScale96;
2926                                         }
2927                                         break;
2928                                 }
2929
2930                                 // We need to unscale if and only if we have a non-zero remainder
2931                                 unscale = TRUE;
2932
2933                                 // Remainder is non-zero.  Scale up quotient and remainder by
2934                                 // powers of 10 so we can compute more significant bits.
2935                                 //
2936                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2937                                 if (cur_scale == 0) {
2938                                         // No more scaling to be done, but remainder is non-zero.
2939                                         // Round quotient.
2940                                         //
2941                                         if (rem[2] >= 0x80000000)
2942                                                 goto RoundUp;
2943
2944                                         tmp = rem[0] > 0x80000000;
2945                                         tmp1 = rem[1] > 0x80000000;
2946                                         rem[0] <<= 1;
2947                                         rem[1] = (rem[1] << 1) + tmp;
2948                                         rem[2] = (rem[2] << 1) + tmp1;
2949
2950                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2951                                                 goto RoundUp;
2952                                         break;
2953                                 }
2954
2955                                 if (cur_scale < 0) {
2956                                         mono_set_pending_exception (mono_get_exception_overflow ());
2957                                         return;
2958                                 }
2959
2960                         HaveScale96:
2961                                 pwr = power10[cur_scale];
2962                                 scale += cur_scale;
2963
2964                                 if (IncreaseScale(quo, pwr) != 0) {
2965                                         mono_set_pending_exception (mono_get_exception_overflow ());
2966                                         return;
2967                                 }
2968
2969                                 rem[3] = IncreaseScale(rem, pwr);
2970                                 tmp = Div128By96(rem, divisor);
2971                                 if (!Add32To96(quo, tmp)) {
2972                                         if (scale == 0) {
2973                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2974                                                 return;
2975                                         }
2976
2977                                         scale--;
2978                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2979                                         break;
2980                                 }
2981
2982                         } // for (;;)
2983                 }
2984         }
2985
2986         // We need to unscale if and only if we have a non-zero remainder
2987         if (unscale) {
2988                 // Try extracting any extra powers of 10 we may have
2989                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2990                 // If a division by one of these powers returns a zero remainder, then
2991                 // we keep the quotient.  If the remainder is not zero, then we restore
2992                 // the previous value.
2993                 //
2994                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2995                 // we can extract.  We use this as a quick test on whether to try a
2996                 // given power.
2997                 //
2998                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2999                         quo_save[0] = quo[0];
3000                         quo_save[1] = quo[1];
3001                         quo_save[2] = quo[2];
3002
3003                         if (Div96By32(quo_save, 100000000) == 0) {
3004                                 quo[0] = quo_save[0];
3005                                 quo[1] = quo_save[1];
3006                                 quo[2] = quo_save[2];
3007                                 scale -= 8;
3008                         } else
3009                                 break;
3010                 }
3011
3012                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
3013                         quo_save[0] = quo[0];
3014                         quo_save[1] = quo[1];
3015                         quo_save[2] = quo[2];
3016
3017                         if (Div96By32(quo_save, 10000) == 0) {
3018                                 quo[0] = quo_save[0];
3019                                 quo[1] = quo_save[1];
3020                                 quo[2] = quo_save[2];
3021                                 scale -= 4;
3022                         }
3023                 }
3024
3025                 if ((quo[0] & 3) == 0 && scale >= 2) {
3026                         quo_save[0] = quo[0];
3027                         quo_save[1] = quo[1];
3028                         quo_save[2] = quo[2];
3029
3030                         if (Div96By32(quo_save, 100) == 0) {
3031                                 quo[0] = quo_save[0];
3032                                 quo[1] = quo_save[1];
3033                                 quo[2] = quo_save[2];
3034                                 scale -= 2;
3035                         }
3036                 }
3037
3038                 if ((quo[0] & 1) == 0 && scale >= 1) {
3039                         quo_save[0] = quo[0];
3040                         quo_save[1] = quo[1];
3041                         quo_save[2] = quo[2];
3042
3043                         if (Div96By32(quo_save, 10) == 0) {
3044                                 quo[0] = quo_save[0];
3045                                 quo[1] = quo_save[1];
3046                                 quo[2] = quo_save[2];
3047                                 scale -= 1;
3048                         }
3049                 }
3050         }
3051
3052         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3053         DECIMAL_HI32(*left) = quo[2];
3054         DECIMAL_MID32(*left) = quo[1];
3055         DECIMAL_LO32(*left) = quo[0];
3056         DECIMAL_SCALE(*left) = (uint8_t)scale;
3057         left->reserved = 0;
3058
3059 }
3060
3061 #define DECIMAL_PRECISION 29
3062 #define NUMBER_MAXDIGITS 50
3063 typedef struct  {
3064         int32_t precision;
3065         int32_t scale;
3066         int32_t sign;
3067         uint16_t digits[NUMBER_MAXDIGITS + 1];
3068         uint16_t* allDigits;
3069 } CLRNumber;
3070
3071 int
3072 mono_decimal_from_number (void *from, MonoDecimal *target)
3073 {
3074         CLRNumber *number = (CLRNumber *) from;
3075         uint16_t* p = number->digits;
3076         MonoDecimal d;
3077         int e = number->scale;
3078         g_assert(number != NULL);
3079         g_assert(target != NULL);
3080
3081         d.reserved = 0;
3082         DECIMAL_SIGNSCALE(d) = 0;
3083         DECIMAL_HI32(d) = 0;
3084         DECIMAL_LO32(d) = 0;
3085         DECIMAL_MID32(d) = 0;
3086         g_assert(p != NULL);
3087         if (!*p) {
3088                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3089                 // the scale to 0 if the scale was previously positive
3090                 if (e > 0) {
3091                         e = 0;
3092                 }
3093         } else {
3094                 if (e > DECIMAL_PRECISION) return 0;
3095                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3096                         DecMul10(&d);
3097                         if (*p)
3098                                 DecAddInt32(&d, *p++ - '0');
3099                         e--;
3100                 }
3101                 if (*p++ >= '5') {
3102                         gboolean round = TRUE;
3103                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3104                                 // For digits > 5 we will be roundinp up anyway.
3105                                 int count = 20; // Look at the next 20 digits to check to round
3106                                 while (*p == '0' && count != 0) {
3107                                         p++;
3108                                         count--;
3109                                 }
3110                                 if (*p == '\0' || count == 0)
3111                                         round = FALSE;// Do nothing
3112                         }
3113
3114                         if (round) {
3115                                 DecAddInt32(&d, 1);
3116                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3117                                         DECIMAL_HI32(d) = 0x19999999;
3118                                         DECIMAL_MID32(d) = 0x99999999;
3119                                         DECIMAL_LO32(d) = 0x9999999A;
3120                                         e++;
3121                                 }
3122                         }
3123                 }
3124         }
3125         if (e > 0)
3126                 return 0;
3127         if (e <= -DECIMAL_PRECISION) {
3128                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3129                 // This should only happen for actual zeros or very small numbers that round to zero.
3130                 DECIMAL_SIGNSCALE(d) = 0;
3131                 DECIMAL_HI32(d) = 0;
3132                 DECIMAL_LO32(d) = 0;
3133                 DECIMAL_MID32(d) = 0;
3134                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3135         } else {
3136                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3137         }
3138
3139         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3140         *target = d;
3141         return 1;
3142 }
3143
3144
3145 #endif