mono/metadata/decimal-ms.c

   1 //
   2 // Copyright (c) Microsoft. All rights reserved.
   3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
   4 //
   5 // Copyright 2015 Xamarin Inc
   6 //
   7 // File: decimal.c
   8 //
   9 // Ported from C++ to C and adjusted to Mono runtime
  10 //
  11 // Pending:
  12 //   DoToCurrency (they look like new methods we do not have)
  13 //
  14 #ifndef DISABLE_DECIMAL
  15 #include "config.h"
  16 #include <stdint.h>
  17 #include <glib.h>
  18 #include <mono/utils/mono-compiler.h>
  19 #include <mono/metadata/exception.h>
  20 #include <mono/metadata/object-internals.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <math.h>
  25 #ifdef HAVE_MEMORY_H
  26 #include <memory.h>
  27 #endif
  28 #ifdef _MSC_VER
  29 #include <intrin.h>
  30 #endif
  31 #include "decimal-ms.h"
  32
  33 #define min(a, b) (((a) < (b)) ? (a) : (b))
  34
  35 typedef enum {
  36         MONO_DECIMAL_OK,
  37         MONO_DECIMAL_OVERFLOW,
  38         MONO_DECIMAL_INVALID_ARGUMENT,
  39         MONO_DECIMAL_DIVBYZERO,
  40         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  41 } MonoDecimalStatus;
  42
  43 #ifndef FC_GC_POLL
  44 #   define FC_GC_POLL()
  45 #endif
  46
  47 static const uint32_t ten_to_nine    = 1000000000U;
  48 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  49 #define POWER10_MAX     9
  50 #define DECIMAL_NEG ((uint8_t)0x80)
  51 #define DECMAX 28
  52 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  53 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  54 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  55 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  56 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  57 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  58 #define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  59 #define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  60
  61 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  62 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  63     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  64
  65 #define DEC_SCALE_MAX   28
  66 #define POWER10_MAX     9
  67
  68 #define OVFL_MAX_9_HI   4
  69 #define OVFL_MAX_9_MID  1266874889
  70 #define OVFL_MAX_9_LO   3047500985u
  71
  72 #define OVFL_MAX_5_HI   42949
  73 #define OVFL_MAX_5_MID  2890341191
  74
  75 #define OVFL_MAX_1_HI   429496729
  76
  77 typedef union {
  78         uint64_t int64;
  79         struct {
  80 #if BYTE_ORDER == G_BIG_ENDIAN
  81         uint32_t Hi;
  82         uint32_t Lo;
  83 #else
  84         uint32_t Lo;
  85         uint32_t Hi;
  86 #endif
  87     } u;
  88 } SPLIT64;
  89
  90 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  91 // Double Bias
  92 #define DBLBIAS 1022
  93
  94 // Structure to access an encoded double floating point
  95 typedef union{
  96     struct {
  97 #if BYTE_ORDER == G_BIG_ENDIAN
  98       unsigned int sign:1;
  99       unsigned int exp:11;
 100       unsigned int mantHi:20;
 101       unsigned int mantLo;
 102 #else // BIGENDIAN
 103       unsigned int mantLo;
 104       unsigned int mantHi:20;
 105       unsigned int exp:11;
 106       unsigned int sign:1;
 107 #endif
 108     } u;
 109     double dbl;
 110 } DoubleStructure;
 111
 112 #if BYTE_ORDER == G_BIG_ENDIAN
 113 #define DEFDS(Lo, Hi, exp, sign) { {sign, exp, Hi, Lo } }
 114 #else
 115 #define DEFDS(Lo, Hi, exp, sign) { {Lo, Hi, exp, sign} }
 116 #endif
 117
 118 const DoubleStructure ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0);
 119
 120 // Single floating point Bias
 121 #define SNGBIAS 126
 122
 123 // Structure to access an encoded single floating point
 124 typedef struct {
 125 #if BYTE_ORDER == G_BIG_ENDIAN
 126     unsigned int sign:1;
 127     unsigned int exp:8;
 128     unsigned int mant:23;
 129 #else
 130     unsigned int mant:23;
 131     unsigned int exp:8;
 132     unsigned int sign:1;
 133 #endif
 134 } SingleStructure;
 135
 136 //
 137 // Data tables
 138 //
 139
 140 static const uint32_t power10 [POWER10_MAX+1] = {
 141         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 142 };
 143
 144
 145 static const double double_power10[] = {
 146         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 147         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 148         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 149         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 150         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 151         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 152         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 153         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 154         1e80 };
 155
 156 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 157                                 {100000000000ULL},         // 1E11
 158                                 {1000000000000ULL},        // 1E12
 159                                 {10000000000000ULL},       // 1E13
 160                                 {100000000000000ULL} };    // 1E14
 161
 162 static const uint64_t long_power10[] = {
 163         1,
 164         10ULL,
 165         100ULL,
 166         1000ULL,
 167         10000ULL,
 168         100000ULL,
 169         1000000ULL,
 170         10000000ULL,
 171         100000000ULL,
 172         1000000000ULL,
 173         10000000000ULL,
 174         100000000000ULL,
 175         1000000000000ULL,
 176         10000000000000ULL,
 177         100000000000000ULL,
 178         1000000000000000ULL,
 179         10000000000000000ULL,
 180         100000000000000000ULL,
 181         1000000000000000000ULL,
 182         10000000000000000000ULL};
 183
 184 typedef struct  {
 185         uint32_t Hi, Mid, Lo;
 186 } DECOVFL;
 187
 188 const DECOVFL power_overflow[] = {
 189 // This is a table of the largest values that can be in the upper two
 190 // ULONGs of a 96-bit number that will not overflow when multiplied
 191 // by a given power.  For the upper word, this is a table of
 192 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 193 // remaining fraction part * 2^32.  2^32 = 4294967296.
 194 //
 195     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 196     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 197     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 198     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 199     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 200     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 201     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 202     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 203     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 204 };
 205
 206
 207 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 208 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 209 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 210
 211 static double
 212 fnDblPower10(int ix)
 213 {
 214     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 215     g_assert(ix >= 0);
 216     if (ix < maxIx)
 217         return double_power10[ix];
 218     return pow(10.0, ix);
 219 } // double fnDblPower10()
 220
 221
 222 static inline int64_t
 223 DivMod32by32(int32_t num, int32_t den)
 224 {
 225     SPLIT64  sdl;
 226
 227     sdl.u.Lo = num / den;
 228     sdl.u.Hi = num % den;
 229     return sdl.int64;
 230 }
 231
 232 static inline int64_t
 233 DivMod64by32(int64_t num, int32_t den)
 234 {
 235     SPLIT64  sdl;
 236
 237     sdl.u.Lo = Div64by32(num, den);
 238     sdl.u.Hi = Mod64by32(num, den);
 239     return sdl.int64;
 240 }
 241
 242 static uint64_t
 243 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 244 {
 245         SPLIT64  tmp1;
 246         SPLIT64  tmp2;
 247         SPLIT64  tmp3;
 248
 249         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 250         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 251         tmp1.u.Hi += tmp2.u.Lo;
 252         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 253                 tmp2.u.Hi++;
 254         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 255         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 256         tmp1.u.Hi += tmp2.u.Lo;
 257         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 258                 tmp2.u.Hi++;
 259         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 260
 261         *hi = tmp3.int64;
 262         return tmp1.int64;
 263 }
 264
 265 /**
 266 * FullDiv64By32:
 267 *
 268 * Entry:
 269 *   pdlNum  - Pointer to 64-bit dividend
 270 *   ulDen   - 32-bit divisor
 271 *
 272 * Purpose:
 273 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 274 *
 275 * Exit:
 276 *   Quotient overwrites dividend.
 277 *   Returns remainder.
 278 *
 279 * Exceptions:
 280 *   None.
 281 */
 282 // Was: FullDiv64By32
 283 static uint32_t
 284 FullDiv64By32 (uint64_t *num, uint32_t den)
 285 {
 286         SPLIT64  tmp;
 287         SPLIT64  res;
 288
 289         tmp.int64 = *num;
 290         res.u.Hi = 0;
 291
 292         if (tmp.u.Hi >= den) {
 293                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 294                 //
 295                 res.u.Lo = tmp.u.Hi;
 296                 res.int64 = DivMod64by32(res.int64, den);
 297                 tmp.u.Hi = res.u.Hi;
 298                 res.u.Hi = res.u.Lo;
 299         }
 300
 301         tmp.int64 = DivMod64by32(tmp.int64, den);
 302         res.u.Lo = tmp.u.Lo;
 303         *num = res.int64;
 304         return tmp.u.Hi;
 305 }
 306
 307 /***
 308  * SearchScale
 309  *
 310  * Entry:
 311  *   res_hi - Top uint32_t of quotient
 312  *   res_mid - Middle uint32_t of quotient
 313  *   res_lo - Bottom uint32_t of quotient
 314  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 315  *
 316  * Purpose:
 317  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 318  *   up by and still fit in 96 bits.
 319  *
 320  * Exit:
 321  *   Returns power of 10 to scale by, -1 if overflow error.
 322  *
 323  ***********************************************************************/
 324
 325 static int
 326 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 327 {
 328         int   cur_scale;
 329
 330         // Quick check to stop us from trying to scale any more.
 331         //
 332         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 333                 cur_scale = 0;
 334                 goto HaveScale;
 335         }
 336
 337         if (scale > DEC_SCALE_MAX - 9) {
 338                 // We can't scale by 10^9 without exceeding the max scale factor.
 339                 // See if we can scale to the max.  If not, we'll fall into
 340                 // standard search for scale factor.
 341                 //
 342                 cur_scale = DEC_SCALE_MAX - scale;
 343                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 344                         goto HaveScale;
 345
 346                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 347                 UpperEq:
 348                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 349                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 350                                 cur_scale--;
 351                         }
 352                         goto HaveScale;
 353                 }
 354         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 355                 return 9;
 356
 357         // Search for a power to scale by < 9.  Do a binary search
 358         // on power_overflow[].
 359         //
 360         cur_scale = 5;
 361         if (res_hi < OVFL_MAX_5_HI)
 362                 cur_scale = 7;
 363         else if (res_hi > OVFL_MAX_5_HI)
 364                 cur_scale = 3;
 365         else
 366                 goto UpperEq;
 367
 368         // cur_scale is 3 or 7.
 369         //
 370         if (res_hi < power_overflow[cur_scale - 1].Hi)
 371                 cur_scale++;
 372         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 373                 cur_scale--;
 374         else
 375                 goto UpperEq;
 376
 377         // cur_scale is 2, 4, 6, or 8.
 378         //
 379         // In all cases, we already found we could not use the power one larger.
 380         // So if we can use this power, it is the biggest, and we're done.  If
 381         // we can't use this power, the one below it is correct for all cases
 382         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 383         //
 384         if (res_hi > power_overflow[cur_scale - 1].Hi)
 385                 cur_scale--;
 386
 387         if (res_hi == power_overflow[cur_scale - 1].Hi)
 388                 goto UpperEq;
 389
 390 HaveScale:
 391         // cur_scale = largest power of 10 we can scale by without overflow,
 392         // cur_scale < 9.  See if this is enough to make scale factor
 393         // positive if it isn't already.
 394         //
 395         if (cur_scale + scale < 0)
 396                 cur_scale = -1;
 397
 398         return cur_scale;
 399 }
 400
 401
 402 /**
 403 * Div96By32
 404 *
 405 * Entry:
 406 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 407 *   ulDen   - 32-bit divisor.
 408 *
 409 * Purpose:
 410 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 411 *
 412 * Exit:
 413 *   Quotient overwrites dividend.
 414 *   Returns remainder.
 415 *
 416 * Exceptions:
 417 *   None.
 418 *
 419 */
 420 static uint32_t
 421 Div96By32(uint32_t *num, uint32_t den)
 422 {
 423         SPLIT64  tmp;
 424
 425         tmp.u.Hi = 0;
 426
 427         if (num[2] != 0)
 428                 goto Div3Word;
 429
 430         if (num[1] >= den)
 431                 goto Div2Word;
 432
 433         tmp.u.Hi = num[1];
 434         num[1] = 0;
 435         goto Div1Word;
 436
 437 Div3Word:
 438         tmp.u.Lo = num[2];
 439         tmp.int64 = DivMod64by32(tmp.int64, den);
 440         num[2] = tmp.u.Lo;
 441 Div2Word:
 442         tmp.u.Lo = num[1];
 443         tmp.int64 = DivMod64by32(tmp.int64, den);
 444         num[1] = tmp.u.Lo;
 445 Div1Word:
 446         tmp.u.Lo = num[0];
 447         tmp.int64 = DivMod64by32(tmp.int64, den);
 448         num[0] = tmp.u.Lo;
 449         return tmp.u.Hi;
 450 }
 451
 452 /***
 453  * DecFixInt
 454  *
 455  * Entry:
 456  *   pdecRes - Pointer to Decimal result location
 457  *   operand  - Pointer to Decimal operand
 458  *
 459  * Purpose:
 460  *   Chop the value to integer.  Return remainder so Int() function
 461  *   can round down if non-zero.
 462  *
 463  * Exit:
 464  *   Returns remainder.
 465  *
 466  * Exceptions:
 467  *   None.
 468  *
 469  ***********************************************************************/
 470
 471 static uint32_t
 472 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 473 {
 474         uint32_t   num[3];
 475         uint32_t   rem;
 476         uint32_t   pwr;
 477         int     scale;
 478
 479         if (operand->u.u.scale > 0) {
 480                 num[0] = operand->v.v.Lo32;
 481                 num[1] = operand->v.v.Mid32;
 482                 num[2] = operand->Hi32;
 483                 scale = operand->u.u.scale;
 484                 result->u.u.sign = operand->u.u.sign;
 485                 rem = 0;
 486
 487                 do {
 488                         if (scale > POWER10_MAX)
 489                                 pwr = ten_to_nine;
 490                         else
 491                                 pwr = power10[scale];
 492
 493                         rem |= Div96By32(num, pwr);
 494                         scale -= 9;
 495                 }while (scale > 0);
 496
 497                 result->v.v.Lo32 = num[0];
 498                 result->v.v.Mid32 = num[1];
 499                 result->Hi32 = num[2];
 500                 result->u.u.scale = 0;
 501
 502                 return rem;
 503         }
 504
 505         COPYDEC(*result, *operand);
 506         // Odd, the Microsoft code does not set result->reserved to zero on this case
 507         return 0;
 508 }
 509
 510 /**
 511  * ScaleResult:
 512  *
 513  * Entry:
 514  *   res - Array of uint32_ts with value, least-significant first.
 515  *   hi_res  - Index of last non-zero value in res.
 516  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 517  *
 518  * Purpose:
 519  *   See if we need to scale the result to fit it in 96 bits.
 520  *   Perform needed scaling.  Adjust scale factor accordingly.
 521  *
 522  * Exit:
 523  *   res updated in place, always 3 uint32_ts.
 524  *   New scale factor returned, -1 if overflow error.
 525  *
 526  */
 527 static int
 528 ScaleResult(uint32_t *res, int hi_res, int scale)
 529 {
 530         int     new_scale;
 531         int     cur;
 532         uint32_t   pwr;
 533         uint32_t   tmp;
 534         uint32_t   sticky;
 535         SPLIT64 sdlTmp;
 536
 537         // See if we need to scale the result.  The combined scale must
 538         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 539         //
 540         // Start by figuring a lower bound on the scaling needed to make
 541         // the upper 96 bits zero.  hi_res is the index into res[]
 542         // of the highest non-zero uint32_t.
 543         //
 544         new_scale =   hi_res * 32 - 64 - 1;
 545         if (new_scale > 0) {
 546
 547                 // Find the MSB.
 548                 //
 549                 tmp = res[hi_res];
 550                 if (!(tmp & 0xFFFF0000)) {
 551                         new_scale -= 16;
 552                         tmp <<= 16;
 553                 }
 554                 if (!(tmp & 0xFF000000)) {
 555                         new_scale -= 8;
 556                         tmp <<= 8;
 557                 }
 558                 if (!(tmp & 0xF0000000)) {
 559                         new_scale -= 4;
 560                         tmp <<= 4;
 561                 }
 562                 if (!(tmp & 0xC0000000)) {
 563                         new_scale -= 2;
 564                         tmp <<= 2;
 565                 }
 566                 if (!(tmp & 0x80000000)) {
 567                         new_scale--;
 568                         tmp <<= 1;
 569                 }
 570
 571                 // Multiply bit position by log10(2) to figure it's power of 10.
 572                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 573                 // with a multiply saves a 96-byte lookup table.  The power returned
 574                 // is <= the power of the number, so we must add one power of 10
 575                 // to make it's integer part zero after dividing by 256.
 576                 //
 577                 // Note: the result of this multiplication by an approximation of
 578                 // log10(2) have been exhaustively checked to verify it gives the
 579                 // correct result.  (There were only 95 to check...)
 580                 //
 581                 new_scale = ((new_scale * 77) >> 8) + 1;
 582
 583                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 584                 // This reduces the scale factor of the result.  If it exceeds the
 585                 // current scale of the result, we'll overflow.
 586                 //
 587                 if (new_scale > scale)
 588                         return -1;
 589         }
 590         else
 591                 new_scale = 0;
 592
 593         // Make sure we scale by enough to bring the current scale factor
 594         // into valid range.
 595         //
 596         if (new_scale < scale - DEC_SCALE_MAX)
 597                 new_scale = scale - DEC_SCALE_MAX;
 598
 599         if (new_scale != 0) {
 600                 // Scale by the power of 10 given by new_scale.  Note that this is
 601                 // NOT guaranteed to bring the number within 96 bits -- it could
 602                 // be 1 power of 10 short.
 603                 //
 604                 scale -= new_scale;
 605                 sticky = 0;
 606                 sdlTmp.u.Hi = 0; // initialize remainder
 607
 608                 for (;;) {
 609
 610                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 611
 612                         if (new_scale > POWER10_MAX)
 613                                 pwr = ten_to_nine;
 614                         else
 615                                 pwr = power10[new_scale];
 616
 617                         // Compute first quotient.
 618                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 619                         //
 620                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 621                         res[hi_res] = sdlTmp.u.Lo;
 622                         cur = hi_res - 1;
 623
 624                         if (cur >= 0) {
 625                                 // If first quotient was 0, update hi_res.
 626                                 //
 627                                 if (sdlTmp.u.Lo == 0)
 628                                         hi_res--;
 629
 630                                 // Compute subsequent quotients.
 631                                 //
 632                                 do {
 633                                         sdlTmp.u.Lo = res[cur];
 634                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 635                                         res[cur] = sdlTmp.u.Lo;
 636                                         cur--;
 637                                 } while (cur >= 0);
 638
 639                         }
 640
 641                         new_scale -= POWER10_MAX;
 642                         if (new_scale > 0)
 643                                 continue; // scale some more
 644
 645                         // If we scaled enough, hi_res would be 2 or less.  If not,
 646                         // divide by 10 more.
 647                         //
 648                         if (hi_res > 2) {
 649                                 new_scale = 1;
 650                                 scale--;
 651                                 continue; // scale by 10
 652                         }
 653
 654                         // Round final result.  See if remainder >= 1/2 of divisor.
 655                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 656                         //
 657                         pwr >>= 1;  // power of 10 always even
 658                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 659                                                     ((res[0] & 1) | sticky)) ) {
 660                                 cur = -1;
 661                                 while (++res[++cur] == 0);
 662
 663                                 if (cur > 2) {
 664                                         // The rounding caused us to carry beyond 96 bits.
 665                                         // Scale by 10 more.
 666                                         //
 667                                         hi_res = cur;
 668                                         sticky = 0;  // no sticky bit
 669                                         sdlTmp.u.Hi = 0; // or remainder
 670                                         new_scale = 1;
 671                                         scale--;
 672                                         continue; // scale by 10
 673                                 }
 674                         }
 675
 676                         // We may have scaled it more than we planned.  Make sure the scale
 677                         // factor hasn't gone negative, indicating overflow.
 678                         //
 679                         if (scale < 0)
 680                                 return -1;
 681
 682                         return scale;
 683                 } // for(;;)
 684         }
 685         return scale;
 686 }
 687
 688 // Decimal multiply
 689 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 690 static MonoDecimalStatus
 691 MONO_VarDecMul(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 692 {
 693         SPLIT64 tmp;
 694         SPLIT64 tmp2;
 695         SPLIT64 tmp3;
 696         int     scale;
 697         int     hi_prod;
 698         uint32_t   pwr;
 699         uint32_t   rem_lo;
 700         uint32_t   rem_hi;
 701         uint32_t   prod[6];
 702
 703         scale = left->u.u.scale + right->u.u.scale;
 704
 705         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 706                 // Upper 64 bits are zero.
 707                 //
 708                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 709                 if (scale > DEC_SCALE_MAX)
 710                 {
 711                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 712                         // If the amount to divide by is > 19 the result is guaranteed
 713                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 714                         //
 715                         scale -= DEC_SCALE_MAX;
 716                         if (scale > 19) {
 717                         ReturnZero:
 718                                 DECIMAL_SETZERO(*result);
 719                                 return MONO_DECIMAL_OK;
 720                         }
 721
 722                         if (scale > POWER10_MAX) {
 723                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 724                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 725                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 726                                 //
 727                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 728                                 pwr = power10[scale - 10] << 2;
 729                         } else {
 730                                 pwr = power10[scale];
 731                                 rem_lo = 0;
 732                         }
 733
 734                         // Power to divide by fits in 32 bits.
 735                         //
 736                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 737
 738                         // Round result.  See if remainder >= 1/2 of divisor.
 739                         // Divisor is a power of 10, so it is always even.
 740                         //
 741                         pwr >>= 1;
 742                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 743                                 tmp.int64++;
 744
 745                         scale = DEC_SCALE_MAX;
 746                 }
 747                 DECIMAL_LO32(*result) = tmp.u.Lo;
 748                 DECIMAL_MID32(*result) = tmp.u.Hi;
 749                 DECIMAL_HI32(*result) = 0;
 750         } else {
 751                 // At least one operand has bits set in the upper 64 bits.
 752                 //
 753                 // Compute and accumulate the 9 partial products into a
 754                 // 192-bit (24-byte) result.
 755                 //
 756                 //                [l-h][l-m][l-l]   left high, middle, low
 757                 //             x  [r-h][r-m][r-l]   right high, middle, low
 758                 // ------------------------------
 759                 //
 760                 //                     [0-h][0-l]   l-l * r-l
 761                 //                [1ah][1al]        l-l * r-m
 762                 //                [1bh][1bl]        l-m * r-l
 763                 //           [2ah][2al]             l-m * r-m
 764                 //           [2bh][2bl]             l-l * r-h
 765                 //           [2ch][2cl]             l-h * r-l
 766                 //      [3ah][3al]                  l-m * r-h
 767                 //      [3bh][3bl]                  l-h * r-m
 768                 // [4-h][4-l]                       l-h * r-h
 769                 // ------------------------------
 770                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 771                 //
 772                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 773                 prod[0] = tmp.u.Lo;
 774
 775                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 776
 777                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 778                 tmp.int64 += tmp2.int64; // this could generate carry
 779                 prod[1] = tmp.u.Lo;
 780                 if (tmp.int64 < tmp2.int64) // detect carry
 781                         tmp2.u.Hi = 1;
 782                 else
 783                         tmp2.u.Hi = 0;
 784                 tmp2.u.Lo = tmp.u.Hi;
 785
 786                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 787
 788                 if (left->Hi32 | right->Hi32) {
 789                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 790                         //
 791                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 792                         tmp.int64 += tmp2.int64; // this could generate carry
 793                         if (tmp.int64 < tmp2.int64) // detect carry
 794                                 tmp3.u.Hi = 1;
 795                         else
 796                                 tmp3.u.Hi = 0;
 797
 798                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 799                         tmp.int64 += tmp2.int64; // this could generate carry
 800                         prod[2] = tmp.u.Lo;
 801                         if (tmp.int64 < tmp2.int64) // detect carry
 802                                 tmp3.u.Hi++;
 803                         tmp3.u.Lo = tmp.u.Hi;
 804
 805                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 806                         tmp.int64 += tmp3.int64; // this could generate carry
 807                         if (tmp.int64 < tmp3.int64) // detect carry
 808                                 tmp3.u.Hi = 1;
 809                         else
 810                                 tmp3.u.Hi = 0;
 811
 812                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 813                         tmp.int64 += tmp2.int64; // this could generate carry
 814                         prod[3] = tmp.u.Lo;
 815                         if (tmp.int64 < tmp2.int64) // detect carry
 816                                 tmp3.u.Hi++;
 817                         tmp3.u.Lo = tmp.u.Hi;
 818
 819                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 820                         prod[4] = tmp.u.Lo;
 821                         prod[5] = tmp.u.Hi;
 822
 823                         hi_prod = 5;
 824                 }
 825                 else {
 826                         prod[2] = tmp.u.Lo;
 827                         prod[3] = tmp.u.Hi;
 828                         hi_prod = 3;
 829                 }
 830
 831                 // Check for leading zero uint32_ts on the product
 832                 //
 833                 while (prod[hi_prod] == 0) {
 834                         hi_prod--;
 835                         if (hi_prod < 0)
 836                                 goto ReturnZero;
 837                 }
 838
 839                 scale = ScaleResult(prod, hi_prod, scale);
 840                 if (scale == -1)
 841                         return MONO_DECIMAL_OVERFLOW;
 842
 843                 result->v.v.Lo32 = prod[0];
 844                 result->v.v.Mid32 = prod[1];
 845                 result->Hi32 = prod[2];
 846         }
 847
 848         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 849         result->u.u.scale = (char)scale;
 850         return MONO_DECIMAL_OK;
 851 }
 852
 853 // Addition and subtraction
 854 static MonoDecimalStatus
 855 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 856 {
 857         uint32_t     num[6];
 858         uint32_t     pwr;
 859         int       scale;
 860         int       hi_prod;
 861         int       cur;
 862         SPLIT64   tmp;
 863         MonoDecimal decRes;
 864         MonoDecimal decTmp;
 865         MonoDecimal *pdecTmp;
 866
 867         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 868
 869         if (right->u.u.scale == left->u.u.scale) {
 870                 // Scale factors are equal, no alignment necessary.
 871                 //
 872                 decRes.u.signscale = left->u.signscale;
 873
 874         AlignedAdd:
 875                 if (sign) {
 876                         // Signs differ - subtract
 877                         //
 878                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 879                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 880
 881                         // Propagate carry
 882                         //
 883                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 884                                 decRes.Hi32--;
 885                                 if (decRes.Hi32 >= left->Hi32)
 886                                         goto SignFlip;
 887                         } else if (decRes.Hi32 > left->Hi32) {
 888                                 // Got negative result.  Flip its sign.
 889                                 //
 890                         SignFlip:
 891                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 892                                 decRes.Hi32 = ~decRes.Hi32;
 893                                 if (DECIMAL_LO64_GET(decRes) == 0)
 894                                         decRes.Hi32++;
 895                                 decRes.u.u.sign ^= DECIMAL_NEG;
 896                         }
 897
 898                 } else {
 899                         // Signs are the same - add
 900                         //
 901                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 902                         decRes.Hi32 = left->Hi32 + right->Hi32;
 903
 904                         // Propagate carry
 905                         //
 906                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 907                                 decRes.Hi32++;
 908                                 if (decRes.Hi32 <= left->Hi32)
 909                                         goto AlignedScale;
 910                         } else if (decRes.Hi32 < left->Hi32) {
 911                         AlignedScale:
 912                                 // The addition carried above 96 bits.  Divide the result by 10,
 913                                 // dropping the scale factor.
 914                                 //
 915                                 if (decRes.u.u.scale == 0)
 916                                         return MONO_DECIMAL_OVERFLOW;
 917                                 decRes.u.u.scale--;
 918
 919                                 tmp.u.Lo = decRes.Hi32;
 920                                 tmp.u.Hi = 1;
 921                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 922                                 decRes.Hi32 = tmp.u.Lo;
 923
 924                                 tmp.u.Lo = decRes.v.v.Mid32;
 925                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 926                                 decRes.v.v.Mid32 = tmp.u.Lo;
 927
 928                                 tmp.u.Lo = decRes.v.v.Lo32;
 929                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 930                                 decRes.v.v.Lo32 = tmp.u.Lo;
 931
 932                                 // See if we need to round up.
 933                                 //
 934                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 935                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 936                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 937                                                         decRes.Hi32++;
 938                                 }
 939                         }
 940                 }
 941         }
 942         else {
 943                 // Scale factors are not equal.  Assume that a larger scale
 944                 // factor (more decimal places) is likely to mean that number
 945                 // is smaller.  Start by guessing that the right operand has
 946                 // the larger scale factor.  The result will have the larger
 947                 // scale factor.
 948                 //
 949                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 950                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 951                 scale = decRes.u.u.scale - left->u.u.scale;
 952
 953                 if (scale < 0) {
 954                         // Guessed scale factor wrong. Swap operands.
 955                         //
 956                         scale = -scale;
 957                         decRes.u.u.scale = left->u.u.scale;
 958                         decRes.u.u.sign ^= sign;
 959                         pdecTmp = right;
 960                         right = left;
 961                         left = pdecTmp;
 962                 }
 963
 964                 // *left will need to be multiplied by 10^scale so
 965                 // it will have the same scale as *right.  We could be
 966                 // extending it to up to 192 bits of precision.
 967                 //
 968                 if (scale <= POWER10_MAX) {
 969                         // Scaling won't make it larger than 4 uint32_ts
 970                         //
 971                         pwr = power10[scale];
 972                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 973                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 974                         tmp.int64 += decTmp.v.v.Mid32;
 975                         decTmp.v.v.Mid32 = tmp.u.Lo;
 976                         decTmp.Hi32 = tmp.u.Hi;
 977                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 978                         tmp.int64 += decTmp.Hi32;
 979                         if (tmp.u.Hi == 0) {
 980                                 // Result fits in 96 bits.  Use standard aligned add.
 981                                 //
 982                                 decTmp.Hi32 = tmp.u.Lo;
 983                                 left = &decTmp;
 984                                 goto AlignedAdd;
 985                         }
 986                         num[0] = decTmp.v.v.Lo32;
 987                         num[1] = decTmp.v.v.Mid32;
 988                         num[2] = tmp.u.Lo;
 989                         num[3] = tmp.u.Hi;
 990                         hi_prod = 3;
 991                 }
 992                 else {
 993                         // Have to scale by a bunch.  Move the number to a buffer
 994                         // where it has room to grow as it's scaled.
 995                         //
 996                         num[0] = left->v.v.Lo32;
 997                         num[1] = left->v.v.Mid32;
 998                         num[2] = left->Hi32;
 999                         hi_prod = 2;
1000
1001                         // Scan for zeros in the upper words.
1002                         //
1003                         if (num[2] == 0) {
1004                                 hi_prod = 1;
1005                                 if (num[1] == 0) {
1006                                         hi_prod = 0;
1007                                         if (num[0] == 0) {
1008                                                 // Left arg is zero, return right.
1009                                                 //
1010                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
1011                                                 decRes.Hi32 = right->Hi32;
1012                                                 decRes.u.u.sign ^= sign;
1013                                                 goto RetDec;
1014                                         }
1015                                 }
1016                         }
1017
1018                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
1019                         // with index of highest non-zero uint32_t.
1020                         //
1021                         for (; scale > 0; scale -= POWER10_MAX) {
1022                                 if (scale > POWER10_MAX)
1023                                         pwr = ten_to_nine;
1024                                 else
1025                                         pwr = power10[scale];
1026
1027                                 tmp.u.Hi = 0;
1028                                 for (cur = 0; cur <= hi_prod; cur++) {
1029                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1030                                         num[cur] = tmp.u.Lo;
1031                                 }
1032
1033                                 if (tmp.u.Hi != 0)
1034                                         // We're extending the result by another uint32_t.
1035                                         num[++hi_prod] = tmp.u.Hi;
1036                         }
1037                 }
1038
1039                 // Scaling complete, do the add.  Could be subtract if signs differ.
1040                 //
1041                 tmp.u.Lo = num[0];
1042                 tmp.u.Hi = num[1];
1043
1044                 if (sign) {
1045                         // Signs differ, subtract.
1046                         //
1047                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1048                         decRes.Hi32 = num[2] - right->Hi32;
1049
1050                         // Propagate carry
1051                         //
1052                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1053                                 decRes.Hi32--;
1054                                 if (decRes.Hi32 >= num[2])
1055                                         goto LongSub;
1056                         }
1057                         else if (decRes.Hi32 > num[2]) {
1058                         LongSub:
1059                                 // If num has more than 96 bits of precision, then we need to
1060                                 // carry the subtraction into the higher bits.  If it doesn't,
1061                                 // then we subtracted in the wrong order and have to flip the
1062                                 // sign of the result.
1063                                 //
1064                                 if (hi_prod <= 2)
1065                                         goto SignFlip;
1066
1067                                 cur = 3;
1068                                 while(num[cur++]-- == 0);
1069                                 if (num[hi_prod] == 0)
1070                                         hi_prod--;
1071                         }
1072                 }
1073                 else {
1074                         // Signs the same, add.
1075                         //
1076                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1077                         decRes.Hi32 = num[2] + right->Hi32;
1078
1079                         // Propagate carry
1080                         //
1081                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1082                                 decRes.Hi32++;
1083                                 if (decRes.Hi32 <= num[2])
1084                                         goto LongAdd;
1085                         }
1086                         else if (decRes.Hi32 < num[2]) {
1087                         LongAdd:
1088                                 // Had a carry above 96 bits.
1089                                 //
1090                                 cur = 3;
1091                                 do {
1092                                         if (hi_prod < cur) {
1093                                                 num[cur] = 1;
1094                                                 hi_prod = cur;
1095                                                 break;
1096                                         }
1097                                 }while (++num[cur++] == 0);
1098                         }
1099                 }
1100
1101                 if (hi_prod > 2) {
1102                         num[0] = decRes.v.v.Lo32;
1103                         num[1] = decRes.v.v.Mid32;
1104                         num[2] = decRes.Hi32;
1105                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1106                         if (decRes.u.u.scale == (uint8_t) -1)
1107                                 return MONO_DECIMAL_OVERFLOW;
1108
1109                         decRes.v.v.Lo32 = num[0];
1110                         decRes.v.v.Mid32 = num[1];
1111                         decRes.Hi32 = num[2];
1112                 }
1113         }
1114
1115 RetDec:
1116         COPYDEC(*result, decRes);
1117         // Odd, the Microsoft code does not set result->reserved to zero on this case
1118         return MONO_DECIMAL_OK;
1119 }
1120
1121 // Decimal addition
1122 static MonoDecimalStatus
1123 MONO_VarDecAdd(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1124 {
1125     return DecAddSub (left, right, result, 0);
1126 }
1127
1128 // Decimal subtraction
1129 static MonoDecimalStatus
1130 MONO_VarDecSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1131 {
1132     return DecAddSub (left, right, result, DECIMAL_NEG);
1133 }
1134
1135 /**
1136  * IncreaseScale:
1137  *
1138  * Entry:
1139  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1140  *   pwr   - Scale factor to multiply by
1141  *
1142  * Purpose:
1143  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1144  *   the input.  The last 32 bits of the product are the return value.
1145  *
1146  * Exit:
1147  *   Returns highest 32 bits of product.
1148  *
1149  * Exceptions:
1150  *   None.
1151  *
1152  */
1153 static uint32_t
1154 IncreaseScale(uint32_t *num, uint32_t pwr)
1155 {
1156         SPLIT64   sdlTmp;
1157
1158         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1159         num[0] = sdlTmp.u.Lo;
1160         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1161         num[1] = sdlTmp.u.Lo;
1162         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1163         num[2] = sdlTmp.u.Lo;
1164         return sdlTmp.u.Hi;
1165 }
1166
1167 /**
1168  * Div96By64:
1169  *
1170  * Entry:
1171  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1172  *   sdlDen  - 64-bit divisor.
1173  *
1174  * Purpose:
1175  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1176  *   Divisor must be larger than upper 64 bits of dividend.
1177  *
1178  * Exit:
1179  *   Remainder overwrites lower 64-bits of dividend.
1180  *   Returns quotient.
1181  *
1182  * Exceptions:
1183  *   None.
1184  *
1185  */
1186 static uint32_t
1187 Div96By64(uint32_t *num, SPLIT64 den)
1188 {
1189         SPLIT64 quo;
1190         SPLIT64 sdlNum;
1191         SPLIT64 prod;
1192
1193         sdlNum.u.Lo = num[0];
1194
1195         if (num[2] >= den.u.Hi) {
1196                 // Divide would overflow.  Assume a quotient of 2^32, and set
1197                 // up remainder accordingly.  Then jump to loop which reduces
1198                 // the quotient.
1199                 //
1200                 sdlNum.u.Hi = num[1] - den.u.Lo;
1201                 quo.u.Lo = 0;
1202                 goto NegRem;
1203         }
1204
1205         // Hardware divide won't overflow
1206         //
1207         if (num[2] == 0 && num[1] < den.u.Hi)
1208                 // Result is zero.  Entire dividend is remainder.
1209                 //
1210                 return 0;
1211
1212         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1213         //
1214         quo.u.Lo = num[1];
1215         quo.u.Hi = num[2];
1216         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1217         sdlNum.u.Hi = quo.u.Hi; // remainder
1218
1219         // Compute full remainder, rem = dividend - (quo * divisor).
1220         //
1221         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1222         sdlNum.int64 -= prod.int64;
1223
1224         if (sdlNum.int64 > ~prod.int64) {
1225         NegRem:
1226                 // Remainder went negative.  Add divisor back in until it's positive,
1227                 // a max of 2 times.
1228                 //
1229                 do {
1230                         quo.u.Lo--;
1231                         sdlNum.int64 += den.int64;
1232                 }while (sdlNum.int64 >= den.int64);
1233         }
1234
1235         num[0] = sdlNum.u.Lo;
1236         num[1] = sdlNum.u.Hi;
1237         return quo.u.Lo;
1238 }
1239
1240 /***
1241 * Div128By96
1242 *
1243 * Entry:
1244 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1245 *   den - Pointer to 96-bit divisor.
1246 *
1247 * Purpose:
1248 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1249 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1250 *   assured in the initial call because the divisor is normalized
1251 *   and the dividend can't be.  In subsequent calls, the remainder
1252 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1253 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1254 *
1255 * Exit:
1256 *   Remainder overwrites lower 96-bits of dividend.
1257 *   Returns quotient.
1258 *
1259 * Exceptions:
1260 *   None.
1261 *
1262 ***********************************************************************/
1263
1264 static uint32_t
1265 Div128By96(uint32_t *num, uint32_t *den)
1266 {
1267         SPLIT64 sdlQuo;
1268         SPLIT64 sdlNum;
1269         SPLIT64 sdlProd1;
1270         SPLIT64 sdlProd2;
1271
1272         sdlNum.u.Lo = num[0];
1273         sdlNum.u.Hi = num[1];
1274
1275         if (num[3] == 0 && num[2] < den[2]){
1276                 // Result is zero.  Entire dividend is remainder.
1277                 //
1278                 return 0;
1279         }
1280
1281         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1282         //
1283         sdlQuo.u.Lo = num[2];
1284         sdlQuo.u.Hi = num[3];
1285         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1286
1287         // Compute full remainder, rem = dividend - (quo * divisor).
1288         //
1289         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1290         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1291         sdlProd2.int64 += sdlProd1.u.Hi;
1292         sdlProd1.u.Hi = sdlProd2.u.Lo;
1293
1294         sdlNum.int64 -= sdlProd1.int64;
1295         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1296
1297         // Propagate carries
1298         //
1299         if (sdlNum.int64 > ~sdlProd1.int64) {
1300                 num[2]--;
1301                 if (num[2] >= ~sdlProd2.u.Hi)
1302                         goto NegRem;
1303         } else if (num[2] > ~sdlProd2.u.Hi) {
1304         NegRem:
1305                 // Remainder went negative.  Add divisor back in until it's positive,
1306                 // a max of 2 times.
1307                 //
1308                 sdlProd1.u.Lo = den[0];
1309                 sdlProd1.u.Hi = den[1];
1310
1311                 for (;;) {
1312                         sdlQuo.u.Lo--;
1313                         sdlNum.int64 += sdlProd1.int64;
1314                         num[2] += den[2];
1315
1316                         if (sdlNum.int64 < sdlProd1.int64) {
1317                                 // Detected carry. Check for carry out of top
1318                                 // before adding it in.
1319                                 //
1320                                 if (num[2]++ < den[2])
1321                                         break;
1322                         }
1323                         if (num[2] < den[2])
1324                                 break; // detected carry
1325                 }
1326         }
1327
1328         num[0] = sdlNum.u.Lo;
1329         num[1] = sdlNum.u.Hi;
1330         return sdlQuo.u.Lo;
1331 }
1332
1333 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1334 // Returns FALSE if there is an overflow
1335 static gboolean
1336 Add32To96(uint32_t *num, uint32_t value)
1337 {
1338         num[0] += value;
1339         if (num[0] < value) {
1340                 if (++num[1] == 0) {
1341                         if (++num[2] == 0) {
1342                                 return FALSE;
1343                         }
1344                 }
1345         }
1346         return TRUE;
1347 }
1348
1349 static void
1350 OverflowUnscale (uint32_t *quo, gboolean remainder)
1351 {
1352         SPLIT64  sdlTmp;
1353
1354         // We have overflown, so load the high bit with a one.
1355         sdlTmp.u.Hi = 1u;
1356         sdlTmp.u.Lo = quo[2];
1357         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1358         quo[2] = sdlTmp.u.Lo;
1359         sdlTmp.u.Lo = quo[1];
1360         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1361         quo[1] = sdlTmp.u.Lo;
1362         sdlTmp.u.Lo = quo[0];
1363         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1364         quo[0] = sdlTmp.u.Lo;
1365         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1366         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1367                 Add32To96(quo, 1u);
1368         }
1369 }
1370
1371 // MONO_VarDecDiv - Decimal divide
1372 static MonoDecimalStatus
1373 MONO_VarDecDiv(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1374 {
1375         uint32_t   quo[3];
1376         uint32_t   quoSave[3];
1377         uint32_t   rem[4];
1378         uint32_t   divisor[3];
1379         uint32_t   pwr;
1380         uint32_t   utmp;
1381         uint32_t   utmp1;
1382         SPLIT64 sdlTmp;
1383         SPLIT64 sdlDivisor;
1384         int     scale;
1385         int     cur_scale;
1386
1387         scale = left->u.u.scale - right->u.u.scale;
1388         divisor[0] = right->v.v.Lo32;
1389         divisor[1] = right->v.v.Mid32;
1390         divisor[2] = right->Hi32;
1391
1392         if (divisor[1] == 0 && divisor[2] == 0) {
1393                 // Divisor is only 32 bits.  Easy divide.
1394                 //
1395                 if (divisor[0] == 0)
1396                         return MONO_DECIMAL_DIVBYZERO;
1397
1398                 quo[0] = left->v.v.Lo32;
1399                 quo[1] = left->v.v.Mid32;
1400                 quo[2] = left->Hi32;
1401                 rem[0] = Div96By32(quo, divisor[0]);
1402
1403                 for (;;) {
1404                         if (rem[0] == 0) {
1405                                 if (scale < 0) {
1406                                         cur_scale = min(9, -scale);
1407                                         goto HaveScale;
1408                                 }
1409                                 break;
1410                         }
1411
1412                         // We have computed a quotient based on the natural scale
1413                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1414                         // remainder, so now we should increase the scale if possible to
1415                         // include more quotient bits.
1416                         //
1417                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1418                         // computing more quotient bits as long as the remainder stays
1419                         // non-zero.  If scaling by that much would cause overflow, we'll
1420                         // drop out of the loop and scale by as much as we can.
1421                         //
1422                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1423                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1424                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1425                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1426                         // is the largest value in quo[1] (when quo[2] == 4) that is
1427                         // assured not to overflow.
1428                         //
1429                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1430                         if (cur_scale == 0) {
1431                                 // No more scaling to be done, but remainder is non-zero.
1432                                 // Round quotient.
1433                                 //
1434                                 utmp = rem[0] << 1;
1435                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1436                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1437                                 RoundUp:
1438                                         if (++quo[0] == 0)
1439                                                 if (++quo[1] == 0)
1440                                                         quo[2]++;
1441                                 }
1442                                 break;
1443                         }
1444
1445                         if (cur_scale == -1)
1446                                 return MONO_DECIMAL_OVERFLOW;
1447
1448                 HaveScale:
1449                         pwr = power10[cur_scale];
1450                         scale += cur_scale;
1451
1452                         if (IncreaseScale(quo, pwr) != 0)
1453                                 return MONO_DECIMAL_OVERFLOW;
1454
1455                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1456                         rem[0] = sdlTmp.u.Hi;
1457
1458                         quo[0] += sdlTmp.u.Lo;
1459                         if (quo[0] < sdlTmp.u.Lo) {
1460                                 if (++quo[1] == 0)
1461                                         quo[2]++;
1462                         }
1463                 } // for (;;)
1464         }
1465         else {
1466                 // Divisor has bits set in the upper 64 bits.
1467                 //
1468                 // Divisor must be fully normalized (shifted so bit 31 of the most
1469                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1470                 // normalize by.  The dividend will be shifted by the same amount so
1471                 // the quotient is not changed.
1472                 //
1473                 if (divisor[2] == 0)
1474                         utmp = divisor[1];
1475                 else
1476                         utmp = divisor[2];
1477
1478                 cur_scale = 0;
1479                 if (!(utmp & 0xFFFF0000)) {
1480                         cur_scale += 16;
1481                         utmp <<= 16;
1482                 }
1483                 if (!(utmp & 0xFF000000)) {
1484                         cur_scale += 8;
1485                         utmp <<= 8;
1486                 }
1487                 if (!(utmp & 0xF0000000)) {
1488                         cur_scale += 4;
1489                         utmp <<= 4;
1490                 }
1491                 if (!(utmp & 0xC0000000)) {
1492                         cur_scale += 2;
1493                         utmp <<= 2;
1494                 }
1495                 if (!(utmp & 0x80000000)) {
1496                         cur_scale++;
1497                         utmp <<= 1;
1498                 }
1499
1500                 // Shift both dividend and divisor left by cur_scale.
1501                 //
1502                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1503                 rem[0] = sdlTmp.u.Lo;
1504                 rem[1] = sdlTmp.u.Hi;
1505                 sdlTmp.u.Lo = left->v.v.Mid32;
1506                 sdlTmp.u.Hi = left->Hi32;
1507                 sdlTmp.int64 <<= cur_scale;
1508                 rem[2] = sdlTmp.u.Hi;
1509                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1510
1511                 sdlDivisor.u.Lo = divisor[0];
1512                 sdlDivisor.u.Hi = divisor[1];
1513                 sdlDivisor.int64 <<= cur_scale;
1514
1515                 if (divisor[2] == 0) {
1516                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1517                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1518                         //
1519                         sdlTmp.u.Lo = rem[2];
1520                         sdlTmp.u.Hi = rem[3];
1521
1522                         quo[2] = 0;
1523                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1524                         quo[0] = Div96By64(rem, sdlDivisor);
1525
1526                         for (;;) {
1527                                 if ((rem[0] | rem[1]) == 0) {
1528                                         if (scale < 0) {
1529                                                 cur_scale = min(9, -scale);
1530                                                 goto HaveScale64;
1531                                         }
1532                                         break;
1533                                 }
1534
1535                                 // Remainder is non-zero.  Scale up quotient and remainder by
1536                                 // powers of 10 so we can compute more significant bits.
1537                                 //
1538                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1539                                 if (cur_scale == 0) {
1540                                         // No more scaling to be done, but remainder is non-zero.
1541                                         // Round quotient.
1542                                         //
1543                                         sdlTmp.u.Lo = rem[0];
1544                                         sdlTmp.u.Hi = rem[1];
1545                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1546                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1547                                                 goto RoundUp;
1548                                         break;
1549                                 }
1550
1551                                 if (cur_scale == -1)
1552                                         return MONO_DECIMAL_OVERFLOW;
1553
1554                         HaveScale64:
1555                                 pwr = power10[cur_scale];
1556                                 scale += cur_scale;
1557
1558                                 if (IncreaseScale(quo, pwr) != 0)
1559                                         return MONO_DECIMAL_OVERFLOW;
1560
1561                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1562                                 IncreaseScale(rem, pwr);
1563                                 utmp = Div96By64(rem, sdlDivisor);
1564                                 quo[0] += utmp;
1565                                 if (quo[0] < utmp)
1566                                         if (++quo[1] == 0)
1567                                                 quo[2]++;
1568
1569                         } // for (;;)
1570                 }
1571                 else {
1572                         // Have a 96-bit divisor in divisor[].
1573                         //
1574                         // Start by finishing the shift left by cur_scale.
1575                         //
1576                         sdlTmp.u.Lo = divisor[1];
1577                         sdlTmp.u.Hi = divisor[2];
1578                         sdlTmp.int64 <<= cur_scale;
1579                         divisor[0] = sdlDivisor.u.Lo;
1580                         divisor[1] = sdlDivisor.u.Hi;
1581                         divisor[2] = sdlTmp.u.Hi;
1582
1583                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1584                         // will be < divisor.
1585                         //
1586                         quo[2] = 0;
1587                         quo[1] = 0;
1588                         quo[0] = Div128By96(rem, divisor);
1589
1590                         for (;;) {
1591                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1592                                         if (scale < 0) {
1593                                                 cur_scale = min(9, -scale);
1594                                                 goto HaveScale96;
1595                                         }
1596                                         break;
1597                                 }
1598
1599                                 // Remainder is non-zero.  Scale up quotient and remainder by
1600                                 // powers of 10 so we can compute more significant bits.
1601                                 //
1602                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1603                                 if (cur_scale == 0) {
1604                                         // No more scaling to be done, but remainder is non-zero.
1605                                         // Round quotient.
1606                                         //
1607                                         if (rem[2] >= 0x80000000)
1608                                                 goto RoundUp;
1609
1610                                         utmp = rem[0] > 0x80000000;
1611                                         utmp1 = rem[1] > 0x80000000;
1612                                         rem[0] <<= 1;
1613                                         rem[1] = (rem[1] << 1) + utmp;
1614                                         rem[2] = (rem[2] << 1) + utmp1;
1615
1616                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1617                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1618                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1619                                               (quo[0] & 1))))
1620                                                 goto RoundUp;
1621                                         break;
1622                                 }
1623
1624                                 if (cur_scale == -1)
1625                                         return MONO_DECIMAL_OVERFLOW;
1626
1627                         HaveScale96:
1628                                 pwr = power10[cur_scale];
1629                                 scale += cur_scale;
1630
1631                                 if (IncreaseScale(quo, pwr) != 0)
1632                                         return MONO_DECIMAL_OVERFLOW;
1633
1634                                 rem[3] = IncreaseScale(rem, pwr);
1635                                 utmp = Div128By96(rem, divisor);
1636                                 quo[0] += utmp;
1637                                 if (quo[0] < utmp)
1638                                         if (++quo[1] == 0)
1639                                                 quo[2]++;
1640
1641                         } // for (;;)
1642                 }
1643         }
1644
1645         // No more remainder.  Try extracting any extra powers of 10 we may have
1646         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1647         // If a division by one of these powers returns a zero remainder, then
1648         // we keep the quotient.  If the remainder is not zero, then we restore
1649         // the previous value.
1650         //
1651         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1652         // we can extract.  We use this as a quick test on whether to try a
1653         // given power.
1654         //
1655         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1656                 quoSave[0] = quo[0];
1657                 quoSave[1] = quo[1];
1658                 quoSave[2] = quo[2];
1659
1660                 if (Div96By32(quoSave, 100000000) == 0) {
1661                         quo[0] = quoSave[0];
1662                         quo[1] = quoSave[1];
1663                         quo[2] = quoSave[2];
1664                         scale -= 8;
1665                 }
1666                 else
1667                         break;
1668         }
1669
1670         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1671                 quoSave[0] = quo[0];
1672                 quoSave[1] = quo[1];
1673                 quoSave[2] = quo[2];
1674
1675                 if (Div96By32(quoSave, 10000) == 0) {
1676                         quo[0] = quoSave[0];
1677                         quo[1] = quoSave[1];
1678                         quo[2] = quoSave[2];
1679                         scale -= 4;
1680                 }
1681         }
1682
1683         if ((quo[0] & 3) == 0 && scale >= 2) {
1684                 quoSave[0] = quo[0];
1685                 quoSave[1] = quo[1];
1686                 quoSave[2] = quo[2];
1687
1688                 if (Div96By32(quoSave, 100) == 0) {
1689                         quo[0] = quoSave[0];
1690                         quo[1] = quoSave[1];
1691                         quo[2] = quoSave[2];
1692                         scale -= 2;
1693                 }
1694         }
1695
1696         if ((quo[0] & 1) == 0 && scale >= 1) {
1697                 quoSave[0] = quo[0];
1698                 quoSave[1] = quo[1];
1699                 quoSave[2] = quo[2];
1700
1701                 if (Div96By32(quoSave, 10) == 0) {
1702                         quo[0] = quoSave[0];
1703                         quo[1] = quoSave[1];
1704                         quo[2] = quoSave[2];
1705                         scale -= 1;
1706                 }
1707         }
1708
1709         result->Hi32 = quo[2];
1710         result->v.v.Mid32 = quo[1];
1711         result->v.v.Lo32 = quo[0];
1712         result->u.u.scale = scale;
1713         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1714         return MONO_DECIMAL_OK;
1715 }
1716
1717 // MONO_VarDecAbs - Decimal Absolute Value
1718 static void
1719 MONO_VarDecAbs (MonoDecimal *pdecOprd, MonoDecimal *result)
1720 {
1721         COPYDEC(*result, *pdecOprd);
1722         result->u.u.sign &= ~DECIMAL_NEG;
1723         // Microsoft does not set reserved here
1724 }
1725
1726 // MONO_VarDecFix - Decimal Fix (chop to integer)
1727 static void
1728 MONO_VarDecFix (MonoDecimal *pdecOprd, MonoDecimal *result)
1729 {
1730         DecFixInt(result, pdecOprd);
1731 }
1732
1733
1734 // MONO_VarDecInt - Decimal Int (round down to integer)
1735 static void
1736 MONO_VarDecInt (MonoDecimal *pdecOprd, MonoDecimal *result)
1737 {
1738         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1739                 // We have chopped off a non-zero amount from a negative value.  Since
1740                 // we round toward -infinity, we must increase the integer result by
1741                 // 1 to make it more negative.  This will never overflow because
1742                 // in order to have a remainder, we must have had a non-zero scale factor.
1743                 // Our scale factor is back to zero now.
1744                 //
1745                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1746                 if (DECIMAL_LO64_GET(*result) == 0)
1747                         result->Hi32++;
1748         }
1749 }
1750
1751
1752 // MONO_VarDecNeg - Decimal Negate
1753 static void
1754 MONO_VarDecNeg (MonoDecimal *pdecOprd, MonoDecimal *result)
1755 {
1756         COPYDEC(*result, *pdecOprd);
1757         // Microsoft does not set result->reserved to zero on this case.
1758         result->u.u.sign ^= DECIMAL_NEG;
1759 }
1760
1761 //
1762 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1763 //
1764 static MonoDecimalStatus
1765 MONO_VarDecRound(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1766 {
1767         uint32_t num[3];
1768         uint32_t rem;
1769         uint32_t sticky;
1770         uint32_t pwr;
1771         int scale;
1772
1773         if (cDecimals < 0)
1774                 return MONO_DECIMAL_INVALID_ARGUMENT;
1775
1776         scale = input->u.u.scale - cDecimals;
1777         if (scale > 0) {
1778                 num[0] = input->v.v.Lo32;
1779                 num[1] = input->v.v.Mid32;
1780                 num[2] = input->Hi32;
1781                 result->u.u.sign = input->u.u.sign;
1782                 rem = sticky = 0;
1783
1784                 do {
1785                         sticky |= rem;
1786                         if (scale > POWER10_MAX)
1787                                 pwr = ten_to_nine;
1788                         else
1789                                 pwr = power10[scale];
1790
1791                         rem = Div96By32(num, pwr);
1792                         scale -= 9;
1793                 }while (scale > 0);
1794
1795                 // Now round.  rem has last remainder, sticky has sticky bits.
1796                 // To do IEEE rounding, we add LSB of result to sticky bits so
1797                 // either causes round up if remainder * 2 == last divisor.
1798                 //
1799                 sticky |= num[0] & 1;
1800                 rem = (rem << 1) + (sticky != 0);
1801                 if (pwr < rem &&
1802                     ++num[0] == 0 &&
1803                     ++num[1] == 0
1804                         )
1805                         ++num[2];
1806
1807                 result->v.v.Lo32 = num[0];
1808                 result->v.v.Mid32 = num[1];
1809                 result->Hi32 = num[2];
1810                 result->u.u.scale = cDecimals;
1811                 return MONO_DECIMAL_OK;
1812         }
1813
1814         COPYDEC(*result, *input);
1815         // Odd, the Microsoft source does not set the result->reserved to zero here.
1816         return MONO_DECIMAL_OK;
1817 }
1818
1819 //
1820 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1821 static MonoDecimalStatus
1822 MONO_VarDecFromR4 (float input, MonoDecimal* result)
1823 {
1824         int         exp;    // number of bits to left of binary point
1825         int         power;
1826         uint32_t       mant;
1827         double      dbl;
1828         SPLIT64     sdlLo;
1829         SPLIT64     sdlHi;
1830         int         lmax, cur;  // temps used during scale reduction
1831
1832         // The most we can scale by is 10^28, which is just slightly more
1833         // than 2^93.  So a float with an exponent of -94 could just
1834         // barely reach 0.5, but smaller exponents will always round to zero.
1835         //
1836         if ((exp = ((SingleStructure *)&input)->exp - SNGBIAS) < -94 ) {
1837                 DECIMAL_SETZERO(*result);
1838                 return MONO_DECIMAL_OK;
1839         }
1840
1841         if (exp > 96)
1842                 return MONO_DECIMAL_OVERFLOW;
1843
1844         // Round the input to a 7-digit integer.  The R4 format has
1845         // only 7 digits of precision, and we want to keep garbage digits
1846         // out of the Decimal were making.
1847         //
1848         // Calculate max power of 10 input value could have by multiplying
1849         // the exponent by log10(2).  Using scaled integer multiplcation,
1850         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1851         //
1852         dbl = fabs(input);
1853         power = 6 - ((exp * 19728) >> 16);
1854
1855         if (power >= 0) {
1856                 // We have less than 7 digits, scale input up.
1857                 //
1858                 if (power > DECMAX)
1859                         power = DECMAX;
1860
1861                 dbl = dbl * double_power10[power];
1862         } else {
1863                 if (power != -1 || dbl >= 1E7)
1864                         dbl = dbl / fnDblPower10(-power);
1865                 else
1866                         power = 0; // didn't scale it
1867         }
1868
1869         g_assert (dbl < 1E7);
1870         if (dbl < 1E6 && power < DECMAX) {
1871                 dbl *= 10;
1872                 power++;
1873                 g_assert(dbl >= 1E6);
1874         }
1875
1876         // Round to integer
1877         //
1878         mant = (int32_t)dbl;
1879         dbl -= (double)mant;  // difference between input & integer
1880         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1881                 mant++;
1882
1883         if (mant == 0) {
1884                 DECIMAL_SETZERO(*result);
1885                 return MONO_DECIMAL_OK;
1886         }
1887
1888         if (power < 0) {
1889                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1890                 //
1891                 power = -power;
1892                 if (power < 10) {
1893                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1894
1895                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1896                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1897                         DECIMAL_HI32(*result) = 0;
1898                 } else {
1899                         // Have a big power of 10.
1900                         //
1901                         if (power > 18) {
1902                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1903                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1904
1905                                 if (sdlHi.u.Hi != 0)
1906                                         return MONO_DECIMAL_OVERFLOW;
1907                         }
1908                         else {
1909                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1910                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1911                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1912                                 sdlHi.int64 += sdlLo.u.Hi;
1913                                 sdlLo.u.Hi = sdlHi.u.Lo;
1914                                 sdlHi.u.Lo = sdlHi.u.Hi;
1915                         }
1916                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1917                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1918                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1919                 }
1920                 DECIMAL_SCALE(*result) = 0;
1921         } else {
1922                 // Factor out powers of 10 to reduce the scale, if possible.
1923                 // The maximum number we could factor out would be 6.  This
1924                 // comes from the fact we have a 7-digit number, and the
1925                 // MSD must be non-zero -- but the lower 6 digits could be
1926                 // zero.  Note also the scale factor is never negative, so
1927                 // we can't scale by any more than the power we used to
1928                 // get the integer.
1929                 //
1930                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1931                 //
1932                 lmax = min(power, 6);
1933
1934                 // lmax is the largest power of 10 to try, lmax <= 6.
1935                 // We'll try powers 4, 2, and 1 unless they're too big.
1936                 //
1937                 for (cur = 4; cur > 0; cur >>= 1)
1938                 {
1939                         if (cur > lmax)
1940                                 continue;
1941
1942                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1943
1944                         if (sdlLo.u.Hi == 0) {
1945                                 mant = sdlLo.u.Lo;
1946                                 power -= cur;
1947                                 lmax -= cur;
1948                         }
1949                 }
1950                 DECIMAL_LO32(*result) = mant;
1951                 DECIMAL_MID32(*result) = 0;
1952                 DECIMAL_HI32(*result) = 0;
1953                 DECIMAL_SCALE(*result) = power;
1954         }
1955
1956         DECIMAL_SIGN(*result) = (char)((SingleStructure *)&input)->sign << 7;
1957         return MONO_DECIMAL_OK;
1958 }
1959
1960 //
1961 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1962 static MonoDecimalStatus
1963 MONO_VarDecFromR8 (double input, MonoDecimal *result)
1964 {
1965         int         exp;    // number of bits to left of binary point
1966         int         power;  // power-of-10 scale factor
1967         SPLIT64     sdlMant;
1968         SPLIT64     sdlLo;
1969         double      dbl;
1970         int         lmax, cur;  // temps used during scale reduction
1971         uint32_t       pwr_cur;
1972         uint32_t       quo;
1973
1974
1975         // The most we can scale by is 10^28, which is just slightly more
1976         // than 2^93.  So a float with an exponent of -94 could just
1977         // barely reach 0.5, but smaller exponents will always round to zero.
1978         //
1979         if ((exp = ((DoubleStructure *)&input)->u.exp - DBLBIAS) < -94) {
1980                 DECIMAL_SETZERO(*result);
1981                 return MONO_DECIMAL_OK;
1982         }
1983
1984         if (exp > 96)
1985                 return MONO_DECIMAL_OVERFLOW;
1986
1987         // Round the input to a 15-digit integer.  The R8 format has
1988         // only 15 digits of precision, and we want to keep garbage digits
1989         // out of the Decimal were making.
1990         //
1991         // Calculate max power of 10 input value could have by multiplying
1992         // the exponent by log10(2).  Using scaled integer multiplcation,
1993         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1994         //
1995         dbl = fabs(input);
1996         power = 14 - ((exp * 19728) >> 16);
1997
1998         if (power >= 0) {
1999                 // We have less than 15 digits, scale input up.
2000                 //
2001                 if (power > DECMAX)
2002                         power = DECMAX;
2003
2004                 dbl = dbl * double_power10[power];
2005         } else {
2006                 if (power != -1 || dbl >= 1E15)
2007                         dbl = dbl / fnDblPower10(-power);
2008                 else
2009                         power = 0; // didn't scale it
2010         }
2011
2012         g_assert (dbl < 1E15);
2013         if (dbl < 1E14 && power < DECMAX) {
2014                 dbl *= 10;
2015                 power++;
2016                 g_assert(dbl >= 1E14);
2017         }
2018
2019         // Round to int64
2020         //
2021         sdlMant.int64 = (int64_t)dbl;
2022         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
2023         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
2024                 sdlMant.int64++;
2025
2026         if (sdlMant.int64 == 0) {
2027                 DECIMAL_SETZERO(*result);
2028                 return MONO_DECIMAL_OK;
2029         }
2030
2031         if (power < 0) {
2032                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2033                 //
2034                 power = -power;
2035                 if (power < 10) {
2036                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2037                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2038                         sdlMant.int64 += sdlLo.u.Hi;
2039                         sdlLo.u.Hi = sdlMant.u.Lo;
2040                         sdlMant.u.Lo = sdlMant.u.Hi;
2041                 }
2042                 else {
2043                         // Have a big power of 10.
2044                         //
2045                         g_assert(power <= 14);
2046                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2047
2048                         if (sdlMant.u.Hi != 0)
2049                                 return MONO_DECIMAL_OVERFLOW;
2050                 }
2051                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2052                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2053                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2054                 DECIMAL_SCALE(*result) = 0;
2055         }
2056         else {
2057                 // Factor out powers of 10 to reduce the scale, if possible.
2058                 // The maximum number we could factor out would be 14.  This
2059                 // comes from the fact we have a 15-digit number, and the
2060                 // MSD must be non-zero -- but the lower 14 digits could be
2061                 // zero.  Note also the scale factor is never negative, so
2062                 // we can't scale by any more than the power we used to
2063                 // get the integer.
2064                 //
2065                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2066                 //
2067                 lmax = min(power, 14);
2068
2069                 // lmax is the largest power of 10 to try, lmax <= 14.
2070                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2071                 //
2072                 for (cur = 8; cur > 0; cur >>= 1)
2073                 {
2074                         if (cur > lmax)
2075                                 continue;
2076
2077                         pwr_cur = (uint32_t)long_power10[cur];
2078
2079                         if (sdlMant.u.Hi >= pwr_cur) {
2080                                 // Overflow if we try to divide in one step.
2081                                 //
2082                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2083                                 quo = sdlLo.u.Lo;
2084                                 sdlLo.u.Lo = sdlMant.u.Lo;
2085                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2086                         }
2087                         else {
2088                                 quo = 0;
2089                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2090                         }
2091
2092                         if (sdlLo.u.Hi == 0) {
2093                                 sdlMant.u.Hi = quo;
2094                                 sdlMant.u.Lo = sdlLo.u.Lo;
2095                                 power -= cur;
2096                                 lmax -= cur;
2097                         }
2098                 }
2099
2100                 DECIMAL_HI32(*result) = 0;
2101                 DECIMAL_SCALE(*result) = power;
2102                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2103                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2104         }
2105
2106         DECIMAL_SIGN(*result) = (char)((DoubleStructure *)&input)->u.sign << 7;
2107         return MONO_DECIMAL_OK;
2108 }
2109
2110 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2111 static MonoDecimalStatus
2112 MONO_VarR8FromDec(MonoDecimal *input, double *result)
2113 {
2114         SPLIT64  tmp;
2115         double   dbl;
2116
2117         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2118                 return MONO_DECIMAL_INVALID_ARGUMENT;
2119
2120         tmp.u.Lo = DECIMAL_LO32(*input);
2121         tmp.u.Hi = DECIMAL_MID32(*input);
2122
2123         if ((int32_t)DECIMAL_MID32(*input) < 0)
2124                 dbl = (ds2to64.dbl + (double)(int64_t)tmp.int64 +
2125                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2126         else
2127                 dbl = ((double)(int64_t)tmp.int64 +
2128                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input));
2129
2130         if (DECIMAL_SIGN(*input))
2131                 dbl = -dbl;
2132
2133         *result = dbl;
2134         return MONO_DECIMAL_OK;
2135 }
2136
2137 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2138 static MonoDecimalStatus
2139 MONO_VarR4FromDec(MonoDecimal *input, float *result)
2140 {
2141         double   dbl;
2142
2143         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2144                 return MONO_DECIMAL_INVALID_ARGUMENT;
2145
2146         // Can't overflow; no errors possible.
2147         //
2148         MONO_VarR8FromDec(input, &dbl);
2149         *result = (float)dbl;
2150         return MONO_DECIMAL_OK;
2151 }
2152
2153 static void
2154 DecShiftLeft(MonoDecimal* value)
2155 {
2156         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2157     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2158     g_assert(value != NULL);
2159
2160     DECIMAL_LO32(*value) <<= 1;
2161     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2162     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2163 }
2164
2165 static int
2166 D32AddCarry(uint32_t* value, uint32_t i)
2167 {
2168     uint32_t v = *value;
2169     uint32_t sum = v + i;
2170     *value = sum;
2171     return sum < v || sum < i? 1: 0;
2172 }
2173
2174 static void
2175 DecAdd(MonoDecimal *value, MonoDecimal* d)
2176 {
2177         g_assert(value != NULL && d != NULL);
2178
2179         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2180                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2181                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2182                 }
2183         }
2184         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2185                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2186         }
2187         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2188 }
2189
2190 static void
2191 DecMul10(MonoDecimal* value)
2192 {
2193         MonoDecimal d = *value;
2194         g_assert (value != NULL);
2195
2196         DecShiftLeft(value);
2197         DecShiftLeft(value);
2198         DecAdd(value, &d);
2199         DecShiftLeft(value);
2200 }
2201
2202 static void
2203 DecAddInt32(MonoDecimal* value, unsigned int i)
2204 {
2205         g_assert(value != NULL);
2206
2207         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2208                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2209                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2210                 }
2211         }
2212 }
2213
2214 MonoDecimalCompareResult
2215 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2216 {
2217         uint32_t   left_sign;
2218         uint32_t   right_sign;
2219         MonoDecimal result;
2220
2221         // First check signs and whether either are zero.  If both are
2222         // non-zero and of the same sign, just use subtraction to compare.
2223         //
2224         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2225         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2226         if (left_sign != 0)
2227                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2228
2229         if (right_sign != 0)
2230                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2231
2232         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2233         // operand is +, 0, or -.
2234         //
2235         if (left_sign == right_sign) {
2236                 if (left_sign == 0)    // both are zero
2237                         return MONO_DECIMAL_CMP_EQ; // return equal
2238
2239                 DecAddSub(left, right, &result, DECIMAL_NEG);
2240                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2241                         return MONO_DECIMAL_CMP_EQ;
2242                 if (result.u.u.sign & DECIMAL_NEG)
2243                         return MONO_DECIMAL_CMP_LT;
2244                 return MONO_DECIMAL_CMP_GT;
2245         }
2246
2247         //
2248         // Signs are different.  Used signed byte compares
2249         //
2250         if ((char)left_sign > (char)right_sign)
2251                 return MONO_DECIMAL_CMP_GT;
2252         return MONO_DECIMAL_CMP_LT;
2253 }
2254
2255 void
2256 mono_decimal_init_single (MonoDecimal *_this, float value)
2257 {
2258         if (MONO_VarDecFromR4 (value, _this) == MONO_DECIMAL_OVERFLOW) {
2259                 mono_set_pending_exception (mono_get_exception_overflow ());
2260                 return;
2261         }
2262         _this->reserved = 0;
2263 }
2264
2265 void
2266 mono_decimal_init_double (MonoDecimal *_this, double value)
2267 {
2268         if (MONO_VarDecFromR8 (value, _this) == MONO_DECIMAL_OVERFLOW) {
2269                 mono_set_pending_exception (mono_get_exception_overflow ());
2270                 return;
2271         }
2272         _this->reserved = 0;
2273 }
2274
2275 void
2276 mono_decimal_floor (MonoDecimal *d)
2277 {
2278         MonoDecimal decRes;
2279
2280         MONO_VarDecInt(d, &decRes);
2281
2282         // copy decRes into d
2283         COPYDEC(*d, decRes);
2284         d->reserved = 0;
2285         FC_GC_POLL ();
2286 }
2287
2288 int32_t
2289 mono_decimal_get_hash_code (MonoDecimal *d)
2290 {
2291         double dbl;
2292
2293         if (MONO_VarR8FromDec(d, &dbl) != MONO_DECIMAL_OK)
2294                 return 0;
2295
2296         if (dbl == 0.0) {
2297                 // Ensure 0 and -0 have the same hash code
2298                 return 0;
2299         }
2300         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2301         //
2302         // For example these two numerically equal decimals with different internal representations produce
2303         // slightly different results when converted to double:
2304         //
2305         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2306         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2307         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2308         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2309         //
2310         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2311
2312 }
2313
2314 void
2315 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2316 {
2317         MonoDecimal decRes;
2318
2319         MonoDecimalStatus status = MONO_VarDecMul(d1, d2, &decRes);
2320         if (status != MONO_DECIMAL_OK) {
2321                 mono_set_pending_exception (mono_get_exception_overflow ());
2322                 return;
2323         }
2324
2325         COPYDEC(*d1, decRes);
2326         d1->reserved = 0;
2327
2328         FC_GC_POLL ();
2329 }
2330
2331 void
2332 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2333 {
2334         MonoDecimal decRes;
2335
2336         // GC is only triggered for throwing, no need to protect result
2337         if (decimals < 0 || decimals > 28) {
2338                 mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2339                 return;
2340         }
2341
2342         MONO_VarDecRound(d, decimals, &decRes);
2343
2344         // copy decRes into d
2345         COPYDEC(*d, decRes);
2346         d->reserved = 0;
2347
2348         FC_GC_POLL();
2349 }
2350
2351 void
2352 mono_decimal_tocurrency (MonoDecimal *decimal)
2353 {
2354         // TODO
2355 }
2356
2357 double
2358 mono_decimal_to_double (MonoDecimal d)
2359 {
2360         double result = 0.0;
2361         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2362         MONO_VarR8FromDec(&d, &result);
2363         return result;
2364 }
2365
2366 int32_t
2367 mono_decimal_to_int32 (MonoDecimal d)
2368 {
2369         MonoDecimal result;
2370
2371         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2372         MONO_VarDecRound(&d, 0, &result);
2373
2374         if (DECIMAL_SCALE(result) != 0) {
2375                 d = result;
2376                 MONO_VarDecFix (&d, &result);
2377         }
2378
2379         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2380                 int32_t i = DECIMAL_LO32(result);
2381                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2382                         if (i >= 0)
2383                                 return i;
2384                 } else {
2385                         i = -i;
2386                         if (i <= 0)
2387                                 return i;
2388                 }
2389         }
2390
2391         mono_set_pending_exception (mono_get_exception_overflow ());
2392         return 0;
2393 }
2394
2395 float
2396 mono_decimal_to_float (MonoDecimal d)
2397 {
2398         float result = 0.0f;
2399         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2400         MONO_VarR4FromDec(&d, &result);
2401         return result;
2402 }
2403
2404 void
2405 mono_decimal_truncate (MonoDecimal *d)
2406 {
2407         MonoDecimal decRes;
2408
2409         MONO_VarDecFix(d, &decRes);
2410
2411         // copy decRes into d
2412         COPYDEC(*d, decRes);
2413         d->reserved = 0;
2414         FC_GC_POLL();
2415 }
2416
2417 void
2418 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2419 {
2420         MonoDecimal result, decTmp;
2421         MonoDecimal *pdecTmp, *leftOriginal;
2422         uint32_t    num[6], pwr;
2423         int         scale, hi_prod, cur;
2424         SPLIT64     sdlTmp;
2425
2426         g_assert(sign == 0 || sign == DECIMAL_NEG);
2427
2428         leftOriginal = left;
2429
2430         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2431
2432         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2433                 // Scale factors are equal, no alignment necessary.
2434                 //
2435                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2436
2437         AlignedAdd:
2438                 if (sign) {
2439                         // Signs differ - subtract
2440                         //
2441                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2442                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2443
2444                         // Propagate carry
2445                         //
2446                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2447                                 DECIMAL_HI32(result)--;
2448                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2449                                         goto SignFlip;
2450                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2451                                 // Got negative result.  Flip its sign.
2452                                 //
2453                         SignFlip:
2454                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2455                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2456                                 if (DECIMAL_LO64_GET(result) == 0)
2457                                         DECIMAL_HI32(result)++;
2458                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2459                         }
2460
2461                 } else {
2462                         // Signs are the same - add
2463                         //
2464                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2465                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2466
2467                         // Propagate carry
2468                         //
2469                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2470                                 DECIMAL_HI32(result)++;
2471                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2472                                         goto AlignedScale;
2473                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2474                         AlignedScale:
2475                                 // The addition carried above 96 bits.  Divide the result by 10,
2476                                 // dropping the scale factor.
2477                                 //
2478                                 if (DECIMAL_SCALE(result) == 0) {
2479                                         mono_set_pending_exception (mono_get_exception_overflow ());
2480                                         return;
2481                                 }
2482                                 DECIMAL_SCALE(result)--;
2483
2484                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2485                                 sdlTmp.u.Hi = 1;
2486                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2487                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2488
2489                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2490                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2491                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2492
2493                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2494                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2495                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2496
2497                                 // See if we need to round up.
2498                                 //
2499                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2500                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2501                                         if (DECIMAL_LO64_GET(result) == 0)
2502                                                 DECIMAL_HI32(result)++;
2503                                 }
2504                         }
2505                 }
2506         } else {
2507                 // Scale factors are not equal.  Assume that a larger scale
2508                 // factor (more decimal places) is likely to mean that number
2509                 // is smaller.  Start by guessing that the right operand has
2510                 // the larger scale factor.  The result will have the larger
2511                 // scale factor.
2512                 //
2513                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2514                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2515                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2516
2517                 if (scale < 0) {
2518                         // Guessed scale factor wrong. Swap operands.
2519                         //
2520                         scale = -scale;
2521                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2522                         DECIMAL_SIGN(result) ^= sign;
2523                         pdecTmp = right;
2524                         right = left;
2525                         left = pdecTmp;
2526                 }
2527
2528                 // *left will need to be multiplied by 10^scale so
2529                 // it will have the same scale as *right.  We could be
2530                 // extending it to up to 192 bits of precision.
2531                 //
2532                 if (scale <= POWER10_MAX) {
2533                         // Scaling won't make it larger than 4 uint32_ts
2534                         //
2535                         pwr = power10[scale];
2536                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2537                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2538                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2539                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2540                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2541                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2542                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2543                         if (sdlTmp.u.Hi == 0) {
2544                                 // Result fits in 96 bits.  Use standard aligned add.
2545                                 //
2546                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2547                                 left = &decTmp;
2548                                 goto AlignedAdd;
2549                         }
2550                         num[0] = DECIMAL_LO32(decTmp);
2551                         num[1] = DECIMAL_MID32(decTmp);
2552                         num[2] = sdlTmp.u.Lo;
2553                         num[3] = sdlTmp.u.Hi;
2554                         hi_prod = 3;
2555                 } else {
2556                         // Have to scale by a bunch.  Move the number to a buffer
2557                         // where it has room to grow as it's scaled.
2558                         //
2559                         num[0] = DECIMAL_LO32(*left);
2560                         num[1] = DECIMAL_MID32(*left);
2561                         num[2] = DECIMAL_HI32(*left);
2562                         hi_prod = 2;
2563
2564                         // Scan for zeros in the upper words.
2565                         //
2566                         if (num[2] == 0) {
2567                                 hi_prod = 1;
2568                                 if (num[1] == 0) {
2569                                         hi_prod = 0;
2570                                         if (num[0] == 0) {
2571                                                 // Left arg is zero, return right.
2572                                                 //
2573                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2574                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2575                                                 DECIMAL_SIGN(result) ^= sign;
2576                                                 goto RetDec;
2577                                         }
2578                                 }
2579                         }
2580
2581                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2582                         // with index of highest non-zero uint32_t.
2583                         //
2584                         for (; scale > 0; scale -= POWER10_MAX) {
2585                                 if (scale > POWER10_MAX)
2586                                         pwr = ten_to_nine;
2587                                 else
2588                                         pwr = power10[scale];
2589
2590                                 sdlTmp.u.Hi = 0;
2591                                 for (cur = 0; cur <= hi_prod; cur++) {
2592                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2593                                         num[cur] = sdlTmp.u.Lo;
2594                                 }
2595
2596                                 if (sdlTmp.u.Hi != 0)
2597                                         // We're extending the result by another uint32_t.
2598                                         num[++hi_prod] = sdlTmp.u.Hi;
2599                         }
2600                 }
2601
2602                 // Scaling complete, do the add.  Could be subtract if signs differ.
2603                 //
2604                 sdlTmp.u.Lo = num[0];
2605                 sdlTmp.u.Hi = num[1];
2606
2607                 if (sign) {
2608                         // Signs differ, subtract.
2609                         //
2610                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2611                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2612
2613                         // Propagate carry
2614                         //
2615                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2616                                 DECIMAL_HI32(result)--;
2617                                 if (DECIMAL_HI32(result) >= num[2])
2618                                         goto LongSub;
2619                         } else if (DECIMAL_HI32(result) > num[2]) {
2620                         LongSub:
2621                                 // If num has more than 96 bits of precision, then we need to
2622                                 // carry the subtraction into the higher bits.  If it doesn't,
2623                                 // then we subtracted in the wrong order and have to flip the
2624                                 // sign of the result.
2625                                 //
2626                                 if (hi_prod <= 2)
2627                                         goto SignFlip;
2628
2629                                 cur = 3;
2630                                 while(num[cur++]-- == 0);
2631                                 if (num[hi_prod] == 0)
2632                                         hi_prod--;
2633                         }
2634                 } else {
2635                         // Signs the same, add.
2636                         //
2637                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2638                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2639
2640                         // Propagate carry
2641                         //
2642                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2643                                 DECIMAL_HI32(result)++;
2644                                 if (DECIMAL_HI32(result) <= num[2])
2645                                         goto LongAdd;
2646                         } else if (DECIMAL_HI32(result) < num[2]) {
2647                         LongAdd:
2648                                 // Had a carry above 96 bits.
2649                                 //
2650                                 cur = 3;
2651                                 do {
2652                                         if (hi_prod < cur) {
2653                                                 num[cur] = 1;
2654                                                 hi_prod = cur;
2655                                                 break;
2656                                         }
2657                                 }while (++num[cur++] == 0);
2658                         }
2659                 }
2660
2661                 if (hi_prod > 2) {
2662                         num[0] = DECIMAL_LO32(result);
2663                         num[1] = DECIMAL_MID32(result);
2664                         num[2] = DECIMAL_HI32(result);
2665                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2666                         if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2667                                 mono_set_pending_exception (mono_get_exception_overflow ());
2668                                 return;
2669                         }
2670
2671                         DECIMAL_LO32(result) = num[0];
2672                         DECIMAL_MID32(result) = num[1];
2673                         DECIMAL_HI32(result) = num[2];
2674                 }
2675         }
2676
2677 RetDec:
2678         left = leftOriginal;
2679         COPYDEC(*left, result);
2680         left->reserved = 0;
2681 }
2682
2683 void
2684 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2685 {
2686         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2687         uint32_t pwr, tmp, tmp1;
2688         SPLIT64  sdlTmp, sdlDivisor;
2689         int      scale, cur_scale;
2690         gboolean unscale;
2691
2692         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2693         unscale = FALSE;
2694         divisor[0] = DECIMAL_LO32(*right);
2695         divisor[1] = DECIMAL_MID32(*right);
2696         divisor[2] = DECIMAL_HI32(*right);
2697
2698         if (divisor[1] == 0 && divisor[2] == 0) {
2699                 // Divisor is only 32 bits.  Easy divide.
2700                 //
2701                 if (divisor[0] == 0) {
2702                         mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2703                         return;
2704                 }
2705
2706                 quo[0] = DECIMAL_LO32(*left);
2707                 quo[1] = DECIMAL_MID32(*left);
2708                 quo[2] = DECIMAL_HI32(*left);
2709                 rem[0] = Div96By32(quo, divisor[0]);
2710
2711                 for (;;) {
2712                         if (rem[0] == 0) {
2713                                 if (scale < 0) {
2714                                         cur_scale = min(9, -scale);
2715                                         goto HaveScale;
2716                                 }
2717                                 break;
2718                         }
2719                         // We need to unscale if and only if we have a non-zero remainder
2720                         unscale = TRUE;
2721
2722                         // We have computed a quotient based on the natural scale
2723                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2724                         // remainder, so now we should increase the scale if possible to
2725                         // include more quotient bits.
2726                         //
2727                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2728                         // computing more quotient bits as long as the remainder stays
2729                         // non-zero.  If scaling by that much would cause overflow, we'll
2730                         // drop out of the loop and scale by as much as we can.
2731                         //
2732                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2733                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2734                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2735                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2736                         // is the largest value in quo[1] (when quo[2] == 4) that is
2737                         // assured not to overflow.
2738                         //
2739                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2740                         if (cur_scale == 0) {
2741                                 // No more scaling to be done, but remainder is non-zero.
2742                                 // Round quotient.
2743                                 //
2744                                 tmp = rem[0] << 1;
2745                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2746                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2747                                 RoundUp:
2748                                         if (!Add32To96(quo, 1)) {
2749                                                 if (scale == 0) {
2750                                                         mono_set_pending_exception (mono_get_exception_overflow ());
2751                                                         return;
2752                                                 }
2753                                                 scale--;
2754                                                 OverflowUnscale(quo, TRUE);
2755                                                 break;
2756                                         }
2757                                 }
2758                                 break;
2759                         }
2760
2761                         if (cur_scale < 0) {
2762                                 mono_set_pending_exception (mono_get_exception_overflow ());
2763                                 return;
2764                         }
2765
2766                 HaveScale:
2767                         pwr = power10[cur_scale];
2768                         scale += cur_scale;
2769
2770                         if (IncreaseScale(quo, pwr) != 0) {
2771                                 mono_set_pending_exception (mono_get_exception_overflow ());
2772                                 return;
2773                         }
2774
2775                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2776                         rem[0] = sdlTmp.u.Hi;
2777
2778                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2779                                 if (scale == 0) {
2780                                         mono_set_pending_exception (mono_get_exception_overflow ());
2781                                         return;
2782                                 }
2783                                 scale--;
2784                                 OverflowUnscale(quo, (rem[0] != 0));
2785                                 break;
2786                         }
2787                 } // for (;;)
2788         } else {
2789                 // Divisor has bits set in the upper 64 bits.
2790                 //
2791                 // Divisor must be fully normalized (shifted so bit 31 of the most
2792                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2793                 // normalize by.  The dividend will be shifted by the same amount so
2794                 // the quotient is not changed.
2795                 //
2796                 if (divisor[2] == 0)
2797                         tmp = divisor[1];
2798                 else
2799                         tmp = divisor[2];
2800
2801                 cur_scale = 0;
2802                 if (!(tmp & 0xFFFF0000)) {
2803                         cur_scale += 16;
2804                         tmp <<= 16;
2805                 }
2806                 if (!(tmp & 0xFF000000)) {
2807                         cur_scale += 8;
2808                         tmp <<= 8;
2809                 }
2810                 if (!(tmp & 0xF0000000)) {
2811                         cur_scale += 4;
2812                         tmp <<= 4;
2813                 }
2814                 if (!(tmp & 0xC0000000)) {
2815                         cur_scale += 2;
2816                         tmp <<= 2;
2817                 }
2818                 if (!(tmp & 0x80000000)) {
2819                         cur_scale++;
2820                         tmp <<= 1;
2821                 }
2822
2823                 // Shift both dividend and divisor left by cur_scale.
2824                 //
2825                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2826                 rem[0] = sdlTmp.u.Lo;
2827                 rem[1] = sdlTmp.u.Hi;
2828                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2829                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2830                 sdlTmp.int64 <<= cur_scale;
2831                 rem[2] = sdlTmp.u.Hi;
2832                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2833
2834                 sdlDivisor.u.Lo = divisor[0];
2835                 sdlDivisor.u.Hi = divisor[1];
2836                 sdlDivisor.int64 <<= cur_scale;
2837
2838                 if (divisor[2] == 0) {
2839                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2840                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2841                         //
2842                         sdlTmp.u.Lo = rem[2];
2843                         sdlTmp.u.Hi = rem[3];
2844
2845                         quo[2] = 0;
2846                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2847                         quo[0] = Div96By64(rem, sdlDivisor);
2848
2849                         for (;;) {
2850                                 if ((rem[0] | rem[1]) == 0) {
2851                                         if (scale < 0) {
2852                                                 cur_scale = min(9, -scale);
2853                                                 goto HaveScale64;
2854                                         }
2855                                         break;
2856                                 }
2857
2858                                 // We need to unscale if and only if we have a non-zero remainder
2859                                 unscale = TRUE;
2860
2861                                 // Remainder is non-zero.  Scale up quotient and remainder by
2862                                 // powers of 10 so we can compute more significant bits.
2863                                 //
2864                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2865                                 if (cur_scale == 0) {
2866                                         // No more scaling to be done, but remainder is non-zero.
2867                                         // Round quotient.
2868                                         //
2869                                         sdlTmp.u.Lo = rem[0];
2870                                         sdlTmp.u.Hi = rem[1];
2871                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2872                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2873                                                 goto RoundUp;
2874                                         break;
2875                                 }
2876
2877                                 if (cur_scale < 0) {
2878                                         mono_set_pending_exception (mono_get_exception_overflow ());
2879                                         return;
2880                                 }
2881
2882                         HaveScale64:
2883                                 pwr = power10[cur_scale];
2884                                 scale += cur_scale;
2885
2886                                 if (IncreaseScale(quo, pwr) != 0) {
2887                                         mono_set_pending_exception (mono_get_exception_overflow ());
2888                                         return;
2889                                 }
2890
2891                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2892                                 IncreaseScale(rem, pwr);
2893                                 tmp = Div96By64(rem, sdlDivisor);
2894                                 if (!Add32To96(quo, tmp)) {
2895                                         if (scale == 0) {
2896                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2897                                                 return;
2898                                         }
2899                                         scale--;
2900                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2901                                         break;
2902                                 }
2903
2904                         } // for (;;)
2905                 } else {
2906                         // Have a 96-bit divisor in divisor[].
2907                         //
2908                         // Start by finishing the shift left by cur_scale.
2909                         //
2910                         sdlTmp.u.Lo = divisor[1];
2911                         sdlTmp.u.Hi = divisor[2];
2912                         sdlTmp.int64 <<= cur_scale;
2913                         divisor[0] = sdlDivisor.u.Lo;
2914                         divisor[1] = sdlDivisor.u.Hi;
2915                         divisor[2] = sdlTmp.u.Hi;
2916
2917                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2918                         // will be < divisor.
2919                         //
2920                         quo[2] = 0;
2921                         quo[1] = 0;
2922                         quo[0] = Div128By96(rem, divisor);
2923
2924                         for (;;) {
2925                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2926                                         if (scale < 0) {
2927                                                 cur_scale = min(9, -scale);
2928                                                 goto HaveScale96;
2929                                         }
2930                                         break;
2931                                 }
2932
2933                                 // We need to unscale if and only if we have a non-zero remainder
2934                                 unscale = TRUE;
2935
2936                                 // Remainder is non-zero.  Scale up quotient and remainder by
2937                                 // powers of 10 so we can compute more significant bits.
2938                                 //
2939                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2940                                 if (cur_scale == 0) {
2941                                         // No more scaling to be done, but remainder is non-zero.
2942                                         // Round quotient.
2943                                         //
2944                                         if (rem[2] >= 0x80000000)
2945                                                 goto RoundUp;
2946
2947                                         tmp = rem[0] > 0x80000000;
2948                                         tmp1 = rem[1] > 0x80000000;
2949                                         rem[0] <<= 1;
2950                                         rem[1] = (rem[1] << 1) + tmp;
2951                                         rem[2] = (rem[2] << 1) + tmp1;
2952
2953                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2954                                                 goto RoundUp;
2955                                         break;
2956                                 }
2957
2958                                 if (cur_scale < 0) {
2959                                         mono_set_pending_exception (mono_get_exception_overflow ());
2960                                         return;
2961                                 }
2962
2963                         HaveScale96:
2964                                 pwr = power10[cur_scale];
2965                                 scale += cur_scale;
2966
2967                                 if (IncreaseScale(quo, pwr) != 0) {
2968                                         mono_set_pending_exception (mono_get_exception_overflow ());
2969                                         return;
2970                                 }
2971
2972                                 rem[3] = IncreaseScale(rem, pwr);
2973                                 tmp = Div128By96(rem, divisor);
2974                                 if (!Add32To96(quo, tmp)) {
2975                                         if (scale == 0) {
2976                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2977                                                 return;
2978                                         }
2979
2980                                         scale--;
2981                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2982                                         break;
2983                                 }
2984
2985                         } // for (;;)
2986                 }
2987         }
2988
2989         // We need to unscale if and only if we have a non-zero remainder
2990         if (unscale) {
2991                 // Try extracting any extra powers of 10 we may have
2992                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2993                 // If a division by one of these powers returns a zero remainder, then
2994                 // we keep the quotient.  If the remainder is not zero, then we restore
2995                 // the previous value.
2996                 //
2997                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2998                 // we can extract.  We use this as a quick test on whether to try a
2999                 // given power.
3000                 //
3001                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
3002                         quo_save[0] = quo[0];
3003                         quo_save[1] = quo[1];
3004                         quo_save[2] = quo[2];
3005
3006                         if (Div96By32(quo_save, 100000000) == 0) {
3007                                 quo[0] = quo_save[0];
3008                                 quo[1] = quo_save[1];
3009                                 quo[2] = quo_save[2];
3010                                 scale -= 8;
3011                         } else
3012                                 break;
3013                 }
3014
3015                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
3016                         quo_save[0] = quo[0];
3017                         quo_save[1] = quo[1];
3018                         quo_save[2] = quo[2];
3019
3020                         if (Div96By32(quo_save, 10000) == 0) {
3021                                 quo[0] = quo_save[0];
3022                                 quo[1] = quo_save[1];
3023                                 quo[2] = quo_save[2];
3024                                 scale -= 4;
3025                         }
3026                 }
3027
3028                 if ((quo[0] & 3) == 0 && scale >= 2) {
3029                         quo_save[0] = quo[0];
3030                         quo_save[1] = quo[1];
3031                         quo_save[2] = quo[2];
3032
3033                         if (Div96By32(quo_save, 100) == 0) {
3034                                 quo[0] = quo_save[0];
3035                                 quo[1] = quo_save[1];
3036                                 quo[2] = quo_save[2];
3037                                 scale -= 2;
3038                         }
3039                 }
3040
3041                 if ((quo[0] & 1) == 0 && scale >= 1) {
3042                         quo_save[0] = quo[0];
3043                         quo_save[1] = quo[1];
3044                         quo_save[2] = quo[2];
3045
3046                         if (Div96By32(quo_save, 10) == 0) {
3047                                 quo[0] = quo_save[0];
3048                                 quo[1] = quo_save[1];
3049                                 quo[2] = quo_save[2];
3050                                 scale -= 1;
3051                         }
3052                 }
3053         }
3054
3055         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3056         DECIMAL_HI32(*left) = quo[2];
3057         DECIMAL_MID32(*left) = quo[1];
3058         DECIMAL_LO32(*left) = quo[0];
3059         DECIMAL_SCALE(*left) = (uint8_t)scale;
3060         left->reserved = 0;
3061
3062 }
3063
3064 #define DECIMAL_PRECISION 29
3065 #define NUMBER_MAXDIGITS 50
3066 typedef struct  {
3067         int32_t precision;
3068         int32_t scale;
3069         int32_t sign;
3070         uint16_t digits[NUMBER_MAXDIGITS + 1];
3071         uint16_t* allDigits;
3072 } CLRNumber;
3073
3074 int
3075 mono_decimal_from_number (void *from, MonoDecimal *target)
3076 {
3077         CLRNumber *number = (CLRNumber *) from;
3078         uint16_t* p = number->digits;
3079         MonoDecimal d;
3080         int e = number->scale;
3081         g_assert(number != NULL);
3082         g_assert(target != NULL);
3083
3084         d.reserved = 0;
3085         DECIMAL_SIGNSCALE(d) = 0;
3086         DECIMAL_HI32(d) = 0;
3087         DECIMAL_LO32(d) = 0;
3088         DECIMAL_MID32(d) = 0;
3089         g_assert(p != NULL);
3090         if (!*p) {
3091                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3092                 // the scale to 0 if the scale was previously positive
3093                 if (e > 0) {
3094                         e = 0;
3095                 }
3096         } else {
3097                 if (e > DECIMAL_PRECISION) return 0;
3098                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3099                         DecMul10(&d);
3100                         if (*p)
3101                                 DecAddInt32(&d, *p++ - '0');
3102                         e--;
3103                 }
3104                 if (*p++ >= '5') {
3105                         gboolean round = TRUE;
3106                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3107                                 // For digits > 5 we will be roundinp up anyway.
3108                                 int count = 20; // Look at the next 20 digits to check to round
3109                                 while (*p == '0' && count != 0) {
3110                                         p++;
3111                                         count--;
3112                                 }
3113                                 if (*p == '\0' || count == 0)
3114                                         round = FALSE;// Do nothing
3115                         }
3116
3117                         if (round) {
3118                                 DecAddInt32(&d, 1);
3119                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3120                                         DECIMAL_HI32(d) = 0x19999999;
3121                                         DECIMAL_MID32(d) = 0x99999999;
3122                                         DECIMAL_LO32(d) = 0x9999999A;
3123                                         e++;
3124                                 }
3125                         }
3126                 }
3127         }
3128         if (e > 0)
3129                 return 0;
3130         if (e <= -DECIMAL_PRECISION) {
3131                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3132                 // This should only happen for actual zeros or very small numbers that round to zero.
3133                 DECIMAL_SIGNSCALE(d) = 0;
3134                 DECIMAL_HI32(d) = 0;
3135                 DECIMAL_LO32(d) = 0;
3136                 DECIMAL_MID32(d) = 0;
3137                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3138         } else {
3139                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3140         }
3141
3142         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3143         *target = d;
3144         return 1;
3145 }
3146
3147
3148 #endif