mono/metadata/decimal-ms.c

   1 //
   2 // Copyright (c) Microsoft. All rights reserved.
   3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
   4 //
   5 // Copyright 2015 Xamarin Inc
   6 //
   7 // File: decimal.c
   8 //
   9 // Ported from C++ to C and adjusted to Mono runtime
  10 //
  11 // Pending:
  12 //   DoToCurrency (they look like new methods we do not have)
  13 //
  14 #ifndef DISABLE_DECIMAL
  15 #include "config.h"
  16 #include <stdint.h>
  17 #include <glib.h>
  18 #include <mono/utils/mono-compiler.h>
  19 #include <mono/metadata/exception.h>
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <math.h>
  24 #ifdef HAVE_MEMORY_H
  25 #include <memory.h>
  26 #endif
  27 #ifdef _MSC_VER
  28 #include <intrin.h>
  29 #endif
  30 #include "decimal-ms.h"
  31
  32 #define min(a, b) (((a) < (b)) ? (a) : (b))
  33
  34 typedef enum {
  35         MONO_DECIMAL_OK,
  36         MONO_DECIMAL_OVERFLOW,
  37         MONO_DECIMAL_INVALID_ARGUMENT,
  38         MONO_DECIMAL_DIVBYZERO,
  39         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  40 } MonoDecimalStatus;
  41
  42 #ifndef FC_GC_POLL
  43 #   define FC_GC_POLL()
  44 #endif
  45
  46 static const uint32_t ten_to_nine    = 1000000000U;
  47 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  48 #define POWER10_MAX     9
  49 #define DECIMAL_NEG ((uint8_t)0x80)
  50 #define DECMAX 28
  51 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  52 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  53 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  54 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  55 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  56 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  57 #define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  58 #define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  59
  60 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  61 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  62     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  63
  64 #define DEC_SCALE_MAX   28
  65 #define POWER10_MAX     9
  66
  67 #define OVFL_MAX_9_HI   4
  68 #define OVFL_MAX_9_MID  1266874889
  69 #define OVFL_MAX_9_LO   3047500985u
  70
  71 #define OVFL_MAX_5_HI   42949
  72 #define OVFL_MAX_5_MID  2890341191
  73
  74 #define OVFL_MAX_1_HI   429496729
  75
  76 typedef union {
  77         uint64_t int64;
  78         struct {
  79 #if BYTE_ORDER == G_BIG_ENDIAN
  80         uint32_t Hi;
  81         uint32_t Lo;
  82 #else
  83         uint32_t Lo;
  84         uint32_t Hi;
  85 #endif
  86     } u;
  87 } SPLIT64;
  88
  89 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  90 // Double Bias
  91 #define DBLBIAS 1022
  92
  93 // Structure to access an encoded double floating point
  94 typedef union{
  95     struct {
  96 #if BYTE_ORDER == G_BIG_ENDIAN
  97       unsigned int sign:1;
  98       unsigned int exp:11;
  99       unsigned int mantHi:20;
 100       unsigned int mantLo;
 101 #else // BIGENDIAN
 102       unsigned int mantLo;
 103       unsigned int mantHi:20;
 104       unsigned int exp:11;
 105       unsigned int sign:1;
 106 #endif
 107     } u;
 108     double dbl;
 109 } DoubleStructure;
 110
 111 #if BYTE_ORDER == G_BIG_ENDIAN
 112 #define DEFDS(Lo, Hi, exp, sign) { {sign, exp, Hi, Lo } }
 113 #else
 114 #define DEFDS(Lo, Hi, exp, sign) { {Lo, Hi, exp, sign} }
 115 #endif
 116
 117 const DoubleStructure ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0);
 118
 119 // Single floating point Bias
 120 #define SNGBIAS 126
 121
 122 // Structure to access an encoded single floating point
 123 typedef struct {
 124 #if BYTE_ORDER == G_BIG_ENDIAN
 125     unsigned int sign:1;
 126     unsigned int exp:8;
 127     unsigned int mant:23;
 128 #else
 129     unsigned int mant:23;
 130     unsigned int exp:8;
 131     unsigned int sign:1;
 132 #endif
 133 } SingleStructure;
 134
 135 //
 136 // Data tables
 137 //
 138
 139 static const uint32_t power10 [POWER10_MAX+1] = {
 140         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 141 };
 142
 143
 144 static const double double_power10[] = {
 145         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 146         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 147         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 148         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 149         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 150         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 151         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 152         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 153         1e80 };
 154
 155 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 156                                 {100000000000ULL},         // 1E11
 157                                 {1000000000000ULL},        // 1E12
 158                                 {10000000000000ULL},       // 1E13
 159                                 {100000000000000ULL} };    // 1E14
 160
 161 static const uint64_t long_power10[] = {
 162         1,
 163         10ULL,
 164         100ULL,
 165         1000ULL,
 166         10000ULL,
 167         100000ULL,
 168         1000000ULL,
 169         10000000ULL,
 170         100000000ULL,
 171         1000000000ULL,
 172         10000000000ULL,
 173         100000000000ULL,
 174         1000000000000ULL,
 175         10000000000000ULL,
 176         100000000000000ULL,
 177         1000000000000000ULL,
 178         10000000000000000ULL,
 179         100000000000000000ULL,
 180         1000000000000000000ULL,
 181         10000000000000000000ULL};
 182
 183 typedef struct  {
 184         uint32_t Hi, Mid, Lo;
 185 } DECOVFL;
 186
 187 const DECOVFL power_overflow[] = {
 188 // This is a table of the largest values that can be in the upper two
 189 // ULONGs of a 96-bit number that will not overflow when multiplied
 190 // by a given power.  For the upper word, this is a table of
 191 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 192 // remaining fraction part * 2^32.  2^32 = 4294967296.
 193 //
 194     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 195     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 196     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 197     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 198     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 199     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 200     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 201     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 202     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 203 };
 204
 205
 206 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 207 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 208 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 209
 210 static double
 211 fnDblPower10(int ix)
 212 {
 213     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 214     g_assert(ix >= 0);
 215     if (ix < maxIx)
 216         return double_power10[ix];
 217     return pow(10.0, ix);
 218 } // double fnDblPower10()
 219
 220
 221 static inline int64_t
 222 DivMod32by32(int32_t num, int32_t den)
 223 {
 224     SPLIT64  sdl;
 225
 226     sdl.u.Lo = num / den;
 227     sdl.u.Hi = num % den;
 228     return sdl.int64;
 229 }
 230
 231 static inline int64_t
 232 DivMod64by32(int64_t num, int32_t den)
 233 {
 234     SPLIT64  sdl;
 235
 236     sdl.u.Lo = Div64by32(num, den);
 237     sdl.u.Hi = Mod64by32(num, den);
 238     return sdl.int64;
 239 }
 240
 241 static uint64_t
 242 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 243 {
 244         SPLIT64  tmp1;
 245         SPLIT64  tmp2;
 246         SPLIT64  tmp3;
 247
 248         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 249         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 250         tmp1.u.Hi += tmp2.u.Lo;
 251         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 252                 tmp2.u.Hi++;
 253         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 254         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 255         tmp1.u.Hi += tmp2.u.Lo;
 256         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 257                 tmp2.u.Hi++;
 258         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 259
 260         *hi = tmp3.int64;
 261         return tmp1.int64;
 262 }
 263
 264 /**
 265 * FullDiv64By32:
 266 *
 267 * Entry:
 268 *   pdlNum  - Pointer to 64-bit dividend
 269 *   ulDen   - 32-bit divisor
 270 *
 271 * Purpose:
 272 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 273 *
 274 * Exit:
 275 *   Quotient overwrites dividend.
 276 *   Returns remainder.
 277 *
 278 * Exceptions:
 279 *   None.
 280 */
 281 // Was: FullDiv64By32
 282 static uint32_t
 283 FullDiv64By32 (uint64_t *num, uint32_t den)
 284 {
 285         SPLIT64  tmp;
 286         SPLIT64  res;
 287
 288         tmp.int64 = *num;
 289         res.u.Hi = 0;
 290
 291         if (tmp.u.Hi >= den) {
 292                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 293                 //
 294                 res.u.Lo = tmp.u.Hi;
 295                 res.int64 = DivMod64by32(res.int64, den);
 296                 tmp.u.Hi = res.u.Hi;
 297                 res.u.Hi = res.u.Lo;
 298         }
 299
 300         tmp.int64 = DivMod64by32(tmp.int64, den);
 301         res.u.Lo = tmp.u.Lo;
 302         *num = res.int64;
 303         return tmp.u.Hi;
 304 }
 305
 306 /***
 307  * SearchScale
 308  *
 309  * Entry:
 310  *   res_hi - Top uint32_t of quotient
 311  *   res_mid - Middle uint32_t of quotient
 312  *   res_lo - Bottom uint32_t of quotient
 313  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 314  *
 315  * Purpose:
 316  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 317  *   up by and still fit in 96 bits.
 318  *
 319  * Exit:
 320  *   Returns power of 10 to scale by, -1 if overflow error.
 321  *
 322  ***********************************************************************/
 323
 324 static int
 325 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 326 {
 327         int   cur_scale;
 328
 329         // Quick check to stop us from trying to scale any more.
 330         //
 331         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 332                 cur_scale = 0;
 333                 goto HaveScale;
 334         }
 335
 336         if (scale > DEC_SCALE_MAX - 9) {
 337                 // We can't scale by 10^9 without exceeding the max scale factor.
 338                 // See if we can scale to the max.  If not, we'll fall into
 339                 // standard search for scale factor.
 340                 //
 341                 cur_scale = DEC_SCALE_MAX - scale;
 342                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 343                         goto HaveScale;
 344
 345                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 346                 UpperEq:
 347                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 348                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 349                                 cur_scale--;
 350                         }
 351                         goto HaveScale;
 352                 }
 353         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 354                 return 9;
 355
 356         // Search for a power to scale by < 9.  Do a binary search
 357         // on power_overflow[].
 358         //
 359         cur_scale = 5;
 360         if (res_hi < OVFL_MAX_5_HI)
 361                 cur_scale = 7;
 362         else if (res_hi > OVFL_MAX_5_HI)
 363                 cur_scale = 3;
 364         else
 365                 goto UpperEq;
 366
 367         // cur_scale is 3 or 7.
 368         //
 369         if (res_hi < power_overflow[cur_scale - 1].Hi)
 370                 cur_scale++;
 371         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 372                 cur_scale--;
 373         else
 374                 goto UpperEq;
 375
 376         // cur_scale is 2, 4, 6, or 8.
 377         //
 378         // In all cases, we already found we could not use the power one larger.
 379         // So if we can use this power, it is the biggest, and we're done.  If
 380         // we can't use this power, the one below it is correct for all cases
 381         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 382         //
 383         if (res_hi > power_overflow[cur_scale - 1].Hi)
 384                 cur_scale--;
 385
 386         if (res_hi == power_overflow[cur_scale - 1].Hi)
 387                 goto UpperEq;
 388
 389 HaveScale:
 390         // cur_scale = largest power of 10 we can scale by without overflow,
 391         // cur_scale < 9.  See if this is enough to make scale factor
 392         // positive if it isn't already.
 393         //
 394         if (cur_scale + scale < 0)
 395                 cur_scale = -1;
 396
 397         return cur_scale;
 398 }
 399
 400
 401 /**
 402 * Div96By32
 403 *
 404 * Entry:
 405 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 406 *   ulDen   - 32-bit divisor.
 407 *
 408 * Purpose:
 409 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 410 *
 411 * Exit:
 412 *   Quotient overwrites dividend.
 413 *   Returns remainder.
 414 *
 415 * Exceptions:
 416 *   None.
 417 *
 418 */
 419 static uint32_t
 420 Div96By32(uint32_t *num, uint32_t den)
 421 {
 422         SPLIT64  tmp;
 423
 424         tmp.u.Hi = 0;
 425
 426         if (num[2] != 0)
 427                 goto Div3Word;
 428
 429         if (num[1] >= den)
 430                 goto Div2Word;
 431
 432         tmp.u.Hi = num[1];
 433         num[1] = 0;
 434         goto Div1Word;
 435
 436 Div3Word:
 437         tmp.u.Lo = num[2];
 438         tmp.int64 = DivMod64by32(tmp.int64, den);
 439         num[2] = tmp.u.Lo;
 440 Div2Word:
 441         tmp.u.Lo = num[1];
 442         tmp.int64 = DivMod64by32(tmp.int64, den);
 443         num[1] = tmp.u.Lo;
 444 Div1Word:
 445         tmp.u.Lo = num[0];
 446         tmp.int64 = DivMod64by32(tmp.int64, den);
 447         num[0] = tmp.u.Lo;
 448         return tmp.u.Hi;
 449 }
 450
 451 /***
 452  * DecFixInt
 453  *
 454  * Entry:
 455  *   pdecRes - Pointer to Decimal result location
 456  *   operand  - Pointer to Decimal operand
 457  *
 458  * Purpose:
 459  *   Chop the value to integer.  Return remainder so Int() function
 460  *   can round down if non-zero.
 461  *
 462  * Exit:
 463  *   Returns remainder.
 464  *
 465  * Exceptions:
 466  *   None.
 467  *
 468  ***********************************************************************/
 469
 470 static uint32_t
 471 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 472 {
 473         uint32_t   num[3];
 474         uint32_t   rem;
 475         uint32_t   pwr;
 476         int     scale;
 477
 478         if (operand->u.u.scale > 0) {
 479                 num[0] = operand->v.v.Lo32;
 480                 num[1] = operand->v.v.Mid32;
 481                 num[2] = operand->Hi32;
 482                 scale = operand->u.u.scale;
 483                 result->u.u.sign = operand->u.u.sign;
 484                 rem = 0;
 485
 486                 do {
 487                         if (scale > POWER10_MAX)
 488                                 pwr = ten_to_nine;
 489                         else
 490                                 pwr = power10[scale];
 491
 492                         rem |= Div96By32(num, pwr);
 493                         scale -= 9;
 494                 }while (scale > 0);
 495
 496                 result->v.v.Lo32 = num[0];
 497                 result->v.v.Mid32 = num[1];
 498                 result->Hi32 = num[2];
 499                 result->u.u.scale = 0;
 500
 501                 return rem;
 502         }
 503
 504         COPYDEC(*result, *operand);
 505         // Odd, the Microsoft code does not set result->reserved to zero on this case
 506         return 0;
 507 }
 508
 509 /**
 510  * ScaleResult:
 511  *
 512  * Entry:
 513  *   res - Array of uint32_ts with value, least-significant first.
 514  *   hi_res  - Index of last non-zero value in res.
 515  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 516  *
 517  * Purpose:
 518  *   See if we need to scale the result to fit it in 96 bits.
 519  *   Perform needed scaling.  Adjust scale factor accordingly.
 520  *
 521  * Exit:
 522  *   res updated in place, always 3 uint32_ts.
 523  *   New scale factor returned, -1 if overflow error.
 524  *
 525  */
 526 static int
 527 ScaleResult(uint32_t *res, int hi_res, int scale)
 528 {
 529         int     new_scale;
 530         int     cur;
 531         uint32_t   pwr;
 532         uint32_t   tmp;
 533         uint32_t   sticky;
 534         SPLIT64 sdlTmp;
 535
 536         // See if we need to scale the result.  The combined scale must
 537         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 538         //
 539         // Start by figuring a lower bound on the scaling needed to make
 540         // the upper 96 bits zero.  hi_res is the index into res[]
 541         // of the highest non-zero uint32_t.
 542         //
 543         new_scale =   hi_res * 32 - 64 - 1;
 544         if (new_scale > 0) {
 545
 546                 // Find the MSB.
 547                 //
 548                 tmp = res[hi_res];
 549                 if (!(tmp & 0xFFFF0000)) {
 550                         new_scale -= 16;
 551                         tmp <<= 16;
 552                 }
 553                 if (!(tmp & 0xFF000000)) {
 554                         new_scale -= 8;
 555                         tmp <<= 8;
 556                 }
 557                 if (!(tmp & 0xF0000000)) {
 558                         new_scale -= 4;
 559                         tmp <<= 4;
 560                 }
 561                 if (!(tmp & 0xC0000000)) {
 562                         new_scale -= 2;
 563                         tmp <<= 2;
 564                 }
 565                 if (!(tmp & 0x80000000)) {
 566                         new_scale--;
 567                         tmp <<= 1;
 568                 }
 569
 570                 // Multiply bit position by log10(2) to figure it's power of 10.
 571                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 572                 // with a multiply saves a 96-byte lookup table.  The power returned
 573                 // is <= the power of the number, so we must add one power of 10
 574                 // to make it's integer part zero after dividing by 256.
 575                 //
 576                 // Note: the result of this multiplication by an approximation of
 577                 // log10(2) have been exhaustively checked to verify it gives the
 578                 // correct result.  (There were only 95 to check...)
 579                 //
 580                 new_scale = ((new_scale * 77) >> 8) + 1;
 581
 582                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 583                 // This reduces the scale factor of the result.  If it exceeds the
 584                 // current scale of the result, we'll overflow.
 585                 //
 586                 if (new_scale > scale)
 587                         return -1;
 588         }
 589         else
 590                 new_scale = 0;
 591
 592         // Make sure we scale by enough to bring the current scale factor
 593         // into valid range.
 594         //
 595         if (new_scale < scale - DEC_SCALE_MAX)
 596                 new_scale = scale - DEC_SCALE_MAX;
 597
 598         if (new_scale != 0) {
 599                 // Scale by the power of 10 given by new_scale.  Note that this is
 600                 // NOT guaranteed to bring the number within 96 bits -- it could
 601                 // be 1 power of 10 short.
 602                 //
 603                 scale -= new_scale;
 604                 sticky = 0;
 605                 sdlTmp.u.Hi = 0; // initialize remainder
 606
 607                 for (;;) {
 608
 609                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 610
 611                         if (new_scale > POWER10_MAX)
 612                                 pwr = ten_to_nine;
 613                         else
 614                                 pwr = power10[new_scale];
 615
 616                         // Compute first quotient.
 617                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 618                         //
 619                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 620                         res[hi_res] = sdlTmp.u.Lo;
 621                         cur = hi_res - 1;
 622
 623                         if (cur >= 0) {
 624                                 // If first quotient was 0, update hi_res.
 625                                 //
 626                                 if (sdlTmp.u.Lo == 0)
 627                                         hi_res--;
 628
 629                                 // Compute subsequent quotients.
 630                                 //
 631                                 do {
 632                                         sdlTmp.u.Lo = res[cur];
 633                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 634                                         res[cur] = sdlTmp.u.Lo;
 635                                         cur--;
 636                                 } while (cur >= 0);
 637
 638                         }
 639
 640                         new_scale -= POWER10_MAX;
 641                         if (new_scale > 0)
 642                                 continue; // scale some more
 643
 644                         // If we scaled enough, hi_res would be 2 or less.  If not,
 645                         // divide by 10 more.
 646                         //
 647                         if (hi_res > 2) {
 648                                 new_scale = 1;
 649                                 scale--;
 650                                 continue; // scale by 10
 651                         }
 652
 653                         // Round final result.  See if remainder >= 1/2 of divisor.
 654                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 655                         //
 656                         pwr >>= 1;  // power of 10 always even
 657                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 658                                                     ((res[0] & 1) | sticky)) ) {
 659                                 cur = -1;
 660                                 while (++res[++cur] == 0);
 661
 662                                 if (cur > 2) {
 663                                         // The rounding caused us to carry beyond 96 bits.
 664                                         // Scale by 10 more.
 665                                         //
 666                                         hi_res = cur;
 667                                         sticky = 0;  // no sticky bit
 668                                         sdlTmp.u.Hi = 0; // or remainder
 669                                         new_scale = 1;
 670                                         scale--;
 671                                         continue; // scale by 10
 672                                 }
 673                         }
 674
 675                         // We may have scaled it more than we planned.  Make sure the scale
 676                         // factor hasn't gone negative, indicating overflow.
 677                         //
 678                         if (scale < 0)
 679                                 return -1;
 680
 681                         return scale;
 682                 } // for(;;)
 683         }
 684         return scale;
 685 }
 686
 687 // Decimal multiply
 688 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 689 static MonoDecimalStatus
 690 VarDecMul(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 691 {
 692         SPLIT64 tmp;
 693         SPLIT64 tmp2;
 694         SPLIT64 tmp3;
 695         int     scale;
 696         int     hi_prod;
 697         uint32_t   pwr;
 698         uint32_t   rem_lo;
 699         uint32_t   rem_hi;
 700         uint32_t   prod[6];
 701
 702         scale = left->u.u.scale + right->u.u.scale;
 703
 704         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 705                 // Upper 64 bits are zero.
 706                 //
 707                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 708                 if (scale > DEC_SCALE_MAX)
 709                 {
 710                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 711                         // If the amount to divide by is > 19 the result is guaranteed
 712                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 713                         //
 714                         scale -= DEC_SCALE_MAX;
 715                         if (scale > 19) {
 716                         ReturnZero:
 717                                 DECIMAL_SETZERO(*result);
 718                                 return MONO_DECIMAL_OK;
 719                         }
 720
 721                         if (scale > POWER10_MAX) {
 722                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 723                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 724                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 725                                 //
 726                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 727                                 pwr = power10[scale - 10] << 2;
 728                         } else {
 729                                 pwr = power10[scale];
 730                                 rem_lo = 0;
 731                         }
 732
 733                         // Power to divide by fits in 32 bits.
 734                         //
 735                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 736
 737                         // Round result.  See if remainder >= 1/2 of divisor.
 738                         // Divisor is a power of 10, so it is always even.
 739                         //
 740                         pwr >>= 1;
 741                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 742                                 tmp.int64++;
 743
 744                         scale = DEC_SCALE_MAX;
 745                 }
 746                 DECIMAL_LO32(*result) = tmp.u.Lo;
 747                 DECIMAL_MID32(*result) = tmp.u.Hi;
 748                 DECIMAL_HI32(*result) = 0;
 749         } else {
 750                 // At least one operand has bits set in the upper 64 bits.
 751                 //
 752                 // Compute and accumulate the 9 partial products into a
 753                 // 192-bit (24-byte) result.
 754                 //
 755                 //                [l-h][l-m][l-l]   left high, middle, low
 756                 //             x  [r-h][r-m][r-l]   right high, middle, low
 757                 // ------------------------------
 758                 //
 759                 //                     [0-h][0-l]   l-l * r-l
 760                 //                [1ah][1al]        l-l * r-m
 761                 //                [1bh][1bl]        l-m * r-l
 762                 //           [2ah][2al]             l-m * r-m
 763                 //           [2bh][2bl]             l-l * r-h
 764                 //           [2ch][2cl]             l-h * r-l
 765                 //      [3ah][3al]                  l-m * r-h
 766                 //      [3bh][3bl]                  l-h * r-m
 767                 // [4-h][4-l]                       l-h * r-h
 768                 // ------------------------------
 769                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 770                 //
 771                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 772                 prod[0] = tmp.u.Lo;
 773
 774                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 775
 776                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 777                 tmp.int64 += tmp2.int64; // this could generate carry
 778                 prod[1] = tmp.u.Lo;
 779                 if (tmp.int64 < tmp2.int64) // detect carry
 780                         tmp2.u.Hi = 1;
 781                 else
 782                         tmp2.u.Hi = 0;
 783                 tmp2.u.Lo = tmp.u.Hi;
 784
 785                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 786
 787                 if (left->Hi32 | right->Hi32) {
 788                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 789                         //
 790                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 791                         tmp.int64 += tmp2.int64; // this could generate carry
 792                         if (tmp.int64 < tmp2.int64) // detect carry
 793                                 tmp3.u.Hi = 1;
 794                         else
 795                                 tmp3.u.Hi = 0;
 796
 797                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 798                         tmp.int64 += tmp2.int64; // this could generate carry
 799                         prod[2] = tmp.u.Lo;
 800                         if (tmp.int64 < tmp2.int64) // detect carry
 801                                 tmp3.u.Hi++;
 802                         tmp3.u.Lo = tmp.u.Hi;
 803
 804                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 805                         tmp.int64 += tmp3.int64; // this could generate carry
 806                         if (tmp.int64 < tmp3.int64) // detect carry
 807                                 tmp3.u.Hi = 1;
 808                         else
 809                                 tmp3.u.Hi = 0;
 810
 811                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 812                         tmp.int64 += tmp2.int64; // this could generate carry
 813                         prod[3] = tmp.u.Lo;
 814                         if (tmp.int64 < tmp2.int64) // detect carry
 815                                 tmp3.u.Hi++;
 816                         tmp3.u.Lo = tmp.u.Hi;
 817
 818                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 819                         prod[4] = tmp.u.Lo;
 820                         prod[5] = tmp.u.Hi;
 821
 822                         hi_prod = 5;
 823                 }
 824                 else {
 825                         prod[2] = tmp.u.Lo;
 826                         prod[3] = tmp.u.Hi;
 827                         hi_prod = 3;
 828                 }
 829
 830                 // Check for leading zero uint32_ts on the product
 831                 //
 832                 while (prod[hi_prod] == 0) {
 833                         hi_prod--;
 834                         if (hi_prod < 0)
 835                                 goto ReturnZero;
 836                 }
 837
 838                 scale = ScaleResult(prod, hi_prod, scale);
 839                 if (scale == -1)
 840                         return MONO_DECIMAL_OVERFLOW;
 841
 842                 result->v.v.Lo32 = prod[0];
 843                 result->v.v.Mid32 = prod[1];
 844                 result->Hi32 = prod[2];
 845         }
 846
 847         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 848         result->u.u.scale = (char)scale;
 849         return MONO_DECIMAL_OK;
 850 }
 851
 852 // Addition and subtraction
 853 static MonoDecimalStatus
 854 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 855 {
 856         uint32_t     num[6];
 857         uint32_t     pwr;
 858         int       scale;
 859         int       hi_prod;
 860         int       cur;
 861         SPLIT64   tmp;
 862         MonoDecimal decRes;
 863         MonoDecimal decTmp;
 864         MonoDecimal *pdecTmp;
 865
 866         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 867
 868         if (right->u.u.scale == left->u.u.scale) {
 869                 // Scale factors are equal, no alignment necessary.
 870                 //
 871                 decRes.u.signscale = left->u.signscale;
 872
 873         AlignedAdd:
 874                 if (sign) {
 875                         // Signs differ - subtract
 876                         //
 877                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 878                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 879
 880                         // Propagate carry
 881                         //
 882                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 883                                 decRes.Hi32--;
 884                                 if (decRes.Hi32 >= left->Hi32)
 885                                         goto SignFlip;
 886                         } else if (decRes.Hi32 > left->Hi32) {
 887                                 // Got negative result.  Flip its sign.
 888                                 //
 889                         SignFlip:
 890                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 891                                 decRes.Hi32 = ~decRes.Hi32;
 892                                 if (DECIMAL_LO64_GET(decRes) == 0)
 893                                         decRes.Hi32++;
 894                                 decRes.u.u.sign ^= DECIMAL_NEG;
 895                         }
 896
 897                 } else {
 898                         // Signs are the same - add
 899                         //
 900                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 901                         decRes.Hi32 = left->Hi32 + right->Hi32;
 902
 903                         // Propagate carry
 904                         //
 905                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 906                                 decRes.Hi32++;
 907                                 if (decRes.Hi32 <= left->Hi32)
 908                                         goto AlignedScale;
 909                         } else if (decRes.Hi32 < left->Hi32) {
 910                         AlignedScale:
 911                                 // The addition carried above 96 bits.  Divide the result by 10,
 912                                 // dropping the scale factor.
 913                                 //
 914                                 if (decRes.u.u.scale == 0)
 915                                         return MONO_DECIMAL_OVERFLOW;
 916                                 decRes.u.u.scale--;
 917
 918                                 tmp.u.Lo = decRes.Hi32;
 919                                 tmp.u.Hi = 1;
 920                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 921                                 decRes.Hi32 = tmp.u.Lo;
 922
 923                                 tmp.u.Lo = decRes.v.v.Mid32;
 924                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 925                                 decRes.v.v.Mid32 = tmp.u.Lo;
 926
 927                                 tmp.u.Lo = decRes.v.v.Lo32;
 928                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 929                                 decRes.v.v.Lo32 = tmp.u.Lo;
 930
 931                                 // See if we need to round up.
 932                                 //
 933                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 934                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 935                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 936                                                         decRes.Hi32++;
 937                                 }
 938                         }
 939                 }
 940         }
 941         else {
 942                 // Scale factors are not equal.  Assume that a larger scale
 943                 // factor (more decimal places) is likely to mean that number
 944                 // is smaller.  Start by guessing that the right operand has
 945                 // the larger scale factor.  The result will have the larger
 946                 // scale factor.
 947                 //
 948                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 949                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 950                 scale = decRes.u.u.scale - left->u.u.scale;
 951
 952                 if (scale < 0) {
 953                         // Guessed scale factor wrong. Swap operands.
 954                         //
 955                         scale = -scale;
 956                         decRes.u.u.scale = left->u.u.scale;
 957                         decRes.u.u.sign ^= sign;
 958                         pdecTmp = right;
 959                         right = left;
 960                         left = pdecTmp;
 961                 }
 962
 963                 // *left will need to be multiplied by 10^scale so
 964                 // it will have the same scale as *right.  We could be
 965                 // extending it to up to 192 bits of precision.
 966                 //
 967                 if (scale <= POWER10_MAX) {
 968                         // Scaling won't make it larger than 4 uint32_ts
 969                         //
 970                         pwr = power10[scale];
 971                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 972                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 973                         tmp.int64 += decTmp.v.v.Mid32;
 974                         decTmp.v.v.Mid32 = tmp.u.Lo;
 975                         decTmp.Hi32 = tmp.u.Hi;
 976                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 977                         tmp.int64 += decTmp.Hi32;
 978                         if (tmp.u.Hi == 0) {
 979                                 // Result fits in 96 bits.  Use standard aligned add.
 980                                 //
 981                                 decTmp.Hi32 = tmp.u.Lo;
 982                                 left = &decTmp;
 983                                 goto AlignedAdd;
 984                         }
 985                         num[0] = decTmp.v.v.Lo32;
 986                         num[1] = decTmp.v.v.Mid32;
 987                         num[2] = tmp.u.Lo;
 988                         num[3] = tmp.u.Hi;
 989                         hi_prod = 3;
 990                 }
 991                 else {
 992                         // Have to scale by a bunch.  Move the number to a buffer
 993                         // where it has room to grow as it's scaled.
 994                         //
 995                         num[0] = left->v.v.Lo32;
 996                         num[1] = left->v.v.Mid32;
 997                         num[2] = left->Hi32;
 998                         hi_prod = 2;
 999
1000                         // Scan for zeros in the upper words.
1001                         //
1002                         if (num[2] == 0) {
1003                                 hi_prod = 1;
1004                                 if (num[1] == 0) {
1005                                         hi_prod = 0;
1006                                         if (num[0] == 0) {
1007                                                 // Left arg is zero, return right.
1008                                                 //
1009                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
1010                                                 decRes.Hi32 = right->Hi32;
1011                                                 decRes.u.u.sign ^= sign;
1012                                                 goto RetDec;
1013                                         }
1014                                 }
1015                         }
1016
1017                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
1018                         // with index of highest non-zero uint32_t.
1019                         //
1020                         for (; scale > 0; scale -= POWER10_MAX) {
1021                                 if (scale > POWER10_MAX)
1022                                         pwr = ten_to_nine;
1023                                 else
1024                                         pwr = power10[scale];
1025
1026                                 tmp.u.Hi = 0;
1027                                 for (cur = 0; cur <= hi_prod; cur++) {
1028                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1029                                         num[cur] = tmp.u.Lo;
1030                                 }
1031
1032                                 if (tmp.u.Hi != 0)
1033                                         // We're extending the result by another uint32_t.
1034                                         num[++hi_prod] = tmp.u.Hi;
1035                         }
1036                 }
1037
1038                 // Scaling complete, do the add.  Could be subtract if signs differ.
1039                 //
1040                 tmp.u.Lo = num[0];
1041                 tmp.u.Hi = num[1];
1042
1043                 if (sign) {
1044                         // Signs differ, subtract.
1045                         //
1046                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1047                         decRes.Hi32 = num[2] - right->Hi32;
1048
1049                         // Propagate carry
1050                         //
1051                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1052                                 decRes.Hi32--;
1053                                 if (decRes.Hi32 >= num[2])
1054                                         goto LongSub;
1055                         }
1056                         else if (decRes.Hi32 > num[2]) {
1057                         LongSub:
1058                                 // If num has more than 96 bits of precision, then we need to
1059                                 // carry the subtraction into the higher bits.  If it doesn't,
1060                                 // then we subtracted in the wrong order and have to flip the
1061                                 // sign of the result.
1062                                 //
1063                                 if (hi_prod <= 2)
1064                                         goto SignFlip;
1065
1066                                 cur = 3;
1067                                 while(num[cur++]-- == 0);
1068                                 if (num[hi_prod] == 0)
1069                                         hi_prod--;
1070                         }
1071                 }
1072                 else {
1073                         // Signs the same, add.
1074                         //
1075                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1076                         decRes.Hi32 = num[2] + right->Hi32;
1077
1078                         // Propagate carry
1079                         //
1080                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1081                                 decRes.Hi32++;
1082                                 if (decRes.Hi32 <= num[2])
1083                                         goto LongAdd;
1084                         }
1085                         else if (decRes.Hi32 < num[2]) {
1086                         LongAdd:
1087                                 // Had a carry above 96 bits.
1088                                 //
1089                                 cur = 3;
1090                                 do {
1091                                         if (hi_prod < cur) {
1092                                                 num[cur] = 1;
1093                                                 hi_prod = cur;
1094                                                 break;
1095                                         }
1096                                 }while (++num[cur++] == 0);
1097                         }
1098                 }
1099
1100                 if (hi_prod > 2) {
1101                         num[0] = decRes.v.v.Lo32;
1102                         num[1] = decRes.v.v.Mid32;
1103                         num[2] = decRes.Hi32;
1104                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1105                         if (decRes.u.u.scale == (uint8_t) -1)
1106                                 return MONO_DECIMAL_OVERFLOW;
1107
1108                         decRes.v.v.Lo32 = num[0];
1109                         decRes.v.v.Mid32 = num[1];
1110                         decRes.Hi32 = num[2];
1111                 }
1112         }
1113
1114 RetDec:
1115         COPYDEC(*result, decRes);
1116         // Odd, the Microsoft code does not set result->reserved to zero on this case
1117         return MONO_DECIMAL_OK;
1118 }
1119
1120 // Decimal addition
1121 static MonoDecimalStatus
1122 VarDecAdd(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1123 {
1124     return DecAddSub (left, right, result, 0);
1125 }
1126
1127 // Decimal subtraction
1128 static MonoDecimalStatus
1129 VarDecSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1130 {
1131     return DecAddSub (left, right, result, DECIMAL_NEG);
1132 }
1133
1134 /**
1135  * IncreaseScale:
1136  *
1137  * Entry:
1138  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1139  *   pwr   - Scale factor to multiply by
1140  *
1141  * Purpose:
1142  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1143  *   the input.  The last 32 bits of the product are the return value.
1144  *
1145  * Exit:
1146  *   Returns highest 32 bits of product.
1147  *
1148  * Exceptions:
1149  *   None.
1150  *
1151  */
1152 static uint32_t
1153 IncreaseScale(uint32_t *num, uint32_t pwr)
1154 {
1155         SPLIT64   sdlTmp;
1156
1157         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1158         num[0] = sdlTmp.u.Lo;
1159         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1160         num[1] = sdlTmp.u.Lo;
1161         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1162         num[2] = sdlTmp.u.Lo;
1163         return sdlTmp.u.Hi;
1164 }
1165
1166 /**
1167  * Div96By64:
1168  *
1169  * Entry:
1170  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1171  *   sdlDen  - 64-bit divisor.
1172  *
1173  * Purpose:
1174  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1175  *   Divisor must be larger than upper 64 bits of dividend.
1176  *
1177  * Exit:
1178  *   Remainder overwrites lower 64-bits of dividend.
1179  *   Returns quotient.
1180  *
1181  * Exceptions:
1182  *   None.
1183  *
1184  */
1185 static uint32_t
1186 Div96By64(uint32_t *num, SPLIT64 den)
1187 {
1188         SPLIT64 quo;
1189         SPLIT64 sdlNum;
1190         SPLIT64 prod;
1191
1192         sdlNum.u.Lo = num[0];
1193
1194         if (num[2] >= den.u.Hi) {
1195                 // Divide would overflow.  Assume a quotient of 2^32, and set
1196                 // up remainder accordingly.  Then jump to loop which reduces
1197                 // the quotient.
1198                 //
1199                 sdlNum.u.Hi = num[1] - den.u.Lo;
1200                 quo.u.Lo = 0;
1201                 goto NegRem;
1202         }
1203
1204         // Hardware divide won't overflow
1205         //
1206         if (num[2] == 0 && num[1] < den.u.Hi)
1207                 // Result is zero.  Entire dividend is remainder.
1208                 //
1209                 return 0;
1210
1211         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1212         //
1213         quo.u.Lo = num[1];
1214         quo.u.Hi = num[2];
1215         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1216         sdlNum.u.Hi = quo.u.Hi; // remainder
1217
1218         // Compute full remainder, rem = dividend - (quo * divisor).
1219         //
1220         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1221         sdlNum.int64 -= prod.int64;
1222
1223         if (sdlNum.int64 > ~prod.int64) {
1224         NegRem:
1225                 // Remainder went negative.  Add divisor back in until it's positive,
1226                 // a max of 2 times.
1227                 //
1228                 do {
1229                         quo.u.Lo--;
1230                         sdlNum.int64 += den.int64;
1231                 }while (sdlNum.int64 >= den.int64);
1232         }
1233
1234         num[0] = sdlNum.u.Lo;
1235         num[1] = sdlNum.u.Hi;
1236         return quo.u.Lo;
1237 }
1238
1239 /***
1240 * Div128By96
1241 *
1242 * Entry:
1243 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1244 *   den - Pointer to 96-bit divisor.
1245 *
1246 * Purpose:
1247 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1248 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1249 *   assured in the initial call because the divisor is normalized
1250 *   and the dividend can't be.  In subsequent calls, the remainder
1251 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1252 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1253 *
1254 * Exit:
1255 *   Remainder overwrites lower 96-bits of dividend.
1256 *   Returns quotient.
1257 *
1258 * Exceptions:
1259 *   None.
1260 *
1261 ***********************************************************************/
1262
1263 static uint32_t
1264 Div128By96(uint32_t *num, uint32_t *den)
1265 {
1266         SPLIT64 sdlQuo;
1267         SPLIT64 sdlNum;
1268         SPLIT64 sdlProd1;
1269         SPLIT64 sdlProd2;
1270
1271         sdlNum.u.Lo = num[0];
1272         sdlNum.u.Hi = num[1];
1273
1274         if (num[3] == 0 && num[2] < den[2]){
1275                 // Result is zero.  Entire dividend is remainder.
1276                 //
1277                 return 0;
1278         }
1279
1280         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1281         //
1282         sdlQuo.u.Lo = num[2];
1283         sdlQuo.u.Hi = num[3];
1284         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1285
1286         // Compute full remainder, rem = dividend - (quo * divisor).
1287         //
1288         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1289         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1290         sdlProd2.int64 += sdlProd1.u.Hi;
1291         sdlProd1.u.Hi = sdlProd2.u.Lo;
1292
1293         sdlNum.int64 -= sdlProd1.int64;
1294         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1295
1296         // Propagate carries
1297         //
1298         if (sdlNum.int64 > ~sdlProd1.int64) {
1299                 num[2]--;
1300                 if (num[2] >= ~sdlProd2.u.Hi)
1301                         goto NegRem;
1302         } else if (num[2] > ~sdlProd2.u.Hi) {
1303         NegRem:
1304                 // Remainder went negative.  Add divisor back in until it's positive,
1305                 // a max of 2 times.
1306                 //
1307                 sdlProd1.u.Lo = den[0];
1308                 sdlProd1.u.Hi = den[1];
1309
1310                 for (;;) {
1311                         sdlQuo.u.Lo--;
1312                         sdlNum.int64 += sdlProd1.int64;
1313                         num[2] += den[2];
1314
1315                         if (sdlNum.int64 < sdlProd1.int64) {
1316                                 // Detected carry. Check for carry out of top
1317                                 // before adding it in.
1318                                 //
1319                                 if (num[2]++ < den[2])
1320                                         break;
1321                         }
1322                         if (num[2] < den[2])
1323                                 break; // detected carry
1324                 }
1325         }
1326
1327         num[0] = sdlNum.u.Lo;
1328         num[1] = sdlNum.u.Hi;
1329         return sdlQuo.u.Lo;
1330 }
1331
1332 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1333 // Returns FALSE if there is an overflow
1334 static gboolean
1335 Add32To96(uint32_t *num, uint32_t value)
1336 {
1337         num[0] += value;
1338         if (num[0] < value) {
1339                 if (++num[1] == 0) {
1340                         if (++num[2] == 0) {
1341                                 return FALSE;
1342                         }
1343                 }
1344         }
1345         return TRUE;
1346 }
1347
1348 static void
1349 OverflowUnscale (uint32_t *quo, gboolean remainder)
1350 {
1351         SPLIT64  sdlTmp;
1352
1353         // We have overflown, so load the high bit with a one.
1354         sdlTmp.u.Hi = 1u;
1355         sdlTmp.u.Lo = quo[2];
1356         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1357         quo[2] = sdlTmp.u.Lo;
1358         sdlTmp.u.Lo = quo[1];
1359         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1360         quo[1] = sdlTmp.u.Lo;
1361         sdlTmp.u.Lo = quo[0];
1362         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1363         quo[0] = sdlTmp.u.Lo;
1364         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1365         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1366                 Add32To96(quo, 1u);
1367         }
1368 }
1369
1370 // VarDecDiv - Decimal divide
1371 static MonoDecimalStatus
1372 VarDecDiv(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1373 {
1374         uint32_t   quo[3];
1375         uint32_t   quoSave[3];
1376         uint32_t   rem[4];
1377         uint32_t   divisor[3];
1378         uint32_t   pwr;
1379         uint32_t   utmp;
1380         uint32_t   utmp1;
1381         SPLIT64 sdlTmp;
1382         SPLIT64 sdlDivisor;
1383         int     scale;
1384         int     cur_scale;
1385
1386         scale = left->u.u.scale - right->u.u.scale;
1387         divisor[0] = right->v.v.Lo32;
1388         divisor[1] = right->v.v.Mid32;
1389         divisor[2] = right->Hi32;
1390
1391         if (divisor[1] == 0 && divisor[2] == 0) {
1392                 // Divisor is only 32 bits.  Easy divide.
1393                 //
1394                 if (divisor[0] == 0)
1395                         return MONO_DECIMAL_DIVBYZERO;
1396
1397                 quo[0] = left->v.v.Lo32;
1398                 quo[1] = left->v.v.Mid32;
1399                 quo[2] = left->Hi32;
1400                 rem[0] = Div96By32(quo, divisor[0]);
1401
1402                 for (;;) {
1403                         if (rem[0] == 0) {
1404                                 if (scale < 0) {
1405                                         cur_scale = min(9, -scale);
1406                                         goto HaveScale;
1407                                 }
1408                                 break;
1409                         }
1410
1411                         // We have computed a quotient based on the natural scale
1412                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1413                         // remainder, so now we should increase the scale if possible to
1414                         // include more quotient bits.
1415                         //
1416                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1417                         // computing more quotient bits as long as the remainder stays
1418                         // non-zero.  If scaling by that much would cause overflow, we'll
1419                         // drop out of the loop and scale by as much as we can.
1420                         //
1421                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1422                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1423                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1424                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1425                         // is the largest value in quo[1] (when quo[2] == 4) that is
1426                         // assured not to overflow.
1427                         //
1428                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1429                         if (cur_scale == 0) {
1430                                 // No more scaling to be done, but remainder is non-zero.
1431                                 // Round quotient.
1432                                 //
1433                                 utmp = rem[0] << 1;
1434                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1435                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1436                                 RoundUp:
1437                                         if (++quo[0] == 0)
1438                                                 if (++quo[1] == 0)
1439                                                         quo[2]++;
1440                                 }
1441                                 break;
1442                         }
1443
1444                         if (cur_scale == -1)
1445                                 return MONO_DECIMAL_OVERFLOW;
1446
1447                 HaveScale:
1448                         pwr = power10[cur_scale];
1449                         scale += cur_scale;
1450
1451                         if (IncreaseScale(quo, pwr) != 0)
1452                                 return MONO_DECIMAL_OVERFLOW;
1453
1454                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1455                         rem[0] = sdlTmp.u.Hi;
1456
1457                         quo[0] += sdlTmp.u.Lo;
1458                         if (quo[0] < sdlTmp.u.Lo) {
1459                                 if (++quo[1] == 0)
1460                                         quo[2]++;
1461                         }
1462                 } // for (;;)
1463         }
1464         else {
1465                 // Divisor has bits set in the upper 64 bits.
1466                 //
1467                 // Divisor must be fully normalized (shifted so bit 31 of the most
1468                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1469                 // normalize by.  The dividend will be shifted by the same amount so
1470                 // the quotient is not changed.
1471                 //
1472                 if (divisor[2] == 0)
1473                         utmp = divisor[1];
1474                 else
1475                         utmp = divisor[2];
1476
1477                 cur_scale = 0;
1478                 if (!(utmp & 0xFFFF0000)) {
1479                         cur_scale += 16;
1480                         utmp <<= 16;
1481                 }
1482                 if (!(utmp & 0xFF000000)) {
1483                         cur_scale += 8;
1484                         utmp <<= 8;
1485                 }
1486                 if (!(utmp & 0xF0000000)) {
1487                         cur_scale += 4;
1488                         utmp <<= 4;
1489                 }
1490                 if (!(utmp & 0xC0000000)) {
1491                         cur_scale += 2;
1492                         utmp <<= 2;
1493                 }
1494                 if (!(utmp & 0x80000000)) {
1495                         cur_scale++;
1496                         utmp <<= 1;
1497                 }
1498
1499                 // Shift both dividend and divisor left by cur_scale.
1500                 //
1501                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1502                 rem[0] = sdlTmp.u.Lo;
1503                 rem[1] = sdlTmp.u.Hi;
1504                 sdlTmp.u.Lo = left->v.v.Mid32;
1505                 sdlTmp.u.Hi = left->Hi32;
1506                 sdlTmp.int64 <<= cur_scale;
1507                 rem[2] = sdlTmp.u.Hi;
1508                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1509
1510                 sdlDivisor.u.Lo = divisor[0];
1511                 sdlDivisor.u.Hi = divisor[1];
1512                 sdlDivisor.int64 <<= cur_scale;
1513
1514                 if (divisor[2] == 0) {
1515                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1516                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1517                         //
1518                         sdlTmp.u.Lo = rem[2];
1519                         sdlTmp.u.Hi = rem[3];
1520
1521                         quo[2] = 0;
1522                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1523                         quo[0] = Div96By64(rem, sdlDivisor);
1524
1525                         for (;;) {
1526                                 if ((rem[0] | rem[1]) == 0) {
1527                                         if (scale < 0) {
1528                                                 cur_scale = min(9, -scale);
1529                                                 goto HaveScale64;
1530                                         }
1531                                         break;
1532                                 }
1533
1534                                 // Remainder is non-zero.  Scale up quotient and remainder by
1535                                 // powers of 10 so we can compute more significant bits.
1536                                 //
1537                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1538                                 if (cur_scale == 0) {
1539                                         // No more scaling to be done, but remainder is non-zero.
1540                                         // Round quotient.
1541                                         //
1542                                         sdlTmp.u.Lo = rem[0];
1543                                         sdlTmp.u.Hi = rem[1];
1544                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1545                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1546                                                 goto RoundUp;
1547                                         break;
1548                                 }
1549
1550                                 if (cur_scale == -1)
1551                                         return MONO_DECIMAL_OVERFLOW;
1552
1553                         HaveScale64:
1554                                 pwr = power10[cur_scale];
1555                                 scale += cur_scale;
1556
1557                                 if (IncreaseScale(quo, pwr) != 0)
1558                                         return MONO_DECIMAL_OVERFLOW;
1559
1560                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1561                                 IncreaseScale(rem, pwr);
1562                                 utmp = Div96By64(rem, sdlDivisor);
1563                                 quo[0] += utmp;
1564                                 if (quo[0] < utmp)
1565                                         if (++quo[1] == 0)
1566                                                 quo[2]++;
1567
1568                         } // for (;;)
1569                 }
1570                 else {
1571                         // Have a 96-bit divisor in divisor[].
1572                         //
1573                         // Start by finishing the shift left by cur_scale.
1574                         //
1575                         sdlTmp.u.Lo = divisor[1];
1576                         sdlTmp.u.Hi = divisor[2];
1577                         sdlTmp.int64 <<= cur_scale;
1578                         divisor[0] = sdlDivisor.u.Lo;
1579                         divisor[1] = sdlDivisor.u.Hi;
1580                         divisor[2] = sdlTmp.u.Hi;
1581
1582                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1583                         // will be < divisor.
1584                         //
1585                         quo[2] = 0;
1586                         quo[1] = 0;
1587                         quo[0] = Div128By96(rem, divisor);
1588
1589                         for (;;) {
1590                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1591                                         if (scale < 0) {
1592                                                 cur_scale = min(9, -scale);
1593                                                 goto HaveScale96;
1594                                         }
1595                                         break;
1596                                 }
1597
1598                                 // Remainder is non-zero.  Scale up quotient and remainder by
1599                                 // powers of 10 so we can compute more significant bits.
1600                                 //
1601                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1602                                 if (cur_scale == 0) {
1603                                         // No more scaling to be done, but remainder is non-zero.
1604                                         // Round quotient.
1605                                         //
1606                                         if (rem[2] >= 0x80000000)
1607                                                 goto RoundUp;
1608
1609                                         utmp = rem[0] > 0x80000000;
1610                                         utmp1 = rem[1] > 0x80000000;
1611                                         rem[0] <<= 1;
1612                                         rem[1] = (rem[1] << 1) + utmp;
1613                                         rem[2] = (rem[2] << 1) + utmp1;
1614
1615                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1616                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1617                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1618                                               (quo[0] & 1))))
1619                                                 goto RoundUp;
1620                                         break;
1621                                 }
1622
1623                                 if (cur_scale == -1)
1624                                         return MONO_DECIMAL_OVERFLOW;
1625
1626                         HaveScale96:
1627                                 pwr = power10[cur_scale];
1628                                 scale += cur_scale;
1629
1630                                 if (IncreaseScale(quo, pwr) != 0)
1631                                         return MONO_DECIMAL_OVERFLOW;
1632
1633                                 rem[3] = IncreaseScale(rem, pwr);
1634                                 utmp = Div128By96(rem, divisor);
1635                                 quo[0] += utmp;
1636                                 if (quo[0] < utmp)
1637                                         if (++quo[1] == 0)
1638                                                 quo[2]++;
1639
1640                         } // for (;;)
1641                 }
1642         }
1643
1644         // No more remainder.  Try extracting any extra powers of 10 we may have
1645         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1646         // If a division by one of these powers returns a zero remainder, then
1647         // we keep the quotient.  If the remainder is not zero, then we restore
1648         // the previous value.
1649         //
1650         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1651         // we can extract.  We use this as a quick test on whether to try a
1652         // given power.
1653         //
1654         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1655                 quoSave[0] = quo[0];
1656                 quoSave[1] = quo[1];
1657                 quoSave[2] = quo[2];
1658
1659                 if (Div96By32(quoSave, 100000000) == 0) {
1660                         quo[0] = quoSave[0];
1661                         quo[1] = quoSave[1];
1662                         quo[2] = quoSave[2];
1663                         scale -= 8;
1664                 }
1665                 else
1666                         break;
1667         }
1668
1669         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1670                 quoSave[0] = quo[0];
1671                 quoSave[1] = quo[1];
1672                 quoSave[2] = quo[2];
1673
1674                 if (Div96By32(quoSave, 10000) == 0) {
1675                         quo[0] = quoSave[0];
1676                         quo[1] = quoSave[1];
1677                         quo[2] = quoSave[2];
1678                         scale -= 4;
1679                 }
1680         }
1681
1682         if ((quo[0] & 3) == 0 && scale >= 2) {
1683                 quoSave[0] = quo[0];
1684                 quoSave[1] = quo[1];
1685                 quoSave[2] = quo[2];
1686
1687                 if (Div96By32(quoSave, 100) == 0) {
1688                         quo[0] = quoSave[0];
1689                         quo[1] = quoSave[1];
1690                         quo[2] = quoSave[2];
1691                         scale -= 2;
1692                 }
1693         }
1694
1695         if ((quo[0] & 1) == 0 && scale >= 1) {
1696                 quoSave[0] = quo[0];
1697                 quoSave[1] = quo[1];
1698                 quoSave[2] = quo[2];
1699
1700                 if (Div96By32(quoSave, 10) == 0) {
1701                         quo[0] = quoSave[0];
1702                         quo[1] = quoSave[1];
1703                         quo[2] = quoSave[2];
1704                         scale -= 1;
1705                 }
1706         }
1707
1708         result->Hi32 = quo[2];
1709         result->v.v.Mid32 = quo[1];
1710         result->v.v.Lo32 = quo[0];
1711         result->u.u.scale = scale;
1712         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1713         return MONO_DECIMAL_OK;
1714 }
1715
1716 // VarDecAbs - Decimal Absolute Value
1717 static void
1718 VarDecAbs (MonoDecimal *pdecOprd, MonoDecimal *result)
1719 {
1720         COPYDEC(*result, *pdecOprd);
1721         result->u.u.sign &= ~DECIMAL_NEG;
1722         // Microsoft does not set reserved here
1723 }
1724
1725 // VarDecFix - Decimal Fix (chop to integer)
1726 static void
1727 VarDecFix (MonoDecimal *pdecOprd, MonoDecimal *result)
1728 {
1729         DecFixInt(result, pdecOprd);
1730 }
1731
1732
1733 // VarDecInt - Decimal Int (round down to integer)
1734 static void
1735 VarDecInt (MonoDecimal *pdecOprd, MonoDecimal *result)
1736 {
1737         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1738                 // We have chopped off a non-zero amount from a negative value.  Since
1739                 // we round toward -infinity, we must increase the integer result by
1740                 // 1 to make it more negative.  This will never overflow because
1741                 // in order to have a remainder, we must have had a non-zero scale factor.
1742                 // Our scale factor is back to zero now.
1743                 //
1744                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1745                 if (DECIMAL_LO64_GET(*result) == 0)
1746                         result->Hi32++;
1747         }
1748 }
1749
1750
1751 // VarDecNeg - Decimal Negate
1752 static void
1753 VarDecNeg (MonoDecimal *pdecOprd, MonoDecimal *result)
1754 {
1755         COPYDEC(*result, *pdecOprd);
1756         // Microsoft does not set result->reserved to zero on this case.
1757         result->u.u.sign ^= DECIMAL_NEG;
1758 }
1759
1760 //
1761 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1762 //
1763 static MonoDecimalStatus
1764 VarDecRound(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1765 {
1766         uint32_t num[3];
1767         uint32_t rem;
1768         uint32_t sticky;
1769         uint32_t pwr;
1770         int scale;
1771
1772         if (cDecimals < 0)
1773                 return MONO_DECIMAL_INVALID_ARGUMENT;
1774
1775         scale = input->u.u.scale - cDecimals;
1776         if (scale > 0) {
1777                 num[0] = input->v.v.Lo32;
1778                 num[1] = input->v.v.Mid32;
1779                 num[2] = input->Hi32;
1780                 result->u.u.sign = input->u.u.sign;
1781                 rem = sticky = 0;
1782
1783                 do {
1784                         sticky |= rem;
1785                         if (scale > POWER10_MAX)
1786                                 pwr = ten_to_nine;
1787                         else
1788                                 pwr = power10[scale];
1789
1790                         rem = Div96By32(num, pwr);
1791                         scale -= 9;
1792                 }while (scale > 0);
1793
1794                 // Now round.  rem has last remainder, sticky has sticky bits.
1795                 // To do IEEE rounding, we add LSB of result to sticky bits so
1796                 // either causes round up if remainder * 2 == last divisor.
1797                 //
1798                 sticky |= num[0] & 1;
1799                 rem = (rem << 1) + (sticky != 0);
1800                 if (pwr < rem &&
1801                     ++num[0] == 0 &&
1802                     ++num[1] == 0
1803                         )
1804                         ++num[2];
1805
1806                 result->v.v.Lo32 = num[0];
1807                 result->v.v.Mid32 = num[1];
1808                 result->Hi32 = num[2];
1809                 result->u.u.scale = cDecimals;
1810                 return MONO_DECIMAL_OK;
1811         }
1812
1813         COPYDEC(*result, *input);
1814         // Odd, the Microsoft source does not set the result->reserved to zero here.
1815         return MONO_DECIMAL_OK;
1816 }
1817
1818 //
1819 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1820 static MonoDecimalStatus
1821 VarDecFromR4 (float input, MonoDecimal* result)
1822 {
1823         int         exp;    // number of bits to left of binary point
1824         int         power;
1825         uint32_t       mant;
1826         double      dbl;
1827         SPLIT64     sdlLo;
1828         SPLIT64     sdlHi;
1829         int         lmax, cur;  // temps used during scale reduction
1830
1831         // The most we can scale by is 10^28, which is just slightly more
1832         // than 2^93.  So a float with an exponent of -94 could just
1833         // barely reach 0.5, but smaller exponents will always round to zero.
1834         //
1835         if ((exp = ((SingleStructure *)&input)->exp - SNGBIAS) < -94 ) {
1836                 DECIMAL_SETZERO(*result);
1837                 return MONO_DECIMAL_OK;
1838         }
1839
1840         if (exp > 96)
1841                 return MONO_DECIMAL_OVERFLOW;
1842
1843         // Round the input to a 7-digit integer.  The R4 format has
1844         // only 7 digits of precision, and we want to keep garbage digits
1845         // out of the Decimal were making.
1846         //
1847         // Calculate max power of 10 input value could have by multiplying
1848         // the exponent by log10(2).  Using scaled integer multiplcation,
1849         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1850         //
1851         dbl = fabs(input);
1852         power = 6 - ((exp * 19728) >> 16);
1853
1854         if (power >= 0) {
1855                 // We have less than 7 digits, scale input up.
1856                 //
1857                 if (power > DECMAX)
1858                         power = DECMAX;
1859
1860                 dbl = dbl * double_power10[power];
1861         } else {
1862                 if (power != -1 || dbl >= 1E7)
1863                         dbl = dbl / fnDblPower10(-power);
1864                 else
1865                         power = 0; // didn't scale it
1866         }
1867
1868         g_assert (dbl < 1E7);
1869         if (dbl < 1E6 && power < DECMAX) {
1870                 dbl *= 10;
1871                 power++;
1872                 g_assert(dbl >= 1E6);
1873         }
1874
1875         // Round to integer
1876         //
1877         mant = (int32_t)dbl;
1878         dbl -= (double)mant;  // difference between input & integer
1879         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1880                 mant++;
1881
1882         if (mant == 0) {
1883                 DECIMAL_SETZERO(*result);
1884                 return MONO_DECIMAL_OK;
1885         }
1886
1887         if (power < 0) {
1888                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1889                 //
1890                 power = -power;
1891                 if (power < 10) {
1892                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1893
1894                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1895                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1896                         DECIMAL_HI32(*result) = 0;
1897                 } else {
1898                         // Have a big power of 10.
1899                         //
1900                         if (power > 18) {
1901                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1902                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1903
1904                                 if (sdlHi.u.Hi != 0)
1905                                         return MONO_DECIMAL_OVERFLOW;
1906                         }
1907                         else {
1908                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1909                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1910                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1911                                 sdlHi.int64 += sdlLo.u.Hi;
1912                                 sdlLo.u.Hi = sdlHi.u.Lo;
1913                                 sdlHi.u.Lo = sdlHi.u.Hi;
1914                         }
1915                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1916                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1917                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1918                 }
1919                 DECIMAL_SCALE(*result) = 0;
1920         } else {
1921                 // Factor out powers of 10 to reduce the scale, if possible.
1922                 // The maximum number we could factor out would be 6.  This
1923                 // comes from the fact we have a 7-digit number, and the
1924                 // MSD must be non-zero -- but the lower 6 digits could be
1925                 // zero.  Note also the scale factor is never negative, so
1926                 // we can't scale by any more than the power we used to
1927                 // get the integer.
1928                 //
1929                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1930                 //
1931                 lmax = min(power, 6);
1932
1933                 // lmax is the largest power of 10 to try, lmax <= 6.
1934                 // We'll try powers 4, 2, and 1 unless they're too big.
1935                 //
1936                 for (cur = 4; cur > 0; cur >>= 1)
1937                 {
1938                         if (cur > lmax)
1939                                 continue;
1940
1941                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1942
1943                         if (sdlLo.u.Hi == 0) {
1944                                 mant = sdlLo.u.Lo;
1945                                 power -= cur;
1946                                 lmax -= cur;
1947                         }
1948                 }
1949                 DECIMAL_LO32(*result) = mant;
1950                 DECIMAL_MID32(*result) = 0;
1951                 DECIMAL_HI32(*result) = 0;
1952                 DECIMAL_SCALE(*result) = power;
1953         }
1954
1955         DECIMAL_SIGN(*result) = (char)((SingleStructure *)&input)->sign << 7;
1956         return MONO_DECIMAL_OK;
1957 }
1958
1959 //
1960 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1961 static MonoDecimalStatus
1962 VarDecFromR8 (double input, MonoDecimal *result)
1963 {
1964         int         exp;    // number of bits to left of binary point
1965         int         power;  // power-of-10 scale factor
1966         SPLIT64     sdlMant;
1967         SPLIT64     sdlLo;
1968         double      dbl;
1969         int         lmax, cur;  // temps used during scale reduction
1970         uint32_t       pwr_cur;
1971         uint32_t       quo;
1972
1973
1974         // The most we can scale by is 10^28, which is just slightly more
1975         // than 2^93.  So a float with an exponent of -94 could just
1976         // barely reach 0.5, but smaller exponents will always round to zero.
1977         //
1978         if ((exp = ((DoubleStructure *)&input)->u.exp - DBLBIAS) < -94) {
1979                 DECIMAL_SETZERO(*result);
1980                 return MONO_DECIMAL_OK;
1981         }
1982
1983         if (exp > 96)
1984                 return MONO_DECIMAL_OVERFLOW;
1985
1986         // Round the input to a 15-digit integer.  The R8 format has
1987         // only 15 digits of precision, and we want to keep garbage digits
1988         // out of the Decimal were making.
1989         //
1990         // Calculate max power of 10 input value could have by multiplying
1991         // the exponent by log10(2).  Using scaled integer multiplcation,
1992         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1993         //
1994         dbl = fabs(input);
1995         power = 14 - ((exp * 19728) >> 16);
1996
1997         if (power >= 0) {
1998                 // We have less than 15 digits, scale input up.
1999                 //
2000                 if (power > DECMAX)
2001                         power = DECMAX;
2002
2003                 dbl = dbl * double_power10[power];
2004         } else {
2005                 if (power != -1 || dbl >= 1E15)
2006                         dbl = dbl / fnDblPower10(-power);
2007                 else
2008                         power = 0; // didn't scale it
2009         }
2010
2011         g_assert (dbl < 1E15);
2012         if (dbl < 1E14 && power < DECMAX) {
2013                 dbl *= 10;
2014                 power++;
2015                 g_assert(dbl >= 1E14);
2016         }
2017
2018         // Round to int64
2019         //
2020         sdlMant.int64 = (int64_t)dbl;
2021         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
2022         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
2023                 sdlMant.int64++;
2024
2025         if (sdlMant.int64 == 0) {
2026                 DECIMAL_SETZERO(*result);
2027                 return MONO_DECIMAL_OK;
2028         }
2029
2030         if (power < 0) {
2031                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2032                 //
2033                 power = -power;
2034                 if (power < 10) {
2035                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2036                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2037                         sdlMant.int64 += sdlLo.u.Hi;
2038                         sdlLo.u.Hi = sdlMant.u.Lo;
2039                         sdlMant.u.Lo = sdlMant.u.Hi;
2040                 }
2041                 else {
2042                         // Have a big power of 10.
2043                         //
2044                         g_assert(power <= 14);
2045                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2046
2047                         if (sdlMant.u.Hi != 0)
2048                                 return MONO_DECIMAL_OVERFLOW;
2049                 }
2050                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2051                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2052                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2053                 DECIMAL_SCALE(*result) = 0;
2054         }
2055         else {
2056                 // Factor out powers of 10 to reduce the scale, if possible.
2057                 // The maximum number we could factor out would be 14.  This
2058                 // comes from the fact we have a 15-digit number, and the
2059                 // MSD must be non-zero -- but the lower 14 digits could be
2060                 // zero.  Note also the scale factor is never negative, so
2061                 // we can't scale by any more than the power we used to
2062                 // get the integer.
2063                 //
2064                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2065                 //
2066                 lmax = min(power, 14);
2067
2068                 // lmax is the largest power of 10 to try, lmax <= 14.
2069                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2070                 //
2071                 for (cur = 8; cur > 0; cur >>= 1)
2072                 {
2073                         if (cur > lmax)
2074                                 continue;
2075
2076                         pwr_cur = (uint32_t)long_power10[cur];
2077
2078                         if (sdlMant.u.Hi >= pwr_cur) {
2079                                 // Overflow if we try to divide in one step.
2080                                 //
2081                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2082                                 quo = sdlLo.u.Lo;
2083                                 sdlLo.u.Lo = sdlMant.u.Lo;
2084                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2085                         }
2086                         else {
2087                                 quo = 0;
2088                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2089                         }
2090
2091                         if (sdlLo.u.Hi == 0) {
2092                                 sdlMant.u.Hi = quo;
2093                                 sdlMant.u.Lo = sdlLo.u.Lo;
2094                                 power -= cur;
2095                                 lmax -= cur;
2096                         }
2097                 }
2098
2099                 DECIMAL_HI32(*result) = 0;
2100                 DECIMAL_SCALE(*result) = power;
2101                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2102                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2103         }
2104
2105         DECIMAL_SIGN(*result) = (char)((DoubleStructure *)&input)->u.sign << 7;
2106         return MONO_DECIMAL_OK;
2107 }
2108
2109 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2110 static MonoDecimalStatus
2111 VarR8FromDec(MonoDecimal *input, double *result)
2112 {
2113         SPLIT64  tmp;
2114         double   dbl;
2115
2116         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2117                 return MONO_DECIMAL_INVALID_ARGUMENT;
2118
2119         tmp.u.Lo = DECIMAL_LO32(*input);
2120         tmp.u.Hi = DECIMAL_MID32(*input);
2121
2122         if ((int32_t)DECIMAL_MID32(*input) < 0)
2123                 dbl = (ds2to64.dbl + (double)(int64_t)tmp.int64 +
2124                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2125         else
2126                 dbl = ((double)(int64_t)tmp.int64 +
2127                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input));
2128
2129         if (DECIMAL_SIGN(*input))
2130                 dbl = -dbl;
2131
2132         *result = dbl;
2133         return MONO_DECIMAL_OK;
2134 }
2135
2136 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2137 static MonoDecimalStatus
2138 VarR4FromDec(MonoDecimal *input, float *result)
2139 {
2140         double   dbl;
2141
2142         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2143                 return MONO_DECIMAL_INVALID_ARGUMENT;
2144
2145         // Can't overflow; no errors possible.
2146         //
2147         VarR8FromDec(input, &dbl);
2148         *result = (float)dbl;
2149         return MONO_DECIMAL_OK;
2150 }
2151
2152 static void
2153 DecShiftLeft(MonoDecimal* value)
2154 {
2155     g_assert(value != NULL);
2156
2157     unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2158     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2159     DECIMAL_LO32(*value) <<= 1;
2160     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2161     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2162 }
2163
2164 static int
2165 D32AddCarry(uint32_t* value, uint32_t i)
2166 {
2167     uint32_t v = *value;
2168     uint32_t sum = v + i;
2169     *value = sum;
2170     return sum < v || sum < i? 1: 0;
2171 }
2172
2173 static void
2174 DecAdd(MonoDecimal *value, MonoDecimal* d)
2175 {
2176         g_assert(value != NULL && d != NULL);
2177
2178         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2179                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2180                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2181                 }
2182         }
2183         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2184                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2185         }
2186         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2187 }
2188
2189 static void
2190 DecMul10(MonoDecimal* value)
2191 {
2192         g_assert (value != NULL);
2193
2194         MonoDecimal d = *value;
2195         DecShiftLeft(value);
2196         DecShiftLeft(value);
2197         DecAdd(value, &d);
2198         DecShiftLeft(value);
2199 }
2200
2201 static void
2202 DecAddInt32(MonoDecimal* value, unsigned int i)
2203 {
2204         g_assert(value != NULL);
2205
2206         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2207                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2208                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2209                 }
2210         }
2211 }
2212
2213 MonoDecimalCompareResult
2214 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2215 {
2216         uint32_t   left_sign;
2217         uint32_t   right_sign;
2218
2219         // First check signs and whether either are zero.  If both are
2220         // non-zero and of the same sign, just use subtraction to compare.
2221         //
2222         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2223         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2224         if (left_sign != 0)
2225                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2226
2227         if (right_sign != 0)
2228                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2229
2230         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2231         // operand is +, 0, or -.
2232         //
2233         if (left_sign == right_sign) {
2234                 if (left_sign == 0)    // both are zero
2235                         return MONO_DECIMAL_CMP_EQ; // return equal
2236
2237                 MonoDecimal result;
2238
2239                 DecAddSub(left, right, &result, DECIMAL_NEG);
2240                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2241                         return MONO_DECIMAL_CMP_EQ;
2242                 if (result.u.u.sign & DECIMAL_NEG)
2243                         return MONO_DECIMAL_CMP_LT;
2244                 return MONO_DECIMAL_CMP_GT;
2245         }
2246
2247         //
2248         // Signs are different.  Used signed byte compares
2249         //
2250         if ((char)left_sign > (char)right_sign)
2251                 return MONO_DECIMAL_CMP_GT;
2252         return MONO_DECIMAL_CMP_LT;
2253 }
2254
2255 void
2256 mono_decimal_init_single (MonoDecimal *_this, float value)
2257 {
2258         if (VarDecFromR4 (value, _this) == MONO_DECIMAL_OVERFLOW)
2259                 mono_raise_exception (mono_get_exception_overflow ());
2260         _this->reserved = 0;
2261 }
2262
2263 void
2264 mono_decimal_init_double (MonoDecimal *_this, double value)
2265 {
2266         if (VarDecFromR8 (value, _this) == MONO_DECIMAL_OVERFLOW)
2267                 mono_raise_exception (mono_get_exception_overflow ());
2268         _this->reserved = 0;
2269 }
2270
2271 void
2272 mono_decimal_floor (MonoDecimal *d)
2273 {
2274         MonoDecimal decRes;
2275
2276         VarDecInt(d, &decRes);
2277
2278         // copy decRes into d
2279         COPYDEC(*d, decRes);
2280         d->reserved = 0;
2281         FC_GC_POLL ();
2282 }
2283
2284 int32_t
2285 mono_decimal_get_hash_code (MonoDecimal *d)
2286 {
2287         double dbl;
2288
2289         if (VarR8FromDec(d, &dbl) != MONO_DECIMAL_OK)
2290                 return 0;
2291
2292         if (dbl == 0.0) {
2293                 // Ensure 0 and -0 have the same hash code
2294                 return 0;
2295         }
2296         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2297         //
2298         // For example these two numerically equal decimals with different internal representations produce
2299         // slightly different results when converted to double:
2300         //
2301         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2302         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2303         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2304         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2305         //
2306         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2307
2308 }
2309
2310 void
2311 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2312 {
2313         MonoDecimal decRes;
2314
2315         MonoDecimalStatus status = VarDecMul(d1, d2, &decRes);
2316         if (status != MONO_DECIMAL_OK)
2317                 mono_raise_exception (mono_get_exception_overflow ());
2318
2319         COPYDEC(*d1, decRes);
2320         d1->reserved = 0;
2321
2322         FC_GC_POLL ();
2323 }
2324
2325 void
2326 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2327 {
2328         MonoDecimal decRes;
2329
2330         // GC is only triggered for throwing, no need to protect result
2331         if (decimals < 0 || decimals > 28)
2332                 mono_raise_exception (mono_get_exception_argument_out_of_range ("d"));
2333
2334         VarDecRound(d, decimals, &decRes);
2335
2336         // copy decRes into d
2337         COPYDEC(*d, decRes);
2338         d->reserved = 0;
2339
2340         FC_GC_POLL();
2341 }
2342
2343 void
2344 mono_decimal_tocurrency (MonoDecimal *decimal)
2345 {
2346         // TODO
2347 }
2348
2349 double
2350 mono_decimal_to_double (MonoDecimal d)
2351 {
2352         double result = 0.0;
2353         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2354         VarR8FromDec(&d, &result);
2355         return result;
2356 }
2357
2358 int32_t
2359 mono_decimal_to_int32 (MonoDecimal d)
2360 {
2361         MonoDecimal result;
2362
2363         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2364         VarDecRound(&d, 0, &result);
2365
2366         if (DECIMAL_SCALE(result) != 0) {
2367                 d = result;
2368                 VarDecFix (&d, &result);
2369         }
2370
2371         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2372                 int32_t i = DECIMAL_LO32(result);
2373                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2374                         if (i >= 0)
2375                                 return i;
2376                 } else {
2377                         i = -i;
2378                         if (i <= 0)
2379                                 return i;
2380                 }
2381         }
2382
2383         mono_raise_exception (mono_get_exception_overflow ());
2384         // Not reachable
2385         return 0;
2386 }
2387
2388 float
2389 mono_decimal_to_float (MonoDecimal d)
2390 {
2391         float result = 0.0f;
2392         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2393         VarR4FromDec(&d, &result);
2394         return result;
2395 }
2396
2397 void
2398 mono_decimal_truncate (MonoDecimal *d)
2399 {
2400         MonoDecimal decRes;
2401
2402         VarDecFix(d, &decRes);
2403
2404         // copy decRes into d
2405         COPYDEC(*d, decRes);
2406         d->reserved = 0;
2407         FC_GC_POLL();
2408 }
2409
2410 void
2411 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2412 {
2413         MonoDecimal result, decTmp;
2414         MonoDecimal *pdecTmp, *leftOriginal;
2415         uint32_t    num[6], pwr;
2416         int         scale, hi_prod, cur;
2417         SPLIT64     sdlTmp;
2418
2419         g_assert(sign == 0 || sign == DECIMAL_NEG);
2420
2421         leftOriginal = left;
2422
2423         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2424
2425         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2426                 // Scale factors are equal, no alignment necessary.
2427                 //
2428                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2429
2430         AlignedAdd:
2431                 if (sign) {
2432                         // Signs differ - subtract
2433                         //
2434                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2435                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2436
2437                         // Propagate carry
2438                         //
2439                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2440                                 DECIMAL_HI32(result)--;
2441                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2442                                         goto SignFlip;
2443                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2444                                 // Got negative result.  Flip its sign.
2445                                 //
2446                         SignFlip:
2447                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2448                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2449                                 if (DECIMAL_LO64_GET(result) == 0)
2450                                         DECIMAL_HI32(result)++;
2451                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2452                         }
2453
2454                 } else {
2455                         // Signs are the same - add
2456                         //
2457                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2458                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2459
2460                         // Propagate carry
2461                         //
2462                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2463                                 DECIMAL_HI32(result)++;
2464                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2465                                         goto AlignedScale;
2466                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2467                         AlignedScale:
2468                                 // The addition carried above 96 bits.  Divide the result by 10,
2469                                 // dropping the scale factor.
2470                                 //
2471                                 if (DECIMAL_SCALE(result) == 0)
2472                                         mono_raise_exception (mono_get_exception_overflow ());
2473                                 DECIMAL_SCALE(result)--;
2474
2475                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2476                                 sdlTmp.u.Hi = 1;
2477                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2478                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2479
2480                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2481                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2482                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2483
2484                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2485                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2486                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2487
2488                                 // See if we need to round up.
2489                                 //
2490                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2491                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2492                                         if (DECIMAL_LO64_GET(result) == 0)
2493                                                 DECIMAL_HI32(result)++;
2494                                 }
2495                         }
2496                 }
2497         } else {
2498                 // Scale factors are not equal.  Assume that a larger scale
2499                 // factor (more decimal places) is likely to mean that number
2500                 // is smaller.  Start by guessing that the right operand has
2501                 // the larger scale factor.  The result will have the larger
2502                 // scale factor.
2503                 //
2504                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2505                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2506                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2507
2508                 if (scale < 0) {
2509                         // Guessed scale factor wrong. Swap operands.
2510                         //
2511                         scale = -scale;
2512                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2513                         DECIMAL_SIGN(result) ^= sign;
2514                         pdecTmp = right;
2515                         right = left;
2516                         left = pdecTmp;
2517                 }
2518
2519                 // *left will need to be multiplied by 10^scale so
2520                 // it will have the same scale as *right.  We could be
2521                 // extending it to up to 192 bits of precision.
2522                 //
2523                 if (scale <= POWER10_MAX) {
2524                         // Scaling won't make it larger than 4 uint32_ts
2525                         //
2526                         pwr = power10[scale];
2527                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2528                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2529                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2530                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2531                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2532                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2533                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2534                         if (sdlTmp.u.Hi == 0) {
2535                                 // Result fits in 96 bits.  Use standard aligned add.
2536                                 //
2537                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2538                                 left = &decTmp;
2539                                 goto AlignedAdd;
2540                         }
2541                         num[0] = DECIMAL_LO32(decTmp);
2542                         num[1] = DECIMAL_MID32(decTmp);
2543                         num[2] = sdlTmp.u.Lo;
2544                         num[3] = sdlTmp.u.Hi;
2545                         hi_prod = 3;
2546                 } else {
2547                         // Have to scale by a bunch.  Move the number to a buffer
2548                         // where it has room to grow as it's scaled.
2549                         //
2550                         num[0] = DECIMAL_LO32(*left);
2551                         num[1] = DECIMAL_MID32(*left);
2552                         num[2] = DECIMAL_HI32(*left);
2553                         hi_prod = 2;
2554
2555                         // Scan for zeros in the upper words.
2556                         //
2557                         if (num[2] == 0) {
2558                                 hi_prod = 1;
2559                                 if (num[1] == 0) {
2560                                         hi_prod = 0;
2561                                         if (num[0] == 0) {
2562                                                 // Left arg is zero, return right.
2563                                                 //
2564                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2565                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2566                                                 DECIMAL_SIGN(result) ^= sign;
2567                                                 goto RetDec;
2568                                         }
2569                                 }
2570                         }
2571
2572                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2573                         // with index of highest non-zero uint32_t.
2574                         //
2575                         for (; scale > 0; scale -= POWER10_MAX) {
2576                                 if (scale > POWER10_MAX)
2577                                         pwr = ten_to_nine;
2578                                 else
2579                                         pwr = power10[scale];
2580
2581                                 sdlTmp.u.Hi = 0;
2582                                 for (cur = 0; cur <= hi_prod; cur++) {
2583                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2584                                         num[cur] = sdlTmp.u.Lo;
2585                                 }
2586
2587                                 if (sdlTmp.u.Hi != 0)
2588                                         // We're extending the result by another uint32_t.
2589                                         num[++hi_prod] = sdlTmp.u.Hi;
2590                         }
2591                 }
2592
2593                 // Scaling complete, do the add.  Could be subtract if signs differ.
2594                 //
2595                 sdlTmp.u.Lo = num[0];
2596                 sdlTmp.u.Hi = num[1];
2597
2598                 if (sign) {
2599                         // Signs differ, subtract.
2600                         //
2601                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2602                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2603
2604                         // Propagate carry
2605                         //
2606                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2607                                 DECIMAL_HI32(result)--;
2608                                 if (DECIMAL_HI32(result) >= num[2])
2609                                         goto LongSub;
2610                         } else if (DECIMAL_HI32(result) > num[2]) {
2611                         LongSub:
2612                                 // If num has more than 96 bits of precision, then we need to
2613                                 // carry the subtraction into the higher bits.  If it doesn't,
2614                                 // then we subtracted in the wrong order and have to flip the
2615                                 // sign of the result.
2616                                 //
2617                                 if (hi_prod <= 2)
2618                                         goto SignFlip;
2619
2620                                 cur = 3;
2621                                 while(num[cur++]-- == 0);
2622                                 if (num[hi_prod] == 0)
2623                                         hi_prod--;
2624                         }
2625                 } else {
2626                         // Signs the same, add.
2627                         //
2628                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2629                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2630
2631                         // Propagate carry
2632                         //
2633                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2634                                 DECIMAL_HI32(result)++;
2635                                 if (DECIMAL_HI32(result) <= num[2])
2636                                         goto LongAdd;
2637                         } else if (DECIMAL_HI32(result) < num[2]) {
2638                         LongAdd:
2639                                 // Had a carry above 96 bits.
2640                                 //
2641                                 cur = 3;
2642                                 do {
2643                                         if (hi_prod < cur) {
2644                                                 num[cur] = 1;
2645                                                 hi_prod = cur;
2646                                                 break;
2647                                         }
2648                                 }while (++num[cur++] == 0);
2649                         }
2650                 }
2651
2652                 if (hi_prod > 2) {
2653                         num[0] = DECIMAL_LO32(result);
2654                         num[1] = DECIMAL_MID32(result);
2655                         num[2] = DECIMAL_HI32(result);
2656                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2657                         if (DECIMAL_SCALE(result) == (uint8_t)-1)
2658                                 mono_raise_exception (mono_get_exception_overflow ());
2659
2660                         DECIMAL_LO32(result) = num[0];
2661                         DECIMAL_MID32(result) = num[1];
2662                         DECIMAL_HI32(result) = num[2];
2663                 }
2664         }
2665
2666 RetDec:
2667         left = leftOriginal;
2668         COPYDEC(*left, result);
2669         left->reserved = 0;
2670 }
2671
2672 void
2673 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2674 {
2675         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2676         uint32_t pwr, tmp, tmp1;
2677         SPLIT64  sdlTmp, sdlDivisor;
2678         int      scale, cur_scale;
2679         gboolean unscale;
2680
2681         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2682         unscale = FALSE;
2683         divisor[0] = DECIMAL_LO32(*right);
2684         divisor[1] = DECIMAL_MID32(*right);
2685         divisor[2] = DECIMAL_HI32(*right);
2686
2687         if (divisor[1] == 0 && divisor[2] == 0) {
2688                 // Divisor is only 32 bits.  Easy divide.
2689                 //
2690                 if (divisor[0] == 0)
2691                         mono_raise_exception (mono_get_exception_divide_by_zero ());
2692
2693                 quo[0] = DECIMAL_LO32(*left);
2694                 quo[1] = DECIMAL_MID32(*left);
2695                 quo[2] = DECIMAL_HI32(*left);
2696                 rem[0] = Div96By32(quo, divisor[0]);
2697
2698                 for (;;) {
2699                         if (rem[0] == 0) {
2700                                 if (scale < 0) {
2701                                         cur_scale = min(9, -scale);
2702                                         goto HaveScale;
2703                                 }
2704                                 break;
2705                         }
2706                         // We need to unscale if and only if we have a non-zero remainder
2707                         unscale = TRUE;
2708
2709                         // We have computed a quotient based on the natural scale
2710                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2711                         // remainder, so now we should increase the scale if possible to
2712                         // include more quotient bits.
2713                         //
2714                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2715                         // computing more quotient bits as long as the remainder stays
2716                         // non-zero.  If scaling by that much would cause overflow, we'll
2717                         // drop out of the loop and scale by as much as we can.
2718                         //
2719                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2720                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2721                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2722                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2723                         // is the largest value in quo[1] (when quo[2] == 4) that is
2724                         // assured not to overflow.
2725                         //
2726                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2727                         if (cur_scale == 0) {
2728                                 // No more scaling to be done, but remainder is non-zero.
2729                                 // Round quotient.
2730                                 //
2731                                 tmp = rem[0] << 1;
2732                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2733                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2734                                 RoundUp:
2735                                         if (!Add32To96(quo, 1)) {
2736                                                 if (scale == 0)
2737                                                         mono_raise_exception (mono_get_exception_overflow ());
2738                                                 scale--;
2739                                                 OverflowUnscale(quo, TRUE);
2740                                                 break;
2741                                         }
2742                                 }
2743                                 break;
2744                         }
2745
2746                         if (cur_scale < 0)
2747                                 mono_raise_exception (mono_get_exception_overflow ());
2748
2749                 HaveScale:
2750                         pwr = power10[cur_scale];
2751                         scale += cur_scale;
2752
2753                         if (IncreaseScale(quo, pwr) != 0)
2754                                 mono_raise_exception (mono_get_exception_overflow ());
2755
2756
2757                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2758                         rem[0] = sdlTmp.u.Hi;
2759
2760                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2761                                 if (scale == 0)
2762                                         mono_raise_exception (mono_get_exception_overflow ());
2763                                 scale--;
2764                                 OverflowUnscale(quo, (rem[0] != 0));
2765                                 break;
2766                         }
2767                 } // for (;;)
2768         } else {
2769                 // Divisor has bits set in the upper 64 bits.
2770                 //
2771                 // Divisor must be fully normalized (shifted so bit 31 of the most
2772                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2773                 // normalize by.  The dividend will be shifted by the same amount so
2774                 // the quotient is not changed.
2775                 //
2776                 if (divisor[2] == 0)
2777                         tmp = divisor[1];
2778                 else
2779                         tmp = divisor[2];
2780
2781                 cur_scale = 0;
2782                 if (!(tmp & 0xFFFF0000)) {
2783                         cur_scale += 16;
2784                         tmp <<= 16;
2785                 }
2786                 if (!(tmp & 0xFF000000)) {
2787                         cur_scale += 8;
2788                         tmp <<= 8;
2789                 }
2790                 if (!(tmp & 0xF0000000)) {
2791                         cur_scale += 4;
2792                         tmp <<= 4;
2793                 }
2794                 if (!(tmp & 0xC0000000)) {
2795                         cur_scale += 2;
2796                         tmp <<= 2;
2797                 }
2798                 if (!(tmp & 0x80000000)) {
2799                         cur_scale++;
2800                         tmp <<= 1;
2801                 }
2802
2803                 // Shift both dividend and divisor left by cur_scale.
2804                 //
2805                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2806                 rem[0] = sdlTmp.u.Lo;
2807                 rem[1] = sdlTmp.u.Hi;
2808                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2809                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2810                 sdlTmp.int64 <<= cur_scale;
2811                 rem[2] = sdlTmp.u.Hi;
2812                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2813
2814                 sdlDivisor.u.Lo = divisor[0];
2815                 sdlDivisor.u.Hi = divisor[1];
2816                 sdlDivisor.int64 <<= cur_scale;
2817
2818                 if (divisor[2] == 0) {
2819                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2820                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2821                         //
2822                         sdlTmp.u.Lo = rem[2];
2823                         sdlTmp.u.Hi = rem[3];
2824
2825                         quo[2] = 0;
2826                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2827                         quo[0] = Div96By64(rem, sdlDivisor);
2828
2829                         for (;;) {
2830                                 if ((rem[0] | rem[1]) == 0) {
2831                                         if (scale < 0) {
2832                                                 cur_scale = min(9, -scale);
2833                                                 goto HaveScale64;
2834                                         }
2835                                         break;
2836                                 }
2837
2838                                 // We need to unscale if and only if we have a non-zero remainder
2839                                 unscale = TRUE;
2840
2841                                 // Remainder is non-zero.  Scale up quotient and remainder by
2842                                 // powers of 10 so we can compute more significant bits.
2843                                 //
2844                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2845                                 if (cur_scale == 0) {
2846                                         // No more scaling to be done, but remainder is non-zero.
2847                                         // Round quotient.
2848                                         //
2849                                         sdlTmp.u.Lo = rem[0];
2850                                         sdlTmp.u.Hi = rem[1];
2851                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2852                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2853                                                 goto RoundUp;
2854                                         break;
2855                                 }
2856
2857                                 if (cur_scale < 0)
2858                                         mono_raise_exception (mono_get_exception_overflow ());
2859
2860                         HaveScale64:
2861                                 pwr = power10[cur_scale];
2862                                 scale += cur_scale;
2863
2864                                 if (IncreaseScale(quo, pwr) != 0)
2865                                         mono_raise_exception (mono_get_exception_overflow ());
2866
2867                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2868                                 IncreaseScale(rem, pwr);
2869                                 tmp = Div96By64(rem, sdlDivisor);
2870                                 if (!Add32To96(quo, tmp)) {
2871                                         if (scale == 0)
2872                                                 mono_raise_exception (mono_get_exception_overflow ());
2873                                         scale--;
2874                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2875                                         break;
2876                                 }
2877
2878                         } // for (;;)
2879                 } else {
2880                         // Have a 96-bit divisor in divisor[].
2881                         //
2882                         // Start by finishing the shift left by cur_scale.
2883                         //
2884                         sdlTmp.u.Lo = divisor[1];
2885                         sdlTmp.u.Hi = divisor[2];
2886                         sdlTmp.int64 <<= cur_scale;
2887                         divisor[0] = sdlDivisor.u.Lo;
2888                         divisor[1] = sdlDivisor.u.Hi;
2889                         divisor[2] = sdlTmp.u.Hi;
2890
2891                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2892                         // will be < divisor.
2893                         //
2894                         quo[2] = 0;
2895                         quo[1] = 0;
2896                         quo[0] = Div128By96(rem, divisor);
2897
2898                         for (;;) {
2899                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2900                                         if (scale < 0) {
2901                                                 cur_scale = min(9, -scale);
2902                                                 goto HaveScale96;
2903                                         }
2904                                         break;
2905                                 }
2906
2907                                 // We need to unscale if and only if we have a non-zero remainder
2908                                 unscale = TRUE;
2909
2910                                 // Remainder is non-zero.  Scale up quotient and remainder by
2911                                 // powers of 10 so we can compute more significant bits.
2912                                 //
2913                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2914                                 if (cur_scale == 0) {
2915                                         // No more scaling to be done, but remainder is non-zero.
2916                                         // Round quotient.
2917                                         //
2918                                         if (rem[2] >= 0x80000000)
2919                                                 goto RoundUp;
2920
2921                                         tmp = rem[0] > 0x80000000;
2922                                         tmp1 = rem[1] > 0x80000000;
2923                                         rem[0] <<= 1;
2924                                         rem[1] = (rem[1] << 1) + tmp;
2925                                         rem[2] = (rem[2] << 1) + tmp1;
2926
2927                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2928                                                 goto RoundUp;
2929                                         break;
2930                                 }
2931
2932                                 if (cur_scale < 0)
2933                                         mono_raise_exception (mono_get_exception_overflow ());
2934
2935                         HaveScale96:
2936                                 pwr = power10[cur_scale];
2937                                 scale += cur_scale;
2938
2939                                 if (IncreaseScale(quo, pwr) != 0)
2940                                         mono_raise_exception (mono_get_exception_overflow ());
2941
2942                                 rem[3] = IncreaseScale(rem, pwr);
2943                                 tmp = Div128By96(rem, divisor);
2944                                 if (!Add32To96(quo, tmp)) {
2945                                         if (scale == 0)
2946                                                 mono_raise_exception (mono_get_exception_overflow ());
2947
2948                                         scale--;
2949                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2950                                         break;
2951                                 }
2952
2953                         } // for (;;)
2954                 }
2955         }
2956
2957         // We need to unscale if and only if we have a non-zero remainder
2958         if (unscale) {
2959                 // Try extracting any extra powers of 10 we may have
2960                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2961                 // If a division by one of these powers returns a zero remainder, then
2962                 // we keep the quotient.  If the remainder is not zero, then we restore
2963                 // the previous value.
2964                 //
2965                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2966                 // we can extract.  We use this as a quick test on whether to try a
2967                 // given power.
2968                 //
2969                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2970                         quo_save[0] = quo[0];
2971                         quo_save[1] = quo[1];
2972                         quo_save[2] = quo[2];
2973
2974                         if (Div96By32(quo_save, 100000000) == 0) {
2975                                 quo[0] = quo_save[0];
2976                                 quo[1] = quo_save[1];
2977                                 quo[2] = quo_save[2];
2978                                 scale -= 8;
2979                         } else
2980                                 break;
2981                 }
2982
2983                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
2984                         quo_save[0] = quo[0];
2985                         quo_save[1] = quo[1];
2986                         quo_save[2] = quo[2];
2987
2988                         if (Div96By32(quo_save, 10000) == 0) {
2989                                 quo[0] = quo_save[0];
2990                                 quo[1] = quo_save[1];
2991                                 quo[2] = quo_save[2];
2992                                 scale -= 4;
2993                         }
2994                 }
2995
2996                 if ((quo[0] & 3) == 0 && scale >= 2) {
2997                         quo_save[0] = quo[0];
2998                         quo_save[1] = quo[1];
2999                         quo_save[2] = quo[2];
3000
3001                         if (Div96By32(quo_save, 100) == 0) {
3002                                 quo[0] = quo_save[0];
3003                                 quo[1] = quo_save[1];
3004                                 quo[2] = quo_save[2];
3005                                 scale -= 2;
3006                         }
3007                 }
3008
3009                 if ((quo[0] & 1) == 0 && scale >= 1) {
3010                         quo_save[0] = quo[0];
3011                         quo_save[1] = quo[1];
3012                         quo_save[2] = quo[2];
3013
3014                         if (Div96By32(quo_save, 10) == 0) {
3015                                 quo[0] = quo_save[0];
3016                                 quo[1] = quo_save[1];
3017                                 quo[2] = quo_save[2];
3018                                 scale -= 1;
3019                         }
3020                 }
3021         }
3022
3023         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3024         DECIMAL_HI32(*left) = quo[2];
3025         DECIMAL_MID32(*left) = quo[1];
3026         DECIMAL_LO32(*left) = quo[0];
3027         DECIMAL_SCALE(*left) = (uint8_t)scale;
3028         left->reserved = 0;
3029
3030 }
3031
3032 #define DECIMAL_PRECISION 29
3033 #define NUMBER_MAXDIGITS 50
3034 typedef struct  {
3035         int32_t precision;
3036         int32_t scale;
3037         int32_t sign;
3038         uint16_t digits[NUMBER_MAXDIGITS + 1];
3039         uint16_t* allDigits;
3040 } CLRNumber;
3041
3042 int
3043 mono_decimal_from_number (void *from, MonoDecimal *target)
3044 {
3045         CLRNumber *number = (CLRNumber *) from;
3046         g_assert(number != NULL);
3047         g_assert(target != NULL);
3048
3049         MonoDecimal d;
3050         d.reserved = 0;
3051         DECIMAL_SIGNSCALE(d) = 0;
3052         DECIMAL_HI32(d) = 0;
3053         DECIMAL_LO32(d) = 0;
3054         DECIMAL_MID32(d) = 0;
3055         uint16_t* p = number->digits;
3056         g_assert(p != NULL);
3057         int e = number->scale;
3058         if (!*p) {
3059                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3060                 // the scale to 0 if the scale was previously positive
3061                 if (e > 0) {
3062                         e = 0;
3063                 }
3064         } else {
3065                 if (e > DECIMAL_PRECISION) return 0;
3066                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3067                         DecMul10(&d);
3068                         if (*p)
3069                                 DecAddInt32(&d, *p++ - '0');
3070                         e--;
3071                 }
3072                 if (*p++ >= '5') {
3073                         gboolean round = TRUE;
3074                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3075                                 // For digits > 5 we will be roundinp up anyway.
3076                                 int count = 20; // Look at the next 20 digits to check to round
3077                                 while (*p == '0' && count != 0) {
3078                                         p++;
3079                                         count--;
3080                                 }
3081                                 if (*p == '\0' || count == 0)
3082                                         round = FALSE;// Do nothing
3083                         }
3084
3085                         if (round) {
3086                                 DecAddInt32(&d, 1);
3087                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3088                                         DECIMAL_HI32(d) = 0x19999999;
3089                                         DECIMAL_MID32(d) = 0x99999999;
3090                                         DECIMAL_LO32(d) = 0x9999999A;
3091                                         e++;
3092                                 }
3093                         }
3094                 }
3095         }
3096         if (e > 0)
3097                 return 0;
3098         if (e <= -DECIMAL_PRECISION) {
3099                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3100                 // This should only happen for actual zeros or very small numbers that round to zero.
3101                 DECIMAL_SIGNSCALE(d) = 0;
3102                 DECIMAL_HI32(d) = 0;
3103                 DECIMAL_LO32(d) = 0;
3104                 DECIMAL_MID32(d) = 0;
3105                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3106         } else {
3107                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3108         }
3109
3110         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3111         *target = d;
3112         return 1;
3113 }
3114
3115
3116 #endif