mono/metadata/decimal-ms.c

   1 //
   2 // Copyright (c) Microsoft. All rights reserved.
   3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
   4 //
   5 // Copyright 2015 Xamarin Inc
   6 //
   7 // File: decimal.c
   8 //
   9 // Ported from C++ to C and adjusted to Mono runtime
  10 //
  11 // Pending:
  12 //   DoToCurrency (they look like new methods we do not have)
  13 //
  14 #ifndef DISABLE_DECIMAL
  15 #include "config.h"
  16 #include <stdint.h>
  17 #include <glib.h>
  18 #include <mono/utils/mono-compiler.h>
  19 #include <mono/metadata/exception.h>
  20 #include <mono/metadata/object-internals.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <math.h>
  25 #ifdef HAVE_MEMORY_H
  26 #include <memory.h>
  27 #endif
  28 #ifdef _MSC_VER
  29 #include <intrin.h>
  30 #endif
  31 #include "decimal-ms.h"
  32
  33 #define min(a, b) (((a) < (b)) ? (a) : (b))
  34
  35 typedef enum {
  36         MONO_DECIMAL_OK,
  37         MONO_DECIMAL_OVERFLOW,
  38         MONO_DECIMAL_INVALID_ARGUMENT,
  39         MONO_DECIMAL_DIVBYZERO,
  40         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  41 } MonoDecimalStatus;
  42
  43 #ifndef FC_GC_POLL
  44 #   define FC_GC_POLL()
  45 #endif
  46
  47 static const uint32_t ten_to_nine    = 1000000000U;
  48 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  49 #define POWER10_MAX     9
  50 #define DECIMAL_NEG ((uint8_t)0x80)
  51 #define DECMAX 28
  52 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  53 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  54 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  55 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  56 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  57 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  58 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
  59 # define DECIMAL_LO64_GET(dec)   (((uint64_t)((dec).v.v.Mid32) << 32) | (dec).v.v.Lo32)
  60 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.v.Lo32 = (value); (dec).v.v.Mid32 = ((value) >> 32); }
  61 #else
  62 # define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  63 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  64 #endif
  65
  66 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  67 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  68     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  69
  70 #define DEC_SCALE_MAX   28
  71 #define POWER10_MAX     9
  72
  73 #define OVFL_MAX_9_HI   4
  74 #define OVFL_MAX_9_MID  1266874889
  75 #define OVFL_MAX_9_LO   3047500985u
  76
  77 #define OVFL_MAX_5_HI   42949
  78 #define OVFL_MAX_5_MID  2890341191
  79
  80 #define OVFL_MAX_1_HI   429496729
  81
  82 typedef union {
  83         uint64_t int64;
  84         struct {
  85 #if BYTE_ORDER == G_BIG_ENDIAN
  86         uint32_t Hi;
  87         uint32_t Lo;
  88 #else
  89         uint32_t Lo;
  90         uint32_t Hi;
  91 #endif
  92     } u;
  93 } SPLIT64;
  94
  95 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  96 // Double Bias
  97 #define DBLBIAS 1022
  98
  99 // Structure to access an encoded double floating point
 100 typedef union{
 101     struct {
 102 #if BYTE_ORDER == G_BIG_ENDIAN
 103       unsigned int sign:1;
 104       unsigned int exp:11;
 105       unsigned int mantHi:20;
 106       unsigned int mantLo;
 107 #else // BIGENDIAN
 108       unsigned int mantLo;
 109       unsigned int mantHi:20;
 110       unsigned int exp:11;
 111       unsigned int sign:1;
 112 #endif
 113     } u;
 114     double dbl;
 115 } DoubleStructure;
 116
 117 #if BYTE_ORDER == G_BIG_ENDIAN
 118 #define DEFDS(Lo, Hi, exp, sign) { {sign, exp, Hi, Lo } }
 119 #else
 120 #define DEFDS(Lo, Hi, exp, sign) { {Lo, Hi, exp, sign} }
 121 #endif
 122
 123 const DoubleStructure ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0);
 124
 125 // Single floating point Bias
 126 #define SNGBIAS 126
 127
 128 // Structure to access an encoded single floating point
 129 typedef struct {
 130 #if BYTE_ORDER == G_BIG_ENDIAN
 131     unsigned int sign:1;
 132     unsigned int exp:8;
 133     unsigned int mant:23;
 134 #else
 135     unsigned int mant:23;
 136     unsigned int exp:8;
 137     unsigned int sign:1;
 138 #endif
 139 } SingleStructure;
 140
 141 //
 142 // Data tables
 143 //
 144
 145 static const uint32_t power10 [POWER10_MAX+1] = {
 146         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 147 };
 148
 149
 150 static const double double_power10[] = {
 151         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 152         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 153         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 154         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 155         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 156         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 157         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 158         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 159         1e80 };
 160
 161 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 162                                 {100000000000ULL},         // 1E11
 163                                 {1000000000000ULL},        // 1E12
 164                                 {10000000000000ULL},       // 1E13
 165                                 {100000000000000ULL} };    // 1E14
 166
 167 static const uint64_t long_power10[] = {
 168         1,
 169         10ULL,
 170         100ULL,
 171         1000ULL,
 172         10000ULL,
 173         100000ULL,
 174         1000000ULL,
 175         10000000ULL,
 176         100000000ULL,
 177         1000000000ULL,
 178         10000000000ULL,
 179         100000000000ULL,
 180         1000000000000ULL,
 181         10000000000000ULL,
 182         100000000000000ULL,
 183         1000000000000000ULL,
 184         10000000000000000ULL,
 185         100000000000000000ULL,
 186         1000000000000000000ULL,
 187         10000000000000000000ULL};
 188
 189 typedef struct  {
 190         uint32_t Hi, Mid, Lo;
 191 } DECOVFL;
 192
 193 const DECOVFL power_overflow[] = {
 194 // This is a table of the largest values that can be in the upper two
 195 // ULONGs of a 96-bit number that will not overflow when multiplied
 196 // by a given power.  For the upper word, this is a table of
 197 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 198 // remaining fraction part * 2^32.  2^32 = 4294967296.
 199 //
 200     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 201     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 202     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 203     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 204     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 205     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 206     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 207     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 208     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 209 };
 210
 211
 212 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 213 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 214 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 215
 216 static double
 217 fnDblPower10(int ix)
 218 {
 219     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 220     g_assert(ix >= 0);
 221     if (ix < maxIx)
 222         return double_power10[ix];
 223     return pow(10.0, ix);
 224 } // double fnDblPower10()
 225
 226
 227 static inline int64_t
 228 DivMod32by32(int32_t num, int32_t den)
 229 {
 230     SPLIT64  sdl;
 231
 232     sdl.u.Lo = num / den;
 233     sdl.u.Hi = num % den;
 234     return sdl.int64;
 235 }
 236
 237 static inline int64_t
 238 DivMod64by32(int64_t num, int32_t den)
 239 {
 240     SPLIT64  sdl;
 241
 242     sdl.u.Lo = Div64by32(num, den);
 243     sdl.u.Hi = Mod64by32(num, den);
 244     return sdl.int64;
 245 }
 246
 247 static uint64_t
 248 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 249 {
 250         SPLIT64  tmp1;
 251         SPLIT64  tmp2;
 252         SPLIT64  tmp3;
 253
 254         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 255         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 256         tmp1.u.Hi += tmp2.u.Lo;
 257         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 258                 tmp2.u.Hi++;
 259         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 260         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 261         tmp1.u.Hi += tmp2.u.Lo;
 262         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 263                 tmp2.u.Hi++;
 264         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 265
 266         *hi = tmp3.int64;
 267         return tmp1.int64;
 268 }
 269
 270 /**
 271 * FullDiv64By32:
 272 *
 273 * Entry:
 274 *   pdlNum  - Pointer to 64-bit dividend
 275 *   ulDen   - 32-bit divisor
 276 *
 277 * Purpose:
 278 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 279 *
 280 * Exit:
 281 *   Quotient overwrites dividend.
 282 *   Returns remainder.
 283 *
 284 * Exceptions:
 285 *   None.
 286 */
 287 // Was: FullDiv64By32
 288 static uint32_t
 289 FullDiv64By32 (uint64_t *num, uint32_t den)
 290 {
 291         SPLIT64  tmp;
 292         SPLIT64  res;
 293
 294         tmp.int64 = *num;
 295         res.u.Hi = 0;
 296
 297         if (tmp.u.Hi >= den) {
 298                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 299                 //
 300                 res.u.Lo = tmp.u.Hi;
 301                 res.int64 = DivMod64by32(res.int64, den);
 302                 tmp.u.Hi = res.u.Hi;
 303                 res.u.Hi = res.u.Lo;
 304         }
 305
 306         tmp.int64 = DivMod64by32(tmp.int64, den);
 307         res.u.Lo = tmp.u.Lo;
 308         *num = res.int64;
 309         return tmp.u.Hi;
 310 }
 311
 312 /***
 313  * SearchScale
 314  *
 315  * Entry:
 316  *   res_hi - Top uint32_t of quotient
 317  *   res_mid - Middle uint32_t of quotient
 318  *   res_lo - Bottom uint32_t of quotient
 319  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 320  *
 321  * Purpose:
 322  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 323  *   up by and still fit in 96 bits.
 324  *
 325  * Exit:
 326  *   Returns power of 10 to scale by, -1 if overflow error.
 327  *
 328  ***********************************************************************/
 329
 330 static int
 331 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 332 {
 333         int   cur_scale;
 334
 335         // Quick check to stop us from trying to scale any more.
 336         //
 337         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 338                 cur_scale = 0;
 339                 goto HaveScale;
 340         }
 341
 342         if (scale > DEC_SCALE_MAX - 9) {
 343                 // We can't scale by 10^9 without exceeding the max scale factor.
 344                 // See if we can scale to the max.  If not, we'll fall into
 345                 // standard search for scale factor.
 346                 //
 347                 cur_scale = DEC_SCALE_MAX - scale;
 348                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 349                         goto HaveScale;
 350
 351                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 352                 UpperEq:
 353                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 354                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 355                                 cur_scale--;
 356                         }
 357                         goto HaveScale;
 358                 }
 359         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 360                 return 9;
 361
 362         // Search for a power to scale by < 9.  Do a binary search
 363         // on power_overflow[].
 364         //
 365         cur_scale = 5;
 366         if (res_hi < OVFL_MAX_5_HI)
 367                 cur_scale = 7;
 368         else if (res_hi > OVFL_MAX_5_HI)
 369                 cur_scale = 3;
 370         else
 371                 goto UpperEq;
 372
 373         // cur_scale is 3 or 7.
 374         //
 375         if (res_hi < power_overflow[cur_scale - 1].Hi)
 376                 cur_scale++;
 377         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 378                 cur_scale--;
 379         else
 380                 goto UpperEq;
 381
 382         // cur_scale is 2, 4, 6, or 8.
 383         //
 384         // In all cases, we already found we could not use the power one larger.
 385         // So if we can use this power, it is the biggest, and we're done.  If
 386         // we can't use this power, the one below it is correct for all cases
 387         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 388         //
 389         if (res_hi > power_overflow[cur_scale - 1].Hi)
 390                 cur_scale--;
 391
 392         if (res_hi == power_overflow[cur_scale - 1].Hi)
 393                 goto UpperEq;
 394
 395 HaveScale:
 396         // cur_scale = largest power of 10 we can scale by without overflow,
 397         // cur_scale < 9.  See if this is enough to make scale factor
 398         // positive if it isn't already.
 399         //
 400         if (cur_scale + scale < 0)
 401                 cur_scale = -1;
 402
 403         return cur_scale;
 404 }
 405
 406
 407 /**
 408 * Div96By32
 409 *
 410 * Entry:
 411 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 412 *   ulDen   - 32-bit divisor.
 413 *
 414 * Purpose:
 415 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 416 *
 417 * Exit:
 418 *   Quotient overwrites dividend.
 419 *   Returns remainder.
 420 *
 421 * Exceptions:
 422 *   None.
 423 *
 424 */
 425 static uint32_t
 426 Div96By32(uint32_t *num, uint32_t den)
 427 {
 428         SPLIT64  tmp;
 429
 430         tmp.u.Hi = 0;
 431
 432         if (num[2] != 0)
 433                 goto Div3Word;
 434
 435         if (num[1] >= den)
 436                 goto Div2Word;
 437
 438         tmp.u.Hi = num[1];
 439         num[1] = 0;
 440         goto Div1Word;
 441
 442 Div3Word:
 443         tmp.u.Lo = num[2];
 444         tmp.int64 = DivMod64by32(tmp.int64, den);
 445         num[2] = tmp.u.Lo;
 446 Div2Word:
 447         tmp.u.Lo = num[1];
 448         tmp.int64 = DivMod64by32(tmp.int64, den);
 449         num[1] = tmp.u.Lo;
 450 Div1Word:
 451         tmp.u.Lo = num[0];
 452         tmp.int64 = DivMod64by32(tmp.int64, den);
 453         num[0] = tmp.u.Lo;
 454         return tmp.u.Hi;
 455 }
 456
 457 /***
 458  * DecFixInt
 459  *
 460  * Entry:
 461  *   pdecRes - Pointer to Decimal result location
 462  *   operand  - Pointer to Decimal operand
 463  *
 464  * Purpose:
 465  *   Chop the value to integer.  Return remainder so Int() function
 466  *   can round down if non-zero.
 467  *
 468  * Exit:
 469  *   Returns remainder.
 470  *
 471  * Exceptions:
 472  *   None.
 473  *
 474  ***********************************************************************/
 475
 476 static uint32_t
 477 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 478 {
 479         uint32_t   num[3];
 480         uint32_t   rem;
 481         uint32_t   pwr;
 482         int     scale;
 483
 484         if (operand->u.u.scale > 0) {
 485                 num[0] = operand->v.v.Lo32;
 486                 num[1] = operand->v.v.Mid32;
 487                 num[2] = operand->Hi32;
 488                 scale = operand->u.u.scale;
 489                 result->u.u.sign = operand->u.u.sign;
 490                 rem = 0;
 491
 492                 do {
 493                         if (scale > POWER10_MAX)
 494                                 pwr = ten_to_nine;
 495                         else
 496                                 pwr = power10[scale];
 497
 498                         rem |= Div96By32(num, pwr);
 499                         scale -= 9;
 500                 }while (scale > 0);
 501
 502                 result->v.v.Lo32 = num[0];
 503                 result->v.v.Mid32 = num[1];
 504                 result->Hi32 = num[2];
 505                 result->u.u.scale = 0;
 506
 507                 return rem;
 508         }
 509
 510         COPYDEC(*result, *operand);
 511         // Odd, the Microsoft code does not set result->reserved to zero on this case
 512         return 0;
 513 }
 514
 515 /**
 516  * ScaleResult:
 517  *
 518  * Entry:
 519  *   res - Array of uint32_ts with value, least-significant first.
 520  *   hi_res  - Index of last non-zero value in res.
 521  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 522  *
 523  * Purpose:
 524  *   See if we need to scale the result to fit it in 96 bits.
 525  *   Perform needed scaling.  Adjust scale factor accordingly.
 526  *
 527  * Exit:
 528  *   res updated in place, always 3 uint32_ts.
 529  *   New scale factor returned, -1 if overflow error.
 530  *
 531  */
 532 static int
 533 ScaleResult(uint32_t *res, int hi_res, int scale)
 534 {
 535         int     new_scale;
 536         int     cur;
 537         uint32_t   pwr;
 538         uint32_t   tmp;
 539         uint32_t   sticky;
 540         SPLIT64 sdlTmp;
 541
 542         // See if we need to scale the result.  The combined scale must
 543         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 544         //
 545         // Start by figuring a lower bound on the scaling needed to make
 546         // the upper 96 bits zero.  hi_res is the index into res[]
 547         // of the highest non-zero uint32_t.
 548         //
 549         new_scale =   hi_res * 32 - 64 - 1;
 550         if (new_scale > 0) {
 551
 552                 // Find the MSB.
 553                 //
 554                 tmp = res[hi_res];
 555                 if (!(tmp & 0xFFFF0000)) {
 556                         new_scale -= 16;
 557                         tmp <<= 16;
 558                 }
 559                 if (!(tmp & 0xFF000000)) {
 560                         new_scale -= 8;
 561                         tmp <<= 8;
 562                 }
 563                 if (!(tmp & 0xF0000000)) {
 564                         new_scale -= 4;
 565                         tmp <<= 4;
 566                 }
 567                 if (!(tmp & 0xC0000000)) {
 568                         new_scale -= 2;
 569                         tmp <<= 2;
 570                 }
 571                 if (!(tmp & 0x80000000)) {
 572                         new_scale--;
 573                         tmp <<= 1;
 574                 }
 575
 576                 // Multiply bit position by log10(2) to figure it's power of 10.
 577                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 578                 // with a multiply saves a 96-byte lookup table.  The power returned
 579                 // is <= the power of the number, so we must add one power of 10
 580                 // to make it's integer part zero after dividing by 256.
 581                 //
 582                 // Note: the result of this multiplication by an approximation of
 583                 // log10(2) have been exhaustively checked to verify it gives the
 584                 // correct result.  (There were only 95 to check...)
 585                 //
 586                 new_scale = ((new_scale * 77) >> 8) + 1;
 587
 588                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 589                 // This reduces the scale factor of the result.  If it exceeds the
 590                 // current scale of the result, we'll overflow.
 591                 //
 592                 if (new_scale > scale)
 593                         return -1;
 594         }
 595         else
 596                 new_scale = 0;
 597
 598         // Make sure we scale by enough to bring the current scale factor
 599         // into valid range.
 600         //
 601         if (new_scale < scale - DEC_SCALE_MAX)
 602                 new_scale = scale - DEC_SCALE_MAX;
 603
 604         if (new_scale != 0) {
 605                 // Scale by the power of 10 given by new_scale.  Note that this is
 606                 // NOT guaranteed to bring the number within 96 bits -- it could
 607                 // be 1 power of 10 short.
 608                 //
 609                 scale -= new_scale;
 610                 sticky = 0;
 611                 sdlTmp.u.Hi = 0; // initialize remainder
 612
 613                 for (;;) {
 614
 615                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 616
 617                         if (new_scale > POWER10_MAX)
 618                                 pwr = ten_to_nine;
 619                         else
 620                                 pwr = power10[new_scale];
 621
 622                         // Compute first quotient.
 623                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 624                         //
 625                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 626                         res[hi_res] = sdlTmp.u.Lo;
 627                         cur = hi_res - 1;
 628
 629                         if (cur >= 0) {
 630                                 // If first quotient was 0, update hi_res.
 631                                 //
 632                                 if (sdlTmp.u.Lo == 0)
 633                                         hi_res--;
 634
 635                                 // Compute subsequent quotients.
 636                                 //
 637                                 do {
 638                                         sdlTmp.u.Lo = res[cur];
 639                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 640                                         res[cur] = sdlTmp.u.Lo;
 641                                         cur--;
 642                                 } while (cur >= 0);
 643
 644                         }
 645
 646                         new_scale -= POWER10_MAX;
 647                         if (new_scale > 0)
 648                                 continue; // scale some more
 649
 650                         // If we scaled enough, hi_res would be 2 or less.  If not,
 651                         // divide by 10 more.
 652                         //
 653                         if (hi_res > 2) {
 654                                 new_scale = 1;
 655                                 scale--;
 656                                 continue; // scale by 10
 657                         }
 658
 659                         // Round final result.  See if remainder >= 1/2 of divisor.
 660                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 661                         //
 662                         pwr >>= 1;  // power of 10 always even
 663                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 664                                                     ((res[0] & 1) | sticky)) ) {
 665                                 cur = -1;
 666                                 while (++res[++cur] == 0);
 667
 668                                 if (cur > 2) {
 669                                         // The rounding caused us to carry beyond 96 bits.
 670                                         // Scale by 10 more.
 671                                         //
 672                                         hi_res = cur;
 673                                         sticky = 0;  // no sticky bit
 674                                         sdlTmp.u.Hi = 0; // or remainder
 675                                         new_scale = 1;
 676                                         scale--;
 677                                         continue; // scale by 10
 678                                 }
 679                         }
 680
 681                         // We may have scaled it more than we planned.  Make sure the scale
 682                         // factor hasn't gone negative, indicating overflow.
 683                         //
 684                         if (scale < 0)
 685                                 return -1;
 686
 687                         return scale;
 688                 } // for(;;)
 689         }
 690         return scale;
 691 }
 692
 693 // Decimal multiply
 694 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 695 static MonoDecimalStatus
 696 mono_decimal_multiply_result(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 697 {
 698         SPLIT64 tmp;
 699         SPLIT64 tmp2;
 700         SPLIT64 tmp3;
 701         int     scale;
 702         int     hi_prod;
 703         uint32_t   pwr;
 704         uint32_t   rem_lo;
 705         uint32_t   rem_hi;
 706         uint32_t   prod[6];
 707
 708         scale = left->u.u.scale + right->u.u.scale;
 709
 710         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 711                 // Upper 64 bits are zero.
 712                 //
 713                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 714                 if (scale > DEC_SCALE_MAX)
 715                 {
 716                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 717                         // If the amount to divide by is > 19 the result is guaranteed
 718                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 719                         //
 720                         scale -= DEC_SCALE_MAX;
 721                         if (scale > 19) {
 722                         ReturnZero:
 723                                 DECIMAL_SETZERO(*result);
 724                                 return MONO_DECIMAL_OK;
 725                         }
 726
 727                         if (scale > POWER10_MAX) {
 728                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 729                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 730                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 731                                 //
 732                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 733                                 pwr = power10[scale - 10] << 2;
 734                         } else {
 735                                 pwr = power10[scale];
 736                                 rem_lo = 0;
 737                         }
 738
 739                         // Power to divide by fits in 32 bits.
 740                         //
 741                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 742
 743                         // Round result.  See if remainder >= 1/2 of divisor.
 744                         // Divisor is a power of 10, so it is always even.
 745                         //
 746                         pwr >>= 1;
 747                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 748                                 tmp.int64++;
 749
 750                         scale = DEC_SCALE_MAX;
 751                 }
 752                 DECIMAL_LO32(*result) = tmp.u.Lo;
 753                 DECIMAL_MID32(*result) = tmp.u.Hi;
 754                 DECIMAL_HI32(*result) = 0;
 755         } else {
 756                 // At least one operand has bits set in the upper 64 bits.
 757                 //
 758                 // Compute and accumulate the 9 partial products into a
 759                 // 192-bit (24-byte) result.
 760                 //
 761                 //                [l-h][l-m][l-l]   left high, middle, low
 762                 //             x  [r-h][r-m][r-l]   right high, middle, low
 763                 // ------------------------------
 764                 //
 765                 //                     [0-h][0-l]   l-l * r-l
 766                 //                [1ah][1al]        l-l * r-m
 767                 //                [1bh][1bl]        l-m * r-l
 768                 //           [2ah][2al]             l-m * r-m
 769                 //           [2bh][2bl]             l-l * r-h
 770                 //           [2ch][2cl]             l-h * r-l
 771                 //      [3ah][3al]                  l-m * r-h
 772                 //      [3bh][3bl]                  l-h * r-m
 773                 // [4-h][4-l]                       l-h * r-h
 774                 // ------------------------------
 775                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 776                 //
 777                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 778                 prod[0] = tmp.u.Lo;
 779
 780                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 781
 782                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 783                 tmp.int64 += tmp2.int64; // this could generate carry
 784                 prod[1] = tmp.u.Lo;
 785                 if (tmp.int64 < tmp2.int64) // detect carry
 786                         tmp2.u.Hi = 1;
 787                 else
 788                         tmp2.u.Hi = 0;
 789                 tmp2.u.Lo = tmp.u.Hi;
 790
 791                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 792
 793                 if (left->Hi32 | right->Hi32) {
 794                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 795                         //
 796                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 797                         tmp.int64 += tmp2.int64; // this could generate carry
 798                         if (tmp.int64 < tmp2.int64) // detect carry
 799                                 tmp3.u.Hi = 1;
 800                         else
 801                                 tmp3.u.Hi = 0;
 802
 803                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 804                         tmp.int64 += tmp2.int64; // this could generate carry
 805                         prod[2] = tmp.u.Lo;
 806                         if (tmp.int64 < tmp2.int64) // detect carry
 807                                 tmp3.u.Hi++;
 808                         tmp3.u.Lo = tmp.u.Hi;
 809
 810                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 811                         tmp.int64 += tmp3.int64; // this could generate carry
 812                         if (tmp.int64 < tmp3.int64) // detect carry
 813                                 tmp3.u.Hi = 1;
 814                         else
 815                                 tmp3.u.Hi = 0;
 816
 817                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 818                         tmp.int64 += tmp2.int64; // this could generate carry
 819                         prod[3] = tmp.u.Lo;
 820                         if (tmp.int64 < tmp2.int64) // detect carry
 821                                 tmp3.u.Hi++;
 822                         tmp3.u.Lo = tmp.u.Hi;
 823
 824                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 825                         prod[4] = tmp.u.Lo;
 826                         prod[5] = tmp.u.Hi;
 827
 828                         hi_prod = 5;
 829                 }
 830                 else {
 831                         prod[2] = tmp.u.Lo;
 832                         prod[3] = tmp.u.Hi;
 833                         hi_prod = 3;
 834                 }
 835
 836                 // Check for leading zero uint32_ts on the product
 837                 //
 838                 while (prod[hi_prod] == 0) {
 839                         hi_prod--;
 840                         if (hi_prod < 0)
 841                                 goto ReturnZero;
 842                 }
 843
 844                 scale = ScaleResult(prod, hi_prod, scale);
 845                 if (scale == -1)
 846                         return MONO_DECIMAL_OVERFLOW;
 847
 848                 result->v.v.Lo32 = prod[0];
 849                 result->v.v.Mid32 = prod[1];
 850                 result->Hi32 = prod[2];
 851         }
 852
 853         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 854         result->u.u.scale = (char)scale;
 855         return MONO_DECIMAL_OK;
 856 }
 857
 858 // Addition and subtraction
 859 static MonoDecimalStatus
 860 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 861 {
 862         uint32_t     num[6];
 863         uint32_t     pwr;
 864         int       scale;
 865         int       hi_prod;
 866         int       cur;
 867         SPLIT64   tmp;
 868         MonoDecimal decRes;
 869         MonoDecimal decTmp;
 870         MonoDecimal *pdecTmp;
 871
 872         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 873
 874         if (right->u.u.scale == left->u.u.scale) {
 875                 // Scale factors are equal, no alignment necessary.
 876                 //
 877                 decRes.u.signscale = left->u.signscale;
 878
 879         AlignedAdd:
 880                 if (sign) {
 881                         // Signs differ - subtract
 882                         //
 883                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 884                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 885
 886                         // Propagate carry
 887                         //
 888                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 889                                 decRes.Hi32--;
 890                                 if (decRes.Hi32 >= left->Hi32)
 891                                         goto SignFlip;
 892                         } else if (decRes.Hi32 > left->Hi32) {
 893                                 // Got negative result.  Flip its sign.
 894                                 //
 895                         SignFlip:
 896                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 897                                 decRes.Hi32 = ~decRes.Hi32;
 898                                 if (DECIMAL_LO64_GET(decRes) == 0)
 899                                         decRes.Hi32++;
 900                                 decRes.u.u.sign ^= DECIMAL_NEG;
 901                         }
 902
 903                 } else {
 904                         // Signs are the same - add
 905                         //
 906                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 907                         decRes.Hi32 = left->Hi32 + right->Hi32;
 908
 909                         // Propagate carry
 910                         //
 911                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 912                                 decRes.Hi32++;
 913                                 if (decRes.Hi32 <= left->Hi32)
 914                                         goto AlignedScale;
 915                         } else if (decRes.Hi32 < left->Hi32) {
 916                         AlignedScale:
 917                                 // The addition carried above 96 bits.  Divide the result by 10,
 918                                 // dropping the scale factor.
 919                                 //
 920                                 if (decRes.u.u.scale == 0)
 921                                         return MONO_DECIMAL_OVERFLOW;
 922                                 decRes.u.u.scale--;
 923
 924                                 tmp.u.Lo = decRes.Hi32;
 925                                 tmp.u.Hi = 1;
 926                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 927                                 decRes.Hi32 = tmp.u.Lo;
 928
 929                                 tmp.u.Lo = decRes.v.v.Mid32;
 930                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 931                                 decRes.v.v.Mid32 = tmp.u.Lo;
 932
 933                                 tmp.u.Lo = decRes.v.v.Lo32;
 934                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 935                                 decRes.v.v.Lo32 = tmp.u.Lo;
 936
 937                                 // See if we need to round up.
 938                                 //
 939                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 940                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 941                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 942                                                         decRes.Hi32++;
 943                                 }
 944                         }
 945                 }
 946         }
 947         else {
 948                 // Scale factors are not equal.  Assume that a larger scale
 949                 // factor (more decimal places) is likely to mean that number
 950                 // is smaller.  Start by guessing that the right operand has
 951                 // the larger scale factor.  The result will have the larger
 952                 // scale factor.
 953                 //
 954                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 955                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 956                 scale = decRes.u.u.scale - left->u.u.scale;
 957
 958                 if (scale < 0) {
 959                         // Guessed scale factor wrong. Swap operands.
 960                         //
 961                         scale = -scale;
 962                         decRes.u.u.scale = left->u.u.scale;
 963                         decRes.u.u.sign ^= sign;
 964                         pdecTmp = right;
 965                         right = left;
 966                         left = pdecTmp;
 967                 }
 968
 969                 // *left will need to be multiplied by 10^scale so
 970                 // it will have the same scale as *right.  We could be
 971                 // extending it to up to 192 bits of precision.
 972                 //
 973                 if (scale <= POWER10_MAX) {
 974                         // Scaling won't make it larger than 4 uint32_ts
 975                         //
 976                         pwr = power10[scale];
 977                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 978                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 979                         tmp.int64 += decTmp.v.v.Mid32;
 980                         decTmp.v.v.Mid32 = tmp.u.Lo;
 981                         decTmp.Hi32 = tmp.u.Hi;
 982                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 983                         tmp.int64 += decTmp.Hi32;
 984                         if (tmp.u.Hi == 0) {
 985                                 // Result fits in 96 bits.  Use standard aligned add.
 986                                 //
 987                                 decTmp.Hi32 = tmp.u.Lo;
 988                                 left = &decTmp;
 989                                 goto AlignedAdd;
 990                         }
 991                         num[0] = decTmp.v.v.Lo32;
 992                         num[1] = decTmp.v.v.Mid32;
 993                         num[2] = tmp.u.Lo;
 994                         num[3] = tmp.u.Hi;
 995                         hi_prod = 3;
 996                 }
 997                 else {
 998                         // Have to scale by a bunch.  Move the number to a buffer
 999                         // where it has room to grow as it's scaled.
1000                         //
1001                         num[0] = left->v.v.Lo32;
1002                         num[1] = left->v.v.Mid32;
1003                         num[2] = left->Hi32;
1004                         hi_prod = 2;
1005
1006                         // Scan for zeros in the upper words.
1007                         //
1008                         if (num[2] == 0) {
1009                                 hi_prod = 1;
1010                                 if (num[1] == 0) {
1011                                         hi_prod = 0;
1012                                         if (num[0] == 0) {
1013                                                 // Left arg is zero, return right.
1014                                                 //
1015                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
1016                                                 decRes.Hi32 = right->Hi32;
1017                                                 decRes.u.u.sign ^= sign;
1018                                                 goto RetDec;
1019                                         }
1020                                 }
1021                         }
1022
1023                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
1024                         // with index of highest non-zero uint32_t.
1025                         //
1026                         for (; scale > 0; scale -= POWER10_MAX) {
1027                                 if (scale > POWER10_MAX)
1028                                         pwr = ten_to_nine;
1029                                 else
1030                                         pwr = power10[scale];
1031
1032                                 tmp.u.Hi = 0;
1033                                 for (cur = 0; cur <= hi_prod; cur++) {
1034                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1035                                         num[cur] = tmp.u.Lo;
1036                                 }
1037
1038                                 if (tmp.u.Hi != 0)
1039                                         // We're extending the result by another uint32_t.
1040                                         num[++hi_prod] = tmp.u.Hi;
1041                         }
1042                 }
1043
1044                 // Scaling complete, do the add.  Could be subtract if signs differ.
1045                 //
1046                 tmp.u.Lo = num[0];
1047                 tmp.u.Hi = num[1];
1048
1049                 if (sign) {
1050                         // Signs differ, subtract.
1051                         //
1052                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1053                         decRes.Hi32 = num[2] - right->Hi32;
1054
1055                         // Propagate carry
1056                         //
1057                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1058                                 decRes.Hi32--;
1059                                 if (decRes.Hi32 >= num[2])
1060                                         goto LongSub;
1061                         }
1062                         else if (decRes.Hi32 > num[2]) {
1063                         LongSub:
1064                                 // If num has more than 96 bits of precision, then we need to
1065                                 // carry the subtraction into the higher bits.  If it doesn't,
1066                                 // then we subtracted in the wrong order and have to flip the
1067                                 // sign of the result.
1068                                 //
1069                                 if (hi_prod <= 2)
1070                                         goto SignFlip;
1071
1072                                 cur = 3;
1073                                 while(num[cur++]-- == 0);
1074                                 if (num[hi_prod] == 0)
1075                                         hi_prod--;
1076                         }
1077                 }
1078                 else {
1079                         // Signs the same, add.
1080                         //
1081                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1082                         decRes.Hi32 = num[2] + right->Hi32;
1083
1084                         // Propagate carry
1085                         //
1086                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1087                                 decRes.Hi32++;
1088                                 if (decRes.Hi32 <= num[2])
1089                                         goto LongAdd;
1090                         }
1091                         else if (decRes.Hi32 < num[2]) {
1092                         LongAdd:
1093                                 // Had a carry above 96 bits.
1094                                 //
1095                                 cur = 3;
1096                                 do {
1097                                         if (hi_prod < cur) {
1098                                                 num[cur] = 1;
1099                                                 hi_prod = cur;
1100                                                 break;
1101                                         }
1102                                 }while (++num[cur++] == 0);
1103                         }
1104                 }
1105
1106                 if (hi_prod > 2) {
1107                         num[0] = decRes.v.v.Lo32;
1108                         num[1] = decRes.v.v.Mid32;
1109                         num[2] = decRes.Hi32;
1110                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1111                         if (decRes.u.u.scale == (uint8_t) -1)
1112                                 return MONO_DECIMAL_OVERFLOW;
1113
1114                         decRes.v.v.Lo32 = num[0];
1115                         decRes.v.v.Mid32 = num[1];
1116                         decRes.Hi32 = num[2];
1117                 }
1118         }
1119
1120 RetDec:
1121         COPYDEC(*result, decRes);
1122         // Odd, the Microsoft code does not set result->reserved to zero on this case
1123         return MONO_DECIMAL_OK;
1124 }
1125
1126 // Decimal addition
1127 static MonoDecimalStatus G_GNUC_UNUSED
1128 mono_decimal_add(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1129 {
1130     return DecAddSub (left, right, result, 0);
1131 }
1132
1133 // Decimal subtraction
1134 static MonoDecimalStatus G_GNUC_UNUSED
1135 mono_decimal_sub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1136 {
1137     return DecAddSub (left, right, result, DECIMAL_NEG);
1138 }
1139
1140 /**
1141  * IncreaseScale:
1142  *
1143  * Entry:
1144  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1145  *   pwr   - Scale factor to multiply by
1146  *
1147  * Purpose:
1148  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1149  *   the input.  The last 32 bits of the product are the return value.
1150  *
1151  * Exit:
1152  *   Returns highest 32 bits of product.
1153  *
1154  * Exceptions:
1155  *   None.
1156  *
1157  */
1158 static uint32_t
1159 IncreaseScale(uint32_t *num, uint32_t pwr)
1160 {
1161         SPLIT64   sdlTmp;
1162
1163         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1164         num[0] = sdlTmp.u.Lo;
1165         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1166         num[1] = sdlTmp.u.Lo;
1167         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1168         num[2] = sdlTmp.u.Lo;
1169         return sdlTmp.u.Hi;
1170 }
1171
1172 /**
1173  * Div96By64:
1174  *
1175  * Entry:
1176  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1177  *   sdlDen  - 64-bit divisor.
1178  *
1179  * Purpose:
1180  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1181  *   Divisor must be larger than upper 64 bits of dividend.
1182  *
1183  * Exit:
1184  *   Remainder overwrites lower 64-bits of dividend.
1185  *   Returns quotient.
1186  *
1187  * Exceptions:
1188  *   None.
1189  *
1190  */
1191 static uint32_t
1192 Div96By64(uint32_t *num, SPLIT64 den)
1193 {
1194         SPLIT64 quo;
1195         SPLIT64 sdlNum;
1196         SPLIT64 prod;
1197
1198         sdlNum.u.Lo = num[0];
1199
1200         if (num[2] >= den.u.Hi) {
1201                 // Divide would overflow.  Assume a quotient of 2^32, and set
1202                 // up remainder accordingly.  Then jump to loop which reduces
1203                 // the quotient.
1204                 //
1205                 sdlNum.u.Hi = num[1] - den.u.Lo;
1206                 quo.u.Lo = 0;
1207                 goto NegRem;
1208         }
1209
1210         // Hardware divide won't overflow
1211         //
1212         if (num[2] == 0 && num[1] < den.u.Hi)
1213                 // Result is zero.  Entire dividend is remainder.
1214                 //
1215                 return 0;
1216
1217         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1218         //
1219         quo.u.Lo = num[1];
1220         quo.u.Hi = num[2];
1221         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1222         sdlNum.u.Hi = quo.u.Hi; // remainder
1223
1224         // Compute full remainder, rem = dividend - (quo * divisor).
1225         //
1226         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1227         sdlNum.int64 -= prod.int64;
1228
1229         if (sdlNum.int64 > ~prod.int64) {
1230         NegRem:
1231                 // Remainder went negative.  Add divisor back in until it's positive,
1232                 // a max of 2 times.
1233                 //
1234                 do {
1235                         quo.u.Lo--;
1236                         sdlNum.int64 += den.int64;
1237                 }while (sdlNum.int64 >= den.int64);
1238         }
1239
1240         num[0] = sdlNum.u.Lo;
1241         num[1] = sdlNum.u.Hi;
1242         return quo.u.Lo;
1243 }
1244
1245 /***
1246 * Div128By96
1247 *
1248 * Entry:
1249 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1250 *   den - Pointer to 96-bit divisor.
1251 *
1252 * Purpose:
1253 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1254 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1255 *   assured in the initial call because the divisor is normalized
1256 *   and the dividend can't be.  In subsequent calls, the remainder
1257 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1258 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1259 *
1260 * Exit:
1261 *   Remainder overwrites lower 96-bits of dividend.
1262 *   Returns quotient.
1263 *
1264 * Exceptions:
1265 *   None.
1266 *
1267 ***********************************************************************/
1268
1269 static uint32_t
1270 Div128By96(uint32_t *num, uint32_t *den)
1271 {
1272         SPLIT64 sdlQuo;
1273         SPLIT64 sdlNum;
1274         SPLIT64 sdlProd1;
1275         SPLIT64 sdlProd2;
1276
1277         sdlNum.u.Lo = num[0];
1278         sdlNum.u.Hi = num[1];
1279
1280         if (num[3] == 0 && num[2] < den[2]){
1281                 // Result is zero.  Entire dividend is remainder.
1282                 //
1283                 return 0;
1284         }
1285
1286         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1287         //
1288         sdlQuo.u.Lo = num[2];
1289         sdlQuo.u.Hi = num[3];
1290         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1291
1292         // Compute full remainder, rem = dividend - (quo * divisor).
1293         //
1294         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1295         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1296         sdlProd2.int64 += sdlProd1.u.Hi;
1297         sdlProd1.u.Hi = sdlProd2.u.Lo;
1298
1299         sdlNum.int64 -= sdlProd1.int64;
1300         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1301
1302         // Propagate carries
1303         //
1304         if (sdlNum.int64 > ~sdlProd1.int64) {
1305                 num[2]--;
1306                 if (num[2] >= ~sdlProd2.u.Hi)
1307                         goto NegRem;
1308         } else if (num[2] > ~sdlProd2.u.Hi) {
1309         NegRem:
1310                 // Remainder went negative.  Add divisor back in until it's positive,
1311                 // a max of 2 times.
1312                 //
1313                 sdlProd1.u.Lo = den[0];
1314                 sdlProd1.u.Hi = den[1];
1315
1316                 for (;;) {
1317                         sdlQuo.u.Lo--;
1318                         sdlNum.int64 += sdlProd1.int64;
1319                         num[2] += den[2];
1320
1321                         if (sdlNum.int64 < sdlProd1.int64) {
1322                                 // Detected carry. Check for carry out of top
1323                                 // before adding it in.
1324                                 //
1325                                 if (num[2]++ < den[2])
1326                                         break;
1327                         }
1328                         if (num[2] < den[2])
1329                                 break; // detected carry
1330                 }
1331         }
1332
1333         num[0] = sdlNum.u.Lo;
1334         num[1] = sdlNum.u.Hi;
1335         return sdlQuo.u.Lo;
1336 }
1337
1338 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1339 // Returns FALSE if there is an overflow
1340 static gboolean
1341 Add32To96(uint32_t *num, uint32_t value)
1342 {
1343         num[0] += value;
1344         if (num[0] < value) {
1345                 if (++num[1] == 0) {
1346                         if (++num[2] == 0) {
1347                                 return FALSE;
1348                         }
1349                 }
1350         }
1351         return TRUE;
1352 }
1353
1354 static void
1355 OverflowUnscale (uint32_t *quo, gboolean remainder)
1356 {
1357         SPLIT64  sdlTmp;
1358
1359         // We have overflown, so load the high bit with a one.
1360         sdlTmp.u.Hi = 1u;
1361         sdlTmp.u.Lo = quo[2];
1362         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1363         quo[2] = sdlTmp.u.Lo;
1364         sdlTmp.u.Lo = quo[1];
1365         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1366         quo[1] = sdlTmp.u.Lo;
1367         sdlTmp.u.Lo = quo[0];
1368         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1369         quo[0] = sdlTmp.u.Lo;
1370         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1371         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1372                 Add32To96(quo, 1u);
1373         }
1374 }
1375
1376 // mono_decimal_divide - Decimal divide
1377 static MonoDecimalStatus G_GNUC_UNUSED
1378 mono_decimal_divide_result(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1379 {
1380         uint32_t   quo[3];
1381         uint32_t   quoSave[3];
1382         uint32_t   rem[4];
1383         uint32_t   divisor[3];
1384         uint32_t   pwr;
1385         uint32_t   utmp;
1386         uint32_t   utmp1;
1387         SPLIT64 sdlTmp;
1388         SPLIT64 sdlDivisor;
1389         int     scale;
1390         int     cur_scale;
1391
1392         scale = left->u.u.scale - right->u.u.scale;
1393         divisor[0] = right->v.v.Lo32;
1394         divisor[1] = right->v.v.Mid32;
1395         divisor[2] = right->Hi32;
1396
1397         if (divisor[1] == 0 && divisor[2] == 0) {
1398                 // Divisor is only 32 bits.  Easy divide.
1399                 //
1400                 if (divisor[0] == 0)
1401                         return MONO_DECIMAL_DIVBYZERO;
1402
1403                 quo[0] = left->v.v.Lo32;
1404                 quo[1] = left->v.v.Mid32;
1405                 quo[2] = left->Hi32;
1406                 rem[0] = Div96By32(quo, divisor[0]);
1407
1408                 for (;;) {
1409                         if (rem[0] == 0) {
1410                                 if (scale < 0) {
1411                                         cur_scale = min(9, -scale);
1412                                         goto HaveScale;
1413                                 }
1414                                 break;
1415                         }
1416
1417                         // We have computed a quotient based on the natural scale
1418                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1419                         // remainder, so now we should increase the scale if possible to
1420                         // include more quotient bits.
1421                         //
1422                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1423                         // computing more quotient bits as long as the remainder stays
1424                         // non-zero.  If scaling by that much would cause overflow, we'll
1425                         // drop out of the loop and scale by as much as we can.
1426                         //
1427                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1428                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1429                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1430                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1431                         // is the largest value in quo[1] (when quo[2] == 4) that is
1432                         // assured not to overflow.
1433                         //
1434                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1435                         if (cur_scale == 0) {
1436                                 // No more scaling to be done, but remainder is non-zero.
1437                                 // Round quotient.
1438                                 //
1439                                 utmp = rem[0] << 1;
1440                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1441                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1442                                 RoundUp:
1443                                         if (++quo[0] == 0)
1444                                                 if (++quo[1] == 0)
1445                                                         quo[2]++;
1446                                 }
1447                                 break;
1448                         }
1449
1450                         if (cur_scale == -1)
1451                                 return MONO_DECIMAL_OVERFLOW;
1452
1453                 HaveScale:
1454                         pwr = power10[cur_scale];
1455                         scale += cur_scale;
1456
1457                         if (IncreaseScale(quo, pwr) != 0)
1458                                 return MONO_DECIMAL_OVERFLOW;
1459
1460                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1461                         rem[0] = sdlTmp.u.Hi;
1462
1463                         quo[0] += sdlTmp.u.Lo;
1464                         if (quo[0] < sdlTmp.u.Lo) {
1465                                 if (++quo[1] == 0)
1466                                         quo[2]++;
1467                         }
1468                 } // for (;;)
1469         }
1470         else {
1471                 // Divisor has bits set in the upper 64 bits.
1472                 //
1473                 // Divisor must be fully normalized (shifted so bit 31 of the most
1474                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1475                 // normalize by.  The dividend will be shifted by the same amount so
1476                 // the quotient is not changed.
1477                 //
1478                 if (divisor[2] == 0)
1479                         utmp = divisor[1];
1480                 else
1481                         utmp = divisor[2];
1482
1483                 cur_scale = 0;
1484                 if (!(utmp & 0xFFFF0000)) {
1485                         cur_scale += 16;
1486                         utmp <<= 16;
1487                 }
1488                 if (!(utmp & 0xFF000000)) {
1489                         cur_scale += 8;
1490                         utmp <<= 8;
1491                 }
1492                 if (!(utmp & 0xF0000000)) {
1493                         cur_scale += 4;
1494                         utmp <<= 4;
1495                 }
1496                 if (!(utmp & 0xC0000000)) {
1497                         cur_scale += 2;
1498                         utmp <<= 2;
1499                 }
1500                 if (!(utmp & 0x80000000)) {
1501                         cur_scale++;
1502                         utmp <<= 1;
1503                 }
1504
1505                 // Shift both dividend and divisor left by cur_scale.
1506                 //
1507                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1508                 rem[0] = sdlTmp.u.Lo;
1509                 rem[1] = sdlTmp.u.Hi;
1510                 sdlTmp.u.Lo = left->v.v.Mid32;
1511                 sdlTmp.u.Hi = left->Hi32;
1512                 sdlTmp.int64 <<= cur_scale;
1513                 rem[2] = sdlTmp.u.Hi;
1514                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1515
1516                 sdlDivisor.u.Lo = divisor[0];
1517                 sdlDivisor.u.Hi = divisor[1];
1518                 sdlDivisor.int64 <<= cur_scale;
1519
1520                 if (divisor[2] == 0) {
1521                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1522                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1523                         //
1524                         sdlTmp.u.Lo = rem[2];
1525                         sdlTmp.u.Hi = rem[3];
1526
1527                         quo[2] = 0;
1528                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1529                         quo[0] = Div96By64(rem, sdlDivisor);
1530
1531                         for (;;) {
1532                                 if ((rem[0] | rem[1]) == 0) {
1533                                         if (scale < 0) {
1534                                                 cur_scale = min(9, -scale);
1535                                                 goto HaveScale64;
1536                                         }
1537                                         break;
1538                                 }
1539
1540                                 // Remainder is non-zero.  Scale up quotient and remainder by
1541                                 // powers of 10 so we can compute more significant bits.
1542                                 //
1543                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1544                                 if (cur_scale == 0) {
1545                                         // No more scaling to be done, but remainder is non-zero.
1546                                         // Round quotient.
1547                                         //
1548                                         sdlTmp.u.Lo = rem[0];
1549                                         sdlTmp.u.Hi = rem[1];
1550                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1551                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1552                                                 goto RoundUp;
1553                                         break;
1554                                 }
1555
1556                                 if (cur_scale == -1)
1557                                         return MONO_DECIMAL_OVERFLOW;
1558
1559                         HaveScale64:
1560                                 pwr = power10[cur_scale];
1561                                 scale += cur_scale;
1562
1563                                 if (IncreaseScale(quo, pwr) != 0)
1564                                         return MONO_DECIMAL_OVERFLOW;
1565
1566                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1567                                 IncreaseScale(rem, pwr);
1568                                 utmp = Div96By64(rem, sdlDivisor);
1569                                 quo[0] += utmp;
1570                                 if (quo[0] < utmp)
1571                                         if (++quo[1] == 0)
1572                                                 quo[2]++;
1573
1574                         } // for (;;)
1575                 }
1576                 else {
1577                         // Have a 96-bit divisor in divisor[].
1578                         //
1579                         // Start by finishing the shift left by cur_scale.
1580                         //
1581                         sdlTmp.u.Lo = divisor[1];
1582                         sdlTmp.u.Hi = divisor[2];
1583                         sdlTmp.int64 <<= cur_scale;
1584                         divisor[0] = sdlDivisor.u.Lo;
1585                         divisor[1] = sdlDivisor.u.Hi;
1586                         divisor[2] = sdlTmp.u.Hi;
1587
1588                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1589                         // will be < divisor.
1590                         //
1591                         quo[2] = 0;
1592                         quo[1] = 0;
1593                         quo[0] = Div128By96(rem, divisor);
1594
1595                         for (;;) {
1596                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1597                                         if (scale < 0) {
1598                                                 cur_scale = min(9, -scale);
1599                                                 goto HaveScale96;
1600                                         }
1601                                         break;
1602                                 }
1603
1604                                 // Remainder is non-zero.  Scale up quotient and remainder by
1605                                 // powers of 10 so we can compute more significant bits.
1606                                 //
1607                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1608                                 if (cur_scale == 0) {
1609                                         // No more scaling to be done, but remainder is non-zero.
1610                                         // Round quotient.
1611                                         //
1612                                         if (rem[2] >= 0x80000000)
1613                                                 goto RoundUp;
1614
1615                                         utmp = rem[0] > 0x80000000;
1616                                         utmp1 = rem[1] > 0x80000000;
1617                                         rem[0] <<= 1;
1618                                         rem[1] = (rem[1] << 1) + utmp;
1619                                         rem[2] = (rem[2] << 1) + utmp1;
1620
1621                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1622                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1623                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1624                                               (quo[0] & 1))))
1625                                                 goto RoundUp;
1626                                         break;
1627                                 }
1628
1629                                 if (cur_scale == -1)
1630                                         return MONO_DECIMAL_OVERFLOW;
1631
1632                         HaveScale96:
1633                                 pwr = power10[cur_scale];
1634                                 scale += cur_scale;
1635
1636                                 if (IncreaseScale(quo, pwr) != 0)
1637                                         return MONO_DECIMAL_OVERFLOW;
1638
1639                                 rem[3] = IncreaseScale(rem, pwr);
1640                                 utmp = Div128By96(rem, divisor);
1641                                 quo[0] += utmp;
1642                                 if (quo[0] < utmp)
1643                                         if (++quo[1] == 0)
1644                                                 quo[2]++;
1645
1646                         } // for (;;)
1647                 }
1648         }
1649
1650         // No more remainder.  Try extracting any extra powers of 10 we may have
1651         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1652         // If a division by one of these powers returns a zero remainder, then
1653         // we keep the quotient.  If the remainder is not zero, then we restore
1654         // the previous value.
1655         //
1656         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1657         // we can extract.  We use this as a quick test on whether to try a
1658         // given power.
1659         //
1660         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1661                 quoSave[0] = quo[0];
1662                 quoSave[1] = quo[1];
1663                 quoSave[2] = quo[2];
1664
1665                 if (Div96By32(quoSave, 100000000) == 0) {
1666                         quo[0] = quoSave[0];
1667                         quo[1] = quoSave[1];
1668                         quo[2] = quoSave[2];
1669                         scale -= 8;
1670                 }
1671                 else
1672                         break;
1673         }
1674
1675         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1676                 quoSave[0] = quo[0];
1677                 quoSave[1] = quo[1];
1678                 quoSave[2] = quo[2];
1679
1680                 if (Div96By32(quoSave, 10000) == 0) {
1681                         quo[0] = quoSave[0];
1682                         quo[1] = quoSave[1];
1683                         quo[2] = quoSave[2];
1684                         scale -= 4;
1685                 }
1686         }
1687
1688         if ((quo[0] & 3) == 0 && scale >= 2) {
1689                 quoSave[0] = quo[0];
1690                 quoSave[1] = quo[1];
1691                 quoSave[2] = quo[2];
1692
1693                 if (Div96By32(quoSave, 100) == 0) {
1694                         quo[0] = quoSave[0];
1695                         quo[1] = quoSave[1];
1696                         quo[2] = quoSave[2];
1697                         scale -= 2;
1698                 }
1699         }
1700
1701         if ((quo[0] & 1) == 0 && scale >= 1) {
1702                 quoSave[0] = quo[0];
1703                 quoSave[1] = quo[1];
1704                 quoSave[2] = quo[2];
1705
1706                 if (Div96By32(quoSave, 10) == 0) {
1707                         quo[0] = quoSave[0];
1708                         quo[1] = quoSave[1];
1709                         quo[2] = quoSave[2];
1710                         scale -= 1;
1711                 }
1712         }
1713
1714         result->Hi32 = quo[2];
1715         result->v.v.Mid32 = quo[1];
1716         result->v.v.Lo32 = quo[0];
1717         result->u.u.scale = scale;
1718         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1719         return MONO_DECIMAL_OK;
1720 }
1721
1722 // mono_decimal_absolute - Decimal Absolute Value
1723 static void G_GNUC_UNUSED
1724 mono_decimal_absolute (MonoDecimal *pdecOprd, MonoDecimal *result)
1725 {
1726         COPYDEC(*result, *pdecOprd);
1727         result->u.u.sign &= ~DECIMAL_NEG;
1728         // Microsoft does not set reserved here
1729 }
1730
1731 // mono_decimal_fix - Decimal Fix (chop to integer)
1732 static void
1733 mono_decimal_fix (MonoDecimal *pdecOprd, MonoDecimal *result)
1734 {
1735         DecFixInt(result, pdecOprd);
1736 }
1737
1738 // mono_decimal_round_to_int - Decimal Int (round down to integer)
1739 static void
1740 mono_decimal_round_to_int (MonoDecimal *pdecOprd, MonoDecimal *result)
1741 {
1742         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1743                 // We have chopped off a non-zero amount from a negative value.  Since
1744                 // we round toward -infinity, we must increase the integer result by
1745                 // 1 to make it more negative.  This will never overflow because
1746                 // in order to have a remainder, we must have had a non-zero scale factor.
1747                 // Our scale factor is back to zero now.
1748                 //
1749                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1750                 if (DECIMAL_LO64_GET(*result) == 0)
1751                         result->Hi32++;
1752         }
1753 }
1754
1755 // mono_decimal_negate - Decimal Negate
1756 static void G_GNUC_UNUSED
1757 mono_decimal_negate (MonoDecimal *pdecOprd, MonoDecimal *result)
1758 {
1759         COPYDEC(*result, *pdecOprd);
1760         // Microsoft does not set result->reserved to zero on this case.
1761         result->u.u.sign ^= DECIMAL_NEG;
1762 }
1763
1764 //
1765 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1766 //
1767 static MonoDecimalStatus
1768 mono_decimal_round_result(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1769 {
1770         uint32_t num[3];
1771         uint32_t rem;
1772         uint32_t sticky;
1773         uint32_t pwr;
1774         int scale;
1775
1776         if (cDecimals < 0)
1777                 return MONO_DECIMAL_INVALID_ARGUMENT;
1778
1779         scale = input->u.u.scale - cDecimals;
1780         if (scale > 0) {
1781                 num[0] = input->v.v.Lo32;
1782                 num[1] = input->v.v.Mid32;
1783                 num[2] = input->Hi32;
1784                 result->u.u.sign = input->u.u.sign;
1785                 rem = sticky = 0;
1786
1787                 do {
1788                         sticky |= rem;
1789                         if (scale > POWER10_MAX)
1790                                 pwr = ten_to_nine;
1791                         else
1792                                 pwr = power10[scale];
1793
1794                         rem = Div96By32(num, pwr);
1795                         scale -= 9;
1796                 }while (scale > 0);
1797
1798                 // Now round.  rem has last remainder, sticky has sticky bits.
1799                 // To do IEEE rounding, we add LSB of result to sticky bits so
1800                 // either causes round up if remainder * 2 == last divisor.
1801                 //
1802                 sticky |= num[0] & 1;
1803                 rem = (rem << 1) + (sticky != 0);
1804                 if (pwr < rem &&
1805                     ++num[0] == 0 &&
1806                     ++num[1] == 0
1807                         )
1808                         ++num[2];
1809
1810                 result->v.v.Lo32 = num[0];
1811                 result->v.v.Mid32 = num[1];
1812                 result->Hi32 = num[2];
1813                 result->u.u.scale = cDecimals;
1814                 return MONO_DECIMAL_OK;
1815         }
1816
1817         COPYDEC(*result, *input);
1818         // Odd, the Microsoft source does not set the result->reserved to zero here.
1819         return MONO_DECIMAL_OK;
1820 }
1821
1822 //
1823 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1824 static MonoDecimalStatus
1825 mono_decimal_from_float (float input, MonoDecimal* result)
1826 {
1827         int         exp;    // number of bits to left of binary point
1828         int         power;
1829         uint32_t       mant;
1830         double      dbl;
1831         SPLIT64     sdlLo;
1832         SPLIT64     sdlHi;
1833         int         lmax, cur;  // temps used during scale reduction
1834
1835         // The most we can scale by is 10^28, which is just slightly more
1836         // than 2^93.  So a float with an exponent of -94 could just
1837         // barely reach 0.5, but smaller exponents will always round to zero.
1838         //
1839         if ((exp = ((SingleStructure *)&input)->exp - SNGBIAS) < -94 ) {
1840                 DECIMAL_SETZERO(*result);
1841                 return MONO_DECIMAL_OK;
1842         }
1843
1844         if (exp > 96)
1845                 return MONO_DECIMAL_OVERFLOW;
1846
1847         // Round the input to a 7-digit integer.  The R4 format has
1848         // only 7 digits of precision, and we want to keep garbage digits
1849         // out of the Decimal were making.
1850         //
1851         // Calculate max power of 10 input value could have by multiplying
1852         // the exponent by log10(2).  Using scaled integer multiplcation,
1853         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1854         //
1855         dbl = fabs(input);
1856         power = 6 - ((exp * 19728) >> 16);
1857
1858         if (power >= 0) {
1859                 // We have less than 7 digits, scale input up.
1860                 //
1861                 if (power > DECMAX)
1862                         power = DECMAX;
1863
1864                 dbl = dbl * double_power10[power];
1865         } else {
1866                 if (power != -1 || dbl >= 1E7)
1867                         dbl = dbl / fnDblPower10(-power);
1868                 else
1869                         power = 0; // didn't scale it
1870         }
1871
1872         g_assert (dbl < 1E7);
1873         if (dbl < 1E6 && power < DECMAX) {
1874                 dbl *= 10;
1875                 power++;
1876                 g_assert(dbl >= 1E6);
1877         }
1878
1879         // Round to integer
1880         //
1881         mant = (int32_t)dbl;
1882         dbl -= (double)mant;  // difference between input & integer
1883         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1884                 mant++;
1885
1886         if (mant == 0) {
1887                 DECIMAL_SETZERO(*result);
1888                 return MONO_DECIMAL_OK;
1889         }
1890
1891         if (power < 0) {
1892                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1893                 //
1894                 power = -power;
1895                 if (power < 10) {
1896                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1897
1898                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1899                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1900                         DECIMAL_HI32(*result) = 0;
1901                 } else {
1902                         // Have a big power of 10.
1903                         //
1904                         if (power > 18) {
1905                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1906                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1907
1908                                 if (sdlHi.u.Hi != 0)
1909                                         return MONO_DECIMAL_OVERFLOW;
1910                         }
1911                         else {
1912                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1913                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1914                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1915                                 sdlHi.int64 += sdlLo.u.Hi;
1916                                 sdlLo.u.Hi = sdlHi.u.Lo;
1917                                 sdlHi.u.Lo = sdlHi.u.Hi;
1918                         }
1919                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1920                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1921                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1922                 }
1923                 DECIMAL_SCALE(*result) = 0;
1924         } else {
1925                 // Factor out powers of 10 to reduce the scale, if possible.
1926                 // The maximum number we could factor out would be 6.  This
1927                 // comes from the fact we have a 7-digit number, and the
1928                 // MSD must be non-zero -- but the lower 6 digits could be
1929                 // zero.  Note also the scale factor is never negative, so
1930                 // we can't scale by any more than the power we used to
1931                 // get the integer.
1932                 //
1933                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1934                 //
1935                 lmax = min(power, 6);
1936
1937                 // lmax is the largest power of 10 to try, lmax <= 6.
1938                 // We'll try powers 4, 2, and 1 unless they're too big.
1939                 //
1940                 for (cur = 4; cur > 0; cur >>= 1)
1941                 {
1942                         if (cur > lmax)
1943                                 continue;
1944
1945                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1946
1947                         if (sdlLo.u.Hi == 0) {
1948                                 mant = sdlLo.u.Lo;
1949                                 power -= cur;
1950                                 lmax -= cur;
1951                         }
1952                 }
1953                 DECIMAL_LO32(*result) = mant;
1954                 DECIMAL_MID32(*result) = 0;
1955                 DECIMAL_HI32(*result) = 0;
1956                 DECIMAL_SCALE(*result) = power;
1957         }
1958
1959         DECIMAL_SIGN(*result) = (char)((SingleStructure *)&input)->sign << 7;
1960         return MONO_DECIMAL_OK;
1961 }
1962
1963 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1964 static MonoDecimalStatus
1965 mono_decimal_from_double (double input, MonoDecimal *result)
1966 {
1967         int         exp;    // number of bits to left of binary point
1968         int         power;  // power-of-10 scale factor
1969         SPLIT64     sdlMant;
1970         SPLIT64     sdlLo;
1971         double      dbl;
1972         int         lmax, cur;  // temps used during scale reduction
1973         uint32_t       pwr_cur;
1974         uint32_t       quo;
1975
1976
1977         // The most we can scale by is 10^28, which is just slightly more
1978         // than 2^93.  So a float with an exponent of -94 could just
1979         // barely reach 0.5, but smaller exponents will always round to zero.
1980         //
1981         if ((exp = ((DoubleStructure *)&input)->u.exp - DBLBIAS) < -94) {
1982                 DECIMAL_SETZERO(*result);
1983                 return MONO_DECIMAL_OK;
1984         }
1985
1986         if (exp > 96)
1987                 return MONO_DECIMAL_OVERFLOW;
1988
1989         // Round the input to a 15-digit integer.  The R8 format has
1990         // only 15 digits of precision, and we want to keep garbage digits
1991         // out of the Decimal were making.
1992         //
1993         // Calculate max power of 10 input value could have by multiplying
1994         // the exponent by log10(2).  Using scaled integer multiplcation,
1995         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1996         //
1997         dbl = fabs(input);
1998         power = 14 - ((exp * 19728) >> 16);
1999
2000         if (power >= 0) {
2001                 // We have less than 15 digits, scale input up.
2002                 //
2003                 if (power > DECMAX)
2004                         power = DECMAX;
2005
2006                 dbl = dbl * double_power10[power];
2007         } else {
2008                 if (power != -1 || dbl >= 1E15)
2009                         dbl = dbl / fnDblPower10(-power);
2010                 else
2011                         power = 0; // didn't scale it
2012         }
2013
2014         g_assert (dbl < 1E15);
2015         if (dbl < 1E14 && power < DECMAX) {
2016                 dbl *= 10;
2017                 power++;
2018                 g_assert(dbl >= 1E14);
2019         }
2020
2021         // Round to int64
2022         //
2023         sdlMant.int64 = (int64_t)dbl;
2024         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
2025         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
2026                 sdlMant.int64++;
2027
2028         if (sdlMant.int64 == 0) {
2029                 DECIMAL_SETZERO(*result);
2030                 return MONO_DECIMAL_OK;
2031         }
2032
2033         if (power < 0) {
2034                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2035                 //
2036                 power = -power;
2037                 if (power < 10) {
2038                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2039                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2040                         sdlMant.int64 += sdlLo.u.Hi;
2041                         sdlLo.u.Hi = sdlMant.u.Lo;
2042                         sdlMant.u.Lo = sdlMant.u.Hi;
2043                 }
2044                 else {
2045                         // Have a big power of 10.
2046                         //
2047                         g_assert(power <= 14);
2048                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2049
2050                         if (sdlMant.u.Hi != 0)
2051                                 return MONO_DECIMAL_OVERFLOW;
2052                 }
2053                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2054                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2055                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2056                 DECIMAL_SCALE(*result) = 0;
2057         }
2058         else {
2059                 // Factor out powers of 10 to reduce the scale, if possible.
2060                 // The maximum number we could factor out would be 14.  This
2061                 // comes from the fact we have a 15-digit number, and the
2062                 // MSD must be non-zero -- but the lower 14 digits could be
2063                 // zero.  Note also the scale factor is never negative, so
2064                 // we can't scale by any more than the power we used to
2065                 // get the integer.
2066                 //
2067                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2068                 //
2069                 lmax = min(power, 14);
2070
2071                 // lmax is the largest power of 10 to try, lmax <= 14.
2072                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2073                 //
2074                 for (cur = 8; cur > 0; cur >>= 1)
2075                 {
2076                         if (cur > lmax)
2077                                 continue;
2078
2079                         pwr_cur = (uint32_t)long_power10[cur];
2080
2081                         if (sdlMant.u.Hi >= pwr_cur) {
2082                                 // Overflow if we try to divide in one step.
2083                                 //
2084                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2085                                 quo = sdlLo.u.Lo;
2086                                 sdlLo.u.Lo = sdlMant.u.Lo;
2087                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2088                         }
2089                         else {
2090                                 quo = 0;
2091                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2092                         }
2093
2094                         if (sdlLo.u.Hi == 0) {
2095                                 sdlMant.u.Hi = quo;
2096                                 sdlMant.u.Lo = sdlLo.u.Lo;
2097                                 power -= cur;
2098                                 lmax -= cur;
2099                         }
2100                 }
2101
2102                 DECIMAL_HI32(*result) = 0;
2103                 DECIMAL_SCALE(*result) = power;
2104                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2105                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2106         }
2107
2108         DECIMAL_SIGN(*result) = (char)((DoubleStructure *)&input)->u.sign << 7;
2109         return MONO_DECIMAL_OK;
2110 }
2111
2112 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2113 static MonoDecimalStatus
2114 mono_decimal_to_double_result(MonoDecimal *input, double *result)
2115 {
2116         SPLIT64  tmp;
2117         double   dbl;
2118
2119         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2120                 return MONO_DECIMAL_INVALID_ARGUMENT;
2121
2122         tmp.u.Lo = DECIMAL_LO32(*input);
2123         tmp.u.Hi = DECIMAL_MID32(*input);
2124
2125         if ((int32_t)DECIMAL_MID32(*input) < 0)
2126                 dbl = (ds2to64.dbl + (double)(int64_t)tmp.int64 +
2127                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2128         else
2129                 dbl = ((double)(int64_t)tmp.int64 +
2130                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input));
2131
2132         if (DECIMAL_SIGN(*input))
2133                 dbl = -dbl;
2134
2135         *result = dbl;
2136         return MONO_DECIMAL_OK;
2137 }
2138
2139 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2140 static MonoDecimalStatus
2141 mono_decimal_to_float_result(MonoDecimal *input, float *result)
2142 {
2143         double   dbl;
2144
2145         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2146                 return MONO_DECIMAL_INVALID_ARGUMENT;
2147
2148         // Can't overflow; no errors possible.
2149         //
2150         mono_decimal_to_double_result(input, &dbl);
2151         *result = (float)dbl;
2152         return MONO_DECIMAL_OK;
2153 }
2154
2155 static void
2156 DecShiftLeft(MonoDecimal* value)
2157 {
2158         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2159     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2160     g_assert(value != NULL);
2161
2162     DECIMAL_LO32(*value) <<= 1;
2163     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2164     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2165 }
2166
2167 static int
2168 D32AddCarry(uint32_t* value, uint32_t i)
2169 {
2170     uint32_t v = *value;
2171     uint32_t sum = v + i;
2172     *value = sum;
2173     return sum < v || sum < i? 1: 0;
2174 }
2175
2176 static void
2177 DecAdd(MonoDecimal *value, MonoDecimal* d)
2178 {
2179         g_assert(value != NULL && d != NULL);
2180
2181         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2182                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2183                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2184                 }
2185         }
2186         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2187                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2188         }
2189         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2190 }
2191
2192 static void
2193 DecMul10(MonoDecimal* value)
2194 {
2195         MonoDecimal d = *value;
2196         g_assert (value != NULL);
2197
2198         DecShiftLeft(value);
2199         DecShiftLeft(value);
2200         DecAdd(value, &d);
2201         DecShiftLeft(value);
2202 }
2203
2204 static void
2205 DecAddInt32(MonoDecimal* value, unsigned int i)
2206 {
2207         g_assert(value != NULL);
2208
2209         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2210                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2211                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2212                 }
2213         }
2214 }
2215
2216 MonoDecimalCompareResult
2217 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2218 {
2219         uint32_t   left_sign;
2220         uint32_t   right_sign;
2221         MonoDecimal result;
2222
2223         result.Hi32 = 0;        // Just to shut up the compiler
2224
2225         // First check signs and whether either are zero.  If both are
2226         // non-zero and of the same sign, just use subtraction to compare.
2227         //
2228         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2229         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2230         if (left_sign != 0)
2231                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2232
2233         if (right_sign != 0)
2234                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2235
2236         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2237         // operand is +, 0, or -.
2238         //
2239         if (left_sign == right_sign) {
2240                 if (left_sign == 0)    // both are zero
2241                         return MONO_DECIMAL_CMP_EQ; // return equal
2242
2243                 DecAddSub(left, right, &result, DECIMAL_NEG);
2244                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2245                         return MONO_DECIMAL_CMP_EQ;
2246                 if (result.u.u.sign & DECIMAL_NEG)
2247                         return MONO_DECIMAL_CMP_LT;
2248                 return MONO_DECIMAL_CMP_GT;
2249         }
2250
2251         //
2252         // Signs are different.  Use signed byte comparison
2253         //
2254         if ((signed char)left_sign > (signed char)right_sign)
2255                 return MONO_DECIMAL_CMP_GT;
2256         return MONO_DECIMAL_CMP_LT;
2257 }
2258
2259 void
2260 mono_decimal_init_single (MonoDecimal *_this, float value)
2261 {
2262         if (mono_decimal_from_float (value, _this) == MONO_DECIMAL_OVERFLOW) {
2263                 mono_set_pending_exception (mono_get_exception_overflow ());
2264                 return;
2265         }
2266         _this->reserved = 0;
2267 }
2268
2269 void
2270 mono_decimal_init_double (MonoDecimal *_this, double value)
2271 {
2272         if (mono_decimal_from_double (value, _this) == MONO_DECIMAL_OVERFLOW) {
2273                 mono_set_pending_exception (mono_get_exception_overflow ());
2274                 return;
2275         }
2276         _this->reserved = 0;
2277 }
2278
2279 void
2280 mono_decimal_floor (MonoDecimal *d)
2281 {
2282         MonoDecimal decRes;
2283
2284         mono_decimal_round_to_int(d, &decRes);
2285
2286         // copy decRes into d
2287         COPYDEC(*d, decRes);
2288         d->reserved = 0;
2289         FC_GC_POLL ();
2290 }
2291
2292 int32_t
2293 mono_decimal_get_hash_code (MonoDecimal *d)
2294 {
2295         double dbl;
2296
2297         if (mono_decimal_to_double_result(d, &dbl) != MONO_DECIMAL_OK)
2298                 return 0;
2299
2300         if (dbl == 0.0) {
2301                 // Ensure 0 and -0 have the same hash code
2302                 return 0;
2303         }
2304         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2305         //
2306         // For example these two numerically equal decimals with different internal representations produce
2307         // slightly different results when converted to double:
2308         //
2309         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2310         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2311         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2312         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2313         //
2314         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2315
2316 }
2317
2318 void
2319 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2320 {
2321         MonoDecimal decRes;
2322
2323         MonoDecimalStatus status = mono_decimal_multiply_result(d1, d2, &decRes);
2324         if (status != MONO_DECIMAL_OK) {
2325                 mono_set_pending_exception (mono_get_exception_overflow ());
2326                 return;
2327         }
2328
2329         COPYDEC(*d1, decRes);
2330         d1->reserved = 0;
2331
2332         FC_GC_POLL ();
2333 }
2334
2335 void
2336 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2337 {
2338         MonoDecimal decRes;
2339
2340         // GC is only triggered for throwing, no need to protect result
2341         if (decimals < 0 || decimals > 28) {
2342                 mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2343                 return;
2344         }
2345
2346         mono_decimal_round_result(d, decimals, &decRes);
2347
2348         // copy decRes into d
2349         COPYDEC(*d, decRes);
2350         d->reserved = 0;
2351
2352         FC_GC_POLL();
2353 }
2354
2355 void
2356 mono_decimal_tocurrency (MonoDecimal *decimal)
2357 {
2358         // TODO
2359 }
2360
2361 double
2362 mono_decimal_to_double (MonoDecimal d)
2363 {
2364         double result = 0.0;
2365         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2366         mono_decimal_to_double_result(&d, &result);
2367         return result;
2368 }
2369
2370 int32_t
2371 mono_decimal_to_int32 (MonoDecimal d)
2372 {
2373         MonoDecimal result;
2374
2375         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2376         mono_decimal_round_result(&d, 0, &result);
2377
2378         if (DECIMAL_SCALE(result) != 0) {
2379                 d = result;
2380                 mono_decimal_fix (&d, &result);
2381         }
2382
2383         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2384                 int32_t i = DECIMAL_LO32(result);
2385                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2386                         if (i >= 0)
2387                                 return i;
2388                 } else {
2389                         i = -i;
2390                         if (i <= 0)
2391                                 return i;
2392                 }
2393         }
2394
2395         mono_set_pending_exception (mono_get_exception_overflow ());
2396         return 0;
2397 }
2398
2399 float
2400 mono_decimal_to_float (MonoDecimal d)
2401 {
2402         float result = 0.0f;
2403         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2404         mono_decimal_to_float_result(&d, &result);
2405         return result;
2406 }
2407
2408 void
2409 mono_decimal_truncate (MonoDecimal *d)
2410 {
2411         MonoDecimal decRes;
2412
2413         mono_decimal_fix(d, &decRes);
2414
2415         // copy decRes into d
2416         COPYDEC(*d, decRes);
2417         d->reserved = 0;
2418         FC_GC_POLL();
2419 }
2420
2421 void
2422 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2423 {
2424         MonoDecimal result, decTmp;
2425         MonoDecimal *pdecTmp, *leftOriginal;
2426         uint32_t    num[6], pwr;
2427         int         scale, hi_prod, cur;
2428         SPLIT64     sdlTmp;
2429
2430         g_assert(sign == 0 || sign == DECIMAL_NEG);
2431
2432         leftOriginal = left;
2433
2434         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2435
2436         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2437                 // Scale factors are equal, no alignment necessary.
2438                 //
2439                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2440
2441         AlignedAdd:
2442                 if (sign) {
2443                         // Signs differ - subtract
2444                         //
2445                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2446                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2447
2448                         // Propagate carry
2449                         //
2450                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2451                                 DECIMAL_HI32(result)--;
2452                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2453                                         goto SignFlip;
2454                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2455                                 // Got negative result.  Flip its sign.
2456                                 //
2457                         SignFlip:
2458                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2459                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2460                                 if (DECIMAL_LO64_GET(result) == 0)
2461                                         DECIMAL_HI32(result)++;
2462                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2463                         }
2464
2465                 } else {
2466                         // Signs are the same - add
2467                         //
2468                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2469                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2470
2471                         // Propagate carry
2472                         //
2473                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2474                                 DECIMAL_HI32(result)++;
2475                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2476                                         goto AlignedScale;
2477                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2478                         AlignedScale:
2479                                 // The addition carried above 96 bits.  Divide the result by 10,
2480                                 // dropping the scale factor.
2481                                 //
2482                                 if (DECIMAL_SCALE(result) == 0) {
2483                                         mono_set_pending_exception (mono_get_exception_overflow ());
2484                                         return;
2485                                 }
2486                                 DECIMAL_SCALE(result)--;
2487
2488                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2489                                 sdlTmp.u.Hi = 1;
2490                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2491                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2492
2493                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2494                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2495                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2496
2497                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2498                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2499                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2500
2501                                 // See if we need to round up.
2502                                 //
2503                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2504                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2505                                         if (DECIMAL_LO64_GET(result) == 0)
2506                                                 DECIMAL_HI32(result)++;
2507                                 }
2508                         }
2509                 }
2510         } else {
2511                 // Scale factors are not equal.  Assume that a larger scale
2512                 // factor (more decimal places) is likely to mean that number
2513                 // is smaller.  Start by guessing that the right operand has
2514                 // the larger scale factor.  The result will have the larger
2515                 // scale factor.
2516                 //
2517                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2518                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2519                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2520
2521                 if (scale < 0) {
2522                         // Guessed scale factor wrong. Swap operands.
2523                         //
2524                         scale = -scale;
2525                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2526                         DECIMAL_SIGN(result) ^= sign;
2527                         pdecTmp = right;
2528                         right = left;
2529                         left = pdecTmp;
2530                 }
2531
2532                 // *left will need to be multiplied by 10^scale so
2533                 // it will have the same scale as *right.  We could be
2534                 // extending it to up to 192 bits of precision.
2535                 //
2536                 if (scale <= POWER10_MAX) {
2537                         // Scaling won't make it larger than 4 uint32_ts
2538                         //
2539                         pwr = power10[scale];
2540                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2541                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2542                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2543                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2544                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2545                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2546                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2547                         if (sdlTmp.u.Hi == 0) {
2548                                 // Result fits in 96 bits.  Use standard aligned add.
2549                                 //
2550                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2551                                 left = &decTmp;
2552                                 goto AlignedAdd;
2553                         }
2554                         num[0] = DECIMAL_LO32(decTmp);
2555                         num[1] = DECIMAL_MID32(decTmp);
2556                         num[2] = sdlTmp.u.Lo;
2557                         num[3] = sdlTmp.u.Hi;
2558                         hi_prod = 3;
2559                 } else {
2560                         // Have to scale by a bunch.  Move the number to a buffer
2561                         // where it has room to grow as it's scaled.
2562                         //
2563                         num[0] = DECIMAL_LO32(*left);
2564                         num[1] = DECIMAL_MID32(*left);
2565                         num[2] = DECIMAL_HI32(*left);
2566                         hi_prod = 2;
2567
2568                         // Scan for zeros in the upper words.
2569                         //
2570                         if (num[2] == 0) {
2571                                 hi_prod = 1;
2572                                 if (num[1] == 0) {
2573                                         hi_prod = 0;
2574                                         if (num[0] == 0) {
2575                                                 // Left arg is zero, return right.
2576                                                 //
2577                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2578                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2579                                                 DECIMAL_SIGN(result) ^= sign;
2580                                                 goto RetDec;
2581                                         }
2582                                 }
2583                         }
2584
2585                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2586                         // with index of highest non-zero uint32_t.
2587                         //
2588                         for (; scale > 0; scale -= POWER10_MAX) {
2589                                 if (scale > POWER10_MAX)
2590                                         pwr = ten_to_nine;
2591                                 else
2592                                         pwr = power10[scale];
2593
2594                                 sdlTmp.u.Hi = 0;
2595                                 for (cur = 0; cur <= hi_prod; cur++) {
2596                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2597                                         num[cur] = sdlTmp.u.Lo;
2598                                 }
2599
2600                                 if (sdlTmp.u.Hi != 0)
2601                                         // We're extending the result by another uint32_t.
2602                                         num[++hi_prod] = sdlTmp.u.Hi;
2603                         }
2604                 }
2605
2606                 // Scaling complete, do the add.  Could be subtract if signs differ.
2607                 //
2608                 sdlTmp.u.Lo = num[0];
2609                 sdlTmp.u.Hi = num[1];
2610
2611                 if (sign) {
2612                         // Signs differ, subtract.
2613                         //
2614                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2615                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2616
2617                         // Propagate carry
2618                         //
2619                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2620                                 DECIMAL_HI32(result)--;
2621                                 if (DECIMAL_HI32(result) >= num[2])
2622                                         goto LongSub;
2623                         } else if (DECIMAL_HI32(result) > num[2]) {
2624                         LongSub:
2625                                 // If num has more than 96 bits of precision, then we need to
2626                                 // carry the subtraction into the higher bits.  If it doesn't,
2627                                 // then we subtracted in the wrong order and have to flip the
2628                                 // sign of the result.
2629                                 //
2630                                 if (hi_prod <= 2)
2631                                         goto SignFlip;
2632
2633                                 cur = 3;
2634                                 while(num[cur++]-- == 0);
2635                                 if (num[hi_prod] == 0)
2636                                         hi_prod--;
2637                         }
2638                 } else {
2639                         // Signs the same, add.
2640                         //
2641                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2642                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2643
2644                         // Propagate carry
2645                         //
2646                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2647                                 DECIMAL_HI32(result)++;
2648                                 if (DECIMAL_HI32(result) <= num[2])
2649                                         goto LongAdd;
2650                         } else if (DECIMAL_HI32(result) < num[2]) {
2651                         LongAdd:
2652                                 // Had a carry above 96 bits.
2653                                 //
2654                                 cur = 3;
2655                                 do {
2656                                         if (hi_prod < cur) {
2657                                                 num[cur] = 1;
2658                                                 hi_prod = cur;
2659                                                 break;
2660                                         }
2661                                 }while (++num[cur++] == 0);
2662                         }
2663                 }
2664
2665                 if (hi_prod > 2) {
2666                         num[0] = DECIMAL_LO32(result);
2667                         num[1] = DECIMAL_MID32(result);
2668                         num[2] = DECIMAL_HI32(result);
2669                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2670                         if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2671                                 mono_set_pending_exception (mono_get_exception_overflow ());
2672                                 return;
2673                         }
2674
2675                         DECIMAL_LO32(result) = num[0];
2676                         DECIMAL_MID32(result) = num[1];
2677                         DECIMAL_HI32(result) = num[2];
2678                 }
2679         }
2680
2681 RetDec:
2682         left = leftOriginal;
2683         COPYDEC(*left, result);
2684         left->reserved = 0;
2685 }
2686
2687 void
2688 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2689 {
2690         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2691         uint32_t pwr, tmp, tmp1;
2692         SPLIT64  sdlTmp, sdlDivisor;
2693         int      scale, cur_scale;
2694         gboolean unscale;
2695
2696         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2697         unscale = FALSE;
2698         divisor[0] = DECIMAL_LO32(*right);
2699         divisor[1] = DECIMAL_MID32(*right);
2700         divisor[2] = DECIMAL_HI32(*right);
2701
2702         if (divisor[1] == 0 && divisor[2] == 0) {
2703                 // Divisor is only 32 bits.  Easy divide.
2704                 //
2705                 if (divisor[0] == 0) {
2706                         mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2707                         return;
2708                 }
2709
2710                 quo[0] = DECIMAL_LO32(*left);
2711                 quo[1] = DECIMAL_MID32(*left);
2712                 quo[2] = DECIMAL_HI32(*left);
2713                 rem[0] = Div96By32(quo, divisor[0]);
2714
2715                 for (;;) {
2716                         if (rem[0] == 0) {
2717                                 if (scale < 0) {
2718                                         cur_scale = min(9, -scale);
2719                                         goto HaveScale;
2720                                 }
2721                                 break;
2722                         }
2723                         // We need to unscale if and only if we have a non-zero remainder
2724                         unscale = TRUE;
2725
2726                         // We have computed a quotient based on the natural scale
2727                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2728                         // remainder, so now we should increase the scale if possible to
2729                         // include more quotient bits.
2730                         //
2731                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2732                         // computing more quotient bits as long as the remainder stays
2733                         // non-zero.  If scaling by that much would cause overflow, we'll
2734                         // drop out of the loop and scale by as much as we can.
2735                         //
2736                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2737                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2738                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2739                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2740                         // is the largest value in quo[1] (when quo[2] == 4) that is
2741                         // assured not to overflow.
2742                         //
2743                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2744                         if (cur_scale == 0) {
2745                                 // No more scaling to be done, but remainder is non-zero.
2746                                 // Round quotient.
2747                                 //
2748                                 tmp = rem[0] << 1;
2749                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2750                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2751                                 RoundUp:
2752                                         if (!Add32To96(quo, 1)) {
2753                                                 if (scale == 0) {
2754                                                         mono_set_pending_exception (mono_get_exception_overflow ());
2755                                                         return;
2756                                                 }
2757                                                 scale--;
2758                                                 OverflowUnscale(quo, TRUE);
2759                                                 break;
2760                                         }
2761                                 }
2762                                 break;
2763                         }
2764
2765                         if (cur_scale < 0) {
2766                                 mono_set_pending_exception (mono_get_exception_overflow ());
2767                                 return;
2768                         }
2769
2770                 HaveScale:
2771                         pwr = power10[cur_scale];
2772                         scale += cur_scale;
2773
2774                         if (IncreaseScale(quo, pwr) != 0) {
2775                                 mono_set_pending_exception (mono_get_exception_overflow ());
2776                                 return;
2777                         }
2778
2779                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2780                         rem[0] = sdlTmp.u.Hi;
2781
2782                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2783                                 if (scale == 0) {
2784                                         mono_set_pending_exception (mono_get_exception_overflow ());
2785                                         return;
2786                                 }
2787                                 scale--;
2788                                 OverflowUnscale(quo, (rem[0] != 0));
2789                                 break;
2790                         }
2791                 } // for (;;)
2792         } else {
2793                 // Divisor has bits set in the upper 64 bits.
2794                 //
2795                 // Divisor must be fully normalized (shifted so bit 31 of the most
2796                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2797                 // normalize by.  The dividend will be shifted by the same amount so
2798                 // the quotient is not changed.
2799                 //
2800                 if (divisor[2] == 0)
2801                         tmp = divisor[1];
2802                 else
2803                         tmp = divisor[2];
2804
2805                 cur_scale = 0;
2806                 if (!(tmp & 0xFFFF0000)) {
2807                         cur_scale += 16;
2808                         tmp <<= 16;
2809                 }
2810                 if (!(tmp & 0xFF000000)) {
2811                         cur_scale += 8;
2812                         tmp <<= 8;
2813                 }
2814                 if (!(tmp & 0xF0000000)) {
2815                         cur_scale += 4;
2816                         tmp <<= 4;
2817                 }
2818                 if (!(tmp & 0xC0000000)) {
2819                         cur_scale += 2;
2820                         tmp <<= 2;
2821                 }
2822                 if (!(tmp & 0x80000000)) {
2823                         cur_scale++;
2824                         tmp <<= 1;
2825                 }
2826
2827                 // Shift both dividend and divisor left by cur_scale.
2828                 //
2829                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2830                 rem[0] = sdlTmp.u.Lo;
2831                 rem[1] = sdlTmp.u.Hi;
2832                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2833                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2834                 sdlTmp.int64 <<= cur_scale;
2835                 rem[2] = sdlTmp.u.Hi;
2836                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2837
2838                 sdlDivisor.u.Lo = divisor[0];
2839                 sdlDivisor.u.Hi = divisor[1];
2840                 sdlDivisor.int64 <<= cur_scale;
2841
2842                 if (divisor[2] == 0) {
2843                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2844                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2845                         //
2846                         sdlTmp.u.Lo = rem[2];
2847                         sdlTmp.u.Hi = rem[3];
2848
2849                         quo[2] = 0;
2850                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2851                         quo[0] = Div96By64(rem, sdlDivisor);
2852
2853                         for (;;) {
2854                                 if ((rem[0] | rem[1]) == 0) {
2855                                         if (scale < 0) {
2856                                                 cur_scale = min(9, -scale);
2857                                                 goto HaveScale64;
2858                                         }
2859                                         break;
2860                                 }
2861
2862                                 // We need to unscale if and only if we have a non-zero remainder
2863                                 unscale = TRUE;
2864
2865                                 // Remainder is non-zero.  Scale up quotient and remainder by
2866                                 // powers of 10 so we can compute more significant bits.
2867                                 //
2868                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2869                                 if (cur_scale == 0) {
2870                                         // No more scaling to be done, but remainder is non-zero.
2871                                         // Round quotient.
2872                                         //
2873                                         sdlTmp.u.Lo = rem[0];
2874                                         sdlTmp.u.Hi = rem[1];
2875                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2876                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2877                                                 goto RoundUp;
2878                                         break;
2879                                 }
2880
2881                                 if (cur_scale < 0) {
2882                                         mono_set_pending_exception (mono_get_exception_overflow ());
2883                                         return;
2884                                 }
2885
2886                         HaveScale64:
2887                                 pwr = power10[cur_scale];
2888                                 scale += cur_scale;
2889
2890                                 if (IncreaseScale(quo, pwr) != 0) {
2891                                         mono_set_pending_exception (mono_get_exception_overflow ());
2892                                         return;
2893                                 }
2894
2895                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2896                                 IncreaseScale(rem, pwr);
2897                                 tmp = Div96By64(rem, sdlDivisor);
2898                                 if (!Add32To96(quo, tmp)) {
2899                                         if (scale == 0) {
2900                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2901                                                 return;
2902                                         }
2903                                         scale--;
2904                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2905                                         break;
2906                                 }
2907
2908                         } // for (;;)
2909                 } else {
2910                         // Have a 96-bit divisor in divisor[].
2911                         //
2912                         // Start by finishing the shift left by cur_scale.
2913                         //
2914                         sdlTmp.u.Lo = divisor[1];
2915                         sdlTmp.u.Hi = divisor[2];
2916                         sdlTmp.int64 <<= cur_scale;
2917                         divisor[0] = sdlDivisor.u.Lo;
2918                         divisor[1] = sdlDivisor.u.Hi;
2919                         divisor[2] = sdlTmp.u.Hi;
2920
2921                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2922                         // will be < divisor.
2923                         //
2924                         quo[2] = 0;
2925                         quo[1] = 0;
2926                         quo[0] = Div128By96(rem, divisor);
2927
2928                         for (;;) {
2929                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2930                                         if (scale < 0) {
2931                                                 cur_scale = min(9, -scale);
2932                                                 goto HaveScale96;
2933                                         }
2934                                         break;
2935                                 }
2936
2937                                 // We need to unscale if and only if we have a non-zero remainder
2938                                 unscale = TRUE;
2939
2940                                 // Remainder is non-zero.  Scale up quotient and remainder by
2941                                 // powers of 10 so we can compute more significant bits.
2942                                 //
2943                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2944                                 if (cur_scale == 0) {
2945                                         // No more scaling to be done, but remainder is non-zero.
2946                                         // Round quotient.
2947                                         //
2948                                         if (rem[2] >= 0x80000000)
2949                                                 goto RoundUp;
2950
2951                                         tmp = rem[0] > 0x80000000;
2952                                         tmp1 = rem[1] > 0x80000000;
2953                                         rem[0] <<= 1;
2954                                         rem[1] = (rem[1] << 1) + tmp;
2955                                         rem[2] = (rem[2] << 1) + tmp1;
2956
2957                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2958                                                 goto RoundUp;
2959                                         break;
2960                                 }
2961
2962                                 if (cur_scale < 0) {
2963                                         mono_set_pending_exception (mono_get_exception_overflow ());
2964                                         return;
2965                                 }
2966
2967                         HaveScale96:
2968                                 pwr = power10[cur_scale];
2969                                 scale += cur_scale;
2970
2971                                 if (IncreaseScale(quo, pwr) != 0) {
2972                                         mono_set_pending_exception (mono_get_exception_overflow ());
2973                                         return;
2974                                 }
2975
2976                                 rem[3] = IncreaseScale(rem, pwr);
2977                                 tmp = Div128By96(rem, divisor);
2978                                 if (!Add32To96(quo, tmp)) {
2979                                         if (scale == 0) {
2980                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2981                                                 return;
2982                                         }
2983
2984                                         scale--;
2985                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2986                                         break;
2987                                 }
2988
2989                         } // for (;;)
2990                 }
2991         }
2992
2993         // We need to unscale if and only if we have a non-zero remainder
2994         if (unscale) {
2995                 // Try extracting any extra powers of 10 we may have
2996                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2997                 // If a division by one of these powers returns a zero remainder, then
2998                 // we keep the quotient.  If the remainder is not zero, then we restore
2999                 // the previous value.
3000                 //
3001                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
3002                 // we can extract.  We use this as a quick test on whether to try a
3003                 // given power.
3004                 //
3005                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
3006                         quo_save[0] = quo[0];
3007                         quo_save[1] = quo[1];
3008                         quo_save[2] = quo[2];
3009
3010                         if (Div96By32(quo_save, 100000000) == 0) {
3011                                 quo[0] = quo_save[0];
3012                                 quo[1] = quo_save[1];
3013                                 quo[2] = quo_save[2];
3014                                 scale -= 8;
3015                         } else
3016                                 break;
3017                 }
3018
3019                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
3020                         quo_save[0] = quo[0];
3021                         quo_save[1] = quo[1];
3022                         quo_save[2] = quo[2];
3023
3024                         if (Div96By32(quo_save, 10000) == 0) {
3025                                 quo[0] = quo_save[0];
3026                                 quo[1] = quo_save[1];
3027                                 quo[2] = quo_save[2];
3028                                 scale -= 4;
3029                         }
3030                 }
3031
3032                 if ((quo[0] & 3) == 0 && scale >= 2) {
3033                         quo_save[0] = quo[0];
3034                         quo_save[1] = quo[1];
3035                         quo_save[2] = quo[2];
3036
3037                         if (Div96By32(quo_save, 100) == 0) {
3038                                 quo[0] = quo_save[0];
3039                                 quo[1] = quo_save[1];
3040                                 quo[2] = quo_save[2];
3041                                 scale -= 2;
3042                         }
3043                 }
3044
3045                 if ((quo[0] & 1) == 0 && scale >= 1) {
3046                         quo_save[0] = quo[0];
3047                         quo_save[1] = quo[1];
3048                         quo_save[2] = quo[2];
3049
3050                         if (Div96By32(quo_save, 10) == 0) {
3051                                 quo[0] = quo_save[0];
3052                                 quo[1] = quo_save[1];
3053                                 quo[2] = quo_save[2];
3054                                 scale -= 1;
3055                         }
3056                 }
3057         }
3058
3059         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3060         DECIMAL_HI32(*left) = quo[2];
3061         DECIMAL_MID32(*left) = quo[1];
3062         DECIMAL_LO32(*left) = quo[0];
3063         DECIMAL_SCALE(*left) = (uint8_t)scale;
3064         left->reserved = 0;
3065
3066 }
3067
3068 #define DECIMAL_PRECISION 29
3069 #define NUMBER_MAXDIGITS 50
3070 typedef struct  {
3071         int32_t precision;
3072         int32_t scale;
3073         int32_t sign;
3074         uint16_t digits[NUMBER_MAXDIGITS + 1];
3075         uint16_t* allDigits;
3076 } CLRNumber;
3077
3078 int
3079 mono_decimal_from_number (void *from, MonoDecimal *target)
3080 {
3081         CLRNumber *number = (CLRNumber *) from;
3082         uint16_t* p = number->digits;
3083         MonoDecimal d;
3084         int e = number->scale;
3085         g_assert(number != NULL);
3086         g_assert(target != NULL);
3087
3088         d.reserved = 0;
3089         DECIMAL_SIGNSCALE(d) = 0;
3090         DECIMAL_HI32(d) = 0;
3091         DECIMAL_LO32(d) = 0;
3092         DECIMAL_MID32(d) = 0;
3093         g_assert(p != NULL);
3094         if (!*p) {
3095                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3096                 // the scale to 0 if the scale was previously positive
3097                 if (e > 0) {
3098                         e = 0;
3099                 }
3100         } else {
3101                 if (e > DECIMAL_PRECISION) return 0;
3102                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3103                         DecMul10(&d);
3104                         if (*p)
3105                                 DecAddInt32(&d, *p++ - '0');
3106                         e--;
3107                 }
3108                 if (*p++ >= '5') {
3109                         gboolean round = TRUE;
3110                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3111                                 // For digits > 5 we will be roundinp up anyway.
3112                                 int count = 20; // Look at the next 20 digits to check to round
3113                                 while (*p == '0' && count != 0) {
3114                                         p++;
3115                                         count--;
3116                                 }
3117                                 if (*p == '\0' || count == 0)
3118                                         round = FALSE;// Do nothing
3119                         }
3120
3121                         if (round) {
3122                                 DecAddInt32(&d, 1);
3123                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3124                                         DECIMAL_HI32(d) = 0x19999999;
3125                                         DECIMAL_MID32(d) = 0x99999999;
3126                                         DECIMAL_LO32(d) = 0x9999999A;
3127                                         e++;
3128                                 }
3129                         }
3130                 }
3131         }
3132         if (e > 0)
3133                 return 0;
3134         if (e <= -DECIMAL_PRECISION) {
3135                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3136                 // This should only happen for actual zeros or very small numbers that round to zero.
3137                 DECIMAL_SIGNSCALE(d) = 0;
3138                 DECIMAL_HI32(d) = 0;
3139                 DECIMAL_LO32(d) = 0;
3140                 DECIMAL_MID32(d) = 0;
3141                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3142         } else {
3143                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3144         }
3145
3146         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3147         *target = d;
3148         return 1;
3149 }
3150
3151
3152 #endif