mono/metadata/decimal-ms.c

   1 /**
   2  * \file
   3  * Copyright (c) Microsoft. All rights reserved.
   4  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
   5  *
   6  * Copyright 2015 Xamarin Inc
   7  *
   8  * File: decimal.c
   9  *
  10  * Ported from C++ to C and adjusted to Mono runtime
  11  *
  12  * Pending:
  13  *   DoToCurrency (they look like new methods we do not have)
  14  */
  15 #ifndef DISABLE_DECIMAL
  16 #include "config.h"
  17 #include <stdint.h>
  18 #include <glib.h>
  19 #include <mono/utils/mono-compiler.h>
  20 #include <mono/metadata/exception.h>
  21 #include <mono/metadata/object-internals.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <math.h>
  26 #ifdef HAVE_MEMORY_H
  27 #include <memory.h>
  28 #endif
  29 #ifdef _MSC_VER
  30 #include <intrin.h>
  31 #endif
  32 #include "decimal-ms.h"
  33 #include "number-ms.h"
  34
  35 #define min(a, b) (((a) < (b)) ? (a) : (b))
  36
  37 typedef enum {
  38         MONO_DECIMAL_OK,
  39         MONO_DECIMAL_OVERFLOW,
  40         MONO_DECIMAL_INVALID_ARGUMENT,
  41         MONO_DECIMAL_DIVBYZERO,
  42         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
  43 } MonoDecimalStatus;
  44
  45 #ifndef FC_GC_POLL
  46 #   define FC_GC_POLL()
  47 #endif
  48
  49 static const uint32_t ten_to_nine    = 1000000000U;
  50 static const uint32_t ten_to_ten_div_4 = 2500000000U;
  51 #define POWER10_MAX     9
  52 #define DECIMAL_NEG ((uint8_t)0x80)
  53 #define DECMAX 28
  54 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
  55 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
  56 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
  57 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
  58 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
  59 #define DECIMAL_HI32(dec)        ((dec).Hi32)
  60 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
  61 # define DECIMAL_LO64_GET(dec)   (((uint64_t)((dec).v.v.Mid32) << 32) | (dec).v.v.Lo32)
  62 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.v.Lo32 = (value); (dec).v.v.Mid32 = ((value) >> 32); }
  63 #else
  64 # define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
  65 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
  66 #endif
  67
  68 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
  69 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
  70     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
  71
  72 #define DEC_SCALE_MAX   28
  73 #define POWER10_MAX     9
  74
  75 #define OVFL_MAX_9_HI   4
  76 #define OVFL_MAX_9_MID  1266874889
  77 #define OVFL_MAX_9_LO   3047500985u
  78
  79 #define OVFL_MAX_5_HI   42949
  80 #define OVFL_MAX_5_MID  2890341191
  81
  82 #define OVFL_MAX_1_HI   429496729
  83
  84 typedef union {
  85         uint64_t int64;
  86         struct {
  87 #if BYTE_ORDER == G_BIG_ENDIAN
  88         uint32_t Hi;
  89         uint32_t Lo;
  90 #else
  91         uint32_t Lo;
  92         uint32_t Hi;
  93 #endif
  94     } u;
  95 } SPLIT64;
  96
  97 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
  98
  99 const MonoDouble_double ds2to64 = { .s = { .sign = 0, .exp = MONO_DOUBLE_BIAS + 65, .mantHi = 0, .mantLo = 0 } };
 100
 101 //
 102 // Data tables
 103 //
 104
 105 static const uint32_t power10 [POWER10_MAX+1] = {
 106         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
 107 };
 108
 109
 110 static const double double_power10[] = {
 111         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
 112         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
 113         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
 114         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
 115         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
 116         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
 117         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
 118         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
 119         1e80 };
 120
 121 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
 122                                 {100000000000ULL},         // 1E11
 123                                 {1000000000000ULL},        // 1E12
 124                                 {10000000000000ULL},       // 1E13
 125                                 {100000000000000ULL} };    // 1E14
 126
 127 static const uint64_t long_power10[] = {
 128         1,
 129         10ULL,
 130         100ULL,
 131         1000ULL,
 132         10000ULL,
 133         100000ULL,
 134         1000000ULL,
 135         10000000ULL,
 136         100000000ULL,
 137         1000000000ULL,
 138         10000000000ULL,
 139         100000000000ULL,
 140         1000000000000ULL,
 141         10000000000000ULL,
 142         100000000000000ULL,
 143         1000000000000000ULL,
 144         10000000000000000ULL,
 145         100000000000000000ULL,
 146         1000000000000000000ULL,
 147         10000000000000000000ULL};
 148
 149 typedef struct  {
 150         uint32_t Hi, Mid, Lo;
 151 } DECOVFL;
 152
 153 const DECOVFL power_overflow[] = {
 154 // This is a table of the largest values that can be in the upper two
 155 // ULONGs of a 96-bit number that will not overflow when multiplied
 156 // by a given power.  For the upper word, this is a table of
 157 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
 158 // remaining fraction part * 2^32.  2^32 = 4294967296.
 159 //
 160     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
 161     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
 162     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
 163     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
 164     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
 165     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
 166     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
 167     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
 168     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
 169 };
 170
 171
 172 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
 173 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
 174 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
 175
 176 static double
 177 fnDblPower10(int ix)
 178 {
 179     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
 180     g_assert(ix >= 0);
 181     if (ix < maxIx)
 182         return double_power10[ix];
 183     return pow(10.0, ix);
 184 } // double fnDblPower10()
 185
 186
 187 static inline int64_t
 188 DivMod32by32(int32_t num, int32_t den)
 189 {
 190     SPLIT64  sdl;
 191
 192     sdl.u.Lo = num / den;
 193     sdl.u.Hi = num % den;
 194     return sdl.int64;
 195 }
 196
 197 static inline int64_t
 198 DivMod64by32(int64_t num, int32_t den)
 199 {
 200     SPLIT64  sdl;
 201
 202     sdl.u.Lo = Div64by32(num, den);
 203     sdl.u.Hi = Mod64by32(num, den);
 204     return sdl.int64;
 205 }
 206
 207 static uint64_t
 208 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
 209 {
 210         SPLIT64  tmp1;
 211         SPLIT64  tmp2;
 212         SPLIT64  tmp3;
 213
 214         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
 215         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
 216         tmp1.u.Hi += tmp2.u.Lo;
 217         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 218                 tmp2.u.Hi++;
 219         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
 220         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
 221         tmp1.u.Hi += tmp2.u.Lo;
 222         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
 223                 tmp2.u.Hi++;
 224         tmp3.int64 += (uint64_t)tmp2.u.Hi;
 225
 226         *hi = tmp3.int64;
 227         return tmp1.int64;
 228 }
 229
 230 /**
 231 * FullDiv64By32:
 232 *
 233 * Entry:
 234 *   pdlNum  - Pointer to 64-bit dividend
 235 *   ulDen   - 32-bit divisor
 236 *
 237 * Purpose:
 238 *   Do full divide, yielding 64-bit result and 32-bit remainder.
 239 *
 240 * Exit:
 241 *   Quotient overwrites dividend.
 242 *   Returns remainder.
 243 *
 244 * Exceptions:
 245 *   None.
 246 */
 247 // Was: FullDiv64By32
 248 static uint32_t
 249 FullDiv64By32 (uint64_t *num, uint32_t den)
 250 {
 251         SPLIT64  tmp;
 252         SPLIT64  res;
 253
 254         tmp.int64 = *num;
 255         res.u.Hi = 0;
 256
 257         if (tmp.u.Hi >= den) {
 258                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
 259                 //
 260                 res.u.Lo = tmp.u.Hi;
 261                 res.int64 = DivMod64by32(res.int64, den);
 262                 tmp.u.Hi = res.u.Hi;
 263                 res.u.Hi = res.u.Lo;
 264         }
 265
 266         tmp.int64 = DivMod64by32(tmp.int64, den);
 267         res.u.Lo = tmp.u.Lo;
 268         *num = res.int64;
 269         return tmp.u.Hi;
 270 }
 271
 272 /***
 273  * SearchScale
 274  *
 275  * Entry:
 276  *   res_hi - Top uint32_t of quotient
 277  *   res_mid - Middle uint32_t of quotient
 278  *   res_lo - Bottom uint32_t of quotient
 279  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
 280  *
 281  * Purpose:
 282  *   Determine the max power of 10, <= 9, that the quotient can be scaled
 283  *   up by and still fit in 96 bits.
 284  *
 285  * Exit:
 286  *   Returns power of 10 to scale by, -1 if overflow error.
 287  *
 288  ***********************************************************************/
 289
 290 static int
 291 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
 292 {
 293         int   cur_scale;
 294
 295         // Quick check to stop us from trying to scale any more.
 296         //
 297         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
 298                 cur_scale = 0;
 299                 goto HaveScale;
 300         }
 301
 302         if (scale > DEC_SCALE_MAX - 9) {
 303                 // We can't scale by 10^9 without exceeding the max scale factor.
 304                 // See if we can scale to the max.  If not, we'll fall into
 305                 // standard search for scale factor.
 306                 //
 307                 cur_scale = DEC_SCALE_MAX - scale;
 308                 if (res_hi < power_overflow[cur_scale - 1].Hi)
 309                         goto HaveScale;
 310
 311                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
 312                 UpperEq:
 313                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
 314                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
 315                                 cur_scale--;
 316                         }
 317                         goto HaveScale;
 318                 }
 319         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
 320                 return 9;
 321
 322         // Search for a power to scale by < 9.  Do a binary search
 323         // on power_overflow[].
 324         //
 325         cur_scale = 5;
 326         if (res_hi < OVFL_MAX_5_HI)
 327                 cur_scale = 7;
 328         else if (res_hi > OVFL_MAX_5_HI)
 329                 cur_scale = 3;
 330         else
 331                 goto UpperEq;
 332
 333         // cur_scale is 3 or 7.
 334         //
 335         if (res_hi < power_overflow[cur_scale - 1].Hi)
 336                 cur_scale++;
 337         else if (res_hi > power_overflow[cur_scale - 1].Hi)
 338                 cur_scale--;
 339         else
 340                 goto UpperEq;
 341
 342         // cur_scale is 2, 4, 6, or 8.
 343         //
 344         // In all cases, we already found we could not use the power one larger.
 345         // So if we can use this power, it is the biggest, and we're done.  If
 346         // we can't use this power, the one below it is correct for all cases
 347         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
 348         //
 349         if (res_hi > power_overflow[cur_scale - 1].Hi)
 350                 cur_scale--;
 351
 352         if (res_hi == power_overflow[cur_scale - 1].Hi)
 353                 goto UpperEq;
 354
 355 HaveScale:
 356         // cur_scale = largest power of 10 we can scale by without overflow,
 357         // cur_scale < 9.  See if this is enough to make scale factor
 358         // positive if it isn't already.
 359         //
 360         if (cur_scale + scale < 0)
 361                 cur_scale = -1;
 362
 363         return cur_scale;
 364 }
 365
 366
 367 /**
 368 * Div96By32
 369 *
 370 * Entry:
 371 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
 372 *   ulDen   - 32-bit divisor.
 373 *
 374 * Purpose:
 375 *   Do full divide, yielding 96-bit result and 32-bit remainder.
 376 *
 377 * Exit:
 378 *   Quotient overwrites dividend.
 379 *   Returns remainder.
 380 *
 381 * Exceptions:
 382 *   None.
 383 *
 384 */
 385 static uint32_t
 386 Div96By32(uint32_t *num, uint32_t den)
 387 {
 388         SPLIT64  tmp;
 389
 390         tmp.u.Hi = 0;
 391
 392         if (num[2] != 0)
 393                 goto Div3Word;
 394
 395         if (num[1] >= den)
 396                 goto Div2Word;
 397
 398         tmp.u.Hi = num[1];
 399         num[1] = 0;
 400         goto Div1Word;
 401
 402 Div3Word:
 403         tmp.u.Lo = num[2];
 404         tmp.int64 = DivMod64by32(tmp.int64, den);
 405         num[2] = tmp.u.Lo;
 406 Div2Word:
 407         tmp.u.Lo = num[1];
 408         tmp.int64 = DivMod64by32(tmp.int64, den);
 409         num[1] = tmp.u.Lo;
 410 Div1Word:
 411         tmp.u.Lo = num[0];
 412         tmp.int64 = DivMod64by32(tmp.int64, den);
 413         num[0] = tmp.u.Lo;
 414         return tmp.u.Hi;
 415 }
 416
 417 /***
 418  * DecFixInt
 419  *
 420  * Entry:
 421  *   pdecRes - Pointer to Decimal result location
 422  *   operand  - Pointer to Decimal operand
 423  *
 424  * Purpose:
 425  *   Chop the value to integer.  Return remainder so Int() function
 426  *   can round down if non-zero.
 427  *
 428  * Exit:
 429  *   Returns remainder.
 430  *
 431  * Exceptions:
 432  *   None.
 433  *
 434  ***********************************************************************/
 435
 436 static uint32_t
 437 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
 438 {
 439         uint32_t   num[3];
 440         uint32_t   rem;
 441         uint32_t   pwr;
 442         int     scale;
 443
 444         if (operand->u.u.scale > 0) {
 445                 num[0] = operand->v.v.Lo32;
 446                 num[1] = operand->v.v.Mid32;
 447                 num[2] = operand->Hi32;
 448                 scale = operand->u.u.scale;
 449                 result->u.u.sign = operand->u.u.sign;
 450                 rem = 0;
 451
 452                 do {
 453                         if (scale > POWER10_MAX)
 454                                 pwr = ten_to_nine;
 455                         else
 456                                 pwr = power10[scale];
 457
 458                         rem |= Div96By32(num, pwr);
 459                         scale -= 9;
 460                 }while (scale > 0);
 461
 462                 result->v.v.Lo32 = num[0];
 463                 result->v.v.Mid32 = num[1];
 464                 result->Hi32 = num[2];
 465                 result->u.u.scale = 0;
 466
 467                 return rem;
 468         }
 469
 470         COPYDEC(*result, *operand);
 471         // Odd, the Microsoft code does not set result->reserved to zero on this case
 472         return 0;
 473 }
 474
 475 /**
 476  * ScaleResult:
 477  *
 478  * Entry:
 479  *   res - Array of uint32_ts with value, least-significant first.
 480  *   hi_res  - Index of last non-zero value in res.
 481  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
 482  *
 483  * Purpose:
 484  *   See if we need to scale the result to fit it in 96 bits.
 485  *   Perform needed scaling.  Adjust scale factor accordingly.
 486  *
 487  * Exit:
 488  *   res updated in place, always 3 uint32_ts.
 489  *   New scale factor returned, -1 if overflow error.
 490  *
 491  */
 492 static int
 493 ScaleResult(uint32_t *res, int hi_res, int scale)
 494 {
 495         int     new_scale;
 496         int     cur;
 497         uint32_t   pwr;
 498         uint32_t   tmp;
 499         uint32_t   sticky;
 500         SPLIT64 sdlTmp;
 501
 502         // See if we need to scale the result.  The combined scale must
 503         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
 504         //
 505         // Start by figuring a lower bound on the scaling needed to make
 506         // the upper 96 bits zero.  hi_res is the index into res[]
 507         // of the highest non-zero uint32_t.
 508         //
 509         new_scale =   hi_res * 32 - 64 - 1;
 510         if (new_scale > 0) {
 511
 512                 // Find the MSB.
 513                 //
 514                 tmp = res[hi_res];
 515                 if (!(tmp & 0xFFFF0000)) {
 516                         new_scale -= 16;
 517                         tmp <<= 16;
 518                 }
 519                 if (!(tmp & 0xFF000000)) {
 520                         new_scale -= 8;
 521                         tmp <<= 8;
 522                 }
 523                 if (!(tmp & 0xF0000000)) {
 524                         new_scale -= 4;
 525                         tmp <<= 4;
 526                 }
 527                 if (!(tmp & 0xC0000000)) {
 528                         new_scale -= 2;
 529                         tmp <<= 2;
 530                 }
 531                 if (!(tmp & 0x80000000)) {
 532                         new_scale--;
 533                         tmp <<= 1;
 534                 }
 535
 536                 // Multiply bit position by log10(2) to figure it's power of 10.
 537                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
 538                 // with a multiply saves a 96-byte lookup table.  The power returned
 539                 // is <= the power of the number, so we must add one power of 10
 540                 // to make it's integer part zero after dividing by 256.
 541                 //
 542                 // Note: the result of this multiplication by an approximation of
 543                 // log10(2) have been exhaustively checked to verify it gives the
 544                 // correct result.  (There were only 95 to check...)
 545                 //
 546                 new_scale = ((new_scale * 77) >> 8) + 1;
 547
 548                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
 549                 // This reduces the scale factor of the result.  If it exceeds the
 550                 // current scale of the result, we'll overflow.
 551                 //
 552                 if (new_scale > scale)
 553                         return -1;
 554         }
 555         else
 556                 new_scale = 0;
 557
 558         // Make sure we scale by enough to bring the current scale factor
 559         // into valid range.
 560         //
 561         if (new_scale < scale - DEC_SCALE_MAX)
 562                 new_scale = scale - DEC_SCALE_MAX;
 563
 564         if (new_scale != 0) {
 565                 // Scale by the power of 10 given by new_scale.  Note that this is
 566                 // NOT guaranteed to bring the number within 96 bits -- it could
 567                 // be 1 power of 10 short.
 568                 //
 569                 scale -= new_scale;
 570                 sticky = 0;
 571                 sdlTmp.u.Hi = 0; // initialize remainder
 572
 573                 for (;;) {
 574
 575                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
 576
 577                         if (new_scale > POWER10_MAX)
 578                                 pwr = ten_to_nine;
 579                         else
 580                                 pwr = power10[new_scale];
 581
 582                         // Compute first quotient.
 583                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
 584                         //
 585                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
 586                         res[hi_res] = sdlTmp.u.Lo;
 587                         cur = hi_res - 1;
 588
 589                         if (cur >= 0) {
 590                                 // If first quotient was 0, update hi_res.
 591                                 //
 592                                 if (sdlTmp.u.Lo == 0)
 593                                         hi_res--;
 594
 595                                 // Compute subsequent quotients.
 596                                 //
 597                                 do {
 598                                         sdlTmp.u.Lo = res[cur];
 599                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
 600                                         res[cur] = sdlTmp.u.Lo;
 601                                         cur--;
 602                                 } while (cur >= 0);
 603
 604                         }
 605
 606                         new_scale -= POWER10_MAX;
 607                         if (new_scale > 0)
 608                                 continue; // scale some more
 609
 610                         // If we scaled enough, hi_res would be 2 or less.  If not,
 611                         // divide by 10 more.
 612                         //
 613                         if (hi_res > 2) {
 614                                 new_scale = 1;
 615                                 scale--;
 616                                 continue; // scale by 10
 617                         }
 618
 619                         // Round final result.  See if remainder >= 1/2 of divisor.
 620                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
 621                         //
 622                         pwr >>= 1;  // power of 10 always even
 623                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
 624                                                     ((res[0] & 1) | sticky)) ) {
 625                                 cur = -1;
 626                                 while (++res[++cur] == 0);
 627
 628                                 if (cur > 2) {
 629                                         // The rounding caused us to carry beyond 96 bits.
 630                                         // Scale by 10 more.
 631                                         //
 632                                         hi_res = cur;
 633                                         sticky = 0;  // no sticky bit
 634                                         sdlTmp.u.Hi = 0; // or remainder
 635                                         new_scale = 1;
 636                                         scale--;
 637                                         continue; // scale by 10
 638                                 }
 639                         }
 640
 641                         // We may have scaled it more than we planned.  Make sure the scale
 642                         // factor hasn't gone negative, indicating overflow.
 643                         //
 644                         if (scale < 0)
 645                                 return -1;
 646
 647                         return scale;
 648                 } // for(;;)
 649         }
 650         return scale;
 651 }
 652
 653 // Decimal multiply
 654 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
 655 static MonoDecimalStatus
 656 mono_decimal_multiply_result(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
 657 {
 658         SPLIT64 tmp;
 659         SPLIT64 tmp2;
 660         SPLIT64 tmp3;
 661         int     scale;
 662         int     hi_prod;
 663         uint32_t   pwr;
 664         uint32_t   rem_lo;
 665         uint32_t   rem_hi;
 666         uint32_t   prod[6];
 667
 668         scale = left->u.u.scale + right->u.u.scale;
 669
 670         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
 671                 // Upper 64 bits are zero.
 672                 //
 673                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 674                 if (scale > DEC_SCALE_MAX)
 675                 {
 676                         // Result scale is too big.  Divide result by power of 10 to reduce it.
 677                         // If the amount to divide by is > 19 the result is guaranteed
 678                         // less than 1/2.  [max value in 64 bits = 1.84E19]
 679                         //
 680                         scale -= DEC_SCALE_MAX;
 681                         if (scale > 19) {
 682                         ReturnZero:
 683                                 DECIMAL_SETZERO(*result);
 684                                 return MONO_DECIMAL_OK;
 685                         }
 686
 687                         if (scale > POWER10_MAX) {
 688                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
 689                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
 690                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
 691                                 //
 692                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
 693                                 pwr = power10[scale - 10] << 2;
 694                         } else {
 695                                 pwr = power10[scale];
 696                                 rem_lo = 0;
 697                         }
 698
 699                         // Power to divide by fits in 32 bits.
 700                         //
 701                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
 702
 703                         // Round result.  See if remainder >= 1/2 of divisor.
 704                         // Divisor is a power of 10, so it is always even.
 705                         //
 706                         pwr >>= 1;
 707                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
 708                                 tmp.int64++;
 709
 710                         scale = DEC_SCALE_MAX;
 711                 }
 712                 DECIMAL_LO32(*result) = tmp.u.Lo;
 713                 DECIMAL_MID32(*result) = tmp.u.Hi;
 714                 DECIMAL_HI32(*result) = 0;
 715         } else {
 716                 // At least one operand has bits set in the upper 64 bits.
 717                 //
 718                 // Compute and accumulate the 9 partial products into a
 719                 // 192-bit (24-byte) result.
 720                 //
 721                 //                [l-h][l-m][l-l]   left high, middle, low
 722                 //             x  [r-h][r-m][r-l]   right high, middle, low
 723                 // ------------------------------
 724                 //
 725                 //                     [0-h][0-l]   l-l * r-l
 726                 //                [1ah][1al]        l-l * r-m
 727                 //                [1bh][1bl]        l-m * r-l
 728                 //           [2ah][2al]             l-m * r-m
 729                 //           [2bh][2bl]             l-l * r-h
 730                 //           [2ch][2cl]             l-h * r-l
 731                 //      [3ah][3al]                  l-m * r-h
 732                 //      [3bh][3bl]                  l-h * r-m
 733                 // [4-h][4-l]                       l-h * r-h
 734                 // ------------------------------
 735                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
 736                 //
 737                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
 738                 prod[0] = tmp.u.Lo;
 739
 740                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
 741
 742                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
 743                 tmp.int64 += tmp2.int64; // this could generate carry
 744                 prod[1] = tmp.u.Lo;
 745                 if (tmp.int64 < tmp2.int64) // detect carry
 746                         tmp2.u.Hi = 1;
 747                 else
 748                         tmp2.u.Hi = 0;
 749                 tmp2.u.Lo = tmp.u.Hi;
 750
 751                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
 752
 753                 if (left->Hi32 | right->Hi32) {
 754                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
 755                         //
 756                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
 757                         tmp.int64 += tmp2.int64; // this could generate carry
 758                         if (tmp.int64 < tmp2.int64) // detect carry
 759                                 tmp3.u.Hi = 1;
 760                         else
 761                                 tmp3.u.Hi = 0;
 762
 763                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
 764                         tmp.int64 += tmp2.int64; // this could generate carry
 765                         prod[2] = tmp.u.Lo;
 766                         if (tmp.int64 < tmp2.int64) // detect carry
 767                                 tmp3.u.Hi++;
 768                         tmp3.u.Lo = tmp.u.Hi;
 769
 770                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
 771                         tmp.int64 += tmp3.int64; // this could generate carry
 772                         if (tmp.int64 < tmp3.int64) // detect carry
 773                                 tmp3.u.Hi = 1;
 774                         else
 775                                 tmp3.u.Hi = 0;
 776
 777                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
 778                         tmp.int64 += tmp2.int64; // this could generate carry
 779                         prod[3] = tmp.u.Lo;
 780                         if (tmp.int64 < tmp2.int64) // detect carry
 781                                 tmp3.u.Hi++;
 782                         tmp3.u.Lo = tmp.u.Hi;
 783
 784                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
 785                         prod[4] = tmp.u.Lo;
 786                         prod[5] = tmp.u.Hi;
 787
 788                         hi_prod = 5;
 789                 }
 790                 else {
 791                         prod[2] = tmp.u.Lo;
 792                         prod[3] = tmp.u.Hi;
 793                         hi_prod = 3;
 794                 }
 795
 796                 // Check for leading zero uint32_ts on the product
 797                 //
 798                 while (prod[hi_prod] == 0) {
 799                         hi_prod--;
 800                         if (hi_prod < 0)
 801                                 goto ReturnZero;
 802                 }
 803
 804                 scale = ScaleResult(prod, hi_prod, scale);
 805                 if (scale == -1)
 806                         return MONO_DECIMAL_OVERFLOW;
 807
 808                 result->v.v.Lo32 = prod[0];
 809                 result->v.v.Mid32 = prod[1];
 810                 result->Hi32 = prod[2];
 811         }
 812
 813         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
 814         result->u.u.scale = (char)scale;
 815         return MONO_DECIMAL_OK;
 816 }
 817
 818 // Addition and subtraction
 819 static MonoDecimalStatus
 820 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
 821 {
 822         uint32_t     num[6];
 823         uint32_t     pwr;
 824         int       scale;
 825         int       hi_prod;
 826         int       cur;
 827         SPLIT64   tmp;
 828         MonoDecimal decRes;
 829         MonoDecimal decTmp;
 830         MonoDecimal *pdecTmp;
 831
 832         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
 833
 834         if (right->u.u.scale == left->u.u.scale) {
 835                 // Scale factors are equal, no alignment necessary.
 836                 //
 837                 decRes.u.signscale = left->u.signscale;
 838
 839         AlignedAdd:
 840                 if (sign) {
 841                         // Signs differ - subtract
 842                         //
 843                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
 844                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
 845
 846                         // Propagate carry
 847                         //
 848                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
 849                                 decRes.Hi32--;
 850                                 if (decRes.Hi32 >= left->Hi32)
 851                                         goto SignFlip;
 852                         } else if (decRes.Hi32 > left->Hi32) {
 853                                 // Got negative result.  Flip its sign.
 854                                 //
 855                         SignFlip:
 856                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
 857                                 decRes.Hi32 = ~decRes.Hi32;
 858                                 if (DECIMAL_LO64_GET(decRes) == 0)
 859                                         decRes.Hi32++;
 860                                 decRes.u.u.sign ^= DECIMAL_NEG;
 861                         }
 862
 863                 } else {
 864                         // Signs are the same - add
 865                         //
 866                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
 867                         decRes.Hi32 = left->Hi32 + right->Hi32;
 868
 869                         // Propagate carry
 870                         //
 871                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
 872                                 decRes.Hi32++;
 873                                 if (decRes.Hi32 <= left->Hi32)
 874                                         goto AlignedScale;
 875                         } else if (decRes.Hi32 < left->Hi32) {
 876                         AlignedScale:
 877                                 // The addition carried above 96 bits.  Divide the result by 10,
 878                                 // dropping the scale factor.
 879                                 //
 880                                 if (decRes.u.u.scale == 0)
 881                                         return MONO_DECIMAL_OVERFLOW;
 882                                 decRes.u.u.scale--;
 883
 884                                 tmp.u.Lo = decRes.Hi32;
 885                                 tmp.u.Hi = 1;
 886                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 887                                 decRes.Hi32 = tmp.u.Lo;
 888
 889                                 tmp.u.Lo = decRes.v.v.Mid32;
 890                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 891                                 decRes.v.v.Mid32 = tmp.u.Lo;
 892
 893                                 tmp.u.Lo = decRes.v.v.Lo32;
 894                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
 895                                 decRes.v.v.Lo32 = tmp.u.Lo;
 896
 897                                 // See if we need to round up.
 898                                 //
 899                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
 900                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
 901                                                 if (DECIMAL_LO64_GET(decRes) == 0)
 902                                                         decRes.Hi32++;
 903                                 }
 904                         }
 905                 }
 906         }
 907         else {
 908                 // Scale factors are not equal.  Assume that a larger scale
 909                 // factor (more decimal places) is likely to mean that number
 910                 // is smaller.  Start by guessing that the right operand has
 911                 // the larger scale factor.  The result will have the larger
 912                 // scale factor.
 913                 //
 914                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
 915                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
 916                 scale = decRes.u.u.scale - left->u.u.scale;
 917
 918                 if (scale < 0) {
 919                         // Guessed scale factor wrong. Swap operands.
 920                         //
 921                         scale = -scale;
 922                         decRes.u.u.scale = left->u.u.scale;
 923                         decRes.u.u.sign ^= sign;
 924                         pdecTmp = right;
 925                         right = left;
 926                         left = pdecTmp;
 927                 }
 928
 929                 // *left will need to be multiplied by 10^scale so
 930                 // it will have the same scale as *right.  We could be
 931                 // extending it to up to 192 bits of precision.
 932                 //
 933                 if (scale <= POWER10_MAX) {
 934                         // Scaling won't make it larger than 4 uint32_ts
 935                         //
 936                         pwr = power10[scale];
 937                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
 938                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
 939                         tmp.int64 += decTmp.v.v.Mid32;
 940                         decTmp.v.v.Mid32 = tmp.u.Lo;
 941                         decTmp.Hi32 = tmp.u.Hi;
 942                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
 943                         tmp.int64 += decTmp.Hi32;
 944                         if (tmp.u.Hi == 0) {
 945                                 // Result fits in 96 bits.  Use standard aligned add.
 946                                 //
 947                                 decTmp.Hi32 = tmp.u.Lo;
 948                                 left = &decTmp;
 949                                 goto AlignedAdd;
 950                         }
 951                         num[0] = decTmp.v.v.Lo32;
 952                         num[1] = decTmp.v.v.Mid32;
 953                         num[2] = tmp.u.Lo;
 954                         num[3] = tmp.u.Hi;
 955                         hi_prod = 3;
 956                 }
 957                 else {
 958                         // Have to scale by a bunch.  Move the number to a buffer
 959                         // where it has room to grow as it's scaled.
 960                         //
 961                         num[0] = left->v.v.Lo32;
 962                         num[1] = left->v.v.Mid32;
 963                         num[2] = left->Hi32;
 964                         hi_prod = 2;
 965
 966                         // Scan for zeros in the upper words.
 967                         //
 968                         if (num[2] == 0) {
 969                                 hi_prod = 1;
 970                                 if (num[1] == 0) {
 971                                         hi_prod = 0;
 972                                         if (num[0] == 0) {
 973                                                 // Left arg is zero, return right.
 974                                                 //
 975                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
 976                                                 decRes.Hi32 = right->Hi32;
 977                                                 decRes.u.u.sign ^= sign;
 978                                                 goto RetDec;
 979                                         }
 980                                 }
 981                         }
 982
 983                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
 984                         // with index of highest non-zero uint32_t.
 985                         //
 986                         for (; scale > 0; scale -= POWER10_MAX) {
 987                                 if (scale > POWER10_MAX)
 988                                         pwr = ten_to_nine;
 989                                 else
 990                                         pwr = power10[scale];
 991
 992                                 tmp.u.Hi = 0;
 993                                 for (cur = 0; cur <= hi_prod; cur++) {
 994                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
 995                                         num[cur] = tmp.u.Lo;
 996                                 }
 997
 998                                 if (tmp.u.Hi != 0)
 999                                         // We're extending the result by another uint32_t.
1000                                         num[++hi_prod] = tmp.u.Hi;
1001                         }
1002                 }
1003
1004                 // Scaling complete, do the add.  Could be subtract if signs differ.
1005                 //
1006                 tmp.u.Lo = num[0];
1007                 tmp.u.Hi = num[1];
1008
1009                 if (sign) {
1010                         // Signs differ, subtract.
1011                         //
1012                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1013                         decRes.Hi32 = num[2] - right->Hi32;
1014
1015                         // Propagate carry
1016                         //
1017                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1018                                 decRes.Hi32--;
1019                                 if (decRes.Hi32 >= num[2])
1020                                         goto LongSub;
1021                         }
1022                         else if (decRes.Hi32 > num[2]) {
1023                         LongSub:
1024                                 // If num has more than 96 bits of precision, then we need to
1025                                 // carry the subtraction into the higher bits.  If it doesn't,
1026                                 // then we subtracted in the wrong order and have to flip the
1027                                 // sign of the result.
1028                                 //
1029                                 if (hi_prod <= 2)
1030                                         goto SignFlip;
1031
1032                                 cur = 3;
1033                                 while(num[cur++]-- == 0);
1034                                 if (num[hi_prod] == 0)
1035                                         hi_prod--;
1036                         }
1037                 }
1038                 else {
1039                         // Signs the same, add.
1040                         //
1041                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1042                         decRes.Hi32 = num[2] + right->Hi32;
1043
1044                         // Propagate carry
1045                         //
1046                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1047                                 decRes.Hi32++;
1048                                 if (decRes.Hi32 <= num[2])
1049                                         goto LongAdd;
1050                         }
1051                         else if (decRes.Hi32 < num[2]) {
1052                         LongAdd:
1053                                 // Had a carry above 96 bits.
1054                                 //
1055                                 cur = 3;
1056                                 do {
1057                                         if (hi_prod < cur) {
1058                                                 num[cur] = 1;
1059                                                 hi_prod = cur;
1060                                                 break;
1061                                         }
1062                                 }while (++num[cur++] == 0);
1063                         }
1064                 }
1065
1066                 if (hi_prod > 2) {
1067                         num[0] = decRes.v.v.Lo32;
1068                         num[1] = decRes.v.v.Mid32;
1069                         num[2] = decRes.Hi32;
1070                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1071                         if (decRes.u.u.scale == (uint8_t) -1)
1072                                 return MONO_DECIMAL_OVERFLOW;
1073
1074                         decRes.v.v.Lo32 = num[0];
1075                         decRes.v.v.Mid32 = num[1];
1076                         decRes.Hi32 = num[2];
1077                 }
1078         }
1079
1080 RetDec:
1081         COPYDEC(*result, decRes);
1082         // Odd, the Microsoft code does not set result->reserved to zero on this case
1083         return MONO_DECIMAL_OK;
1084 }
1085
1086 // Decimal addition
1087 static MonoDecimalStatus G_GNUC_UNUSED
1088 mono_decimal_add(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1089 {
1090     return DecAddSub (left, right, result, 0);
1091 }
1092
1093 // Decimal subtraction
1094 static MonoDecimalStatus G_GNUC_UNUSED
1095 mono_decimal_sub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1096 {
1097     return DecAddSub (left, right, result, DECIMAL_NEG);
1098 }
1099
1100 /**
1101  * IncreaseScale:
1102  *
1103  * Entry:
1104  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1105  *   pwr   - Scale factor to multiply by
1106  *
1107  * Purpose:
1108  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1109  *   the input.  The last 32 bits of the product are the return value.
1110  *
1111  * Exit:
1112  *   Returns highest 32 bits of product.
1113  *
1114  * Exceptions:
1115  *   None.
1116  *
1117  */
1118 static uint32_t
1119 IncreaseScale(uint32_t *num, uint32_t pwr)
1120 {
1121         SPLIT64   sdlTmp;
1122
1123         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1124         num[0] = sdlTmp.u.Lo;
1125         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1126         num[1] = sdlTmp.u.Lo;
1127         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1128         num[2] = sdlTmp.u.Lo;
1129         return sdlTmp.u.Hi;
1130 }
1131
1132 /**
1133  * Div96By64:
1134  *
1135  * Entry:
1136  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1137  *   sdlDen  - 64-bit divisor.
1138  *
1139  * Purpose:
1140  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1141  *   Divisor must be larger than upper 64 bits of dividend.
1142  *
1143  * Exit:
1144  *   Remainder overwrites lower 64-bits of dividend.
1145  *   Returns quotient.
1146  *
1147  * Exceptions:
1148  *   None.
1149  *
1150  */
1151 static uint32_t
1152 Div96By64(uint32_t *num, SPLIT64 den)
1153 {
1154         SPLIT64 quo;
1155         SPLIT64 sdlNum;
1156         SPLIT64 prod;
1157
1158         sdlNum.u.Lo = num[0];
1159
1160         if (num[2] >= den.u.Hi) {
1161                 // Divide would overflow.  Assume a quotient of 2^32, and set
1162                 // up remainder accordingly.  Then jump to loop which reduces
1163                 // the quotient.
1164                 //
1165                 sdlNum.u.Hi = num[1] - den.u.Lo;
1166                 quo.u.Lo = 0;
1167                 goto NegRem;
1168         }
1169
1170         // Hardware divide won't overflow
1171         //
1172         if (num[2] == 0 && num[1] < den.u.Hi)
1173                 // Result is zero.  Entire dividend is remainder.
1174                 //
1175                 return 0;
1176
1177         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1178         //
1179         quo.u.Lo = num[1];
1180         quo.u.Hi = num[2];
1181         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1182         sdlNum.u.Hi = quo.u.Hi; // remainder
1183
1184         // Compute full remainder, rem = dividend - (quo * divisor).
1185         //
1186         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1187         sdlNum.int64 -= prod.int64;
1188
1189         if (sdlNum.int64 > ~prod.int64) {
1190         NegRem:
1191                 // Remainder went negative.  Add divisor back in until it's positive,
1192                 // a max of 2 times.
1193                 //
1194                 do {
1195                         quo.u.Lo--;
1196                         sdlNum.int64 += den.int64;
1197                 }while (sdlNum.int64 >= den.int64);
1198         }
1199
1200         num[0] = sdlNum.u.Lo;
1201         num[1] = sdlNum.u.Hi;
1202         return quo.u.Lo;
1203 }
1204
1205 /***
1206 * Div128By96
1207 *
1208 * Entry:
1209 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1210 *   den - Pointer to 96-bit divisor.
1211 *
1212 * Purpose:
1213 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1214 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1215 *   assured in the initial call because the divisor is normalized
1216 *   and the dividend can't be.  In subsequent calls, the remainder
1217 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1218 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1219 *
1220 * Exit:
1221 *   Remainder overwrites lower 96-bits of dividend.
1222 *   Returns quotient.
1223 *
1224 * Exceptions:
1225 *   None.
1226 *
1227 ***********************************************************************/
1228
1229 static uint32_t
1230 Div128By96(uint32_t *num, uint32_t *den)
1231 {
1232         SPLIT64 sdlQuo;
1233         SPLIT64 sdlNum;
1234         SPLIT64 sdlProd1;
1235         SPLIT64 sdlProd2;
1236
1237         sdlNum.u.Lo = num[0];
1238         sdlNum.u.Hi = num[1];
1239
1240         if (num[3] == 0 && num[2] < den[2]){
1241                 // Result is zero.  Entire dividend is remainder.
1242                 //
1243                 return 0;
1244         }
1245
1246         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1247         //
1248         sdlQuo.u.Lo = num[2];
1249         sdlQuo.u.Hi = num[3];
1250         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1251
1252         // Compute full remainder, rem = dividend - (quo * divisor).
1253         //
1254         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1255         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1256         sdlProd2.int64 += sdlProd1.u.Hi;
1257         sdlProd1.u.Hi = sdlProd2.u.Lo;
1258
1259         sdlNum.int64 -= sdlProd1.int64;
1260         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1261
1262         // Propagate carries
1263         //
1264         if (sdlNum.int64 > ~sdlProd1.int64) {
1265                 num[2]--;
1266                 if (num[2] >= ~sdlProd2.u.Hi)
1267                         goto NegRem;
1268         } else if (num[2] > ~sdlProd2.u.Hi) {
1269         NegRem:
1270                 // Remainder went negative.  Add divisor back in until it's positive,
1271                 // a max of 2 times.
1272                 //
1273                 sdlProd1.u.Lo = den[0];
1274                 sdlProd1.u.Hi = den[1];
1275
1276                 for (;;) {
1277                         sdlQuo.u.Lo--;
1278                         sdlNum.int64 += sdlProd1.int64;
1279                         num[2] += den[2];
1280
1281                         if (sdlNum.int64 < sdlProd1.int64) {
1282                                 // Detected carry. Check for carry out of top
1283                                 // before adding it in.
1284                                 //
1285                                 if (num[2]++ < den[2])
1286                                         break;
1287                         }
1288                         if (num[2] < den[2])
1289                                 break; // detected carry
1290                 }
1291         }
1292
1293         num[0] = sdlNum.u.Lo;
1294         num[1] = sdlNum.u.Hi;
1295         return sdlQuo.u.Lo;
1296 }
1297
1298 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1299 // Returns FALSE if there is an overflow
1300 static gboolean
1301 Add32To96(uint32_t *num, uint32_t value)
1302 {
1303         num[0] += value;
1304         if (num[0] < value) {
1305                 if (++num[1] == 0) {
1306                         if (++num[2] == 0) {
1307                                 return FALSE;
1308                         }
1309                 }
1310         }
1311         return TRUE;
1312 }
1313
1314 static void
1315 OverflowUnscale (uint32_t *quo, gboolean remainder)
1316 {
1317         SPLIT64  sdlTmp;
1318
1319         // We have overflown, so load the high bit with a one.
1320         sdlTmp.u.Hi = 1u;
1321         sdlTmp.u.Lo = quo[2];
1322         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1323         quo[2] = sdlTmp.u.Lo;
1324         sdlTmp.u.Lo = quo[1];
1325         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1326         quo[1] = sdlTmp.u.Lo;
1327         sdlTmp.u.Lo = quo[0];
1328         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1329         quo[0] = sdlTmp.u.Lo;
1330         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1331         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1332                 Add32To96(quo, 1u);
1333         }
1334 }
1335
1336 // mono_decimal_divide - Decimal divide
1337 static MonoDecimalStatus G_GNUC_UNUSED
1338 mono_decimal_divide_result(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1339 {
1340         uint32_t   quo[3];
1341         uint32_t   quoSave[3];
1342         uint32_t   rem[4];
1343         uint32_t   divisor[3];
1344         uint32_t   pwr;
1345         uint32_t   utmp;
1346         uint32_t   utmp1;
1347         SPLIT64 sdlTmp;
1348         SPLIT64 sdlDivisor;
1349         int     scale;
1350         int     cur_scale;
1351
1352         scale = left->u.u.scale - right->u.u.scale;
1353         divisor[0] = right->v.v.Lo32;
1354         divisor[1] = right->v.v.Mid32;
1355         divisor[2] = right->Hi32;
1356
1357         if (divisor[1] == 0 && divisor[2] == 0) {
1358                 // Divisor is only 32 bits.  Easy divide.
1359                 //
1360                 if (divisor[0] == 0)
1361                         return MONO_DECIMAL_DIVBYZERO;
1362
1363                 quo[0] = left->v.v.Lo32;
1364                 quo[1] = left->v.v.Mid32;
1365                 quo[2] = left->Hi32;
1366                 rem[0] = Div96By32(quo, divisor[0]);
1367
1368                 for (;;) {
1369                         if (rem[0] == 0) {
1370                                 if (scale < 0) {
1371                                         cur_scale = min(9, -scale);
1372                                         goto HaveScale;
1373                                 }
1374                                 break;
1375                         }
1376
1377                         // We have computed a quotient based on the natural scale
1378                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1379                         // remainder, so now we should increase the scale if possible to
1380                         // include more quotient bits.
1381                         //
1382                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1383                         // computing more quotient bits as long as the remainder stays
1384                         // non-zero.  If scaling by that much would cause overflow, we'll
1385                         // drop out of the loop and scale by as much as we can.
1386                         //
1387                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
1388                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
1389                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1390                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
1391                         // is the largest value in quo[1] (when quo[2] == 4) that is
1392                         // assured not to overflow.
1393                         //
1394                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1395                         if (cur_scale == 0) {
1396                                 // No more scaling to be done, but remainder is non-zero.
1397                                 // Round quotient.
1398                                 //
1399                                 utmp = rem[0] << 1;
1400                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1401                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1402                                 RoundUp:
1403                                         if (++quo[0] == 0)
1404                                                 if (++quo[1] == 0)
1405                                                         quo[2]++;
1406                                 }
1407                                 break;
1408                         }
1409
1410                         if (cur_scale == -1)
1411                                 return MONO_DECIMAL_OVERFLOW;
1412
1413                 HaveScale:
1414                         pwr = power10[cur_scale];
1415                         scale += cur_scale;
1416
1417                         if (IncreaseScale(quo, pwr) != 0)
1418                                 return MONO_DECIMAL_OVERFLOW;
1419
1420                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1421                         rem[0] = sdlTmp.u.Hi;
1422
1423                         quo[0] += sdlTmp.u.Lo;
1424                         if (quo[0] < sdlTmp.u.Lo) {
1425                                 if (++quo[1] == 0)
1426                                         quo[2]++;
1427                         }
1428                 } // for (;;)
1429         }
1430         else {
1431                 // Divisor has bits set in the upper 64 bits.
1432                 //
1433                 // Divisor must be fully normalized (shifted so bit 31 of the most
1434                 // significant uint32_t is 1).  Locate the MSB so we know how much to
1435                 // normalize by.  The dividend will be shifted by the same amount so
1436                 // the quotient is not changed.
1437                 //
1438                 if (divisor[2] == 0)
1439                         utmp = divisor[1];
1440                 else
1441                         utmp = divisor[2];
1442
1443                 cur_scale = 0;
1444                 if (!(utmp & 0xFFFF0000)) {
1445                         cur_scale += 16;
1446                         utmp <<= 16;
1447                 }
1448                 if (!(utmp & 0xFF000000)) {
1449                         cur_scale += 8;
1450                         utmp <<= 8;
1451                 }
1452                 if (!(utmp & 0xF0000000)) {
1453                         cur_scale += 4;
1454                         utmp <<= 4;
1455                 }
1456                 if (!(utmp & 0xC0000000)) {
1457                         cur_scale += 2;
1458                         utmp <<= 2;
1459                 }
1460                 if (!(utmp & 0x80000000)) {
1461                         cur_scale++;
1462                         utmp <<= 1;
1463                 }
1464
1465                 // Shift both dividend and divisor left by cur_scale.
1466                 //
1467                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1468                 rem[0] = sdlTmp.u.Lo;
1469                 rem[1] = sdlTmp.u.Hi;
1470                 sdlTmp.u.Lo = left->v.v.Mid32;
1471                 sdlTmp.u.Hi = left->Hi32;
1472                 sdlTmp.int64 <<= cur_scale;
1473                 rem[2] = sdlTmp.u.Hi;
1474                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1475
1476                 sdlDivisor.u.Lo = divisor[0];
1477                 sdlDivisor.u.Hi = divisor[1];
1478                 sdlDivisor.int64 <<= cur_scale;
1479
1480                 if (divisor[2] == 0) {
1481                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1482                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1483                         //
1484                         sdlTmp.u.Lo = rem[2];
1485                         sdlTmp.u.Hi = rem[3];
1486
1487                         quo[2] = 0;
1488                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1489                         quo[0] = Div96By64(rem, sdlDivisor);
1490
1491                         for (;;) {
1492                                 if ((rem[0] | rem[1]) == 0) {
1493                                         if (scale < 0) {
1494                                                 cur_scale = min(9, -scale);
1495                                                 goto HaveScale64;
1496                                         }
1497                                         break;
1498                                 }
1499
1500                                 // Remainder is non-zero.  Scale up quotient and remainder by
1501                                 // powers of 10 so we can compute more significant bits.
1502                                 //
1503                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1504                                 if (cur_scale == 0) {
1505                                         // No more scaling to be done, but remainder is non-zero.
1506                                         // Round quotient.
1507                                         //
1508                                         sdlTmp.u.Lo = rem[0];
1509                                         sdlTmp.u.Hi = rem[1];
1510                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1511                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1512                                                 goto RoundUp;
1513                                         break;
1514                                 }
1515
1516                                 if (cur_scale == -1)
1517                                         return MONO_DECIMAL_OVERFLOW;
1518
1519                         HaveScale64:
1520                                 pwr = power10[cur_scale];
1521                                 scale += cur_scale;
1522
1523                                 if (IncreaseScale(quo, pwr) != 0)
1524                                         return MONO_DECIMAL_OVERFLOW;
1525
1526                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1527                                 IncreaseScale(rem, pwr);
1528                                 utmp = Div96By64(rem, sdlDivisor);
1529                                 quo[0] += utmp;
1530                                 if (quo[0] < utmp)
1531                                         if (++quo[1] == 0)
1532                                                 quo[2]++;
1533
1534                         } // for (;;)
1535                 }
1536                 else {
1537                         // Have a 96-bit divisor in divisor[].
1538                         //
1539                         // Start by finishing the shift left by cur_scale.
1540                         //
1541                         sdlTmp.u.Lo = divisor[1];
1542                         sdlTmp.u.Hi = divisor[2];
1543                         sdlTmp.int64 <<= cur_scale;
1544                         divisor[0] = sdlDivisor.u.Lo;
1545                         divisor[1] = sdlDivisor.u.Hi;
1546                         divisor[2] = sdlTmp.u.Hi;
1547
1548                         // The remainder (currently 96 bits spread over 4 uint32_ts)
1549                         // will be < divisor.
1550                         //
1551                         quo[2] = 0;
1552                         quo[1] = 0;
1553                         quo[0] = Div128By96(rem, divisor);
1554
1555                         for (;;) {
1556                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1557                                         if (scale < 0) {
1558                                                 cur_scale = min(9, -scale);
1559                                                 goto HaveScale96;
1560                                         }
1561                                         break;
1562                                 }
1563
1564                                 // Remainder is non-zero.  Scale up quotient and remainder by
1565                                 // powers of 10 so we can compute more significant bits.
1566                                 //
1567                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1568                                 if (cur_scale == 0) {
1569                                         // No more scaling to be done, but remainder is non-zero.
1570                                         // Round quotient.
1571                                         //
1572                                         if (rem[2] >= 0x80000000)
1573                                                 goto RoundUp;
1574
1575                                         utmp = rem[0] > 0x80000000;
1576                                         utmp1 = rem[1] > 0x80000000;
1577                                         rem[0] <<= 1;
1578                                         rem[1] = (rem[1] << 1) + utmp;
1579                                         rem[2] = (rem[2] << 1) + utmp1;
1580
1581                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1582                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1583                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1584                                               (quo[0] & 1))))
1585                                                 goto RoundUp;
1586                                         break;
1587                                 }
1588
1589                                 if (cur_scale == -1)
1590                                         return MONO_DECIMAL_OVERFLOW;
1591
1592                         HaveScale96:
1593                                 pwr = power10[cur_scale];
1594                                 scale += cur_scale;
1595
1596                                 if (IncreaseScale(quo, pwr) != 0)
1597                                         return MONO_DECIMAL_OVERFLOW;
1598
1599                                 rem[3] = IncreaseScale(rem, pwr);
1600                                 utmp = Div128By96(rem, divisor);
1601                                 quo[0] += utmp;
1602                                 if (quo[0] < utmp)
1603                                         if (++quo[1] == 0)
1604                                                 quo[2]++;
1605
1606                         } // for (;;)
1607                 }
1608         }
1609
1610         // No more remainder.  Try extracting any extra powers of 10 we may have
1611         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1612         // If a division by one of these powers returns a zero remainder, then
1613         // we keep the quotient.  If the remainder is not zero, then we restore
1614         // the previous value.
1615         //
1616         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1617         // we can extract.  We use this as a quick test on whether to try a
1618         // given power.
1619         //
1620         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1621                 quoSave[0] = quo[0];
1622                 quoSave[1] = quo[1];
1623                 quoSave[2] = quo[2];
1624
1625                 if (Div96By32(quoSave, 100000000) == 0) {
1626                         quo[0] = quoSave[0];
1627                         quo[1] = quoSave[1];
1628                         quo[2] = quoSave[2];
1629                         scale -= 8;
1630                 }
1631                 else
1632                         break;
1633         }
1634
1635         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1636                 quoSave[0] = quo[0];
1637                 quoSave[1] = quo[1];
1638                 quoSave[2] = quo[2];
1639
1640                 if (Div96By32(quoSave, 10000) == 0) {
1641                         quo[0] = quoSave[0];
1642                         quo[1] = quoSave[1];
1643                         quo[2] = quoSave[2];
1644                         scale -= 4;
1645                 }
1646         }
1647
1648         if ((quo[0] & 3) == 0 && scale >= 2) {
1649                 quoSave[0] = quo[0];
1650                 quoSave[1] = quo[1];
1651                 quoSave[2] = quo[2];
1652
1653                 if (Div96By32(quoSave, 100) == 0) {
1654                         quo[0] = quoSave[0];
1655                         quo[1] = quoSave[1];
1656                         quo[2] = quoSave[2];
1657                         scale -= 2;
1658                 }
1659         }
1660
1661         if ((quo[0] & 1) == 0 && scale >= 1) {
1662                 quoSave[0] = quo[0];
1663                 quoSave[1] = quo[1];
1664                 quoSave[2] = quo[2];
1665
1666                 if (Div96By32(quoSave, 10) == 0) {
1667                         quo[0] = quoSave[0];
1668                         quo[1] = quoSave[1];
1669                         quo[2] = quoSave[2];
1670                         scale -= 1;
1671                 }
1672         }
1673
1674         result->Hi32 = quo[2];
1675         result->v.v.Mid32 = quo[1];
1676         result->v.v.Lo32 = quo[0];
1677         result->u.u.scale = scale;
1678         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1679         return MONO_DECIMAL_OK;
1680 }
1681
1682 // mono_decimal_absolute - Decimal Absolute Value
1683 static void G_GNUC_UNUSED
1684 mono_decimal_absolute (MonoDecimal *pdecOprd, MonoDecimal *result)
1685 {
1686         COPYDEC(*result, *pdecOprd);
1687         result->u.u.sign &= ~DECIMAL_NEG;
1688         // Microsoft does not set reserved here
1689 }
1690
1691 // mono_decimal_fix - Decimal Fix (chop to integer)
1692 static void
1693 mono_decimal_fix (MonoDecimal *pdecOprd, MonoDecimal *result)
1694 {
1695         DecFixInt(result, pdecOprd);
1696 }
1697
1698 // mono_decimal_round_to_int - Decimal Int (round down to integer)
1699 static void
1700 mono_decimal_round_to_int (MonoDecimal *pdecOprd, MonoDecimal *result)
1701 {
1702         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1703                 // We have chopped off a non-zero amount from a negative value.  Since
1704                 // we round toward -infinity, we must increase the integer result by
1705                 // 1 to make it more negative.  This will never overflow because
1706                 // in order to have a remainder, we must have had a non-zero scale factor.
1707                 // Our scale factor is back to zero now.
1708                 //
1709                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1710                 if (DECIMAL_LO64_GET(*result) == 0)
1711                         result->Hi32++;
1712         }
1713 }
1714
1715 // mono_decimal_negate - Decimal Negate
1716 static void G_GNUC_UNUSED
1717 mono_decimal_negate (MonoDecimal *pdecOprd, MonoDecimal *result)
1718 {
1719         COPYDEC(*result, *pdecOprd);
1720         // Microsoft does not set result->reserved to zero on this case.
1721         result->u.u.sign ^= DECIMAL_NEG;
1722 }
1723
1724 //
1725 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1726 //
1727 static MonoDecimalStatus
1728 mono_decimal_round_result(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1729 {
1730         uint32_t num[3];
1731         uint32_t rem;
1732         uint32_t sticky;
1733         uint32_t pwr;
1734         int scale;
1735
1736         if (cDecimals < 0)
1737                 return MONO_DECIMAL_INVALID_ARGUMENT;
1738
1739         scale = input->u.u.scale - cDecimals;
1740         if (scale > 0) {
1741                 num[0] = input->v.v.Lo32;
1742                 num[1] = input->v.v.Mid32;
1743                 num[2] = input->Hi32;
1744                 result->u.u.sign = input->u.u.sign;
1745                 rem = sticky = 0;
1746
1747                 do {
1748                         sticky |= rem;
1749                         if (scale > POWER10_MAX)
1750                                 pwr = ten_to_nine;
1751                         else
1752                                 pwr = power10[scale];
1753
1754                         rem = Div96By32(num, pwr);
1755                         scale -= 9;
1756                 }while (scale > 0);
1757
1758                 // Now round.  rem has last remainder, sticky has sticky bits.
1759                 // To do IEEE rounding, we add LSB of result to sticky bits so
1760                 // either causes round up if remainder * 2 == last divisor.
1761                 //
1762                 sticky |= num[0] & 1;
1763                 rem = (rem << 1) + (sticky != 0);
1764                 if (pwr < rem &&
1765                     ++num[0] == 0 &&
1766                     ++num[1] == 0
1767                         )
1768                         ++num[2];
1769
1770                 result->v.v.Lo32 = num[0];
1771                 result->v.v.Mid32 = num[1];
1772                 result->Hi32 = num[2];
1773                 result->u.u.scale = cDecimals;
1774                 return MONO_DECIMAL_OK;
1775         }
1776
1777         COPYDEC(*result, *input);
1778         // Odd, the Microsoft source does not set the result->reserved to zero here.
1779         return MONO_DECIMAL_OK;
1780 }
1781
1782 //
1783 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1784 static MonoDecimalStatus
1785 mono_decimal_from_float (float input_f, MonoDecimal* result)
1786 {
1787         int         exp;    // number of bits to left of binary point
1788         int         power;
1789         uint32_t       mant;
1790         double      dbl;
1791         SPLIT64     sdlLo;
1792         SPLIT64     sdlHi;
1793         int         lmax, cur;  // temps used during scale reduction
1794         MonoSingle_float input = { .f = input_f };
1795
1796         // The most we can scale by is 10^28, which is just slightly more
1797         // than 2^93.  So a float with an exponent of -94 could just
1798         // barely reach 0.5, but smaller exponents will always round to zero.
1799         //
1800         if ((exp = input.s.exp - MONO_SINGLE_BIAS) < -94 ) {
1801                 DECIMAL_SETZERO(*result);
1802                 return MONO_DECIMAL_OK;
1803         }
1804
1805         if (exp > 96)
1806                 return MONO_DECIMAL_OVERFLOW;
1807
1808         // Round the input to a 7-digit integer.  The R4 format has
1809         // only 7 digits of precision, and we want to keep garbage digits
1810         // out of the Decimal were making.
1811         //
1812         // Calculate max power of 10 input value could have by multiplying
1813         // the exponent by log10(2).  Using scaled integer multiplcation,
1814         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1815         //
1816         dbl = fabs(input.f);
1817         power = 6 - ((exp * 19728) >> 16);
1818
1819         if (power >= 0) {
1820                 // We have less than 7 digits, scale input up.
1821                 //
1822                 if (power > DECMAX)
1823                         power = DECMAX;
1824
1825                 dbl = dbl * double_power10[power];
1826         } else {
1827                 if (power != -1 || dbl >= 1E7)
1828                         dbl = dbl / fnDblPower10(-power);
1829                 else
1830                         power = 0; // didn't scale it
1831         }
1832
1833         g_assert (dbl < 1E7);
1834         if (dbl < 1E6 && power < DECMAX) {
1835                 dbl *= 10;
1836                 power++;
1837                 g_assert(dbl >= 1E6);
1838         }
1839
1840         // Round to integer
1841         //
1842         mant = (int32_t)dbl;
1843         dbl -= (double)mant;  // difference between input & integer
1844         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1845                 mant++;
1846
1847         if (mant == 0) {
1848                 DECIMAL_SETZERO(*result);
1849                 return MONO_DECIMAL_OK;
1850         }
1851
1852         if (power < 0) {
1853                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1854                 //
1855                 power = -power;
1856                 if (power < 10) {
1857                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1858
1859                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1860                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1861                         DECIMAL_HI32(*result) = 0;
1862                 } else {
1863                         // Have a big power of 10.
1864                         //
1865                         if (power > 18) {
1866                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1867                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1868
1869                                 if (sdlHi.u.Hi != 0)
1870                                         return MONO_DECIMAL_OVERFLOW;
1871                         }
1872                         else {
1873                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1874                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1875                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1876                                 sdlHi.int64 += sdlLo.u.Hi;
1877                                 sdlLo.u.Hi = sdlHi.u.Lo;
1878                                 sdlHi.u.Lo = sdlHi.u.Hi;
1879                         }
1880                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1881                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1882                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1883                 }
1884                 DECIMAL_SCALE(*result) = 0;
1885         } else {
1886                 // Factor out powers of 10 to reduce the scale, if possible.
1887                 // The maximum number we could factor out would be 6.  This
1888                 // comes from the fact we have a 7-digit number, and the
1889                 // MSD must be non-zero -- but the lower 6 digits could be
1890                 // zero.  Note also the scale factor is never negative, so
1891                 // we can't scale by any more than the power we used to
1892                 // get the integer.
1893                 //
1894                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1895                 //
1896                 lmax = min(power, 6);
1897
1898                 // lmax is the largest power of 10 to try, lmax <= 6.
1899                 // We'll try powers 4, 2, and 1 unless they're too big.
1900                 //
1901                 for (cur = 4; cur > 0; cur >>= 1)
1902                 {
1903                         if (cur > lmax)
1904                                 continue;
1905
1906                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1907
1908                         if (sdlLo.u.Hi == 0) {
1909                                 mant = sdlLo.u.Lo;
1910                                 power -= cur;
1911                                 lmax -= cur;
1912                         }
1913                 }
1914                 DECIMAL_LO32(*result) = mant;
1915                 DECIMAL_MID32(*result) = 0;
1916                 DECIMAL_HI32(*result) = 0;
1917                 DECIMAL_SCALE(*result) = power;
1918         }
1919
1920         DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
1921         return MONO_DECIMAL_OK;
1922 }
1923
1924 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1925 static MonoDecimalStatus
1926 mono_decimal_from_double (double input_d, MonoDecimal *result)
1927 {
1928         int         exp;    // number of bits to left of binary point
1929         int         power;  // power-of-10 scale factor
1930         SPLIT64     sdlMant;
1931         SPLIT64     sdlLo;
1932         double      dbl;
1933         int         lmax, cur;  // temps used during scale reduction
1934         uint32_t       pwr_cur;
1935         uint32_t       quo;
1936         MonoDouble_double input = { .d = input_d };
1937
1938         // The most we can scale by is 10^28, which is just slightly more
1939         // than 2^93.  So a float with an exponent of -94 could just
1940         // barely reach 0.5, but smaller exponents will always round to zero.
1941         //
1942         if ((exp = input.s.exp - MONO_DOUBLE_BIAS) < -94) {
1943                 DECIMAL_SETZERO(*result);
1944                 return MONO_DECIMAL_OK;
1945         }
1946
1947         if (exp > 96)
1948                 return MONO_DECIMAL_OVERFLOW;
1949
1950         // Round the input to a 15-digit integer.  The R8 format has
1951         // only 15 digits of precision, and we want to keep garbage digits
1952         // out of the Decimal were making.
1953         //
1954         // Calculate max power of 10 input value could have by multiplying
1955         // the exponent by log10(2).  Using scaled integer multiplcation,
1956         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1957         //
1958         dbl = fabs(input.d);
1959         power = 14 - ((exp * 19728) >> 16);
1960
1961         if (power >= 0) {
1962                 // We have less than 15 digits, scale input up.
1963                 //
1964                 if (power > DECMAX)
1965                         power = DECMAX;
1966
1967                 dbl = dbl * double_power10[power];
1968         } else {
1969                 if (power != -1 || dbl >= 1E15)
1970                         dbl = dbl / fnDblPower10(-power);
1971                 else
1972                         power = 0; // didn't scale it
1973         }
1974
1975         g_assert (dbl < 1E15);
1976         if (dbl < 1E14 && power < DECMAX) {
1977                 dbl *= 10;
1978                 power++;
1979                 g_assert(dbl >= 1E14);
1980         }
1981
1982         // Round to int64
1983         //
1984         sdlMant.int64 = (int64_t)dbl;
1985         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
1986         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
1987                 sdlMant.int64++;
1988
1989         if (sdlMant.int64 == 0) {
1990                 DECIMAL_SETZERO(*result);
1991                 return MONO_DECIMAL_OK;
1992         }
1993
1994         if (power < 0) {
1995                 // Add -power factors of 10, -power <= (29 - 15) = 14.
1996                 //
1997                 power = -power;
1998                 if (power < 10) {
1999                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2000                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2001                         sdlMant.int64 += sdlLo.u.Hi;
2002                         sdlLo.u.Hi = sdlMant.u.Lo;
2003                         sdlMant.u.Lo = sdlMant.u.Hi;
2004                 }
2005                 else {
2006                         // Have a big power of 10.
2007                         //
2008                         g_assert(power <= 14);
2009                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2010
2011                         if (sdlMant.u.Hi != 0)
2012                                 return MONO_DECIMAL_OVERFLOW;
2013                 }
2014                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2015                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2016                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2017                 DECIMAL_SCALE(*result) = 0;
2018         }
2019         else {
2020                 // Factor out powers of 10 to reduce the scale, if possible.
2021                 // The maximum number we could factor out would be 14.  This
2022                 // comes from the fact we have a 15-digit number, and the
2023                 // MSD must be non-zero -- but the lower 14 digits could be
2024                 // zero.  Note also the scale factor is never negative, so
2025                 // we can't scale by any more than the power we used to
2026                 // get the integer.
2027                 //
2028                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2029                 //
2030                 lmax = min(power, 14);
2031
2032                 // lmax is the largest power of 10 to try, lmax <= 14.
2033                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2034                 //
2035                 for (cur = 8; cur > 0; cur >>= 1)
2036                 {
2037                         if (cur > lmax)
2038                                 continue;
2039
2040                         pwr_cur = (uint32_t)long_power10[cur];
2041
2042                         if (sdlMant.u.Hi >= pwr_cur) {
2043                                 // Overflow if we try to divide in one step.
2044                                 //
2045                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2046                                 quo = sdlLo.u.Lo;
2047                                 sdlLo.u.Lo = sdlMant.u.Lo;
2048                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2049                         }
2050                         else {
2051                                 quo = 0;
2052                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2053                         }
2054
2055                         if (sdlLo.u.Hi == 0) {
2056                                 sdlMant.u.Hi = quo;
2057                                 sdlMant.u.Lo = sdlLo.u.Lo;
2058                                 power -= cur;
2059                                 lmax -= cur;
2060                         }
2061                 }
2062
2063                 DECIMAL_HI32(*result) = 0;
2064                 DECIMAL_SCALE(*result) = power;
2065                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2066                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2067         }
2068
2069         DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
2070         return MONO_DECIMAL_OK;
2071 }
2072
2073 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2074 static MonoDecimalStatus
2075 mono_decimal_to_double_result(MonoDecimal *input, double *result)
2076 {
2077         SPLIT64  tmp;
2078         double   dbl;
2079
2080         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2081                 return MONO_DECIMAL_INVALID_ARGUMENT;
2082
2083         tmp.u.Lo = DECIMAL_LO32(*input);
2084         tmp.u.Hi = DECIMAL_MID32(*input);
2085
2086         if ((int32_t)DECIMAL_MID32(*input) < 0)
2087                 dbl = (ds2to64.d + (double)(int64_t)tmp.int64 +
2088                        (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2089         else
2090                 dbl = ((double)(int64_t)tmp.int64 +
2091                        (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input));
2092
2093         if (DECIMAL_SIGN(*input))
2094                 dbl = -dbl;
2095
2096         *result = dbl;
2097         return MONO_DECIMAL_OK;
2098 }
2099
2100 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2101 static MonoDecimalStatus
2102 mono_decimal_to_float_result(MonoDecimal *input, float *result)
2103 {
2104         double   dbl;
2105
2106         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2107                 return MONO_DECIMAL_INVALID_ARGUMENT;
2108
2109         // Can't overflow; no errors possible.
2110         //
2111         mono_decimal_to_double_result(input, &dbl);
2112         *result = (float)dbl;
2113         return MONO_DECIMAL_OK;
2114 }
2115
2116 static void
2117 DecShiftLeft(MonoDecimal* value)
2118 {
2119         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2120     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2121     g_assert(value != NULL);
2122
2123     DECIMAL_LO32(*value) <<= 1;
2124     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2125     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2126 }
2127
2128 static int
2129 D32AddCarry(uint32_t* value, uint32_t i)
2130 {
2131     uint32_t v = *value;
2132     uint32_t sum = v + i;
2133     *value = sum;
2134     return sum < v || sum < i? 1: 0;
2135 }
2136
2137 static void
2138 DecAdd(MonoDecimal *value, MonoDecimal* d)
2139 {
2140         g_assert(value != NULL && d != NULL);
2141
2142         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2143                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2144                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2145                 }
2146         }
2147         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2148                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2149         }
2150         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2151 }
2152
2153 static void
2154 DecMul10(MonoDecimal* value)
2155 {
2156         MonoDecimal d = *value;
2157         g_assert (value != NULL);
2158
2159         DecShiftLeft(value);
2160         DecShiftLeft(value);
2161         DecAdd(value, &d);
2162         DecShiftLeft(value);
2163 }
2164
2165 static void
2166 DecAddInt32(MonoDecimal* value, unsigned int i)
2167 {
2168         g_assert(value != NULL);
2169
2170         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2171                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2172                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2173                 }
2174         }
2175 }
2176
2177 MonoDecimalCompareResult
2178 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2179 {
2180         uint32_t   left_sign;
2181         uint32_t   right_sign;
2182         MonoDecimal result;
2183
2184         result.Hi32 = 0;        // Just to shut up the compiler
2185
2186         // First check signs and whether either are zero.  If both are
2187         // non-zero and of the same sign, just use subtraction to compare.
2188         //
2189         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2190         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2191         if (left_sign != 0)
2192                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2193
2194         if (right_sign != 0)
2195                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2196
2197         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2198         // operand is +, 0, or -.
2199         //
2200         if (left_sign == right_sign) {
2201                 if (left_sign == 0)    // both are zero
2202                         return MONO_DECIMAL_CMP_EQ; // return equal
2203
2204                 DecAddSub(left, right, &result, DECIMAL_NEG);
2205                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2206                         return MONO_DECIMAL_CMP_EQ;
2207                 if (result.u.u.sign & DECIMAL_NEG)
2208                         return MONO_DECIMAL_CMP_LT;
2209                 return MONO_DECIMAL_CMP_GT;
2210         }
2211
2212         //
2213         // Signs are different.  Use signed byte comparison
2214         //
2215         if ((signed char)left_sign > (signed char)right_sign)
2216                 return MONO_DECIMAL_CMP_GT;
2217         return MONO_DECIMAL_CMP_LT;
2218 }
2219
2220 void
2221 mono_decimal_init_single (MonoDecimal *_this, float value)
2222 {
2223         if (mono_decimal_from_float (value, _this) == MONO_DECIMAL_OVERFLOW) {
2224                 mono_set_pending_exception (mono_get_exception_overflow ());
2225                 return;
2226         }
2227         _this->reserved = 0;
2228 }
2229
2230 void
2231 mono_decimal_init_double (MonoDecimal *_this, double value)
2232 {
2233         if (mono_decimal_from_double (value, _this) == MONO_DECIMAL_OVERFLOW) {
2234                 mono_set_pending_exception (mono_get_exception_overflow ());
2235                 return;
2236         }
2237         _this->reserved = 0;
2238 }
2239
2240 void
2241 mono_decimal_floor (MonoDecimal *d)
2242 {
2243         MonoDecimal decRes;
2244
2245         mono_decimal_round_to_int(d, &decRes);
2246
2247         // copy decRes into d
2248         COPYDEC(*d, decRes);
2249         d->reserved = 0;
2250         FC_GC_POLL ();
2251 }
2252
2253 int32_t
2254 mono_decimal_get_hash_code (MonoDecimal *d)
2255 {
2256         double dbl;
2257
2258         if (mono_decimal_to_double_result(d, &dbl) != MONO_DECIMAL_OK)
2259                 return 0;
2260
2261         if (dbl == 0.0) {
2262                 // Ensure 0 and -0 have the same hash code
2263                 return 0;
2264         }
2265         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2266         //
2267         // For example these two numerically equal decimals with different internal representations produce
2268         // slightly different results when converted to double:
2269         //
2270         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2271         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2272         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 });
2273         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2274         //
2275         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2276
2277 }
2278
2279 void
2280 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2281 {
2282         MonoDecimal decRes;
2283
2284         MonoDecimalStatus status = mono_decimal_multiply_result(d1, d2, &decRes);
2285         if (status != MONO_DECIMAL_OK) {
2286                 mono_set_pending_exception (mono_get_exception_overflow ());
2287                 return;
2288         }
2289
2290         COPYDEC(*d1, decRes);
2291         d1->reserved = 0;
2292
2293         FC_GC_POLL ();
2294 }
2295
2296 void
2297 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2298 {
2299         MonoDecimal decRes;
2300
2301         // GC is only triggered for throwing, no need to protect result
2302         if (decimals < 0 || decimals > 28) {
2303                 mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2304                 return;
2305         }
2306
2307         mono_decimal_round_result(d, decimals, &decRes);
2308
2309         // copy decRes into d
2310         COPYDEC(*d, decRes);
2311         d->reserved = 0;
2312
2313         FC_GC_POLL();
2314 }
2315
2316 void
2317 mono_decimal_tocurrency (MonoDecimal *decimal)
2318 {
2319         // TODO
2320 }
2321
2322 double
2323 mono_decimal_to_double (MonoDecimal d)
2324 {
2325         double result = 0.0;
2326         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2327         mono_decimal_to_double_result(&d, &result);
2328         return result;
2329 }
2330
2331 int32_t
2332 mono_decimal_to_int32 (MonoDecimal d)
2333 {
2334         MonoDecimal result;
2335
2336         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2337         mono_decimal_round_result(&d, 0, &result);
2338
2339         if (DECIMAL_SCALE(result) != 0) {
2340                 d = result;
2341                 mono_decimal_fix (&d, &result);
2342         }
2343
2344         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2345                 int32_t i = DECIMAL_LO32(result);
2346                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2347                         if (i >= 0)
2348                                 return i;
2349                 } else {
2350                         i = -i;
2351                         if (i <= 0)
2352                                 return i;
2353                 }
2354         }
2355
2356         mono_set_pending_exception (mono_get_exception_overflow ());
2357         return 0;
2358 }
2359
2360 float
2361 mono_decimal_to_float (MonoDecimal d)
2362 {
2363         float result = 0.0f;
2364         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2365         mono_decimal_to_float_result(&d, &result);
2366         return result;
2367 }
2368
2369 void
2370 mono_decimal_truncate (MonoDecimal *d)
2371 {
2372         MonoDecimal decRes;
2373
2374         mono_decimal_fix(d, &decRes);
2375
2376         // copy decRes into d
2377         COPYDEC(*d, decRes);
2378         d->reserved = 0;
2379         FC_GC_POLL();
2380 }
2381
2382 void
2383 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2384 {
2385         MonoDecimal result, decTmp;
2386         MonoDecimal *pdecTmp, *leftOriginal;
2387         uint32_t    num[6], pwr;
2388         int         scale, hi_prod, cur;
2389         SPLIT64     sdlTmp;
2390
2391         g_assert(sign == 0 || sign == DECIMAL_NEG);
2392
2393         leftOriginal = left;
2394
2395         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2396
2397         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2398                 // Scale factors are equal, no alignment necessary.
2399                 //
2400                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2401
2402         AlignedAdd:
2403                 if (sign) {
2404                         // Signs differ - subtract
2405                         //
2406                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2407                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2408
2409                         // Propagate carry
2410                         //
2411                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2412                                 DECIMAL_HI32(result)--;
2413                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2414                                         goto SignFlip;
2415                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2416                                 // Got negative result.  Flip its sign.
2417                                 //
2418                         SignFlip:
2419                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2420                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2421                                 if (DECIMAL_LO64_GET(result) == 0)
2422                                         DECIMAL_HI32(result)++;
2423                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2424                         }
2425
2426                 } else {
2427                         // Signs are the same - add
2428                         //
2429                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2430                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2431
2432                         // Propagate carry
2433                         //
2434                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2435                                 DECIMAL_HI32(result)++;
2436                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2437                                         goto AlignedScale;
2438                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2439                         AlignedScale:
2440                                 // The addition carried above 96 bits.  Divide the result by 10,
2441                                 // dropping the scale factor.
2442                                 //
2443                                 if (DECIMAL_SCALE(result) == 0) {
2444                                         mono_set_pending_exception (mono_get_exception_overflow ());
2445                                         return;
2446                                 }
2447                                 DECIMAL_SCALE(result)--;
2448
2449                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2450                                 sdlTmp.u.Hi = 1;
2451                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2452                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2453
2454                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2455                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2456                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2457
2458                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2459                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2460                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2461
2462                                 // See if we need to round up.
2463                                 //
2464                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2465                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2466                                         if (DECIMAL_LO64_GET(result) == 0)
2467                                                 DECIMAL_HI32(result)++;
2468                                 }
2469                         }
2470                 }
2471         } else {
2472                 // Scale factors are not equal.  Assume that a larger scale
2473                 // factor (more decimal places) is likely to mean that number
2474                 // is smaller.  Start by guessing that the right operand has
2475                 // the larger scale factor.  The result will have the larger
2476                 // scale factor.
2477                 //
2478                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2479                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2480                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2481
2482                 if (scale < 0) {
2483                         // Guessed scale factor wrong. Swap operands.
2484                         //
2485                         scale = -scale;
2486                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2487                         DECIMAL_SIGN(result) ^= sign;
2488                         pdecTmp = right;
2489                         right = left;
2490                         left = pdecTmp;
2491                 }
2492
2493                 // *left will need to be multiplied by 10^scale so
2494                 // it will have the same scale as *right.  We could be
2495                 // extending it to up to 192 bits of precision.
2496                 //
2497                 if (scale <= POWER10_MAX) {
2498                         // Scaling won't make it larger than 4 uint32_ts
2499                         //
2500                         pwr = power10[scale];
2501                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2502                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2503                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2504                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2505                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2506                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2507                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2508                         if (sdlTmp.u.Hi == 0) {
2509                                 // Result fits in 96 bits.  Use standard aligned add.
2510                                 //
2511                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2512                                 left = &decTmp;
2513                                 goto AlignedAdd;
2514                         }
2515                         num[0] = DECIMAL_LO32(decTmp);
2516                         num[1] = DECIMAL_MID32(decTmp);
2517                         num[2] = sdlTmp.u.Lo;
2518                         num[3] = sdlTmp.u.Hi;
2519                         hi_prod = 3;
2520                 } else {
2521                         // Have to scale by a bunch.  Move the number to a buffer
2522                         // where it has room to grow as it's scaled.
2523                         //
2524                         num[0] = DECIMAL_LO32(*left);
2525                         num[1] = DECIMAL_MID32(*left);
2526                         num[2] = DECIMAL_HI32(*left);
2527                         hi_prod = 2;
2528
2529                         // Scan for zeros in the upper words.
2530                         //
2531                         if (num[2] == 0) {
2532                                 hi_prod = 1;
2533                                 if (num[1] == 0) {
2534                                         hi_prod = 0;
2535                                         if (num[0] == 0) {
2536                                                 // Left arg is zero, return right.
2537                                                 //
2538                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2539                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2540                                                 DECIMAL_SIGN(result) ^= sign;
2541                                                 goto RetDec;
2542                                         }
2543                                 }
2544                         }
2545
2546                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2547                         // with index of highest non-zero uint32_t.
2548                         //
2549                         for (; scale > 0; scale -= POWER10_MAX) {
2550                                 if (scale > POWER10_MAX)
2551                                         pwr = ten_to_nine;
2552                                 else
2553                                         pwr = power10[scale];
2554
2555                                 sdlTmp.u.Hi = 0;
2556                                 for (cur = 0; cur <= hi_prod; cur++) {
2557                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2558                                         num[cur] = sdlTmp.u.Lo;
2559                                 }
2560
2561                                 if (sdlTmp.u.Hi != 0)
2562                                         // We're extending the result by another uint32_t.
2563                                         num[++hi_prod] = sdlTmp.u.Hi;
2564                         }
2565                 }
2566
2567                 // Scaling complete, do the add.  Could be subtract if signs differ.
2568                 //
2569                 sdlTmp.u.Lo = num[0];
2570                 sdlTmp.u.Hi = num[1];
2571
2572                 if (sign) {
2573                         // Signs differ, subtract.
2574                         //
2575                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2576                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2577
2578                         // Propagate carry
2579                         //
2580                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2581                                 DECIMAL_HI32(result)--;
2582                                 if (DECIMAL_HI32(result) >= num[2])
2583                                         goto LongSub;
2584                         } else if (DECIMAL_HI32(result) > num[2]) {
2585                         LongSub:
2586                                 // If num has more than 96 bits of precision, then we need to
2587                                 // carry the subtraction into the higher bits.  If it doesn't,
2588                                 // then we subtracted in the wrong order and have to flip the
2589                                 // sign of the result.
2590                                 //
2591                                 if (hi_prod <= 2)
2592                                         goto SignFlip;
2593
2594                                 cur = 3;
2595                                 while(num[cur++]-- == 0);
2596                                 if (num[hi_prod] == 0)
2597                                         hi_prod--;
2598                         }
2599                 } else {
2600                         // Signs the same, add.
2601                         //
2602                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2603                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2604
2605                         // Propagate carry
2606                         //
2607                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2608                                 DECIMAL_HI32(result)++;
2609                                 if (DECIMAL_HI32(result) <= num[2])
2610                                         goto LongAdd;
2611                         } else if (DECIMAL_HI32(result) < num[2]) {
2612                         LongAdd:
2613                                 // Had a carry above 96 bits.
2614                                 //
2615                                 cur = 3;
2616                                 do {
2617                                         if (hi_prod < cur) {
2618                                                 num[cur] = 1;
2619                                                 hi_prod = cur;
2620                                                 break;
2621                                         }
2622                                 }while (++num[cur++] == 0);
2623                         }
2624                 }
2625
2626                 if (hi_prod > 2) {
2627                         num[0] = DECIMAL_LO32(result);
2628                         num[1] = DECIMAL_MID32(result);
2629                         num[2] = DECIMAL_HI32(result);
2630                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2631                         if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2632                                 mono_set_pending_exception (mono_get_exception_overflow ());
2633                                 return;
2634                         }
2635
2636                         DECIMAL_LO32(result) = num[0];
2637                         DECIMAL_MID32(result) = num[1];
2638                         DECIMAL_HI32(result) = num[2];
2639                 }
2640         }
2641
2642 RetDec:
2643         left = leftOriginal;
2644         COPYDEC(*left, result);
2645         left->reserved = 0;
2646 }
2647
2648 void
2649 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2650 {
2651         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2652         uint32_t pwr, tmp, tmp1;
2653         SPLIT64  sdlTmp, sdlDivisor;
2654         int      scale, cur_scale;
2655         gboolean unscale;
2656
2657         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2658         unscale = FALSE;
2659         divisor[0] = DECIMAL_LO32(*right);
2660         divisor[1] = DECIMAL_MID32(*right);
2661         divisor[2] = DECIMAL_HI32(*right);
2662
2663         if (divisor[1] == 0 && divisor[2] == 0) {
2664                 // Divisor is only 32 bits.  Easy divide.
2665                 //
2666                 if (divisor[0] == 0) {
2667                         mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2668                         return;
2669                 }
2670
2671                 quo[0] = DECIMAL_LO32(*left);
2672                 quo[1] = DECIMAL_MID32(*left);
2673                 quo[2] = DECIMAL_HI32(*left);
2674                 rem[0] = Div96By32(quo, divisor[0]);
2675
2676                 for (;;) {
2677                         if (rem[0] == 0) {
2678                                 if (scale < 0) {
2679                                         cur_scale = min(9, -scale);
2680                                         goto HaveScale;
2681                                 }
2682                                 break;
2683                         }
2684                         // We need to unscale if and only if we have a non-zero remainder
2685                         unscale = TRUE;
2686
2687                         // We have computed a quotient based on the natural scale
2688                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
2689                         // remainder, so now we should increase the scale if possible to
2690                         // include more quotient bits.
2691                         //
2692                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
2693                         // computing more quotient bits as long as the remainder stays
2694                         // non-zero.  If scaling by that much would cause overflow, we'll
2695                         // drop out of the loop and scale by as much as we can.
2696                         //
2697                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9
2698                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and
2699                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
2700                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888
2701                         // is the largest value in quo[1] (when quo[2] == 4) that is
2702                         // assured not to overflow.
2703                         //
2704                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2705                         if (cur_scale == 0) {
2706                                 // No more scaling to be done, but remainder is non-zero.
2707                                 // Round quotient.
2708                                 //
2709                                 tmp = rem[0] << 1;
2710                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2711                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2712                                 RoundUp:
2713                                         if (!Add32To96(quo, 1)) {
2714                                                 if (scale == 0) {
2715                                                         mono_set_pending_exception (mono_get_exception_overflow ());
2716                                                         return;
2717                                                 }
2718                                                 scale--;
2719                                                 OverflowUnscale(quo, TRUE);
2720                                                 break;
2721                                         }
2722                                 }
2723                                 break;
2724                         }
2725
2726                         if (cur_scale < 0) {
2727                                 mono_set_pending_exception (mono_get_exception_overflow ());
2728                                 return;
2729                         }
2730
2731                 HaveScale:
2732                         pwr = power10[cur_scale];
2733                         scale += cur_scale;
2734
2735                         if (IncreaseScale(quo, pwr) != 0) {
2736                                 mono_set_pending_exception (mono_get_exception_overflow ());
2737                                 return;
2738                         }
2739
2740                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2741                         rem[0] = sdlTmp.u.Hi;
2742
2743                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2744                                 if (scale == 0) {
2745                                         mono_set_pending_exception (mono_get_exception_overflow ());
2746                                         return;
2747                                 }
2748                                 scale--;
2749                                 OverflowUnscale(quo, (rem[0] != 0));
2750                                 break;
2751                         }
2752                 } // for (;;)
2753         } else {
2754                 // Divisor has bits set in the upper 64 bits.
2755                 //
2756                 // Divisor must be fully normalized (shifted so bit 31 of the most
2757                 // significant uint32_t is 1).  Locate the MSB so we know how much to
2758                 // normalize by.  The dividend will be shifted by the same amount so
2759                 // the quotient is not changed.
2760                 //
2761                 if (divisor[2] == 0)
2762                         tmp = divisor[1];
2763                 else
2764                         tmp = divisor[2];
2765
2766                 cur_scale = 0;
2767                 if (!(tmp & 0xFFFF0000)) {
2768                         cur_scale += 16;
2769                         tmp <<= 16;
2770                 }
2771                 if (!(tmp & 0xFF000000)) {
2772                         cur_scale += 8;
2773                         tmp <<= 8;
2774                 }
2775                 if (!(tmp & 0xF0000000)) {
2776                         cur_scale += 4;
2777                         tmp <<= 4;
2778                 }
2779                 if (!(tmp & 0xC0000000)) {
2780                         cur_scale += 2;
2781                         tmp <<= 2;
2782                 }
2783                 if (!(tmp & 0x80000000)) {
2784                         cur_scale++;
2785                         tmp <<= 1;
2786                 }
2787
2788                 // Shift both dividend and divisor left by cur_scale.
2789                 //
2790                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2791                 rem[0] = sdlTmp.u.Lo;
2792                 rem[1] = sdlTmp.u.Hi;
2793                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2794                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2795                 sdlTmp.int64 <<= cur_scale;
2796                 rem[2] = sdlTmp.u.Hi;
2797                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2798
2799                 sdlDivisor.u.Lo = divisor[0];
2800                 sdlDivisor.u.Hi = divisor[1];
2801                 sdlDivisor.int64 <<= cur_scale;
2802
2803                 if (divisor[2] == 0) {
2804                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2805                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2806                         //
2807                         sdlTmp.u.Lo = rem[2];
2808                         sdlTmp.u.Hi = rem[3];
2809
2810                         quo[2] = 0;
2811                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2812                         quo[0] = Div96By64(rem, sdlDivisor);
2813
2814                         for (;;) {
2815                                 if ((rem[0] | rem[1]) == 0) {
2816                                         if (scale < 0) {
2817                                                 cur_scale = min(9, -scale);
2818                                                 goto HaveScale64;
2819                                         }
2820                                         break;
2821                                 }
2822
2823                                 // We need to unscale if and only if we have a non-zero remainder
2824                                 unscale = TRUE;
2825
2826                                 // Remainder is non-zero.  Scale up quotient and remainder by
2827                                 // powers of 10 so we can compute more significant bits.
2828                                 //
2829                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2830                                 if (cur_scale == 0) {
2831                                         // No more scaling to be done, but remainder is non-zero.
2832                                         // Round quotient.
2833                                         //
2834                                         sdlTmp.u.Lo = rem[0];
2835                                         sdlTmp.u.Hi = rem[1];
2836                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2837                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2838                                                 goto RoundUp;
2839                                         break;
2840                                 }
2841
2842                                 if (cur_scale < 0) {
2843                                         mono_set_pending_exception (mono_get_exception_overflow ());
2844                                         return;
2845                                 }
2846
2847                         HaveScale64:
2848                                 pwr = power10[cur_scale];
2849                                 scale += cur_scale;
2850
2851                                 if (IncreaseScale(quo, pwr) != 0) {
2852                                         mono_set_pending_exception (mono_get_exception_overflow ());
2853                                         return;
2854                                 }
2855
2856                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2857                                 IncreaseScale(rem, pwr);
2858                                 tmp = Div96By64(rem, sdlDivisor);
2859                                 if (!Add32To96(quo, tmp)) {
2860                                         if (scale == 0) {
2861                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2862                                                 return;
2863                                         }
2864                                         scale--;
2865                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2866                                         break;
2867                                 }
2868
2869                         } // for (;;)
2870                 } else {
2871                         // Have a 96-bit divisor in divisor[].
2872                         //
2873                         // Start by finishing the shift left by cur_scale.
2874                         //
2875                         sdlTmp.u.Lo = divisor[1];
2876                         sdlTmp.u.Hi = divisor[2];
2877                         sdlTmp.int64 <<= cur_scale;
2878                         divisor[0] = sdlDivisor.u.Lo;
2879                         divisor[1] = sdlDivisor.u.Hi;
2880                         divisor[2] = sdlTmp.u.Hi;
2881
2882                         // The remainder (currently 96 bits spread over 4 uint32_ts)
2883                         // will be < divisor.
2884                         //
2885                         quo[2] = 0;
2886                         quo[1] = 0;
2887                         quo[0] = Div128By96(rem, divisor);
2888
2889                         for (;;) {
2890                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2891                                         if (scale < 0) {
2892                                                 cur_scale = min(9, -scale);
2893                                                 goto HaveScale96;
2894                                         }
2895                                         break;
2896                                 }
2897
2898                                 // We need to unscale if and only if we have a non-zero remainder
2899                                 unscale = TRUE;
2900
2901                                 // Remainder is non-zero.  Scale up quotient and remainder by
2902                                 // powers of 10 so we can compute more significant bits.
2903                                 //
2904                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2905                                 if (cur_scale == 0) {
2906                                         // No more scaling to be done, but remainder is non-zero.
2907                                         // Round quotient.
2908                                         //
2909                                         if (rem[2] >= 0x80000000)
2910                                                 goto RoundUp;
2911
2912                                         tmp = rem[0] > 0x80000000;
2913                                         tmp1 = rem[1] > 0x80000000;
2914                                         rem[0] <<= 1;
2915                                         rem[1] = (rem[1] << 1) + tmp;
2916                                         rem[2] = (rem[2] << 1) + tmp1;
2917
2918                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2919                                                 goto RoundUp;
2920                                         break;
2921                                 }
2922
2923                                 if (cur_scale < 0) {
2924                                         mono_set_pending_exception (mono_get_exception_overflow ());
2925                                         return;
2926                                 }
2927
2928                         HaveScale96:
2929                                 pwr = power10[cur_scale];
2930                                 scale += cur_scale;
2931
2932                                 if (IncreaseScale(quo, pwr) != 0) {
2933                                         mono_set_pending_exception (mono_get_exception_overflow ());
2934                                         return;
2935                                 }
2936
2937                                 rem[3] = IncreaseScale(rem, pwr);
2938                                 tmp = Div128By96(rem, divisor);
2939                                 if (!Add32To96(quo, tmp)) {
2940                                         if (scale == 0) {
2941                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2942                                                 return;
2943                                         }
2944
2945                                         scale--;
2946                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2947                                         break;
2948                                 }
2949
2950                         } // for (;;)
2951                 }
2952         }
2953
2954         // We need to unscale if and only if we have a non-zero remainder
2955         if (unscale) {
2956                 // Try extracting any extra powers of 10 we may have
2957                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2958                 // If a division by one of these powers returns a zero remainder, then
2959                 // we keep the quotient.  If the remainder is not zero, then we restore
2960                 // the previous value.
2961                 //
2962                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2963                 // we can extract.  We use this as a quick test on whether to try a
2964                 // given power.
2965                 //
2966                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2967                         quo_save[0] = quo[0];
2968                         quo_save[1] = quo[1];
2969                         quo_save[2] = quo[2];
2970
2971                         if (Div96By32(quo_save, 100000000) == 0) {
2972                                 quo[0] = quo_save[0];
2973                                 quo[1] = quo_save[1];
2974                                 quo[2] = quo_save[2];
2975                                 scale -= 8;
2976                         } else
2977                                 break;
2978                 }
2979
2980                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
2981                         quo_save[0] = quo[0];
2982                         quo_save[1] = quo[1];
2983                         quo_save[2] = quo[2];
2984
2985                         if (Div96By32(quo_save, 10000) == 0) {
2986                                 quo[0] = quo_save[0];
2987                                 quo[1] = quo_save[1];
2988                                 quo[2] = quo_save[2];
2989                                 scale -= 4;
2990                         }
2991                 }
2992
2993                 if ((quo[0] & 3) == 0 && scale >= 2) {
2994                         quo_save[0] = quo[0];
2995                         quo_save[1] = quo[1];
2996                         quo_save[2] = quo[2];
2997
2998                         if (Div96By32(quo_save, 100) == 0) {
2999                                 quo[0] = quo_save[0];
3000                                 quo[1] = quo_save[1];
3001                                 quo[2] = quo_save[2];
3002                                 scale -= 2;
3003                         }
3004                 }
3005
3006                 if ((quo[0] & 1) == 0 && scale >= 1) {
3007                         quo_save[0] = quo[0];
3008                         quo_save[1] = quo[1];
3009                         quo_save[2] = quo[2];
3010
3011                         if (Div96By32(quo_save, 10) == 0) {
3012                                 quo[0] = quo_save[0];
3013                                 quo[1] = quo_save[1];
3014                                 quo[2] = quo_save[2];
3015                                 scale -= 1;
3016                         }
3017                 }
3018         }
3019
3020         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3021         DECIMAL_HI32(*left) = quo[2];
3022         DECIMAL_MID32(*left) = quo[1];
3023         DECIMAL_LO32(*left) = quo[0];
3024         DECIMAL_SCALE(*left) = (uint8_t)scale;
3025         left->reserved = 0;
3026
3027 }
3028
3029 #define DECIMAL_PRECISION 29
3030
3031 int
3032 mono_decimal_from_number (void *from, MonoDecimal *target)
3033 {
3034         MonoNumber *number = (MonoNumber *) from;
3035         uint16_t* p = number->digits;
3036         MonoDecimal d;
3037         int e = number->scale;
3038         g_assert(number != NULL);
3039         g_assert(target != NULL);
3040
3041         d.reserved = 0;
3042         DECIMAL_SIGNSCALE(d) = 0;
3043         DECIMAL_HI32(d) = 0;
3044         DECIMAL_LO32(d) = 0;
3045         DECIMAL_MID32(d) = 0;
3046         g_assert(p != NULL);
3047         if (!*p) {
3048                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3049                 // the scale to 0 if the scale was previously positive
3050                 if (e > 0) {
3051                         e = 0;
3052                 }
3053         } else {
3054                 if (e > DECIMAL_PRECISION) return 0;
3055                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3056                         DecMul10(&d);
3057                         if (*p)
3058                                 DecAddInt32(&d, *p++ - '0');
3059                         e--;
3060                 }
3061                 if (*p++ >= '5') {
3062                         gboolean round = TRUE;
3063                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3064                                 // For digits > 5 we will be roundinp up anyway.
3065                                 int count = 20; // Look at the next 20 digits to check to round
3066                                 while (*p == '0' && count != 0) {
3067                                         p++;
3068                                         count--;
3069                                 }
3070                                 if (*p == '\0' || count == 0)
3071                                         round = FALSE;// Do nothing
3072                         }
3073
3074                         if (round) {
3075                                 DecAddInt32(&d, 1);
3076                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3077                                         DECIMAL_HI32(d) = 0x19999999;
3078                                         DECIMAL_MID32(d) = 0x99999999;
3079                                         DECIMAL_LO32(d) = 0x9999999A;
3080                                         e++;
3081                                 }
3082                         }
3083                 }
3084         }
3085         if (e > 0)
3086                 return 0;
3087         if (e <= -DECIMAL_PRECISION) {
3088                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3089                 // This should only happen for actual zeros or very small numbers that round to zero.
3090                 DECIMAL_SIGNSCALE(d) = 0;
3091                 DECIMAL_HI32(d) = 0;
3092                 DECIMAL_LO32(d) = 0;
3093                 DECIMAL_MID32(d) = 0;
3094                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3095         } else {
3096                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3097         }
3098
3099         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3100         *target = d;
3101         return 1;
3102 }
3103
3104
3105 #endif