Merge pull request #853 from echampet/onclick
[mono.git] / mono / metadata / decimal-ms.c
1 //
2 // Copyright (c) Microsoft. All rights reserved.
3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
4 //
5 // Copyright 2015 Xamarin Inc
6 //
7 // File: decimal.c
8 //
9 // Ported from C++ to C and adjusted to Mono runtime
10 //
11 // Pending:
12 //   DoToCurrency (they look like new methods we do not have)
13 //
14 #ifndef DISABLE_DECIMAL
15 #include "config.h"
16 #include <stdint.h>
17 #include <glib.h>
18 #include <mono/utils/mono-compiler.h>
19 #include <mono/metadata/exception.h>
20 #include <mono/metadata/object-internals.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <math.h>
25 #ifdef HAVE_MEMORY_H
26 #include <memory.h>
27 #endif
28 #ifdef _MSC_VER
29 #include <intrin.h>
30 #endif
31 #include "decimal-ms.h"
32 #include "number-ms.h"
33
34 #define min(a, b) (((a) < (b)) ? (a) : (b))
35
36 typedef enum {
37         MONO_DECIMAL_OK,
38         MONO_DECIMAL_OVERFLOW,
39         MONO_DECIMAL_INVALID_ARGUMENT,
40         MONO_DECIMAL_DIVBYZERO,
41         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
42 } MonoDecimalStatus;
43
44 #ifndef FC_GC_POLL
45 #   define FC_GC_POLL() 
46 #endif
47
48 static const uint32_t ten_to_nine    = 1000000000U;
49 static const uint32_t ten_to_ten_div_4 = 2500000000U;
50 #define POWER10_MAX     9
51 #define DECIMAL_NEG ((uint8_t)0x80)
52 #define DECMAX 28
53 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
54 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
55 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
56 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
57 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
58 #define DECIMAL_HI32(dec)        ((dec).Hi32)
59 #if G_BYTE_ORDER != G_LITTLE_ENDIAN
60 # define DECIMAL_LO64_GET(dec)   (((uint64_t)((dec).v.v.Mid32) << 32) | (dec).v.v.Lo32)
61 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.v.Lo32 = (value); (dec).v.v.Mid32 = ((value) >> 32); }
62 #else
63 # define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
64 # define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
65 #endif
66
67 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
68 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
69     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
70
71 #define DEC_SCALE_MAX   28
72 #define POWER10_MAX     9
73
74 #define OVFL_MAX_9_HI   4
75 #define OVFL_MAX_9_MID  1266874889
76 #define OVFL_MAX_9_LO   3047500985u
77
78 #define OVFL_MAX_5_HI   42949
79 #define OVFL_MAX_5_MID  2890341191
80
81 #define OVFL_MAX_1_HI   429496729
82
83 typedef union {
84         uint64_t int64;
85         struct {
86 #if BYTE_ORDER == G_BIG_ENDIAN
87         uint32_t Hi;
88         uint32_t Lo;
89 #else
90         uint32_t Lo;
91         uint32_t Hi;
92 #endif
93     } u;
94 } SPLIT64;
95
96 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
97
98 const MonoDouble_double ds2to64 = { .s = { .sign = 0, .exp = MONO_DOUBLE_BIAS + 65, .mantHi = 0, .mantLo = 0 } };
99
100 //
101 // Data tables
102 //
103
104 static const uint32_t power10 [POWER10_MAX+1] = {
105         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
106 };
107
108
109 static const double double_power10[] = {
110         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 
111         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 
112         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 
113         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 
114         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 
115         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
116         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, 
117         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
118         1e80 };
119
120 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
121                                 {100000000000ULL},         // 1E11
122                                 {1000000000000ULL},        // 1E12
123                                 {10000000000000ULL},       // 1E13
124                                 {100000000000000ULL} };    // 1E14
125
126 static const uint64_t long_power10[] = {
127         1,
128         10ULL,
129         100ULL,
130         1000ULL,
131         10000ULL,
132         100000ULL,
133         1000000ULL,
134         10000000ULL,
135         100000000ULL,
136         1000000000ULL,
137         10000000000ULL,
138         100000000000ULL,
139         1000000000000ULL,
140         10000000000000ULL,
141         100000000000000ULL,
142         1000000000000000ULL,
143         10000000000000000ULL,
144         100000000000000000ULL,
145         1000000000000000000ULL,
146         10000000000000000000ULL};
147
148 typedef struct  {
149         uint32_t Hi, Mid, Lo;
150 } DECOVFL;
151
152 const DECOVFL power_overflow[] = {
153 // This is a table of the largest values that can be in the upper two
154 // ULONGs of a 96-bit number that will not overflow when multiplied
155 // by a given power.  For the upper word, this is a table of 
156 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
157 // remaining fraction part * 2^32.  2^32 = 4294967296.
158 // 
159     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
160     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
161     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
162     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
163     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
164     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
165     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
166     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
167     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
168 };
169
170
171 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
172 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
173 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
174
175 static double
176 fnDblPower10(int ix)
177 {
178     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
179     g_assert(ix >= 0);
180     if (ix < maxIx)
181         return double_power10[ix];
182     return pow(10.0, ix);
183 } // double fnDblPower10()
184
185
186 static inline int64_t
187 DivMod32by32(int32_t num, int32_t den)
188 {
189     SPLIT64  sdl;
190
191     sdl.u.Lo = num / den;
192     sdl.u.Hi = num % den;
193     return sdl.int64;
194 }
195
196 static inline int64_t
197 DivMod64by32(int64_t num, int32_t den)
198 {
199     SPLIT64  sdl;
200
201     sdl.u.Lo = Div64by32(num, den);
202     sdl.u.Hi = Mod64by32(num, den);
203     return sdl.int64;
204 }
205
206 static uint64_t
207 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
208 {
209         SPLIT64  tmp1;
210         SPLIT64  tmp2;
211         SPLIT64  tmp3;
212
213         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
214         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
215         tmp1.u.Hi += tmp2.u.Lo;
216         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
217                 tmp2.u.Hi++;
218         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
219         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
220         tmp1.u.Hi += tmp2.u.Lo;
221         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
222                 tmp2.u.Hi++;
223         tmp3.int64 += (uint64_t)tmp2.u.Hi;
224
225         *hi = tmp3.int64;
226         return tmp1.int64;
227 }
228
229 /**
230 * FullDiv64By32:
231 *
232 * Entry:
233 *   pdlNum  - Pointer to 64-bit dividend
234 *   ulDen   - 32-bit divisor
235 *
236 * Purpose:
237 *   Do full divide, yielding 64-bit result and 32-bit remainder.
238 *
239 * Exit:
240 *   Quotient overwrites dividend.
241 *   Returns remainder.
242 *
243 * Exceptions:
244 *   None.
245 */
246 // Was: FullDiv64By32
247 static uint32_t
248 FullDiv64By32 (uint64_t *num, uint32_t den)
249 {
250         SPLIT64  tmp;
251         SPLIT64  res;
252         
253         tmp.int64 = *num;
254         res.u.Hi = 0;
255         
256         if (tmp.u.Hi >= den) {
257                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
258                 //
259                 res.u.Lo = tmp.u.Hi;
260                 res.int64 = DivMod64by32(res.int64, den);
261                 tmp.u.Hi = res.u.Hi;
262                 res.u.Hi = res.u.Lo;
263         }
264         
265         tmp.int64 = DivMod64by32(tmp.int64, den);
266         res.u.Lo = tmp.u.Lo;
267         *num = res.int64;
268         return tmp.u.Hi;
269 }
270
271 /***
272  * SearchScale
273  *
274  * Entry:
275  *   res_hi - Top uint32_t of quotient
276  *   res_mid - Middle uint32_t of quotient
277  *   res_lo - Bottom uint32_t of quotient
278  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
279  *
280  * Purpose:
281  *   Determine the max power of 10, <= 9, that the quotient can be scaled
282  *   up by and still fit in 96 bits.
283  *
284  * Exit:
285  *   Returns power of 10 to scale by, -1 if overflow error.
286  *
287  ***********************************************************************/
288
289 static int
290 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
291 {
292         int   cur_scale;
293
294         // Quick check to stop us from trying to scale any more.
295         //
296         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
297                 cur_scale = 0;
298                 goto HaveScale;
299         }
300
301         if (scale > DEC_SCALE_MAX - 9) {
302                 // We can't scale by 10^9 without exceeding the max scale factor.
303                 // See if we can scale to the max.  If not, we'll fall into
304                 // standard search for scale factor.
305                 //
306                 cur_scale = DEC_SCALE_MAX - scale;
307                 if (res_hi < power_overflow[cur_scale - 1].Hi)
308                         goto HaveScale;
309
310                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
311                 UpperEq:
312                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
313                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
314                                 cur_scale--;
315                         }
316                         goto HaveScale;
317                 }
318         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
319                 return 9;
320
321         // Search for a power to scale by < 9.  Do a binary search
322         // on power_overflow[].
323         //
324         cur_scale = 5;
325         if (res_hi < OVFL_MAX_5_HI)
326                 cur_scale = 7;
327         else if (res_hi > OVFL_MAX_5_HI)
328                 cur_scale = 3;
329         else
330                 goto UpperEq;
331
332         // cur_scale is 3 or 7.
333         //
334         if (res_hi < power_overflow[cur_scale - 1].Hi)
335                 cur_scale++;
336         else if (res_hi > power_overflow[cur_scale - 1].Hi)
337                 cur_scale--;
338         else
339                 goto UpperEq;
340
341         // cur_scale is 2, 4, 6, or 8.
342         //
343         // In all cases, we already found we could not use the power one larger.
344         // So if we can use this power, it is the biggest, and we're done.  If
345         // we can't use this power, the one below it is correct for all cases 
346         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
347         // 
348         if (res_hi > power_overflow[cur_scale - 1].Hi)
349                 cur_scale--;
350
351         if (res_hi == power_overflow[cur_scale - 1].Hi)
352                 goto UpperEq;
353
354 HaveScale:
355         // cur_scale = largest power of 10 we can scale by without overflow, 
356         // cur_scale < 9.  See if this is enough to make scale factor 
357         // positive if it isn't already.
358         // 
359         if (cur_scale + scale < 0)
360                 cur_scale = -1;
361
362         return cur_scale;
363 }
364
365
366 /**
367 * Div96By32
368 *
369 * Entry:
370 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
371 *   ulDen   - 32-bit divisor.
372 *
373 * Purpose:
374 *   Do full divide, yielding 96-bit result and 32-bit remainder.
375 *
376 * Exit:
377 *   Quotient overwrites dividend.
378 *   Returns remainder.
379 *
380 * Exceptions:
381 *   None.
382 *
383 */
384 static uint32_t
385 Div96By32(uint32_t *num, uint32_t den)
386 {
387         SPLIT64  tmp;
388
389         tmp.u.Hi = 0;
390
391         if (num[2] != 0)
392                 goto Div3Word;
393
394         if (num[1] >= den)
395                 goto Div2Word;
396
397         tmp.u.Hi = num[1];
398         num[1] = 0;
399         goto Div1Word;
400
401 Div3Word:
402         tmp.u.Lo = num[2];
403         tmp.int64 = DivMod64by32(tmp.int64, den);
404         num[2] = tmp.u.Lo;
405 Div2Word:
406         tmp.u.Lo = num[1];
407         tmp.int64 = DivMod64by32(tmp.int64, den);
408         num[1] = tmp.u.Lo;
409 Div1Word:
410         tmp.u.Lo = num[0];
411         tmp.int64 = DivMod64by32(tmp.int64, den);
412         num[0] = tmp.u.Lo;
413         return tmp.u.Hi;
414 }
415
416 /***
417  * DecFixInt
418  *
419  * Entry:
420  *   pdecRes - Pointer to Decimal result location
421  *   operand  - Pointer to Decimal operand
422  *
423  * Purpose:
424  *   Chop the value to integer.  Return remainder so Int() function
425  *   can round down if non-zero.
426  *
427  * Exit:
428  *   Returns remainder.
429  *
430  * Exceptions:
431  *   None.
432  *
433  ***********************************************************************/
434
435 static uint32_t
436 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
437 {
438         uint32_t   num[3];
439         uint32_t   rem;
440         uint32_t   pwr;
441         int     scale;
442
443         if (operand->u.u.scale > 0) {
444                 num[0] = operand->v.v.Lo32;
445                 num[1] = operand->v.v.Mid32;
446                 num[2] = operand->Hi32;
447                 scale = operand->u.u.scale;
448                 result->u.u.sign = operand->u.u.sign;
449                 rem = 0;
450
451                 do {
452                         if (scale > POWER10_MAX)
453                                 pwr = ten_to_nine;
454                         else
455                                 pwr = power10[scale];
456
457                         rem |= Div96By32(num, pwr);
458                         scale -= 9;
459                 }while (scale > 0);
460
461                 result->v.v.Lo32 = num[0];
462                 result->v.v.Mid32 = num[1];
463                 result->Hi32 = num[2];
464                 result->u.u.scale = 0;
465
466                 return rem;
467         }
468
469         COPYDEC(*result, *operand);
470         // Odd, the Microsoft code does not set result->reserved to zero on this case
471         return 0;
472 }
473
474 /**
475  * ScaleResult:
476  *
477  * Entry:
478  *   res - Array of uint32_ts with value, least-significant first.
479  *   hi_res  - Index of last non-zero value in res.
480  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
481  *
482  * Purpose:
483  *   See if we need to scale the result to fit it in 96 bits.
484  *   Perform needed scaling.  Adjust scale factor accordingly.
485  *
486  * Exit:
487  *   res updated in place, always 3 uint32_ts.
488  *   New scale factor returned, -1 if overflow error.
489  *
490  */
491 static int
492 ScaleResult(uint32_t *res, int hi_res, int scale)
493 {
494         int     new_scale;
495         int     cur;
496         uint32_t   pwr;
497         uint32_t   tmp;
498         uint32_t   sticky;
499         SPLIT64 sdlTmp;
500
501         // See if we need to scale the result.  The combined scale must
502         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
503         // 
504         // Start by figuring a lower bound on the scaling needed to make
505         // the upper 96 bits zero.  hi_res is the index into res[]
506         // of the highest non-zero uint32_t.
507         // 
508         new_scale =   hi_res * 32 - 64 - 1;
509         if (new_scale > 0) {
510
511                 // Find the MSB.
512                 //
513                 tmp = res[hi_res];
514                 if (!(tmp & 0xFFFF0000)) {
515                         new_scale -= 16;
516                         tmp <<= 16;
517                 }
518                 if (!(tmp & 0xFF000000)) {
519                         new_scale -= 8;
520                         tmp <<= 8;
521                 }
522                 if (!(tmp & 0xF0000000)) {
523                         new_scale -= 4;
524                         tmp <<= 4;
525                 }
526                 if (!(tmp & 0xC0000000)) {
527                         new_scale -= 2;
528                         tmp <<= 2;
529                 }
530                 if (!(tmp & 0x80000000)) {
531                         new_scale--;
532                         tmp <<= 1;
533                 }
534     
535                 // Multiply bit position by log10(2) to figure it's power of 10.
536                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this 
537                 // with a multiply saves a 96-byte lookup table.  The power returned
538                 // is <= the power of the number, so we must add one power of 10
539                 // to make it's integer part zero after dividing by 256.
540                 // 
541                 // Note: the result of this multiplication by an approximation of
542                 // log10(2) have been exhaustively checked to verify it gives the 
543                 // correct result.  (There were only 95 to check...)
544                 // 
545                 new_scale = ((new_scale * 77) >> 8) + 1;
546
547                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
548                 // This reduces the scale factor of the result.  If it exceeds the
549                 // current scale of the result, we'll overflow.
550                 // 
551                 if (new_scale > scale)
552                         return -1;
553         }
554         else
555                 new_scale = 0;
556
557         // Make sure we scale by enough to bring the current scale factor
558         // into valid range.
559         //
560         if (new_scale < scale - DEC_SCALE_MAX)
561                 new_scale = scale - DEC_SCALE_MAX;
562
563         if (new_scale != 0) {
564                 // Scale by the power of 10 given by new_scale.  Note that this is 
565                 // NOT guaranteed to bring the number within 96 bits -- it could 
566                 // be 1 power of 10 short.
567                 //
568                 scale -= new_scale;
569                 sticky = 0;
570                 sdlTmp.u.Hi = 0; // initialize remainder
571
572                 for (;;) {
573
574                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
575
576                         if (new_scale > POWER10_MAX)
577                                 pwr = ten_to_nine;
578                         else
579                                 pwr = power10[new_scale];
580
581                         // Compute first quotient.
582                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
583                         //
584                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
585                         res[hi_res] = sdlTmp.u.Lo;
586                         cur = hi_res - 1;
587
588                         if (cur >= 0) {
589                                 // If first quotient was 0, update hi_res.
590                                 //
591                                 if (sdlTmp.u.Lo == 0)
592                                         hi_res--;
593
594                                 // Compute subsequent quotients.
595                                 //
596                                 do {
597                                         sdlTmp.u.Lo = res[cur];
598                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
599                                         res[cur] = sdlTmp.u.Lo;
600                                         cur--;
601                                 } while (cur >= 0);
602
603                         }
604
605                         new_scale -= POWER10_MAX;
606                         if (new_scale > 0)
607                                 continue; // scale some more
608
609                         // If we scaled enough, hi_res would be 2 or less.  If not,
610                         // divide by 10 more.
611                         //
612                         if (hi_res > 2) {
613                                 new_scale = 1;
614                                 scale--;
615                                 continue; // scale by 10
616                         }
617
618                         // Round final result.  See if remainder >= 1/2 of divisor.
619                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
620                         //
621                         pwr >>= 1;  // power of 10 always even
622                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
623                                                     ((res[0] & 1) | sticky)) ) {
624                                 cur = -1;
625                                 while (++res[++cur] == 0);
626                                 
627                                 if (cur > 2) {
628                                         // The rounding caused us to carry beyond 96 bits. 
629                                         // Scale by 10 more.
630                                         //
631                                         hi_res = cur;
632                                         sticky = 0;  // no sticky bit
633                                         sdlTmp.u.Hi = 0; // or remainder
634                                         new_scale = 1;
635                                         scale--;
636                                         continue; // scale by 10
637                                 }
638                         }
639                         
640                         // We may have scaled it more than we planned.  Make sure the scale 
641                         // factor hasn't gone negative, indicating overflow.
642                         // 
643                         if (scale < 0)
644                                 return -1;
645                         
646                         return scale;
647                 } // for(;;)
648         }
649         return scale;
650 }
651
652 // Decimal multiply
653 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
654 static MonoDecimalStatus
655 mono_decimal_multiply_result(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
656 {
657         SPLIT64 tmp;
658         SPLIT64 tmp2;
659         SPLIT64 tmp3;
660         int     scale;
661         int     hi_prod;
662         uint32_t   pwr;
663         uint32_t   rem_lo;
664         uint32_t   rem_hi;
665         uint32_t   prod[6];
666
667         scale = left->u.u.scale + right->u.u.scale;
668
669         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
670                 // Upper 64 bits are zero.
671                 //
672                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
673                 if (scale > DEC_SCALE_MAX)
674                 {
675                         // Result scale is too big.  Divide result by power of 10 to reduce it.
676                         // If the amount to divide by is > 19 the result is guaranteed
677                         // less than 1/2.  [max value in 64 bits = 1.84E19]
678                         //
679                         scale -= DEC_SCALE_MAX;
680                         if (scale > 19) {
681                         ReturnZero:
682                                 DECIMAL_SETZERO(*result);
683                                 return MONO_DECIMAL_OK;
684                         }
685
686                         if (scale > POWER10_MAX) {
687                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
688                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
689                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
690                                 // 
691                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
692                                 pwr = power10[scale - 10] << 2;
693                         } else {
694                                 pwr = power10[scale];
695                                 rem_lo = 0;
696                         }
697
698                         // Power to divide by fits in 32 bits.
699                         //
700                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
701
702                         // Round result.  See if remainder >= 1/2 of divisor.
703                         // Divisor is a power of 10, so it is always even.
704                         //
705                         pwr >>= 1;
706                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
707                                 tmp.int64++;
708
709                         scale = DEC_SCALE_MAX;
710                 }
711                 DECIMAL_LO32(*result) = tmp.u.Lo;
712                 DECIMAL_MID32(*result) = tmp.u.Hi;
713                 DECIMAL_HI32(*result) = 0;
714         } else {
715                 // At least one operand has bits set in the upper 64 bits.
716                 //
717                 // Compute and accumulate the 9 partial products into a 
718                 // 192-bit (24-byte) result.
719                 //
720                 //                [l-h][l-m][l-l]   left high, middle, low
721                 //             x  [r-h][r-m][r-l]   right high, middle, low
722                 // ------------------------------
723                 //
724                 //                     [0-h][0-l]   l-l * r-l
725                 //                [1ah][1al]        l-l * r-m
726                 //                [1bh][1bl]        l-m * r-l
727                 //           [2ah][2al]             l-m * r-m
728                 //           [2bh][2bl]             l-l * r-h
729                 //           [2ch][2cl]             l-h * r-l
730                 //      [3ah][3al]                  l-m * r-h
731                 //      [3bh][3bl]                  l-h * r-m
732                 // [4-h][4-l]                       l-h * r-h
733                 // ------------------------------
734                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
735                 //
736                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
737                 prod[0] = tmp.u.Lo;
738
739                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
740
741                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
742                 tmp.int64 += tmp2.int64; // this could generate carry
743                 prod[1] = tmp.u.Lo;
744                 if (tmp.int64 < tmp2.int64) // detect carry
745                         tmp2.u.Hi = 1;
746                 else
747                         tmp2.u.Hi = 0;
748                 tmp2.u.Lo = tmp.u.Hi;
749
750                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
751
752                 if (left->Hi32 | right->Hi32) {
753                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
754                         //
755                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
756                         tmp.int64 += tmp2.int64; // this could generate carry
757                         if (tmp.int64 < tmp2.int64) // detect carry
758                                 tmp3.u.Hi = 1;
759                         else
760                                 tmp3.u.Hi = 0;
761
762                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
763                         tmp.int64 += tmp2.int64; // this could generate carry
764                         prod[2] = tmp.u.Lo;
765                         if (tmp.int64 < tmp2.int64) // detect carry
766                                 tmp3.u.Hi++;
767                         tmp3.u.Lo = tmp.u.Hi;
768
769                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
770                         tmp.int64 += tmp3.int64; // this could generate carry
771                         if (tmp.int64 < tmp3.int64) // detect carry
772                                 tmp3.u.Hi = 1;
773                         else
774                                 tmp3.u.Hi = 0;
775
776                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
777                         tmp.int64 += tmp2.int64; // this could generate carry
778                         prod[3] = tmp.u.Lo;
779                         if (tmp.int64 < tmp2.int64) // detect carry
780                                 tmp3.u.Hi++;
781                         tmp3.u.Lo = tmp.u.Hi;
782
783                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
784                         prod[4] = tmp.u.Lo;
785                         prod[5] = tmp.u.Hi;
786
787                         hi_prod = 5;
788                 }
789                 else {
790                         prod[2] = tmp.u.Lo;
791                         prod[3] = tmp.u.Hi;
792                         hi_prod = 3;
793                 }
794
795                 // Check for leading zero uint32_ts on the product
796                 //
797                 while (prod[hi_prod] == 0) {
798                         hi_prod--;
799                         if (hi_prod < 0)
800                                 goto ReturnZero;
801                 }
802
803                 scale = ScaleResult(prod, hi_prod, scale);
804                 if (scale == -1)
805                         return MONO_DECIMAL_OVERFLOW;
806
807                 result->v.v.Lo32 = prod[0];
808                 result->v.v.Mid32 = prod[1];
809                 result->Hi32 = prod[2];
810         }
811
812         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
813         result->u.u.scale = (char)scale;
814         return MONO_DECIMAL_OK;
815 }
816
817 // Addition and subtraction
818 static MonoDecimalStatus
819 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
820 {
821         uint32_t     num[6];
822         uint32_t     pwr;
823         int       scale;
824         int       hi_prod;
825         int       cur;
826         SPLIT64   tmp;
827         MonoDecimal decRes;
828         MonoDecimal decTmp;
829         MonoDecimal *pdecTmp;
830
831         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
832
833         if (right->u.u.scale == left->u.u.scale) {
834                 // Scale factors are equal, no alignment necessary.
835                 //
836                 decRes.u.signscale = left->u.signscale;
837
838         AlignedAdd:
839                 if (sign) {
840                         // Signs differ - subtract
841                         //
842                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
843                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
844
845                         // Propagate carry
846                         //
847                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
848                                 decRes.Hi32--;
849                                 if (decRes.Hi32 >= left->Hi32)
850                                         goto SignFlip;
851                         } else if (decRes.Hi32 > left->Hi32) {
852                                 // Got negative result.  Flip its sign.
853                                 //
854                         SignFlip:
855                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
856                                 decRes.Hi32 = ~decRes.Hi32;
857                                 if (DECIMAL_LO64_GET(decRes) == 0)
858                                         decRes.Hi32++;
859                                 decRes.u.u.sign ^= DECIMAL_NEG;
860                         }
861
862                 } else {
863                         // Signs are the same - add
864                         //
865                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
866                         decRes.Hi32 = left->Hi32 + right->Hi32;
867
868                         // Propagate carry
869                         //
870                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
871                                 decRes.Hi32++;
872                                 if (decRes.Hi32 <= left->Hi32)
873                                         goto AlignedScale;
874                         } else if (decRes.Hi32 < left->Hi32) {
875                         AlignedScale:
876                                 // The addition carried above 96 bits.  Divide the result by 10,
877                                 // dropping the scale factor.
878                                 //
879                                 if (decRes.u.u.scale == 0)
880                                         return MONO_DECIMAL_OVERFLOW;
881                                 decRes.u.u.scale--;
882
883                                 tmp.u.Lo = decRes.Hi32;
884                                 tmp.u.Hi = 1;
885                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
886                                 decRes.Hi32 = tmp.u.Lo;
887
888                                 tmp.u.Lo = decRes.v.v.Mid32;
889                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
890                                 decRes.v.v.Mid32 = tmp.u.Lo;
891
892                                 tmp.u.Lo = decRes.v.v.Lo32;
893                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
894                                 decRes.v.v.Lo32 = tmp.u.Lo;
895
896                                 // See if we need to round up.
897                                 //
898                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
899                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
900                                                 if (DECIMAL_LO64_GET(decRes) == 0)
901                                                         decRes.Hi32++;
902                                 }
903                         }
904                 }
905         }
906         else {
907                 // Scale factors are not equal.  Assume that a larger scale
908                 // factor (more decimal places) is likely to mean that number
909                 // is smaller.  Start by guessing that the right operand has
910                 // the larger scale factor.  The result will have the larger
911                 // scale factor.
912                 //
913                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
914                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
915                 scale = decRes.u.u.scale - left->u.u.scale;
916
917                 if (scale < 0) {
918                         // Guessed scale factor wrong. Swap operands.
919                         //
920                         scale = -scale;
921                         decRes.u.u.scale = left->u.u.scale;
922                         decRes.u.u.sign ^= sign;
923                         pdecTmp = right;
924                         right = left;
925                         left = pdecTmp;
926                 }
927
928                 // *left will need to be multiplied by 10^scale so
929                 // it will have the same scale as *right.  We could be
930                 // extending it to up to 192 bits of precision.
931                 //
932                 if (scale <= POWER10_MAX) {
933                         // Scaling won't make it larger than 4 uint32_ts
934                         //
935                         pwr = power10[scale];
936                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
937                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
938                         tmp.int64 += decTmp.v.v.Mid32;
939                         decTmp.v.v.Mid32 = tmp.u.Lo;
940                         decTmp.Hi32 = tmp.u.Hi;
941                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
942                         tmp.int64 += decTmp.Hi32;
943                         if (tmp.u.Hi == 0) {
944                                 // Result fits in 96 bits.  Use standard aligned add.
945                                 //
946                                 decTmp.Hi32 = tmp.u.Lo;
947                                 left = &decTmp;
948                                 goto AlignedAdd;
949                         }
950                         num[0] = decTmp.v.v.Lo32;
951                         num[1] = decTmp.v.v.Mid32;
952                         num[2] = tmp.u.Lo;
953                         num[3] = tmp.u.Hi;
954                         hi_prod = 3;
955                 }
956                 else {
957                         // Have to scale by a bunch.  Move the number to a buffer
958                         // where it has room to grow as it's scaled.
959                         //
960                         num[0] = left->v.v.Lo32;
961                         num[1] = left->v.v.Mid32;
962                         num[2] = left->Hi32;
963                         hi_prod = 2;
964
965                         // Scan for zeros in the upper words.
966                         //
967                         if (num[2] == 0) {
968                                 hi_prod = 1;
969                                 if (num[1] == 0) {
970                                         hi_prod = 0;
971                                         if (num[0] == 0) {
972                                                 // Left arg is zero, return right.
973                                                 //
974                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
975                                                 decRes.Hi32 = right->Hi32;
976                                                 decRes.u.u.sign ^= sign;
977                                                 goto RetDec;
978                                         }
979                                 }
980                         }
981
982                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
983                         // with index of highest non-zero uint32_t.
984                         //
985                         for (; scale > 0; scale -= POWER10_MAX) {
986                                 if (scale > POWER10_MAX)
987                                         pwr = ten_to_nine;
988                                 else
989                                         pwr = power10[scale];
990
991                                 tmp.u.Hi = 0;
992                                 for (cur = 0; cur <= hi_prod; cur++) {
993                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
994                                         num[cur] = tmp.u.Lo;
995                                 }
996
997                                 if (tmp.u.Hi != 0)
998                                         // We're extending the result by another uint32_t.
999                                         num[++hi_prod] = tmp.u.Hi;
1000                         }
1001                 }
1002
1003                 // Scaling complete, do the add.  Could be subtract if signs differ.
1004                 //
1005                 tmp.u.Lo = num[0];
1006                 tmp.u.Hi = num[1];
1007
1008                 if (sign) {
1009                         // Signs differ, subtract.
1010                         //
1011                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1012                         decRes.Hi32 = num[2] - right->Hi32;
1013
1014                         // Propagate carry
1015                         //
1016                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1017                                 decRes.Hi32--;
1018                                 if (decRes.Hi32 >= num[2])
1019                                         goto LongSub;
1020                         }
1021                         else if (decRes.Hi32 > num[2]) {
1022                         LongSub:
1023                                 // If num has more than 96 bits of precision, then we need to
1024                                 // carry the subtraction into the higher bits.  If it doesn't,
1025                                 // then we subtracted in the wrong order and have to flip the 
1026                                 // sign of the result.
1027                                 // 
1028                                 if (hi_prod <= 2)
1029                                         goto SignFlip;
1030
1031                                 cur = 3;
1032                                 while(num[cur++]-- == 0);
1033                                 if (num[hi_prod] == 0)
1034                                         hi_prod--;
1035                         }
1036                 }
1037                 else {
1038                         // Signs the same, add.
1039                         //
1040                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1041                         decRes.Hi32 = num[2] + right->Hi32;
1042
1043                         // Propagate carry
1044                         //
1045                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1046                                 decRes.Hi32++;
1047                                 if (decRes.Hi32 <= num[2])
1048                                         goto LongAdd;
1049                         }
1050                         else if (decRes.Hi32 < num[2]) {
1051                         LongAdd:
1052                                 // Had a carry above 96 bits.
1053                                 //
1054                                 cur = 3;
1055                                 do {
1056                                         if (hi_prod < cur) {
1057                                                 num[cur] = 1;
1058                                                 hi_prod = cur;
1059                                                 break;
1060                                         }
1061                                 }while (++num[cur++] == 0);
1062                         }
1063                 }
1064
1065                 if (hi_prod > 2) {
1066                         num[0] = decRes.v.v.Lo32;
1067                         num[1] = decRes.v.v.Mid32;
1068                         num[2] = decRes.Hi32;
1069                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1070                         if (decRes.u.u.scale == (uint8_t) -1)
1071                                 return MONO_DECIMAL_OVERFLOW;
1072
1073                         decRes.v.v.Lo32 = num[0];
1074                         decRes.v.v.Mid32 = num[1];
1075                         decRes.Hi32 = num[2];
1076                 }
1077         }
1078
1079 RetDec:
1080         COPYDEC(*result, decRes);
1081         // Odd, the Microsoft code does not set result->reserved to zero on this case
1082         return MONO_DECIMAL_OK;
1083 }
1084
1085 // Decimal addition
1086 static MonoDecimalStatus G_GNUC_UNUSED
1087 mono_decimal_add(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1088 {
1089     return DecAddSub (left, right, result, 0);
1090 }
1091
1092 // Decimal subtraction
1093 static MonoDecimalStatus G_GNUC_UNUSED
1094 mono_decimal_sub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1095 {
1096     return DecAddSub (left, right, result, DECIMAL_NEG);
1097 }
1098
1099 /**
1100  * IncreaseScale:
1101  *
1102  * Entry:
1103  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1104  *   pwr   - Scale factor to multiply by
1105  *
1106  * Purpose:
1107  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1108  *   the input.  The last 32 bits of the product are the return value.
1109  *
1110  * Exit:
1111  *   Returns highest 32 bits of product.
1112  *
1113  * Exceptions:
1114  *   None.
1115  *
1116  */
1117 static uint32_t
1118 IncreaseScale(uint32_t *num, uint32_t pwr)
1119 {
1120         SPLIT64   sdlTmp;
1121
1122         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1123         num[0] = sdlTmp.u.Lo;
1124         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1125         num[1] = sdlTmp.u.Lo;
1126         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1127         num[2] = sdlTmp.u.Lo;
1128         return sdlTmp.u.Hi;
1129 }
1130
1131 /**
1132  * Div96By64:
1133  *
1134  * Entry:
1135  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1136  *   sdlDen  - 64-bit divisor.
1137  *
1138  * Purpose:
1139  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1140  *   Divisor must be larger than upper 64 bits of dividend.
1141  *
1142  * Exit:
1143  *   Remainder overwrites lower 64-bits of dividend.
1144  *   Returns quotient.
1145  *
1146  * Exceptions:
1147  *   None.
1148  *
1149  */
1150 static uint32_t
1151 Div96By64(uint32_t *num, SPLIT64 den)
1152 {
1153         SPLIT64 quo;
1154         SPLIT64 sdlNum;
1155         SPLIT64 prod;
1156
1157         sdlNum.u.Lo = num[0];
1158
1159         if (num[2] >= den.u.Hi) {
1160                 // Divide would overflow.  Assume a quotient of 2^32, and set
1161                 // up remainder accordingly.  Then jump to loop which reduces
1162                 // the quotient.
1163                 //
1164                 sdlNum.u.Hi = num[1] - den.u.Lo;
1165                 quo.u.Lo = 0;
1166                 goto NegRem;
1167         }
1168
1169         // Hardware divide won't overflow
1170         //
1171         if (num[2] == 0 && num[1] < den.u.Hi)
1172                 // Result is zero.  Entire dividend is remainder.
1173                 //
1174                 return 0;
1175
1176         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1177         //
1178         quo.u.Lo = num[1];
1179         quo.u.Hi = num[2];
1180         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1181         sdlNum.u.Hi = quo.u.Hi; // remainder
1182
1183         // Compute full remainder, rem = dividend - (quo * divisor).
1184         //
1185         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1186         sdlNum.int64 -= prod.int64;
1187
1188         if (sdlNum.int64 > ~prod.int64) {
1189         NegRem:
1190                 // Remainder went negative.  Add divisor back in until it's positive,
1191                 // a max of 2 times.
1192                 //
1193                 do {
1194                         quo.u.Lo--;
1195                         sdlNum.int64 += den.int64;
1196                 }while (sdlNum.int64 >= den.int64);
1197         }
1198
1199         num[0] = sdlNum.u.Lo;
1200         num[1] = sdlNum.u.Hi;
1201         return quo.u.Lo;
1202 }
1203
1204 /***
1205 * Div128By96
1206 *
1207 * Entry:
1208 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1209 *   den - Pointer to 96-bit divisor.
1210 *
1211 * Purpose:
1212 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1213 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1214 *   assured in the initial call because the divisor is normalized
1215 *   and the dividend can't be.  In subsequent calls, the remainder
1216 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1217 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1218 *
1219 * Exit:
1220 *   Remainder overwrites lower 96-bits of dividend.
1221 *   Returns quotient.
1222 *
1223 * Exceptions:
1224 *   None.
1225 *
1226 ***********************************************************************/
1227
1228 static uint32_t
1229 Div128By96(uint32_t *num, uint32_t *den)
1230 {
1231         SPLIT64 sdlQuo;
1232         SPLIT64 sdlNum;
1233         SPLIT64 sdlProd1;
1234         SPLIT64 sdlProd2;
1235
1236         sdlNum.u.Lo = num[0];
1237         sdlNum.u.Hi = num[1];
1238
1239         if (num[3] == 0 && num[2] < den[2]){
1240                 // Result is zero.  Entire dividend is remainder.
1241                 //
1242                 return 0;
1243         }
1244
1245         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1246         //
1247         sdlQuo.u.Lo = num[2];
1248         sdlQuo.u.Hi = num[3];
1249         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1250
1251         // Compute full remainder, rem = dividend - (quo * divisor).
1252         //
1253         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1254         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1255         sdlProd2.int64 += sdlProd1.u.Hi;
1256         sdlProd1.u.Hi = sdlProd2.u.Lo;
1257
1258         sdlNum.int64 -= sdlProd1.int64;
1259         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1260
1261         // Propagate carries
1262         //
1263         if (sdlNum.int64 > ~sdlProd1.int64) {
1264                 num[2]--;
1265                 if (num[2] >= ~sdlProd2.u.Hi)
1266                         goto NegRem;
1267         } else if (num[2] > ~sdlProd2.u.Hi) {
1268         NegRem:
1269                 // Remainder went negative.  Add divisor back in until it's positive,
1270                 // a max of 2 times.
1271                 //
1272                 sdlProd1.u.Lo = den[0];
1273                 sdlProd1.u.Hi = den[1];
1274
1275                 for (;;) {
1276                         sdlQuo.u.Lo--;
1277                         sdlNum.int64 += sdlProd1.int64;
1278                         num[2] += den[2];
1279
1280                         if (sdlNum.int64 < sdlProd1.int64) {
1281                                 // Detected carry. Check for carry out of top
1282                                 // before adding it in.
1283                                 //
1284                                 if (num[2]++ < den[2])
1285                                         break;
1286                         }
1287                         if (num[2] < den[2])
1288                                 break; // detected carry
1289                 }
1290         }
1291
1292         num[0] = sdlNum.u.Lo;
1293         num[1] = sdlNum.u.Hi;
1294         return sdlQuo.u.Lo;
1295 }
1296
1297 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1298 // Returns FALSE if there is an overflow
1299 static gboolean
1300 Add32To96(uint32_t *num, uint32_t value)
1301 {
1302         num[0] += value;
1303         if (num[0] < value) {
1304                 if (++num[1] == 0) {                
1305                         if (++num[2] == 0) {                
1306                                 return FALSE;
1307                         }            
1308                 }
1309         }
1310         return TRUE;
1311 }
1312
1313 static void
1314 OverflowUnscale (uint32_t *quo, gboolean remainder)
1315 {
1316         SPLIT64  sdlTmp;
1317         
1318         // We have overflown, so load the high bit with a one.
1319         sdlTmp.u.Hi = 1u;
1320         sdlTmp.u.Lo = quo[2];
1321         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1322         quo[2] = sdlTmp.u.Lo;
1323         sdlTmp.u.Lo = quo[1];
1324         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1325         quo[1] = sdlTmp.u.Lo;
1326         sdlTmp.u.Lo = quo[0];
1327         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1328         quo[0] = sdlTmp.u.Lo;
1329         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1330         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1331                 Add32To96(quo, 1u);
1332         }
1333 }
1334
1335 // mono_decimal_divide - Decimal divide
1336 static MonoDecimalStatus G_GNUC_UNUSED
1337 mono_decimal_divide_result(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1338 {
1339         uint32_t   quo[3];
1340         uint32_t   quoSave[3];
1341         uint32_t   rem[4];
1342         uint32_t   divisor[3];
1343         uint32_t   pwr;
1344         uint32_t   utmp;
1345         uint32_t   utmp1;
1346         SPLIT64 sdlTmp;
1347         SPLIT64 sdlDivisor;
1348         int     scale;
1349         int     cur_scale;
1350
1351         scale = left->u.u.scale - right->u.u.scale;
1352         divisor[0] = right->v.v.Lo32;
1353         divisor[1] = right->v.v.Mid32;
1354         divisor[2] = right->Hi32;
1355
1356         if (divisor[1] == 0 && divisor[2] == 0) {
1357                 // Divisor is only 32 bits.  Easy divide.
1358                 //
1359                 if (divisor[0] == 0)
1360                         return MONO_DECIMAL_DIVBYZERO;
1361
1362                 quo[0] = left->v.v.Lo32;
1363                 quo[1] = left->v.v.Mid32;
1364                 quo[2] = left->Hi32;
1365                 rem[0] = Div96By32(quo, divisor[0]);
1366
1367                 for (;;) {
1368                         if (rem[0] == 0) {
1369                                 if (scale < 0) {
1370                                         cur_scale = min(9, -scale);
1371                                         goto HaveScale;
1372                                 }
1373                                 break;
1374                         }
1375
1376                         // We have computed a quotient based on the natural scale 
1377                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero 
1378                         // remainder, so now we should increase the scale if possible to 
1379                         // include more quotient bits.
1380                         // 
1381                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and 
1382                         // computing more quotient bits as long as the remainder stays 
1383                         // non-zero.  If scaling by that much would cause overflow, we'll 
1384                         // drop out of the loop and scale by as much as we can.
1385                         // 
1386                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9 
1387                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and 
1388                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since 
1389                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888 
1390                         // is the largest value in quo[1] (when quo[2] == 4) that is 
1391                         // assured not to overflow.
1392                         // 
1393                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1394                         if (cur_scale == 0) {
1395                                 // No more scaling to be done, but remainder is non-zero.
1396                                 // Round quotient.
1397                                 //
1398                                 utmp = rem[0] << 1;
1399                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1400                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1401                                 RoundUp:
1402                                         if (++quo[0] == 0)
1403                                                 if (++quo[1] == 0)
1404                                                         quo[2]++;
1405                                 }
1406                                 break;
1407                         }
1408
1409                         if (cur_scale == -1)
1410                                 return MONO_DECIMAL_OVERFLOW;
1411
1412                 HaveScale:
1413                         pwr = power10[cur_scale];
1414                         scale += cur_scale;
1415
1416                         if (IncreaseScale(quo, pwr) != 0)
1417                                 return MONO_DECIMAL_OVERFLOW;
1418
1419                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1420                         rem[0] = sdlTmp.u.Hi;
1421
1422                         quo[0] += sdlTmp.u.Lo;
1423                         if (quo[0] < sdlTmp.u.Lo) {
1424                                 if (++quo[1] == 0)
1425                                         quo[2]++;
1426                         }
1427                 } // for (;;)
1428         }
1429         else {
1430                 // Divisor has bits set in the upper 64 bits.
1431                 //
1432                 // Divisor must be fully normalized (shifted so bit 31 of the most 
1433                 // significant uint32_t is 1).  Locate the MSB so we know how much to 
1434                 // normalize by.  The dividend will be shifted by the same amount so 
1435                 // the quotient is not changed.
1436                 //
1437                 if (divisor[2] == 0)
1438                         utmp = divisor[1];
1439                 else
1440                         utmp = divisor[2];
1441
1442                 cur_scale = 0;
1443                 if (!(utmp & 0xFFFF0000)) {
1444                         cur_scale += 16;
1445                         utmp <<= 16;
1446                 }
1447                 if (!(utmp & 0xFF000000)) {
1448                         cur_scale += 8;
1449                         utmp <<= 8;
1450                 }
1451                 if (!(utmp & 0xF0000000)) {
1452                         cur_scale += 4;
1453                         utmp <<= 4;
1454                 }
1455                 if (!(utmp & 0xC0000000)) {
1456                         cur_scale += 2;
1457                         utmp <<= 2;
1458                 }
1459                 if (!(utmp & 0x80000000)) {
1460                         cur_scale++;
1461                         utmp <<= 1;
1462                 }
1463     
1464                 // Shift both dividend and divisor left by cur_scale.
1465                 // 
1466                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1467                 rem[0] = sdlTmp.u.Lo;
1468                 rem[1] = sdlTmp.u.Hi;
1469                 sdlTmp.u.Lo = left->v.v.Mid32;
1470                 sdlTmp.u.Hi = left->Hi32;
1471                 sdlTmp.int64 <<= cur_scale;
1472                 rem[2] = sdlTmp.u.Hi;
1473                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1474
1475                 sdlDivisor.u.Lo = divisor[0];
1476                 sdlDivisor.u.Hi = divisor[1];
1477                 sdlDivisor.int64 <<= cur_scale;
1478
1479                 if (divisor[2] == 0) {
1480                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1481                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1482                         //
1483                         sdlTmp.u.Lo = rem[2];
1484                         sdlTmp.u.Hi = rem[3];
1485
1486                         quo[2] = 0;
1487                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1488                         quo[0] = Div96By64(rem, sdlDivisor);
1489
1490                         for (;;) {
1491                                 if ((rem[0] | rem[1]) == 0) {
1492                                         if (scale < 0) {
1493                                                 cur_scale = min(9, -scale);
1494                                                 goto HaveScale64;
1495                                         }
1496                                         break;
1497                                 }
1498
1499                                 // Remainder is non-zero.  Scale up quotient and remainder by 
1500                                 // powers of 10 so we can compute more significant bits.
1501                                 // 
1502                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1503                                 if (cur_scale == 0) {
1504                                         // No more scaling to be done, but remainder is non-zero.
1505                                         // Round quotient.
1506                                         //
1507                                         sdlTmp.u.Lo = rem[0];
1508                                         sdlTmp.u.Hi = rem[1];
1509                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1510                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1511                                                 goto RoundUp;
1512                                         break;
1513                                 }
1514
1515                                 if (cur_scale == -1)
1516                                         return MONO_DECIMAL_OVERFLOW;
1517
1518                         HaveScale64:
1519                                 pwr = power10[cur_scale];
1520                                 scale += cur_scale;
1521
1522                                 if (IncreaseScale(quo, pwr) != 0)
1523                                         return MONO_DECIMAL_OVERFLOW;
1524
1525                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1526                                 IncreaseScale(rem, pwr);
1527                                 utmp = Div96By64(rem, sdlDivisor);
1528                                 quo[0] += utmp;
1529                                 if (quo[0] < utmp)
1530                                         if (++quo[1] == 0)
1531                                                 quo[2]++;
1532
1533                         } // for (;;)
1534                 }
1535                 else {
1536                         // Have a 96-bit divisor in divisor[].
1537                         //
1538                         // Start by finishing the shift left by cur_scale.
1539                         //
1540                         sdlTmp.u.Lo = divisor[1];
1541                         sdlTmp.u.Hi = divisor[2];
1542                         sdlTmp.int64 <<= cur_scale;
1543                         divisor[0] = sdlDivisor.u.Lo;
1544                         divisor[1] = sdlDivisor.u.Hi;
1545                         divisor[2] = sdlTmp.u.Hi;
1546
1547                         // The remainder (currently 96 bits spread over 4 uint32_ts) 
1548                         // will be < divisor.
1549                         // 
1550                         quo[2] = 0;
1551                         quo[1] = 0;
1552                         quo[0] = Div128By96(rem, divisor);
1553
1554                         for (;;) {
1555                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1556                                         if (scale < 0) {
1557                                                 cur_scale = min(9, -scale);
1558                                                 goto HaveScale96;
1559                                         }
1560                                         break;
1561                                 }
1562
1563                                 // Remainder is non-zero.  Scale up quotient and remainder by 
1564                                 // powers of 10 so we can compute more significant bits.
1565                                 // 
1566                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1567                                 if (cur_scale == 0) {
1568                                         // No more scaling to be done, but remainder is non-zero.
1569                                         // Round quotient.
1570                                         //
1571                                         if (rem[2] >= 0x80000000)
1572                                                 goto RoundUp;
1573
1574                                         utmp = rem[0] > 0x80000000;
1575                                         utmp1 = rem[1] > 0x80000000;
1576                                         rem[0] <<= 1;
1577                                         rem[1] = (rem[1] << 1) + utmp;
1578                                         rem[2] = (rem[2] << 1) + utmp1;
1579
1580                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1581                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1582                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1583                                               (quo[0] & 1))))
1584                                                 goto RoundUp;
1585                                         break;
1586                                 }
1587
1588                                 if (cur_scale == -1)
1589                                         return MONO_DECIMAL_OVERFLOW;
1590
1591                         HaveScale96:
1592                                 pwr = power10[cur_scale];
1593                                 scale += cur_scale;
1594
1595                                 if (IncreaseScale(quo, pwr) != 0)
1596                                         return MONO_DECIMAL_OVERFLOW;
1597
1598                                 rem[3] = IncreaseScale(rem, pwr);
1599                                 utmp = Div128By96(rem, divisor);
1600                                 quo[0] += utmp;
1601                                 if (quo[0] < utmp)
1602                                         if (++quo[1] == 0)
1603                                                 quo[2]++;
1604
1605                         } // for (;;)
1606                 }
1607         }
1608
1609         // No more remainder.  Try extracting any extra powers of 10 we may have 
1610         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1611         // If a division by one of these powers returns a zero remainder, then
1612         // we keep the quotient.  If the remainder is not zero, then we restore
1613         // the previous value.
1614         // 
1615         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1616         // we can extract.  We use this as a quick test on whether to try a
1617         // given power.
1618         // 
1619         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1620                 quoSave[0] = quo[0];
1621                 quoSave[1] = quo[1];
1622                 quoSave[2] = quo[2];
1623
1624                 if (Div96By32(quoSave, 100000000) == 0) {
1625                         quo[0] = quoSave[0];
1626                         quo[1] = quoSave[1];
1627                         quo[2] = quoSave[2];
1628                         scale -= 8;
1629                 }
1630                 else
1631                         break;
1632         }
1633
1634         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1635                 quoSave[0] = quo[0];
1636                 quoSave[1] = quo[1];
1637                 quoSave[2] = quo[2];
1638
1639                 if (Div96By32(quoSave, 10000) == 0) {
1640                         quo[0] = quoSave[0];
1641                         quo[1] = quoSave[1];
1642                         quo[2] = quoSave[2];
1643                         scale -= 4;
1644                 }
1645         }
1646
1647         if ((quo[0] & 3) == 0 && scale >= 2) {
1648                 quoSave[0] = quo[0];
1649                 quoSave[1] = quo[1];
1650                 quoSave[2] = quo[2];
1651
1652                 if (Div96By32(quoSave, 100) == 0) {
1653                         quo[0] = quoSave[0];
1654                         quo[1] = quoSave[1];
1655                         quo[2] = quoSave[2];
1656                         scale -= 2;
1657                 }
1658         }
1659
1660         if ((quo[0] & 1) == 0 && scale >= 1) {
1661                 quoSave[0] = quo[0];
1662                 quoSave[1] = quo[1];
1663                 quoSave[2] = quo[2];
1664
1665                 if (Div96By32(quoSave, 10) == 0) {
1666                         quo[0] = quoSave[0];
1667                         quo[1] = quoSave[1];
1668                         quo[2] = quoSave[2];
1669                         scale -= 1;
1670                 }
1671         }
1672
1673         result->Hi32 = quo[2];
1674         result->v.v.Mid32 = quo[1];
1675         result->v.v.Lo32 = quo[0];
1676         result->u.u.scale = scale;
1677         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1678         return MONO_DECIMAL_OK;
1679 }
1680
1681 // mono_decimal_absolute - Decimal Absolute Value
1682 static void G_GNUC_UNUSED
1683 mono_decimal_absolute (MonoDecimal *pdecOprd, MonoDecimal *result)
1684 {
1685         COPYDEC(*result, *pdecOprd);
1686         result->u.u.sign &= ~DECIMAL_NEG;
1687         // Microsoft does not set reserved here
1688 }
1689
1690 // mono_decimal_fix - Decimal Fix (chop to integer)
1691 static void
1692 mono_decimal_fix (MonoDecimal *pdecOprd, MonoDecimal *result)
1693 {
1694         DecFixInt(result, pdecOprd);
1695 }
1696
1697 // mono_decimal_round_to_int - Decimal Int (round down to integer)
1698 static void
1699 mono_decimal_round_to_int (MonoDecimal *pdecOprd, MonoDecimal *result)
1700 {
1701         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1702                 // We have chopped off a non-zero amount from a negative value.  Since
1703                 // we round toward -infinity, we must increase the integer result by
1704                 // 1 to make it more negative.  This will never overflow because
1705                 // in order to have a remainder, we must have had a non-zero scale factor.
1706                 // Our scale factor is back to zero now.
1707                 //
1708                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1709                 if (DECIMAL_LO64_GET(*result) == 0)
1710                         result->Hi32++;
1711         }
1712 }
1713
1714 // mono_decimal_negate - Decimal Negate
1715 static void G_GNUC_UNUSED
1716 mono_decimal_negate (MonoDecimal *pdecOprd, MonoDecimal *result)
1717 {
1718         COPYDEC(*result, *pdecOprd);
1719         // Microsoft does not set result->reserved to zero on this case.
1720         result->u.u.sign ^= DECIMAL_NEG;
1721 }
1722
1723 //
1724 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1725 //
1726 static MonoDecimalStatus
1727 mono_decimal_round_result(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1728 {
1729         uint32_t num[3];
1730         uint32_t rem;
1731         uint32_t sticky;
1732         uint32_t pwr;
1733         int scale;
1734
1735         if (cDecimals < 0)
1736                 return MONO_DECIMAL_INVALID_ARGUMENT;
1737
1738         scale = input->u.u.scale - cDecimals;
1739         if (scale > 0) {
1740                 num[0] = input->v.v.Lo32;
1741                 num[1] = input->v.v.Mid32;
1742                 num[2] = input->Hi32;
1743                 result->u.u.sign = input->u.u.sign;
1744                 rem = sticky = 0;
1745
1746                 do {
1747                         sticky |= rem;
1748                         if (scale > POWER10_MAX)
1749                                 pwr = ten_to_nine;
1750                         else
1751                                 pwr = power10[scale];
1752
1753                         rem = Div96By32(num, pwr);
1754                         scale -= 9;
1755                 }while (scale > 0);
1756
1757                 // Now round.  rem has last remainder, sticky has sticky bits.
1758                 // To do IEEE rounding, we add LSB of result to sticky bits so
1759                 // either causes round up if remainder * 2 == last divisor.
1760                 //
1761                 sticky |= num[0] & 1;
1762                 rem = (rem << 1) + (sticky != 0);
1763                 if (pwr < rem &&
1764                     ++num[0] == 0 &&
1765                     ++num[1] == 0
1766                         )
1767                         ++num[2];
1768
1769                 result->v.v.Lo32 = num[0];
1770                 result->v.v.Mid32 = num[1];
1771                 result->Hi32 = num[2];
1772                 result->u.u.scale = cDecimals;
1773                 return MONO_DECIMAL_OK;
1774         }
1775
1776         COPYDEC(*result, *input);
1777         // Odd, the Microsoft source does not set the result->reserved to zero here.
1778         return MONO_DECIMAL_OK;
1779 }
1780
1781 //
1782 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1783 static MonoDecimalStatus
1784 mono_decimal_from_float (float input_f, MonoDecimal* result)
1785 {
1786         int         exp;    // number of bits to left of binary point
1787         int         power;
1788         uint32_t       mant;
1789         double      dbl;
1790         SPLIT64     sdlLo;
1791         SPLIT64     sdlHi;
1792         int         lmax, cur;  // temps used during scale reduction
1793         MonoSingle_float input = { .f = input_f };
1794
1795         // The most we can scale by is 10^28, which is just slightly more
1796         // than 2^93.  So a float with an exponent of -94 could just
1797         // barely reach 0.5, but smaller exponents will always round to zero.
1798         //
1799         if ((exp = input.s.exp - MONO_SINGLE_BIAS) < -94 ) {
1800                 DECIMAL_SETZERO(*result);
1801                 return MONO_DECIMAL_OK;
1802         }
1803
1804         if (exp > 96)
1805                 return MONO_DECIMAL_OVERFLOW;
1806
1807         // Round the input to a 7-digit integer.  The R4 format has
1808         // only 7 digits of precision, and we want to keep garbage digits
1809         // out of the Decimal were making.
1810         //
1811         // Calculate max power of 10 input value could have by multiplying 
1812         // the exponent by log10(2).  Using scaled integer multiplcation, 
1813         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1814         //
1815         dbl = fabs(input.f);
1816         power = 6 - ((exp * 19728) >> 16);
1817         
1818         if (power >= 0) {
1819                 // We have less than 7 digits, scale input up.
1820                 //
1821                 if (power > DECMAX)
1822                         power = DECMAX;
1823                 
1824                 dbl = dbl * double_power10[power];
1825         } else {
1826                 if (power != -1 || dbl >= 1E7)
1827                         dbl = dbl / fnDblPower10(-power);
1828                 else 
1829                         power = 0; // didn't scale it
1830         }
1831         
1832         g_assert (dbl < 1E7);
1833         if (dbl < 1E6 && power < DECMAX) {
1834                 dbl *= 10;
1835                 power++;
1836                 g_assert(dbl >= 1E6);
1837         }
1838         
1839         // Round to integer
1840         //
1841         mant = (int32_t)dbl;
1842         dbl -= (double)mant;  // difference between input & integer
1843         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1844                 mant++;
1845         
1846         if (mant == 0) {
1847                 DECIMAL_SETZERO(*result);
1848                 return MONO_DECIMAL_OK;
1849         }
1850         
1851         if (power < 0) {
1852                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1853                 //
1854                 power = -power;
1855                 if (power < 10) {
1856                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1857                         
1858                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1859                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1860                         DECIMAL_HI32(*result) = 0;
1861                 } else {
1862                         // Have a big power of 10.
1863                         //
1864                         if (power > 18) {
1865                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1866                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1867                                 
1868                                 if (sdlHi.u.Hi != 0)
1869                                         return MONO_DECIMAL_OVERFLOW;
1870                         }
1871                         else {
1872                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1873                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1874                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1875                                 sdlHi.int64 += sdlLo.u.Hi;
1876                                 sdlLo.u.Hi = sdlHi.u.Lo;
1877                                 sdlHi.u.Lo = sdlHi.u.Hi;
1878                         }
1879                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1880                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1881                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1882                 }
1883                 DECIMAL_SCALE(*result) = 0;
1884         } else {
1885                 // Factor out powers of 10 to reduce the scale, if possible.
1886                 // The maximum number we could factor out would be 6.  This
1887                 // comes from the fact we have a 7-digit number, and the
1888                 // MSD must be non-zero -- but the lower 6 digits could be
1889                 // zero.  Note also the scale factor is never negative, so
1890                 // we can't scale by any more than the power we used to
1891                 // get the integer.
1892                 //
1893                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1894                 //
1895                 lmax = min(power, 6);
1896                 
1897                 // lmax is the largest power of 10 to try, lmax <= 6.
1898                 // We'll try powers 4, 2, and 1 unless they're too big.
1899                 //
1900                 for (cur = 4; cur > 0; cur >>= 1)
1901                 {
1902                         if (cur > lmax)
1903                                 continue;
1904                         
1905                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1906                         
1907                         if (sdlLo.u.Hi == 0) {
1908                                 mant = sdlLo.u.Lo;
1909                                 power -= cur;
1910                                 lmax -= cur;
1911                         }
1912                 }
1913                 DECIMAL_LO32(*result) = mant;
1914                 DECIMAL_MID32(*result) = 0;
1915                 DECIMAL_HI32(*result) = 0;
1916                 DECIMAL_SCALE(*result) = power;
1917         }
1918         
1919         DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
1920         return MONO_DECIMAL_OK;
1921 }
1922
1923 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1924 static MonoDecimalStatus
1925 mono_decimal_from_double (double input_d, MonoDecimal *result)
1926 {
1927         int         exp;    // number of bits to left of binary point
1928         int         power;  // power-of-10 scale factor
1929         SPLIT64     sdlMant;
1930         SPLIT64     sdlLo;
1931         double      dbl;
1932         int         lmax, cur;  // temps used during scale reduction
1933         uint32_t       pwr_cur;
1934         uint32_t       quo;
1935         MonoDouble_double input = { .d = input_d };
1936         
1937         // The most we can scale by is 10^28, which is just slightly more
1938         // than 2^93.  So a float with an exponent of -94 could just
1939         // barely reach 0.5, but smaller exponents will always round to zero.
1940         //
1941         if ((exp = input.s.exp - MONO_DOUBLE_BIAS) < -94) {
1942                 DECIMAL_SETZERO(*result);
1943                 return MONO_DECIMAL_OK;
1944         }
1945
1946         if (exp > 96)
1947                 return MONO_DECIMAL_OVERFLOW;
1948
1949         // Round the input to a 15-digit integer.  The R8 format has
1950         // only 15 digits of precision, and we want to keep garbage digits
1951         // out of the Decimal were making.
1952         //
1953         // Calculate max power of 10 input value could have by multiplying 
1954         // the exponent by log10(2).  Using scaled integer multiplcation, 
1955         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1956         //
1957         dbl = fabs(input.d);
1958         power = 14 - ((exp * 19728) >> 16);
1959         
1960         if (power >= 0) {
1961                 // We have less than 15 digits, scale input up.
1962                 //
1963                 if (power > DECMAX)
1964                         power = DECMAX;
1965
1966                 dbl = dbl * double_power10[power];
1967         } else {
1968                 if (power != -1 || dbl >= 1E15)
1969                         dbl = dbl / fnDblPower10(-power);
1970                 else 
1971                         power = 0; // didn't scale it
1972         }
1973
1974         g_assert (dbl < 1E15);
1975         if (dbl < 1E14 && power < DECMAX) {
1976                 dbl *= 10;
1977                 power++;
1978                 g_assert(dbl >= 1E14);
1979         }
1980
1981         // Round to int64
1982         //
1983         sdlMant.int64 = (int64_t)dbl;
1984         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
1985         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
1986                 sdlMant.int64++;
1987
1988         if (sdlMant.int64 == 0) {
1989                 DECIMAL_SETZERO(*result);
1990                 return MONO_DECIMAL_OK;
1991         }
1992
1993         if (power < 0) {
1994                 // Add -power factors of 10, -power <= (29 - 15) = 14.
1995                 //
1996                 power = -power;
1997                 if (power < 10) {
1998                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
1999                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2000                         sdlMant.int64 += sdlLo.u.Hi;
2001                         sdlLo.u.Hi = sdlMant.u.Lo;
2002                         sdlMant.u.Lo = sdlMant.u.Hi;
2003                 }
2004                 else {
2005                         // Have a big power of 10.
2006                         //
2007                         g_assert(power <= 14);
2008                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2009
2010                         if (sdlMant.u.Hi != 0)
2011                                 return MONO_DECIMAL_OVERFLOW;
2012                 }
2013                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2014                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2015                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2016                 DECIMAL_SCALE(*result) = 0;
2017         }
2018         else {
2019                 // Factor out powers of 10 to reduce the scale, if possible.
2020                 // The maximum number we could factor out would be 14.  This
2021                 // comes from the fact we have a 15-digit number, and the 
2022                 // MSD must be non-zero -- but the lower 14 digits could be 
2023                 // zero.  Note also the scale factor is never negative, so
2024                 // we can't scale by any more than the power we used to
2025                 // get the integer.
2026                 //
2027                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2028                 //
2029                 lmax = min(power, 14);
2030
2031                 // lmax is the largest power of 10 to try, lmax <= 14.
2032                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2033                 //
2034                 for (cur = 8; cur > 0; cur >>= 1)
2035                 {
2036                         if (cur > lmax)
2037                                 continue;
2038
2039                         pwr_cur = (uint32_t)long_power10[cur];
2040
2041                         if (sdlMant.u.Hi >= pwr_cur) {
2042                                 // Overflow if we try to divide in one step.
2043                                 //
2044                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2045                                 quo = sdlLo.u.Lo;
2046                                 sdlLo.u.Lo = sdlMant.u.Lo;
2047                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2048                         }
2049                         else {
2050                                 quo = 0;
2051                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2052                         }
2053
2054                         if (sdlLo.u.Hi == 0) {
2055                                 sdlMant.u.Hi = quo;
2056                                 sdlMant.u.Lo = sdlLo.u.Lo;
2057                                 power -= cur;
2058                                 lmax -= cur;
2059                         }
2060                 }
2061
2062                 DECIMAL_HI32(*result) = 0;
2063                 DECIMAL_SCALE(*result) = power;
2064                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2065                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2066         }
2067
2068         DECIMAL_SIGN(*result) = (char)input.s.sign << 7;
2069         return MONO_DECIMAL_OK;
2070 }
2071
2072 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2073 static MonoDecimalStatus
2074 mono_decimal_to_double_result(MonoDecimal *input, double *result)
2075 {
2076         SPLIT64  tmp;
2077         double   dbl;
2078         
2079         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2080                 return MONO_DECIMAL_INVALID_ARGUMENT;
2081         
2082         tmp.u.Lo = DECIMAL_LO32(*input);
2083         tmp.u.Hi = DECIMAL_MID32(*input);
2084         
2085         if ((int32_t)DECIMAL_MID32(*input) < 0)
2086                 dbl = (ds2to64.d + (double)(int64_t)tmp.int64 +
2087                        (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2088         else
2089                 dbl = ((double)(int64_t)tmp.int64 +
2090                        (double)DECIMAL_HI32(*input) * ds2to64.d) / fnDblPower10(DECIMAL_SCALE(*input));
2091         
2092         if (DECIMAL_SIGN(*input))
2093                 dbl = -dbl;
2094         
2095         *result = dbl;
2096         return MONO_DECIMAL_OK;
2097 }
2098
2099 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2100 static MonoDecimalStatus
2101 mono_decimal_to_float_result(MonoDecimal *input, float *result)
2102 {
2103         double   dbl;
2104         
2105         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2106                 return MONO_DECIMAL_INVALID_ARGUMENT;
2107         
2108         // Can't overflow; no errors possible.
2109         //
2110         mono_decimal_to_double_result(input, &dbl);
2111         *result = (float)dbl;
2112         return MONO_DECIMAL_OK;
2113 }
2114
2115 static void
2116 DecShiftLeft(MonoDecimal* value)
2117 {
2118         unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2119     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2120     g_assert(value != NULL);
2121
2122     DECIMAL_LO32(*value) <<= 1;
2123     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2124     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2125 }
2126
2127 static int
2128 D32AddCarry(uint32_t* value, uint32_t i)
2129 {
2130     uint32_t v = *value;
2131     uint32_t sum = v + i;
2132     *value = sum;
2133     return sum < v || sum < i? 1: 0;
2134 }
2135
2136 static void
2137 DecAdd(MonoDecimal *value, MonoDecimal* d)
2138 {
2139         g_assert(value != NULL && d != NULL);
2140
2141         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2142                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2143                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2144                 }
2145         }
2146         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2147                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2148         }
2149         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2150 }
2151
2152 static void
2153 DecMul10(MonoDecimal* value)
2154 {
2155         MonoDecimal d = *value;
2156         g_assert (value != NULL);
2157
2158         DecShiftLeft(value);
2159         DecShiftLeft(value);
2160         DecAdd(value, &d);
2161         DecShiftLeft(value);
2162 }
2163
2164 static void
2165 DecAddInt32(MonoDecimal* value, unsigned int i)
2166 {
2167         g_assert(value != NULL);
2168
2169         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2170                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2171                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2172                 }
2173         }
2174 }
2175
2176 MonoDecimalCompareResult
2177 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2178 {
2179         uint32_t   left_sign;
2180         uint32_t   right_sign;
2181         MonoDecimal result;
2182
2183         result.Hi32 = 0;        // Just to shut up the compiler
2184
2185         // First check signs and whether either are zero.  If both are
2186         // non-zero and of the same sign, just use subtraction to compare.
2187         //
2188         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2189         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2190         if (left_sign != 0)
2191                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2192
2193         if (right_sign != 0)
2194                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2195
2196         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2197         // operand is +, 0, or -.
2198         //
2199         if (left_sign == right_sign) {
2200                 if (left_sign == 0)    // both are zero
2201                         return MONO_DECIMAL_CMP_EQ; // return equal
2202
2203                 DecAddSub(left, right, &result, DECIMAL_NEG);
2204                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2205                         return MONO_DECIMAL_CMP_EQ;
2206                 if (result.u.u.sign & DECIMAL_NEG)
2207                         return MONO_DECIMAL_CMP_LT;
2208                 return MONO_DECIMAL_CMP_GT;
2209         }
2210
2211         //
2212         // Signs are different.  Use signed byte comparison
2213         //
2214         if ((signed char)left_sign > (signed char)right_sign)
2215                 return MONO_DECIMAL_CMP_GT;
2216         return MONO_DECIMAL_CMP_LT;
2217 }
2218
2219 void
2220 mono_decimal_init_single (MonoDecimal *_this, float value)
2221 {
2222         if (mono_decimal_from_float (value, _this) == MONO_DECIMAL_OVERFLOW) {
2223                 mono_set_pending_exception (mono_get_exception_overflow ());
2224                 return;
2225         }
2226         _this->reserved = 0;
2227 }
2228
2229 void
2230 mono_decimal_init_double (MonoDecimal *_this, double value)
2231 {
2232         if (mono_decimal_from_double (value, _this) == MONO_DECIMAL_OVERFLOW) {
2233                 mono_set_pending_exception (mono_get_exception_overflow ());
2234                 return;
2235         }
2236         _this->reserved = 0;
2237 }
2238
2239 void
2240 mono_decimal_floor (MonoDecimal *d)
2241 {
2242         MonoDecimal decRes;
2243
2244         mono_decimal_round_to_int(d, &decRes);
2245         
2246         // copy decRes into d
2247         COPYDEC(*d, decRes);
2248         d->reserved = 0;
2249         FC_GC_POLL ();
2250 }
2251
2252 int32_t
2253 mono_decimal_get_hash_code (MonoDecimal *d)
2254 {
2255         double dbl;
2256
2257         if (mono_decimal_to_double_result(d, &dbl) != MONO_DECIMAL_OK)
2258                 return 0;
2259         
2260         if (dbl == 0.0) {
2261                 // Ensure 0 and -0 have the same hash code
2262                 return 0;
2263         }
2264         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2265         // 
2266         // For example these two numerically equal decimals with different internal representations produce
2267         // slightly different results when converted to double:
2268         //
2269         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2270         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2271         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 }); 
2272         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2273         //
2274         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2275         
2276 }
2277
2278 void
2279 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2280 {
2281         MonoDecimal decRes;
2282
2283         MonoDecimalStatus status = mono_decimal_multiply_result(d1, d2, &decRes);
2284         if (status != MONO_DECIMAL_OK) {
2285                 mono_set_pending_exception (mono_get_exception_overflow ());
2286                 return;
2287         }
2288
2289         COPYDEC(*d1, decRes);
2290         d1->reserved = 0;
2291
2292         FC_GC_POLL ();
2293 }
2294
2295 void
2296 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2297 {
2298         MonoDecimal decRes;
2299         
2300         // GC is only triggered for throwing, no need to protect result 
2301         if (decimals < 0 || decimals > 28) {
2302                 mono_set_pending_exception (mono_get_exception_argument_out_of_range ("d"));
2303                 return;
2304         }
2305
2306         mono_decimal_round_result(d, decimals, &decRes);
2307
2308         // copy decRes into d
2309         COPYDEC(*d, decRes);
2310         d->reserved = 0;
2311
2312         FC_GC_POLL();
2313 }
2314
2315 void
2316 mono_decimal_tocurrency (MonoDecimal *decimal)
2317 {
2318         // TODO
2319 }
2320
2321 double
2322 mono_decimal_to_double (MonoDecimal d)
2323 {
2324         double result = 0.0;
2325         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2326         mono_decimal_to_double_result(&d, &result);
2327         return result;
2328 }
2329
2330 int32_t
2331 mono_decimal_to_int32 (MonoDecimal d)
2332 {
2333         MonoDecimal result;
2334         
2335         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2336         mono_decimal_round_result(&d, 0, &result);
2337         
2338         if (DECIMAL_SCALE(result) != 0) {
2339                 d = result;
2340                 mono_decimal_fix (&d, &result);
2341         }
2342         
2343         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2344                 int32_t i = DECIMAL_LO32(result);
2345                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2346                         if (i >= 0)
2347                                 return i;
2348                 } else {
2349                         i = -i;
2350                         if (i <= 0)
2351                                 return i;
2352                 }
2353         }
2354         
2355         mono_set_pending_exception (mono_get_exception_overflow ());
2356         return 0;
2357 }
2358
2359 float
2360 mono_decimal_to_float (MonoDecimal d)
2361 {
2362         float result = 0.0f;
2363         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2364         mono_decimal_to_float_result(&d, &result);
2365         return result;
2366 }
2367
2368 void
2369 mono_decimal_truncate (MonoDecimal *d)
2370 {
2371         MonoDecimal decRes;
2372
2373         mono_decimal_fix(d, &decRes);
2374
2375         // copy decRes into d
2376         COPYDEC(*d, decRes);
2377         d->reserved = 0;
2378         FC_GC_POLL();
2379 }
2380
2381 void
2382 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2383 {
2384         MonoDecimal result, decTmp;
2385         MonoDecimal *pdecTmp, *leftOriginal;
2386         uint32_t    num[6], pwr;
2387         int         scale, hi_prod, cur;
2388         SPLIT64     sdlTmp;
2389         
2390         g_assert(sign == 0 || sign == DECIMAL_NEG);
2391
2392         leftOriginal = left;
2393
2394         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2395
2396         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2397                 // Scale factors are equal, no alignment necessary.
2398                 //
2399                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2400
2401         AlignedAdd:
2402                 if (sign) {
2403                         // Signs differ - subtract
2404                         //
2405                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2406                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2407
2408                         // Propagate carry
2409                         //
2410                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2411                                 DECIMAL_HI32(result)--;
2412                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2413                                         goto SignFlip;
2414                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2415                                 // Got negative result.  Flip its sign.
2416                                 // 
2417                         SignFlip:
2418                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2419                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2420                                 if (DECIMAL_LO64_GET(result) == 0)
2421                                         DECIMAL_HI32(result)++;
2422                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2423                         }
2424
2425                 } else {
2426                         // Signs are the same - add
2427                         //
2428                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2429                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2430
2431                         // Propagate carry
2432                         //
2433                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2434                                 DECIMAL_HI32(result)++;
2435                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2436                                         goto AlignedScale;
2437                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2438                         AlignedScale:
2439                                 // The addition carried above 96 bits.  Divide the result by 10,
2440                                 // dropping the scale factor.
2441                                 // 
2442                                 if (DECIMAL_SCALE(result) == 0) {
2443                                         mono_set_pending_exception (mono_get_exception_overflow ());
2444                                         return;
2445                                 }
2446                                 DECIMAL_SCALE(result)--;
2447
2448                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2449                                 sdlTmp.u.Hi = 1;
2450                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2451                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2452
2453                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2454                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2455                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2456
2457                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2458                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2459                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2460
2461                                 // See if we need to round up.
2462                                 //
2463                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2464                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2465                                         if (DECIMAL_LO64_GET(result) == 0)
2466                                                 DECIMAL_HI32(result)++;
2467                                 }
2468                         }
2469                 }
2470         } else {
2471                 // Scale factors are not equal.  Assume that a larger scale
2472                 // factor (more decimal places) is likely to mean that number
2473                 // is smaller.  Start by guessing that the right operand has
2474                 // the larger scale factor.  The result will have the larger
2475                 // scale factor.
2476                 //
2477                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2478                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2479                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2480
2481                 if (scale < 0) {
2482                         // Guessed scale factor wrong. Swap operands.
2483                         //
2484                         scale = -scale;
2485                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2486                         DECIMAL_SIGN(result) ^= sign;
2487                         pdecTmp = right;
2488                         right = left;
2489                         left = pdecTmp;
2490                 }
2491
2492                 // *left will need to be multiplied by 10^scale so
2493                 // it will have the same scale as *right.  We could be
2494                 // extending it to up to 192 bits of precision.
2495                 //
2496                 if (scale <= POWER10_MAX) {
2497                         // Scaling won't make it larger than 4 uint32_ts
2498                         //
2499                         pwr = power10[scale];
2500                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2501                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2502                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2503                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2504                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2505                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2506                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2507                         if (sdlTmp.u.Hi == 0) {
2508                                 // Result fits in 96 bits.  Use standard aligned add.
2509                                 //
2510                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2511                                 left = &decTmp;
2512                                 goto AlignedAdd;
2513                         }
2514                         num[0] = DECIMAL_LO32(decTmp);
2515                         num[1] = DECIMAL_MID32(decTmp);
2516                         num[2] = sdlTmp.u.Lo;
2517                         num[3] = sdlTmp.u.Hi;
2518                         hi_prod = 3;
2519                 } else {
2520                         // Have to scale by a bunch.  Move the number to a buffer
2521                         // where it has room to grow as it's scaled.
2522                         //
2523                         num[0] = DECIMAL_LO32(*left);
2524                         num[1] = DECIMAL_MID32(*left);
2525                         num[2] = DECIMAL_HI32(*left);
2526                         hi_prod = 2;
2527
2528                         // Scan for zeros in the upper words.
2529                         //
2530                         if (num[2] == 0) {
2531                                 hi_prod = 1;
2532                                 if (num[1] == 0) {
2533                                         hi_prod = 0;
2534                                         if (num[0] == 0) {
2535                                                 // Left arg is zero, return right.
2536                                                 //
2537                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2538                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2539                                                 DECIMAL_SIGN(result) ^= sign;
2540                                                 goto RetDec;
2541                                         }
2542                                 }
2543                         }
2544
2545                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2546                         // with index of highest non-zero uint32_t.
2547                         //
2548                         for (; scale > 0; scale -= POWER10_MAX) {
2549                                 if (scale > POWER10_MAX)
2550                                         pwr = ten_to_nine;
2551                                 else
2552                                         pwr = power10[scale];
2553
2554                                 sdlTmp.u.Hi = 0;
2555                                 for (cur = 0; cur <= hi_prod; cur++) {
2556                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2557                                         num[cur] = sdlTmp.u.Lo;
2558                                 }
2559
2560                                 if (sdlTmp.u.Hi != 0)
2561                                         // We're extending the result by another uint32_t.
2562                                         num[++hi_prod] = sdlTmp.u.Hi;
2563                         }
2564                 }
2565
2566                 // Scaling complete, do the add.  Could be subtract if signs differ.
2567                 //
2568                 sdlTmp.u.Lo = num[0];
2569                 sdlTmp.u.Hi = num[1];
2570
2571                 if (sign) {
2572                         // Signs differ, subtract.
2573                         //
2574                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2575                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2576
2577                         // Propagate carry
2578                         //
2579                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2580                                 DECIMAL_HI32(result)--;
2581                                 if (DECIMAL_HI32(result) >= num[2])
2582                                         goto LongSub;
2583                         } else if (DECIMAL_HI32(result) > num[2]) {
2584                         LongSub:
2585                                 // If num has more than 96 bits of precision, then we need to 
2586                                 // carry the subtraction into the higher bits.  If it doesn't, 
2587                                 // then we subtracted in the wrong order and have to flip the 
2588                                 // sign of the result.
2589                                 // 
2590                                 if (hi_prod <= 2)
2591                                         goto SignFlip;
2592
2593                                 cur = 3;
2594                                 while(num[cur++]-- == 0);
2595                                 if (num[hi_prod] == 0)
2596                                         hi_prod--;
2597                         }
2598                 } else {
2599                         // Signs the same, add.
2600                         //
2601                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2602                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2603
2604                         // Propagate carry
2605                         //
2606                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2607                                 DECIMAL_HI32(result)++;
2608                                 if (DECIMAL_HI32(result) <= num[2])
2609                                         goto LongAdd;
2610                         } else if (DECIMAL_HI32(result) < num[2]) {
2611                         LongAdd:
2612                                 // Had a carry above 96 bits.
2613                                 //
2614                                 cur = 3;
2615                                 do {
2616                                         if (hi_prod < cur) {
2617                                                 num[cur] = 1;
2618                                                 hi_prod = cur;
2619                                                 break;
2620                                         }
2621                                 }while (++num[cur++] == 0);
2622                         }
2623                 }
2624
2625                 if (hi_prod > 2) {
2626                         num[0] = DECIMAL_LO32(result);
2627                         num[1] = DECIMAL_MID32(result);
2628                         num[2] = DECIMAL_HI32(result);
2629                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2630                         if (DECIMAL_SCALE(result) == (uint8_t)-1) {
2631                                 mono_set_pending_exception (mono_get_exception_overflow ());
2632                                 return;
2633                         }
2634
2635                         DECIMAL_LO32(result) = num[0];
2636                         DECIMAL_MID32(result) = num[1];
2637                         DECIMAL_HI32(result) = num[2];
2638                 }
2639         }
2640
2641 RetDec:
2642         left = leftOriginal;
2643         COPYDEC(*left, result);
2644         left->reserved = 0;
2645 }
2646
2647 void
2648 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2649 {
2650         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2651         uint32_t pwr, tmp, tmp1;
2652         SPLIT64  sdlTmp, sdlDivisor;
2653         int      scale, cur_scale;
2654         gboolean unscale;
2655
2656         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2657         unscale = FALSE;
2658         divisor[0] = DECIMAL_LO32(*right);
2659         divisor[1] = DECIMAL_MID32(*right);
2660         divisor[2] = DECIMAL_HI32(*right);
2661
2662         if (divisor[1] == 0 && divisor[2] == 0) {
2663                 // Divisor is only 32 bits.  Easy divide.
2664                 //
2665                 if (divisor[0] == 0) {
2666                         mono_set_pending_exception (mono_get_exception_divide_by_zero ());
2667                         return;
2668                 }
2669
2670                 quo[0] = DECIMAL_LO32(*left);
2671                 quo[1] = DECIMAL_MID32(*left);
2672                 quo[2] = DECIMAL_HI32(*left);
2673                 rem[0] = Div96By32(quo, divisor[0]);
2674
2675                 for (;;) {
2676                         if (rem[0] == 0) {
2677                                 if (scale < 0) {
2678                                         cur_scale = min(9, -scale);
2679                                         goto HaveScale;
2680                                 }
2681                                 break;
2682                         }
2683                         // We need to unscale if and only if we have a non-zero remainder
2684                         unscale = TRUE;
2685
2686                         // We have computed a quotient based on the natural scale 
2687                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero 
2688                         // remainder, so now we should increase the scale if possible to 
2689                         // include more quotient bits.
2690                         // 
2691                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and 
2692                         // computing more quotient bits as long as the remainder stays 
2693                         // non-zero.  If scaling by that much would cause overflow, we'll 
2694                         // drop out of the loop and scale by as much as we can.
2695                         // 
2696                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9 
2697                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and 
2698                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since 
2699                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888 
2700                         // is the largest value in quo[1] (when quo[2] == 4) that is 
2701                         // assured not to overflow.
2702                         // 
2703                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2704                         if (cur_scale == 0) {
2705                                 // No more scaling to be done, but remainder is non-zero.
2706                                 // Round quotient.
2707                                 //
2708                                 tmp = rem[0] << 1;
2709                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2710                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2711                                 RoundUp:
2712                                         if (!Add32To96(quo, 1)) {
2713                                                 if (scale == 0) {
2714                                                         mono_set_pending_exception (mono_get_exception_overflow ());
2715                                                         return;
2716                                                 }
2717                                                 scale--;
2718                                                 OverflowUnscale(quo, TRUE);
2719                                                 break;
2720                                         }      
2721                                 }
2722                                 break;
2723                         }
2724
2725                         if (cur_scale < 0) {
2726                                 mono_set_pending_exception (mono_get_exception_overflow ());
2727                                 return;
2728                         }
2729
2730                 HaveScale:
2731                         pwr = power10[cur_scale];
2732                         scale += cur_scale;
2733
2734                         if (IncreaseScale(quo, pwr) != 0) {
2735                                 mono_set_pending_exception (mono_get_exception_overflow ());
2736                                 return;
2737                         }
2738
2739                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2740                         rem[0] = sdlTmp.u.Hi;
2741
2742                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2743                                 if (scale == 0) {
2744                                         mono_set_pending_exception (mono_get_exception_overflow ());
2745                                         return;
2746                                 }
2747                                 scale--;
2748                                 OverflowUnscale(quo, (rem[0] != 0));
2749                                 break;
2750                         }
2751                 } // for (;;)
2752         } else {
2753                 // Divisor has bits set in the upper 64 bits.
2754                 //
2755                 // Divisor must be fully normalized (shifted so bit 31 of the most 
2756                 // significant uint32_t is 1).  Locate the MSB so we know how much to 
2757                 // normalize by.  The dividend will be shifted by the same amount so 
2758                 // the quotient is not changed.
2759                 //
2760                 if (divisor[2] == 0)
2761                         tmp = divisor[1];
2762                 else
2763                         tmp = divisor[2];
2764
2765                 cur_scale = 0;
2766                 if (!(tmp & 0xFFFF0000)) {
2767                         cur_scale += 16;
2768                         tmp <<= 16;
2769                 }
2770                 if (!(tmp & 0xFF000000)) {
2771                         cur_scale += 8;
2772                         tmp <<= 8;
2773                 }
2774                 if (!(tmp & 0xF0000000)) {
2775                         cur_scale += 4;
2776                         tmp <<= 4;
2777                 }
2778                 if (!(tmp & 0xC0000000)) {
2779                         cur_scale += 2;
2780                         tmp <<= 2;
2781                 }
2782                 if (!(tmp & 0x80000000)) {
2783                         cur_scale++;
2784                         tmp <<= 1;
2785                 }
2786     
2787                 // Shift both dividend and divisor left by cur_scale.
2788                 // 
2789                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2790                 rem[0] = sdlTmp.u.Lo;
2791                 rem[1] = sdlTmp.u.Hi;
2792                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2793                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2794                 sdlTmp.int64 <<= cur_scale;
2795                 rem[2] = sdlTmp.u.Hi;
2796                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2797
2798                 sdlDivisor.u.Lo = divisor[0];
2799                 sdlDivisor.u.Hi = divisor[1];
2800                 sdlDivisor.int64 <<= cur_scale;
2801
2802                 if (divisor[2] == 0) {
2803                         // Have a 64-bit divisor in sdlDivisor.  The remainder 
2804                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2805                         // 
2806                         sdlTmp.u.Lo = rem[2];
2807                         sdlTmp.u.Hi = rem[3];
2808
2809                         quo[2] = 0;
2810                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2811                         quo[0] = Div96By64(rem, sdlDivisor);
2812
2813                         for (;;) {
2814                                 if ((rem[0] | rem[1]) == 0) {
2815                                         if (scale < 0) {
2816                                                 cur_scale = min(9, -scale);
2817                                                 goto HaveScale64;
2818                                         }
2819                                         break;
2820                                 }
2821
2822                                 // We need to unscale if and only if we have a non-zero remainder
2823                                 unscale = TRUE;
2824
2825                                 // Remainder is non-zero.  Scale up quotient and remainder by 
2826                                 // powers of 10 so we can compute more significant bits.
2827                                 // 
2828                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2829                                 if (cur_scale == 0) {
2830                                         // No more scaling to be done, but remainder is non-zero.
2831                                         // Round quotient.
2832                                         //
2833                                         sdlTmp.u.Lo = rem[0];
2834                                         sdlTmp.u.Hi = rem[1];
2835                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2836                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2837                                                 goto RoundUp;
2838                                         break;
2839                                 }
2840
2841                                 if (cur_scale < 0) {
2842                                         mono_set_pending_exception (mono_get_exception_overflow ());
2843                                         return;
2844                                 }
2845
2846                         HaveScale64:
2847                                 pwr = power10[cur_scale];
2848                                 scale += cur_scale;
2849
2850                                 if (IncreaseScale(quo, pwr) != 0) {
2851                                         mono_set_pending_exception (mono_get_exception_overflow ());
2852                                         return;
2853                                 }
2854                                 
2855                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2856                                 IncreaseScale(rem, pwr);
2857                                 tmp = Div96By64(rem, sdlDivisor);
2858                                 if (!Add32To96(quo, tmp)) {
2859                                         if (scale == 0) {
2860                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2861                                                 return;
2862                                         }
2863                                         scale--;
2864                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2865                                         break;
2866                                 }      
2867
2868                         } // for (;;)
2869                 } else {
2870                         // Have a 96-bit divisor in divisor[].
2871                         //
2872                         // Start by finishing the shift left by cur_scale.
2873                         //
2874                         sdlTmp.u.Lo = divisor[1];
2875                         sdlTmp.u.Hi = divisor[2];
2876                         sdlTmp.int64 <<= cur_scale;
2877                         divisor[0] = sdlDivisor.u.Lo;
2878                         divisor[1] = sdlDivisor.u.Hi;
2879                         divisor[2] = sdlTmp.u.Hi;
2880
2881                         // The remainder (currently 96 bits spread over 4 uint32_ts) 
2882                         // will be < divisor.
2883                         // 
2884                         quo[2] = 0;
2885                         quo[1] = 0;
2886                         quo[0] = Div128By96(rem, divisor);
2887
2888                         for (;;) {
2889                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2890                                         if (scale < 0) {
2891                                                 cur_scale = min(9, -scale);
2892                                                 goto HaveScale96;
2893                                         }
2894                                         break;
2895                                 }
2896
2897                                 // We need to unscale if and only if we have a non-zero remainder
2898                                 unscale = TRUE;
2899
2900                                 // Remainder is non-zero.  Scale up quotient and remainder by 
2901                                 // powers of 10 so we can compute more significant bits.
2902                                 // 
2903                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2904                                 if (cur_scale == 0) {
2905                                         // No more scaling to be done, but remainder is non-zero.
2906                                         // Round quotient.
2907                                         //
2908                                         if (rem[2] >= 0x80000000)
2909                                                 goto RoundUp;
2910
2911                                         tmp = rem[0] > 0x80000000;
2912                                         tmp1 = rem[1] > 0x80000000;
2913                                         rem[0] <<= 1;
2914                                         rem[1] = (rem[1] << 1) + tmp;
2915                                         rem[2] = (rem[2] << 1) + tmp1;
2916
2917                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2918                                                 goto RoundUp;
2919                                         break;
2920                                 }
2921
2922                                 if (cur_scale < 0) {
2923                                         mono_set_pending_exception (mono_get_exception_overflow ());
2924                                         return;
2925                                 }
2926                                 
2927                         HaveScale96:
2928                                 pwr = power10[cur_scale];
2929                                 scale += cur_scale;
2930
2931                                 if (IncreaseScale(quo, pwr) != 0) {
2932                                         mono_set_pending_exception (mono_get_exception_overflow ());
2933                                         return;
2934                                 }
2935
2936                                 rem[3] = IncreaseScale(rem, pwr);
2937                                 tmp = Div128By96(rem, divisor);
2938                                 if (!Add32To96(quo, tmp)) {
2939                                         if (scale == 0) {
2940                                                 mono_set_pending_exception (mono_get_exception_overflow ());
2941                                                 return;
2942                                         }
2943                                         
2944                                         scale--;
2945                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2946                                         break;
2947                                 }      
2948
2949                         } // for (;;)
2950                 }
2951         }
2952
2953         // We need to unscale if and only if we have a non-zero remainder
2954         if (unscale) {
2955                 // Try extracting any extra powers of 10 we may have 
2956                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2957                 // If a division by one of these powers returns a zero remainder, then
2958                 // we keep the quotient.  If the remainder is not zero, then we restore
2959                 // the previous value.
2960                 // 
2961                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2962                 // we can extract.  We use this as a quick test on whether to try a
2963                 // given power.
2964                 // 
2965                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2966                         quo_save[0] = quo[0];
2967                         quo_save[1] = quo[1];
2968                         quo_save[2] = quo[2];
2969
2970                         if (Div96By32(quo_save, 100000000) == 0) {
2971                                 quo[0] = quo_save[0];
2972                                 quo[1] = quo_save[1];
2973                                 quo[2] = quo_save[2];
2974                                 scale -= 8;
2975                         } else
2976                                 break;
2977                 }
2978
2979                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
2980                         quo_save[0] = quo[0];
2981                         quo_save[1] = quo[1];
2982                         quo_save[2] = quo[2];
2983
2984                         if (Div96By32(quo_save, 10000) == 0) {
2985                                 quo[0] = quo_save[0];
2986                                 quo[1] = quo_save[1];
2987                                 quo[2] = quo_save[2];
2988                                 scale -= 4;
2989                         }
2990                 }
2991
2992                 if ((quo[0] & 3) == 0 && scale >= 2) {
2993                         quo_save[0] = quo[0];
2994                         quo_save[1] = quo[1];
2995                         quo_save[2] = quo[2];
2996
2997                         if (Div96By32(quo_save, 100) == 0) {
2998                                 quo[0] = quo_save[0];
2999                                 quo[1] = quo_save[1];
3000                                 quo[2] = quo_save[2];
3001                                 scale -= 2;
3002                         }
3003                 }
3004
3005                 if ((quo[0] & 1) == 0 && scale >= 1) {
3006                         quo_save[0] = quo[0];
3007                         quo_save[1] = quo[1];
3008                         quo_save[2] = quo[2];
3009
3010                         if (Div96By32(quo_save, 10) == 0) {
3011                                 quo[0] = quo_save[0];
3012                                 quo[1] = quo_save[1];
3013                                 quo[2] = quo_save[2];
3014                                 scale -= 1;
3015                         }
3016                 }
3017         }
3018
3019         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3020         DECIMAL_HI32(*left) = quo[2];
3021         DECIMAL_MID32(*left) = quo[1];
3022         DECIMAL_LO32(*left) = quo[0];
3023         DECIMAL_SCALE(*left) = (uint8_t)scale;
3024         left->reserved = 0;
3025
3026 }
3027
3028 #define DECIMAL_PRECISION 29
3029
3030 int
3031 mono_decimal_from_number (void *from, MonoDecimal *target)
3032 {
3033         MonoNumber *number = (MonoNumber *) from;
3034         uint16_t* p = number->digits;
3035         MonoDecimal d;
3036         int e = number->scale;
3037         g_assert(number != NULL);
3038         g_assert(target != NULL);
3039
3040         d.reserved = 0;
3041         DECIMAL_SIGNSCALE(d) = 0;
3042         DECIMAL_HI32(d) = 0;
3043         DECIMAL_LO32(d) = 0;
3044         DECIMAL_MID32(d) = 0;
3045         g_assert(p != NULL);
3046         if (!*p) {
3047                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3048                 // the scale to 0 if the scale was previously positive
3049                 if (e > 0) {
3050                         e = 0;
3051                 }
3052         } else {
3053                 if (e > DECIMAL_PRECISION) return 0;
3054                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3055                         DecMul10(&d);
3056                         if (*p)
3057                                 DecAddInt32(&d, *p++ - '0');
3058                         e--;
3059                 }
3060                 if (*p++ >= '5') {
3061                         gboolean round = TRUE;
3062                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3063                                 // For digits > 5 we will be roundinp up anyway.
3064                                 int count = 20; // Look at the next 20 digits to check to round
3065                                 while (*p == '0' && count != 0) {
3066                                         p++;
3067                                         count--;
3068                                 }
3069                                 if (*p == '\0' || count == 0) 
3070                                         round = FALSE;// Do nothing
3071                         }
3072                         
3073                         if (round) {
3074                                 DecAddInt32(&d, 1);
3075                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3076                                         DECIMAL_HI32(d) = 0x19999999;
3077                                         DECIMAL_MID32(d) = 0x99999999;
3078                                         DECIMAL_LO32(d) = 0x9999999A;
3079                                         e++;
3080                                 }
3081                         }
3082                 }
3083         }
3084         if (e > 0)
3085                 return 0;
3086         if (e <= -DECIMAL_PRECISION) {
3087                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3088                 // This should only happen for actual zeros or very small numbers that round to zero.
3089                 DECIMAL_SIGNSCALE(d) = 0;
3090                 DECIMAL_HI32(d) = 0;
3091                 DECIMAL_LO32(d) = 0;
3092                 DECIMAL_MID32(d) = 0;
3093                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3094         } else {
3095                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3096         }
3097         
3098         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3099         *target = d;
3100         return 1;
3101 }
3102
3103
3104 #endif