Merge branch 'master' into ms-decimal
[mono.git] / mono / metadata / decimal-ms.c
1 //
2 // Copyright (c) Microsoft. All rights reserved.
3 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
4 //
5 // Copyright 2015 Xamarin Inc
6 //
7 // File: decimal.c
8 //
9 // Ported from C++ to C and adjusted to Mono runtime
10 //
11 // Pending:
12 //   DoToCurrency (they look like new methods we do not have)
13 //
14 #ifndef DISABLE_DECIMAL
15 #include "config.h"
16 #include <stdint.h>
17 #include <glib.h>
18 #include <mono/utils/mono-compiler.h>
19 #include <mono/metadata/exception.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <math.h>
24 #ifdef HAVE_MEMORY_H
25 #include <memory.h>
26 #endif
27 #ifdef _MSC_VER
28 #include <intrin.h>
29 #endif
30 #include "decimal-ms.h"
31
32 #define min(a, b) (((a) < (b)) ? (a) : (b))
33
34 typedef enum {
35         MONO_DECIMAL_OK,
36         MONO_DECIMAL_OVERFLOW,
37         MONO_DECIMAL_INVALID_ARGUMENT,
38         MONO_DECIMAL_DIVBYZERO,
39         MONO_DECIMAL_ARGUMENT_OUT_OF_RANGE
40 } MonoDecimalStatus;
41
42 #ifndef FC_GC_POLL
43 #   define FC_GC_POLL() 
44 #endif
45
46 static const uint32_t ten_to_nine    = 1000000000U;
47 static const uint32_t ten_to_ten_div_4 = 2500000000U;
48 #define POWER10_MAX     9
49 #define DECIMAL_NEG ((uint8_t)0x80)
50 #define DECMAX 28
51 #define DECIMAL_SCALE(dec)       ((dec).u.u.scale)
52 #define DECIMAL_SIGN(dec)        ((dec).u.u.sign)
53 #define DECIMAL_SIGNSCALE(dec)   ((dec).u.signscale)
54 #define DECIMAL_LO32(dec)        ((dec).v.v.Lo32)
55 #define DECIMAL_MID32(dec)       ((dec).v.v.Mid32)
56 #define DECIMAL_HI32(dec)        ((dec).Hi32)
57 #define DECIMAL_LO64_GET(dec)    ((dec).v.Lo64)
58 #define DECIMAL_LO64_SET(dec,value)   {(dec).v.Lo64 = value; }
59
60 #define DECIMAL_SETZERO(dec) {DECIMAL_LO32(dec) = 0; DECIMAL_MID32(dec) = 0; DECIMAL_HI32(dec) = 0; DECIMAL_SIGNSCALE(dec) = 0;}
61 #define COPYDEC(dest, src) {DECIMAL_SIGNSCALE(dest) = DECIMAL_SIGNSCALE(src); DECIMAL_HI32(dest) = DECIMAL_HI32(src); \
62     DECIMAL_MID32(dest) = DECIMAL_MID32(src); DECIMAL_LO32(dest) = DECIMAL_LO32(src); }
63
64 #define DEC_SCALE_MAX   28
65 #define POWER10_MAX     9
66
67 #define OVFL_MAX_9_HI   4
68 #define OVFL_MAX_9_MID  1266874889
69 #define OVFL_MAX_9_LO   3047500985u
70
71 #define OVFL_MAX_5_HI   42949
72 #define OVFL_MAX_5_MID  2890341191
73
74 #define OVFL_MAX_1_HI   429496729
75
76 typedef union {
77         uint64_t int64;
78         struct {
79 #if BYTE_ORDER == G_BIG_ENDIAN
80         uint32_t Hi;
81         uint32_t Lo;
82 #else
83         uint32_t Lo;
84         uint32_t Hi;
85 #endif
86     } u;
87 } SPLIT64;
88
89 static const SPLIT64    ten_to_eighteen = { 1000000000000000000ULL };
90 // Double Bias
91 #define DBLBIAS 1022
92
93 // Structure to access an encoded double floating point
94 typedef union{
95     struct {
96 #if BYTE_ORDER == G_BIG_ENDIAN
97       unsigned int sign:1;
98       unsigned int exp:11;
99       unsigned int mantHi:20;
100       unsigned int mantLo;
101 #else // BIGENDIAN
102       unsigned int mantLo;
103       unsigned int mantHi:20;
104       unsigned int exp:11;
105       unsigned int sign:1;
106 #endif
107     } u;
108     double dbl;
109 } DoubleStructure;
110
111 #if BYTE_ORDER == G_BIG_ENDIAN
112 #define DEFDS(Lo, Hi, exp, sign) { {sign, exp, Hi, Lo } }
113 #else
114 #define DEFDS(Lo, Hi, exp, sign) { {Lo, Hi, exp, sign} }
115 #endif
116
117 const DoubleStructure ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0);
118
119 // Single floating point Bias
120 #define SNGBIAS 126
121
122 // Structure to access an encoded single floating point
123 typedef struct {
124 #if BYTE_ORDER == G_BIG_ENDIAN
125     unsigned int sign:1;
126     unsigned int exp:8;
127     unsigned int mant:23;
128 #else
129     unsigned int mant:23;
130     unsigned int exp:8;
131     unsigned int sign:1;
132 #endif
133 } SingleStructure;
134
135 //
136 // Data tables
137 //
138
139 static const uint32_t power10 [POWER10_MAX+1] = {
140         1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
141 };
142
143
144 static const double double_power10[] = {
145         1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 
146         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 
147         1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 
148         1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 
149         1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 
150         1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
151         1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, 
152         1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
153         1e80 };
154
155 const SPLIT64 sdl_power10[] = { {10000000000ULL},          // 1E10
156                                 {100000000000ULL},         // 1E11
157                                 {1000000000000ULL},        // 1E12
158                                 {10000000000000ULL},       // 1E13
159                                 {100000000000000ULL} };    // 1E14
160
161 static const uint64_t long_power10[] = {
162         1,
163         10ULL,
164         100ULL,
165         1000ULL,
166         10000ULL,
167         100000ULL,
168         1000000ULL,
169         10000000ULL,
170         100000000ULL,
171         1000000000ULL,
172         10000000000ULL,
173         100000000000ULL,
174         1000000000000ULL,
175         10000000000000ULL,
176         100000000000000ULL,
177         1000000000000000ULL,
178         10000000000000000ULL,
179         100000000000000000ULL,
180         1000000000000000000ULL,
181         10000000000000000000ULL};
182
183 typedef struct  {
184         uint32_t Hi, Mid, Lo;
185 } DECOVFL;
186
187 const DECOVFL power_overflow[] = {
188 // This is a table of the largest values that can be in the upper two
189 // ULONGs of a 96-bit number that will not overflow when multiplied
190 // by a given power.  For the upper word, this is a table of 
191 // 2^32 / 10^n for 1 <= n <= 9.  For the lower word, this is the
192 // remaining fraction part * 2^32.  2^32 = 4294967296.
193 // 
194     { 429496729u, 2576980377u, 2576980377u }, // 10^1 remainder 0.6
195     { 42949672u,  4123168604u, 687194767u  }, // 10^2 remainder 0.16
196     { 4294967u,   1271310319u, 2645699854u }, // 10^3 remainder 0.616
197     { 429496u,    3133608139u, 694066715u  }, // 10^4 remainder 0.1616
198     { 42949u,     2890341191u, 2216890319u }, // 10^5 remainder 0.51616
199     { 4294u,      4154504685u, 2369172679u }, // 10^6 remainder 0.551616
200     { 429u,       2133437386u, 4102387834u }, // 10^7 remainder 0.9551616
201     { 42u,        4078814305u, 410238783u  }, // 10^8 remainder 0.09991616
202     { 4u,         1266874889u, 3047500985u }, // 10^9 remainder 0.709551616
203 };
204
205
206 #define UInt32x32To64(a, b) ((uint64_t)((uint32_t)(a)) * (uint64_t)((uint32_t)(b)))
207 #define Div64by32(num, den) ((uint32_t)((uint64_t)(num) / (uint32_t)(den)))
208 #define Mod64by32(num, den) ((uint32_t)((uint64_t)(num) % (uint32_t)(den)))
209
210 static double
211 fnDblPower10(int ix)
212 {
213     const int maxIx = (sizeof(double_power10)/sizeof(double_power10[0]));
214     g_assert(ix >= 0);
215     if (ix < maxIx)
216         return double_power10[ix];
217     return pow(10.0, ix);
218 } // double fnDblPower10()
219
220
221 static inline int64_t
222 DivMod32by32(int32_t num, int32_t den)
223 {
224     SPLIT64  sdl;
225
226     sdl.u.Lo = num / den;
227     sdl.u.Hi = num % den;
228     return sdl.int64;
229 }
230
231 static inline int64_t
232 DivMod64by32(int64_t num, int32_t den)
233 {
234     SPLIT64  sdl;
235
236     sdl.u.Lo = Div64by32(num, den);
237     sdl.u.Hi = Mod64by32(num, den);
238     return sdl.int64;
239 }
240
241 static uint64_t
242 UInt64x64To128(SPLIT64 op1, SPLIT64 op2, uint64_t *hi)
243 {
244         SPLIT64  tmp1;
245         SPLIT64  tmp2;
246         SPLIT64  tmp3;
247
248         tmp1.int64 = UInt32x32To64(op1.u.Lo, op2.u.Lo); // lo partial prod
249         tmp2.int64 = UInt32x32To64(op1.u.Lo, op2.u.Hi); // mid 1 partial prod
250         tmp1.u.Hi += tmp2.u.Lo;
251         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
252                 tmp2.u.Hi++;
253         tmp3.int64 = UInt32x32To64(op1.u.Hi, op2.u.Hi) + (uint64_t)tmp2.u.Hi;
254         tmp2.int64 = UInt32x32To64(op1.u.Hi, op2.u.Lo);
255         tmp1.u.Hi += tmp2.u.Lo;
256         if (tmp1.u.Hi < tmp2.u.Lo)  // test for carry
257                 tmp2.u.Hi++;
258         tmp3.int64 += (uint64_t)tmp2.u.Hi;
259
260         *hi = tmp3.int64;
261         return tmp1.int64;
262 }
263
264 /**
265 * FullDiv64By32:
266 *
267 * Entry:
268 *   pdlNum  - Pointer to 64-bit dividend
269 *   ulDen   - 32-bit divisor
270 *
271 * Purpose:
272 *   Do full divide, yielding 64-bit result and 32-bit remainder.
273 *
274 * Exit:
275 *   Quotient overwrites dividend.
276 *   Returns remainder.
277 *
278 * Exceptions:
279 *   None.
280 */
281 // Was: FullDiv64By32
282 static uint32_t
283 FullDiv64By32 (uint64_t *num, uint32_t den)
284 {
285         SPLIT64  tmp;
286         SPLIT64  res;
287         
288         tmp.int64 = *num;
289         res.u.Hi = 0;
290         
291         if (tmp.u.Hi >= den) {
292                 // DivMod64by32 returns quotient in Lo, remainder in Hi.
293                 //
294                 res.u.Lo = tmp.u.Hi;
295                 res.int64 = DivMod64by32(res.int64, den);
296                 tmp.u.Hi = res.u.Hi;
297                 res.u.Hi = res.u.Lo;
298         }
299         
300         tmp.int64 = DivMod64by32(tmp.int64, den);
301         res.u.Lo = tmp.u.Lo;
302         *num = res.int64;
303         return tmp.u.Hi;
304 }
305
306 /***
307  * SearchScale
308  *
309  * Entry:
310  *   res_hi - Top uint32_t of quotient
311  *   res_mid - Middle uint32_t of quotient
312  *   res_lo - Bottom uint32_t of quotient
313  *   scale  - Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX
314  *
315  * Purpose:
316  *   Determine the max power of 10, <= 9, that the quotient can be scaled
317  *   up by and still fit in 96 bits.
318  *
319  * Exit:
320  *   Returns power of 10 to scale by, -1 if overflow error.
321  *
322  ***********************************************************************/
323
324 static int
325 SearchScale(uint32_t res_hi, uint32_t res_mid, uint32_t res_lo, int scale)
326 {
327         int   cur_scale;
328
329         // Quick check to stop us from trying to scale any more.
330         //
331         if (res_hi > OVFL_MAX_1_HI || scale >= DEC_SCALE_MAX) {
332                 cur_scale = 0;
333                 goto HaveScale;
334         }
335
336         if (scale > DEC_SCALE_MAX - 9) {
337                 // We can't scale by 10^9 without exceeding the max scale factor.
338                 // See if we can scale to the max.  If not, we'll fall into
339                 // standard search for scale factor.
340                 //
341                 cur_scale = DEC_SCALE_MAX - scale;
342                 if (res_hi < power_overflow[cur_scale - 1].Hi)
343                         goto HaveScale;
344
345                 if (res_hi == power_overflow[cur_scale - 1].Hi) {
346                 UpperEq:
347                         if (res_mid > power_overflow[cur_scale - 1].Mid ||
348                             (res_mid == power_overflow[cur_scale - 1].Mid && res_lo > power_overflow[cur_scale - 1].Lo)) {
349                                 cur_scale--;
350                         }
351                         goto HaveScale;
352                 }
353         } else if (res_hi < OVFL_MAX_9_HI || (res_hi == OVFL_MAX_9_HI && res_mid < OVFL_MAX_9_MID) || (res_hi == OVFL_MAX_9_HI && res_mid == OVFL_MAX_9_MID && res_lo <= OVFL_MAX_9_LO))
354                 return 9;
355
356         // Search for a power to scale by < 9.  Do a binary search
357         // on power_overflow[].
358         //
359         cur_scale = 5;
360         if (res_hi < OVFL_MAX_5_HI)
361                 cur_scale = 7;
362         else if (res_hi > OVFL_MAX_5_HI)
363                 cur_scale = 3;
364         else
365                 goto UpperEq;
366
367         // cur_scale is 3 or 7.
368         //
369         if (res_hi < power_overflow[cur_scale - 1].Hi)
370                 cur_scale++;
371         else if (res_hi > power_overflow[cur_scale - 1].Hi)
372                 cur_scale--;
373         else
374                 goto UpperEq;
375
376         // cur_scale is 2, 4, 6, or 8.
377         //
378         // In all cases, we already found we could not use the power one larger.
379         // So if we can use this power, it is the biggest, and we're done.  If
380         // we can't use this power, the one below it is correct for all cases 
381         // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
382         // 
383         if (res_hi > power_overflow[cur_scale - 1].Hi)
384                 cur_scale--;
385
386         if (res_hi == power_overflow[cur_scale - 1].Hi)
387                 goto UpperEq;
388
389 HaveScale:
390         // cur_scale = largest power of 10 we can scale by without overflow, 
391         // cur_scale < 9.  See if this is enough to make scale factor 
392         // positive if it isn't already.
393         // 
394         if (cur_scale + scale < 0)
395                 cur_scale = -1;
396
397         return cur_scale;
398 }
399
400
401 /**
402 * Div96By32
403 *
404 * Entry:
405 *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
406 *   ulDen   - 32-bit divisor.
407 *
408 * Purpose:
409 *   Do full divide, yielding 96-bit result and 32-bit remainder.
410 *
411 * Exit:
412 *   Quotient overwrites dividend.
413 *   Returns remainder.
414 *
415 * Exceptions:
416 *   None.
417 *
418 */
419 static uint32_t
420 Div96By32(uint32_t *num, uint32_t den)
421 {
422         SPLIT64  tmp;
423
424         tmp.u.Hi = 0;
425
426         if (num[2] != 0)
427                 goto Div3Word;
428
429         if (num[1] >= den)
430                 goto Div2Word;
431
432         tmp.u.Hi = num[1];
433         num[1] = 0;
434         goto Div1Word;
435
436 Div3Word:
437         tmp.u.Lo = num[2];
438         tmp.int64 = DivMod64by32(tmp.int64, den);
439         num[2] = tmp.u.Lo;
440 Div2Word:
441         tmp.u.Lo = num[1];
442         tmp.int64 = DivMod64by32(tmp.int64, den);
443         num[1] = tmp.u.Lo;
444 Div1Word:
445         tmp.u.Lo = num[0];
446         tmp.int64 = DivMod64by32(tmp.int64, den);
447         num[0] = tmp.u.Lo;
448         return tmp.u.Hi;
449 }
450
451 /***
452  * DecFixInt
453  *
454  * Entry:
455  *   pdecRes - Pointer to Decimal result location
456  *   operand  - Pointer to Decimal operand
457  *
458  * Purpose:
459  *   Chop the value to integer.  Return remainder so Int() function
460  *   can round down if non-zero.
461  *
462  * Exit:
463  *   Returns remainder.
464  *
465  * Exceptions:
466  *   None.
467  *
468  ***********************************************************************/
469
470 static uint32_t
471 DecFixInt(MonoDecimal * result, MonoDecimal * operand)
472 {
473         uint32_t   num[3];
474         uint32_t   rem;
475         uint32_t   pwr;
476         int     scale;
477
478         if (operand->u.u.scale > 0) {
479                 num[0] = operand->v.v.Lo32;
480                 num[1] = operand->v.v.Mid32;
481                 num[2] = operand->Hi32;
482                 scale = operand->u.u.scale;
483                 result->u.u.sign = operand->u.u.sign;
484                 rem = 0;
485
486                 do {
487                         if (scale > POWER10_MAX)
488                                 pwr = ten_to_nine;
489                         else
490                                 pwr = power10[scale];
491
492                         rem |= Div96By32(num, pwr);
493                         scale -= 9;
494                 }while (scale > 0);
495
496                 result->v.v.Lo32 = num[0];
497                 result->v.v.Mid32 = num[1];
498                 result->Hi32 = num[2];
499                 result->u.u.scale = 0;
500
501                 return rem;
502         }
503
504         COPYDEC(*result, *operand);
505         // Odd, the Microsoft code does not set result->reserved to zero on this case
506         return 0;
507 }
508
509 /**
510  * ScaleResult:
511  *
512  * Entry:
513  *   res - Array of uint32_ts with value, least-significant first.
514  *   hi_res  - Index of last non-zero value in res.
515  *   scale  - Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX
516  *
517  * Purpose:
518  *   See if we need to scale the result to fit it in 96 bits.
519  *   Perform needed scaling.  Adjust scale factor accordingly.
520  *
521  * Exit:
522  *   res updated in place, always 3 uint32_ts.
523  *   New scale factor returned, -1 if overflow error.
524  *
525  */
526 static int
527 ScaleResult(uint32_t *res, int hi_res, int scale)
528 {
529         int     new_scale;
530         int     cur;
531         uint32_t   pwr;
532         uint32_t   tmp;
533         uint32_t   sticky;
534         SPLIT64 sdlTmp;
535
536         // See if we need to scale the result.  The combined scale must
537         // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
538         // 
539         // Start by figuring a lower bound on the scaling needed to make
540         // the upper 96 bits zero.  hi_res is the index into res[]
541         // of the highest non-zero uint32_t.
542         // 
543         new_scale =   hi_res * 32 - 64 - 1;
544         if (new_scale > 0) {
545
546                 // Find the MSB.
547                 //
548                 tmp = res[hi_res];
549                 if (!(tmp & 0xFFFF0000)) {
550                         new_scale -= 16;
551                         tmp <<= 16;
552                 }
553                 if (!(tmp & 0xFF000000)) {
554                         new_scale -= 8;
555                         tmp <<= 8;
556                 }
557                 if (!(tmp & 0xF0000000)) {
558                         new_scale -= 4;
559                         tmp <<= 4;
560                 }
561                 if (!(tmp & 0xC0000000)) {
562                         new_scale -= 2;
563                         tmp <<= 2;
564                 }
565                 if (!(tmp & 0x80000000)) {
566                         new_scale--;
567                         tmp <<= 1;
568                 }
569     
570                 // Multiply bit position by log10(2) to figure it's power of 10.
571                 // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this 
572                 // with a multiply saves a 96-byte lookup table.  The power returned
573                 // is <= the power of the number, so we must add one power of 10
574                 // to make it's integer part zero after dividing by 256.
575                 // 
576                 // Note: the result of this multiplication by an approximation of
577                 // log10(2) have been exhaustively checked to verify it gives the 
578                 // correct result.  (There were only 95 to check...)
579                 // 
580                 new_scale = ((new_scale * 77) >> 8) + 1;
581
582                 // new_scale = min scale factor to make high 96 bits zero, 0 - 29.
583                 // This reduces the scale factor of the result.  If it exceeds the
584                 // current scale of the result, we'll overflow.
585                 // 
586                 if (new_scale > scale)
587                         return -1;
588         }
589         else
590                 new_scale = 0;
591
592         // Make sure we scale by enough to bring the current scale factor
593         // into valid range.
594         //
595         if (new_scale < scale - DEC_SCALE_MAX)
596                 new_scale = scale - DEC_SCALE_MAX;
597
598         if (new_scale != 0) {
599                 // Scale by the power of 10 given by new_scale.  Note that this is 
600                 // NOT guaranteed to bring the number within 96 bits -- it could 
601                 // be 1 power of 10 short.
602                 //
603                 scale -= new_scale;
604                 sticky = 0;
605                 sdlTmp.u.Hi = 0; // initialize remainder
606
607                 for (;;) {
608
609                         sticky |= sdlTmp.u.Hi; // record remainder as sticky bit
610
611                         if (new_scale > POWER10_MAX)
612                                 pwr = ten_to_nine;
613                         else
614                                 pwr = power10[new_scale];
615
616                         // Compute first quotient.
617                         // DivMod64by32 returns quotient in Lo, remainder in Hi.
618                         //
619                         sdlTmp.int64 = DivMod64by32(res[hi_res], pwr);
620                         res[hi_res] = sdlTmp.u.Lo;
621                         cur = hi_res - 1;
622
623                         if (cur >= 0) {
624                                 // If first quotient was 0, update hi_res.
625                                 //
626                                 if (sdlTmp.u.Lo == 0)
627                                         hi_res--;
628
629                                 // Compute subsequent quotients.
630                                 //
631                                 do {
632                                         sdlTmp.u.Lo = res[cur];
633                                         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, pwr);
634                                         res[cur] = sdlTmp.u.Lo;
635                                         cur--;
636                                 } while (cur >= 0);
637
638                         }
639
640                         new_scale -= POWER10_MAX;
641                         if (new_scale > 0)
642                                 continue; // scale some more
643
644                         // If we scaled enough, hi_res would be 2 or less.  If not,
645                         // divide by 10 more.
646                         //
647                         if (hi_res > 2) {
648                                 new_scale = 1;
649                                 scale--;
650                                 continue; // scale by 10
651                         }
652
653                         // Round final result.  See if remainder >= 1/2 of divisor.
654                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
655                         //
656                         pwr >>= 1;  // power of 10 always even
657                         if ( pwr <= sdlTmp.u.Hi && (pwr < sdlTmp.u.Hi ||
658                                                     ((res[0] & 1) | sticky)) ) {
659                                 cur = -1;
660                                 while (++res[++cur] == 0);
661                                 
662                                 if (cur > 2) {
663                                         // The rounding caused us to carry beyond 96 bits. 
664                                         // Scale by 10 more.
665                                         //
666                                         hi_res = cur;
667                                         sticky = 0;  // no sticky bit
668                                         sdlTmp.u.Hi = 0; // or remainder
669                                         new_scale = 1;
670                                         scale--;
671                                         continue; // scale by 10
672                                 }
673                         }
674                         
675                         // We may have scaled it more than we planned.  Make sure the scale 
676                         // factor hasn't gone negative, indicating overflow.
677                         // 
678                         if (scale < 0)
679                                 return -1;
680                         
681                         return scale;
682                 } // for(;;)
683         }
684         return scale;
685 }
686
687 // Decimal multiply
688 // Returns: MONO_DECIMAL_OVERFLOW or MONO_DECIMAL_OK
689 static MonoDecimalStatus
690 VarDecMul(MonoDecimal * left, MonoDecimal * right, MonoDecimal * result)
691 {
692         SPLIT64 tmp;
693         SPLIT64 tmp2;
694         SPLIT64 tmp3;
695         int     scale;
696         int     hi_prod;
697         uint32_t   pwr;
698         uint32_t   rem_lo;
699         uint32_t   rem_hi;
700         uint32_t   prod[6];
701
702         scale = left->u.u.scale + right->u.u.scale;
703
704         if ((left->Hi32 | left->v.v.Mid32 | right->Hi32 | right->v.v.Mid32) == 0) {
705                 // Upper 64 bits are zero.
706                 //
707                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
708                 if (scale > DEC_SCALE_MAX)
709                 {
710                         // Result scale is too big.  Divide result by power of 10 to reduce it.
711                         // If the amount to divide by is > 19 the result is guaranteed
712                         // less than 1/2.  [max value in 64 bits = 1.84E19]
713                         //
714                         scale -= DEC_SCALE_MAX;
715                         if (scale > 19) {
716                         ReturnZero:
717                                 DECIMAL_SETZERO(*result);
718                                 return MONO_DECIMAL_OK;
719                         }
720
721                         if (scale > POWER10_MAX) {
722                                 // Divide by 1E10 first, to get the power down to a 32-bit quantity.
723                                 // 1E10 itself doesn't fit in 32 bits, so we'll divide by 2.5E9 now
724                                 // then multiply the next divisor by 4 (which will be a max of 4E9).
725                                 // 
726                                 rem_lo = FullDiv64By32(&tmp.int64, ten_to_ten_div_4);
727                                 pwr = power10[scale - 10] << 2;
728                         } else {
729                                 pwr = power10[scale];
730                                 rem_lo = 0;
731                         }
732
733                         // Power to divide by fits in 32 bits.
734                         //
735                         rem_hi = FullDiv64By32(&tmp.int64, pwr);
736
737                         // Round result.  See if remainder >= 1/2 of divisor.
738                         // Divisor is a power of 10, so it is always even.
739                         //
740                         pwr >>= 1;
741                         if (rem_hi >= pwr && (rem_hi > pwr || (rem_lo | (tmp.u.Lo & 1))))
742                                 tmp.int64++;
743
744                         scale = DEC_SCALE_MAX;
745                 }
746                 DECIMAL_LO32(*result) = tmp.u.Lo;
747                 DECIMAL_MID32(*result) = tmp.u.Hi;
748                 DECIMAL_HI32(*result) = 0;
749         } else {
750                 // At least one operand has bits set in the upper 64 bits.
751                 //
752                 // Compute and accumulate the 9 partial products into a 
753                 // 192-bit (24-byte) result.
754                 //
755                 //                [l-h][l-m][l-l]   left high, middle, low
756                 //             x  [r-h][r-m][r-l]   right high, middle, low
757                 // ------------------------------
758                 //
759                 //                     [0-h][0-l]   l-l * r-l
760                 //                [1ah][1al]        l-l * r-m
761                 //                [1bh][1bl]        l-m * r-l
762                 //           [2ah][2al]             l-m * r-m
763                 //           [2bh][2bl]             l-l * r-h
764                 //           [2ch][2cl]             l-h * r-l
765                 //      [3ah][3al]                  l-m * r-h
766                 //      [3bh][3bl]                  l-h * r-m
767                 // [4-h][4-l]                       l-h * r-h
768                 // ------------------------------
769                 // [p-5][p-4][p-3][p-2][p-1][p-0]   prod[] array
770                 //
771                 tmp.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Lo32);
772                 prod[0] = tmp.u.Lo;
773
774                 tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->v.v.Mid32) + tmp.u.Hi;
775
776                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Lo32);
777                 tmp.int64 += tmp2.int64; // this could generate carry
778                 prod[1] = tmp.u.Lo;
779                 if (tmp.int64 < tmp2.int64) // detect carry
780                         tmp2.u.Hi = 1;
781                 else
782                         tmp2.u.Hi = 0;
783                 tmp2.u.Lo = tmp.u.Hi;
784
785                 tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->v.v.Mid32) + tmp2.int64;
786
787                 if (left->Hi32 | right->Hi32) {
788                         // Highest 32 bits is non-zero.  Calculate 5 more partial products.
789                         //
790                         tmp2.int64 = UInt32x32To64(left->v.v.Lo32, right->Hi32);
791                         tmp.int64 += tmp2.int64; // this could generate carry
792                         if (tmp.int64 < tmp2.int64) // detect carry
793                                 tmp3.u.Hi = 1;
794                         else
795                                 tmp3.u.Hi = 0;
796
797                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Lo32);
798                         tmp.int64 += tmp2.int64; // this could generate carry
799                         prod[2] = tmp.u.Lo;
800                         if (tmp.int64 < tmp2.int64) // detect carry
801                                 tmp3.u.Hi++;
802                         tmp3.u.Lo = tmp.u.Hi;
803
804                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, right->Hi32);
805                         tmp.int64 += tmp3.int64; // this could generate carry
806                         if (tmp.int64 < tmp3.int64) // detect carry
807                                 tmp3.u.Hi = 1;
808                         else
809                                 tmp3.u.Hi = 0;
810
811                         tmp2.int64 = UInt32x32To64(left->Hi32, right->v.v.Mid32);
812                         tmp.int64 += tmp2.int64; // this could generate carry
813                         prod[3] = tmp.u.Lo;
814                         if (tmp.int64 < tmp2.int64) // detect carry
815                                 tmp3.u.Hi++;
816                         tmp3.u.Lo = tmp.u.Hi;
817
818                         tmp.int64 = UInt32x32To64(left->Hi32, right->Hi32) + tmp3.int64;
819                         prod[4] = tmp.u.Lo;
820                         prod[5] = tmp.u.Hi;
821
822                         hi_prod = 5;
823                 }
824                 else {
825                         prod[2] = tmp.u.Lo;
826                         prod[3] = tmp.u.Hi;
827                         hi_prod = 3;
828                 }
829
830                 // Check for leading zero uint32_ts on the product
831                 //
832                 while (prod[hi_prod] == 0) {
833                         hi_prod--;
834                         if (hi_prod < 0)
835                                 goto ReturnZero;
836                 }
837
838                 scale = ScaleResult(prod, hi_prod, scale);
839                 if (scale == -1)
840                         return MONO_DECIMAL_OVERFLOW;
841
842                 result->v.v.Lo32 = prod[0];
843                 result->v.v.Mid32 = prod[1];
844                 result->Hi32 = prod[2];
845         }
846
847         result->u.u.sign = right->u.u.sign ^ left->u.u.sign;
848         result->u.u.scale = (char)scale;
849         return MONO_DECIMAL_OK;
850 }
851
852 // Addition and subtraction
853 static MonoDecimalStatus
854 DecAddSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result, int8_t sign)
855 {
856         uint32_t     num[6];
857         uint32_t     pwr;
858         int       scale;
859         int       hi_prod;
860         int       cur;
861         SPLIT64   tmp;
862         MonoDecimal decRes;
863         MonoDecimal decTmp;
864         MonoDecimal *pdecTmp;
865
866         sign ^= (right->u.u.sign ^ left->u.u.sign) & DECIMAL_NEG;
867
868         if (right->u.u.scale == left->u.u.scale) {
869                 // Scale factors are equal, no alignment necessary.
870                 //
871                 decRes.u.signscale = left->u.signscale;
872
873         AlignedAdd:
874                 if (sign) {
875                         // Signs differ - subtract
876                         //
877                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right));
878                         DECIMAL_HI32(decRes) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
879
880                         // Propagate carry
881                         //
882                         if (DECIMAL_LO64_GET(decRes) > DECIMAL_LO64_GET(*left)) {
883                                 decRes.Hi32--;
884                                 if (decRes.Hi32 >= left->Hi32)
885                                         goto SignFlip;
886                         } else if (decRes.Hi32 > left->Hi32) {
887                                 // Got negative result.  Flip its sign.
888                                 //
889                         SignFlip:
890                                 DECIMAL_LO64_SET(decRes, -(uint64_t)DECIMAL_LO64_GET(decRes));
891                                 decRes.Hi32 = ~decRes.Hi32;
892                                 if (DECIMAL_LO64_GET(decRes) == 0)
893                                         decRes.Hi32++;
894                                 decRes.u.u.sign ^= DECIMAL_NEG;
895                         }
896
897                 } else {
898                         // Signs are the same - add
899                         //
900                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right));
901                         decRes.Hi32 = left->Hi32 + right->Hi32;
902
903                         // Propagate carry
904                         //
905                         if (DECIMAL_LO64_GET(decRes) < DECIMAL_LO64_GET(*left)) {
906                                 decRes.Hi32++;
907                                 if (decRes.Hi32 <= left->Hi32)
908                                         goto AlignedScale;
909                         } else if (decRes.Hi32 < left->Hi32) {
910                         AlignedScale:
911                                 // The addition carried above 96 bits.  Divide the result by 10,
912                                 // dropping the scale factor.
913                                 //
914                                 if (decRes.u.u.scale == 0)
915                                         return MONO_DECIMAL_OVERFLOW;
916                                 decRes.u.u.scale--;
917
918                                 tmp.u.Lo = decRes.Hi32;
919                                 tmp.u.Hi = 1;
920                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
921                                 decRes.Hi32 = tmp.u.Lo;
922
923                                 tmp.u.Lo = decRes.v.v.Mid32;
924                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
925                                 decRes.v.v.Mid32 = tmp.u.Lo;
926
927                                 tmp.u.Lo = decRes.v.v.Lo32;
928                                 tmp.int64 = DivMod64by32(tmp.int64, 10);
929                                 decRes.v.v.Lo32 = tmp.u.Lo;
930
931                                 // See if we need to round up.
932                                 //
933                                 if (tmp.u.Hi >= 5 && (tmp.u.Hi > 5 || (decRes.v.v.Lo32 & 1))) {
934                                         DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(decRes)+1)
935                                                 if (DECIMAL_LO64_GET(decRes) == 0)
936                                                         decRes.Hi32++;
937                                 }
938                         }
939                 }
940         }
941         else {
942                 // Scale factors are not equal.  Assume that a larger scale
943                 // factor (more decimal places) is likely to mean that number
944                 // is smaller.  Start by guessing that the right operand has
945                 // the larger scale factor.  The result will have the larger
946                 // scale factor.
947                 //
948                 decRes.u.u.scale = right->u.u.scale;  // scale factor of "smaller"
949                 decRes.u.u.sign = left->u.u.sign;    // but sign of "larger"
950                 scale = decRes.u.u.scale - left->u.u.scale;
951
952                 if (scale < 0) {
953                         // Guessed scale factor wrong. Swap operands.
954                         //
955                         scale = -scale;
956                         decRes.u.u.scale = left->u.u.scale;
957                         decRes.u.u.sign ^= sign;
958                         pdecTmp = right;
959                         right = left;
960                         left = pdecTmp;
961                 }
962
963                 // *left will need to be multiplied by 10^scale so
964                 // it will have the same scale as *right.  We could be
965                 // extending it to up to 192 bits of precision.
966                 //
967                 if (scale <= POWER10_MAX) {
968                         // Scaling won't make it larger than 4 uint32_ts
969                         //
970                         pwr = power10[scale];
971                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(left->v.v.Lo32, pwr));
972                         tmp.int64 = UInt32x32To64(left->v.v.Mid32, pwr);
973                         tmp.int64 += decTmp.v.v.Mid32;
974                         decTmp.v.v.Mid32 = tmp.u.Lo;
975                         decTmp.Hi32 = tmp.u.Hi;
976                         tmp.int64 = UInt32x32To64(left->Hi32, pwr);
977                         tmp.int64 += decTmp.Hi32;
978                         if (tmp.u.Hi == 0) {
979                                 // Result fits in 96 bits.  Use standard aligned add.
980                                 //
981                                 decTmp.Hi32 = tmp.u.Lo;
982                                 left = &decTmp;
983                                 goto AlignedAdd;
984                         }
985                         num[0] = decTmp.v.v.Lo32;
986                         num[1] = decTmp.v.v.Mid32;
987                         num[2] = tmp.u.Lo;
988                         num[3] = tmp.u.Hi;
989                         hi_prod = 3;
990                 }
991                 else {
992                         // Have to scale by a bunch.  Move the number to a buffer
993                         // where it has room to grow as it's scaled.
994                         //
995                         num[0] = left->v.v.Lo32;
996                         num[1] = left->v.v.Mid32;
997                         num[2] = left->Hi32;
998                         hi_prod = 2;
999
1000                         // Scan for zeros in the upper words.
1001                         //
1002                         if (num[2] == 0) {
1003                                 hi_prod = 1;
1004                                 if (num[1] == 0) {
1005                                         hi_prod = 0;
1006                                         if (num[0] == 0) {
1007                                                 // Left arg is zero, return right.
1008                                                 //
1009                                                 DECIMAL_LO64_SET(decRes, DECIMAL_LO64_GET(*right));
1010                                                 decRes.Hi32 = right->Hi32;
1011                                                 decRes.u.u.sign ^= sign;
1012                                                 goto RetDec;
1013                                         }
1014                                 }
1015                         }
1016
1017                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
1018                         // with index of highest non-zero uint32_t.
1019                         //
1020                         for (; scale > 0; scale -= POWER10_MAX) {
1021                                 if (scale > POWER10_MAX)
1022                                         pwr = ten_to_nine;
1023                                 else
1024                                         pwr = power10[scale];
1025
1026                                 tmp.u.Hi = 0;
1027                                 for (cur = 0; cur <= hi_prod; cur++) {
1028                                         tmp.int64 = UInt32x32To64(num[cur], pwr) + tmp.u.Hi;
1029                                         num[cur] = tmp.u.Lo;
1030                                 }
1031
1032                                 if (tmp.u.Hi != 0)
1033                                         // We're extending the result by another uint32_t.
1034                                         num[++hi_prod] = tmp.u.Hi;
1035                         }
1036                 }
1037
1038                 // Scaling complete, do the add.  Could be subtract if signs differ.
1039                 //
1040                 tmp.u.Lo = num[0];
1041                 tmp.u.Hi = num[1];
1042
1043                 if (sign) {
1044                         // Signs differ, subtract.
1045                         //
1046                         DECIMAL_LO64_SET(decRes, tmp.int64 - DECIMAL_LO64_GET(*right));
1047                         decRes.Hi32 = num[2] - right->Hi32;
1048
1049                         // Propagate carry
1050                         //
1051                         if (DECIMAL_LO64_GET(decRes) > tmp.int64) {
1052                                 decRes.Hi32--;
1053                                 if (decRes.Hi32 >= num[2])
1054                                         goto LongSub;
1055                         }
1056                         else if (decRes.Hi32 > num[2]) {
1057                         LongSub:
1058                                 // If num has more than 96 bits of precision, then we need to
1059                                 // carry the subtraction into the higher bits.  If it doesn't,
1060                                 // then we subtracted in the wrong order and have to flip the 
1061                                 // sign of the result.
1062                                 // 
1063                                 if (hi_prod <= 2)
1064                                         goto SignFlip;
1065
1066                                 cur = 3;
1067                                 while(num[cur++]-- == 0);
1068                                 if (num[hi_prod] == 0)
1069                                         hi_prod--;
1070                         }
1071                 }
1072                 else {
1073                         // Signs the same, add.
1074                         //
1075                         DECIMAL_LO64_SET(decRes, tmp.int64 + DECIMAL_LO64_GET(*right));
1076                         decRes.Hi32 = num[2] + right->Hi32;
1077
1078                         // Propagate carry
1079                         //
1080                         if (DECIMAL_LO64_GET(decRes) < tmp.int64) {
1081                                 decRes.Hi32++;
1082                                 if (decRes.Hi32 <= num[2])
1083                                         goto LongAdd;
1084                         }
1085                         else if (decRes.Hi32 < num[2]) {
1086                         LongAdd:
1087                                 // Had a carry above 96 bits.
1088                                 //
1089                                 cur = 3;
1090                                 do {
1091                                         if (hi_prod < cur) {
1092                                                 num[cur] = 1;
1093                                                 hi_prod = cur;
1094                                                 break;
1095                                         }
1096                                 }while (++num[cur++] == 0);
1097                         }
1098                 }
1099
1100                 if (hi_prod > 2) {
1101                         num[0] = decRes.v.v.Lo32;
1102                         num[1] = decRes.v.v.Mid32;
1103                         num[2] = decRes.Hi32;
1104                         decRes.u.u.scale = ScaleResult(num, hi_prod, decRes.u.u.scale);
1105                         if (decRes.u.u.scale == (uint8_t) -1)
1106                                 return MONO_DECIMAL_OVERFLOW;
1107
1108                         decRes.v.v.Lo32 = num[0];
1109                         decRes.v.v.Mid32 = num[1];
1110                         decRes.Hi32 = num[2];
1111                 }
1112         }
1113
1114 RetDec:
1115         COPYDEC(*result, decRes);
1116         // Odd, the Microsoft code does not set result->reserved to zero on this case
1117         return MONO_DECIMAL_OK;
1118 }
1119
1120 // Decimal addition
1121 static MonoDecimalStatus
1122 VarDecAdd(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1123 {
1124     return DecAddSub (left, right, result, 0);
1125 }
1126
1127 // Decimal subtraction
1128 static MonoDecimalStatus
1129 VarDecSub(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1130 {
1131     return DecAddSub (left, right, result, DECIMAL_NEG);
1132 }
1133
1134 /**
1135  * IncreaseScale:
1136  *
1137  * Entry:
1138  *   num - Pointer to 96-bit number as array of uint32_ts, least-sig first
1139  *   pwr   - Scale factor to multiply by
1140  *
1141  * Purpose:
1142  *   Multiply the two numbers.  The low 96 bits of the result overwrite
1143  *   the input.  The last 32 bits of the product are the return value.
1144  *
1145  * Exit:
1146  *   Returns highest 32 bits of product.
1147  *
1148  * Exceptions:
1149  *   None.
1150  *
1151  */
1152 static uint32_t
1153 IncreaseScale(uint32_t *num, uint32_t pwr)
1154 {
1155         SPLIT64   sdlTmp;
1156
1157         sdlTmp.int64 = UInt32x32To64(num[0], pwr);
1158         num[0] = sdlTmp.u.Lo;
1159         sdlTmp.int64 = UInt32x32To64(num[1], pwr) + sdlTmp.u.Hi;
1160         num[1] = sdlTmp.u.Lo;
1161         sdlTmp.int64 = UInt32x32To64(num[2], pwr) + sdlTmp.u.Hi;
1162         num[2] = sdlTmp.u.Lo;
1163         return sdlTmp.u.Hi;
1164 }
1165
1166 /**
1167  * Div96By64:
1168  *
1169  * Entry:
1170  *   rgulNum - Pointer to 96-bit dividend as array of uint32_ts, least-sig first
1171  *   sdlDen  - 64-bit divisor.
1172  *
1173  * Purpose:
1174  *   Do partial divide, yielding 32-bit result and 64-bit remainder.
1175  *   Divisor must be larger than upper 64 bits of dividend.
1176  *
1177  * Exit:
1178  *   Remainder overwrites lower 64-bits of dividend.
1179  *   Returns quotient.
1180  *
1181  * Exceptions:
1182  *   None.
1183  *
1184  */
1185 static uint32_t
1186 Div96By64(uint32_t *num, SPLIT64 den)
1187 {
1188         SPLIT64 quo;
1189         SPLIT64 sdlNum;
1190         SPLIT64 prod;
1191
1192         sdlNum.u.Lo = num[0];
1193
1194         if (num[2] >= den.u.Hi) {
1195                 // Divide would overflow.  Assume a quotient of 2^32, and set
1196                 // up remainder accordingly.  Then jump to loop which reduces
1197                 // the quotient.
1198                 //
1199                 sdlNum.u.Hi = num[1] - den.u.Lo;
1200                 quo.u.Lo = 0;
1201                 goto NegRem;
1202         }
1203
1204         // Hardware divide won't overflow
1205         //
1206         if (num[2] == 0 && num[1] < den.u.Hi)
1207                 // Result is zero.  Entire dividend is remainder.
1208                 //
1209                 return 0;
1210
1211         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1212         //
1213         quo.u.Lo = num[1];
1214         quo.u.Hi = num[2];
1215         quo.int64 = DivMod64by32(quo.int64, den.u.Hi);
1216         sdlNum.u.Hi = quo.u.Hi; // remainder
1217
1218         // Compute full remainder, rem = dividend - (quo * divisor).
1219         //
1220         prod.int64 = UInt32x32To64(quo.u.Lo, den.u.Lo); // quo * lo divisor
1221         sdlNum.int64 -= prod.int64;
1222
1223         if (sdlNum.int64 > ~prod.int64) {
1224         NegRem:
1225                 // Remainder went negative.  Add divisor back in until it's positive,
1226                 // a max of 2 times.
1227                 //
1228                 do {
1229                         quo.u.Lo--;
1230                         sdlNum.int64 += den.int64;
1231                 }while (sdlNum.int64 >= den.int64);
1232         }
1233
1234         num[0] = sdlNum.u.Lo;
1235         num[1] = sdlNum.u.Hi;
1236         return quo.u.Lo;
1237 }
1238
1239 /***
1240 * Div128By96
1241 *
1242 * Entry:
1243 *   rgulNum - Pointer to 128-bit dividend as array of uint32_ts, least-sig first
1244 *   den - Pointer to 96-bit divisor.
1245 *
1246 * Purpose:
1247 *   Do partial divide, yielding 32-bit result and 96-bit remainder.
1248 *   Top divisor uint32_t must be larger than top dividend uint32_t.  This is
1249 *   assured in the initial call because the divisor is normalized
1250 *   and the dividend can't be.  In subsequent calls, the remainder
1251 *   is multiplied by 10^9 (max), so it can be no more than 1/4 of
1252 *   the divisor which is effectively multiplied by 2^32 (4 * 10^9).
1253 *
1254 * Exit:
1255 *   Remainder overwrites lower 96-bits of dividend.
1256 *   Returns quotient.
1257 *
1258 * Exceptions:
1259 *   None.
1260 *
1261 ***********************************************************************/
1262
1263 static uint32_t
1264 Div128By96(uint32_t *num, uint32_t *den)
1265 {
1266         SPLIT64 sdlQuo;
1267         SPLIT64 sdlNum;
1268         SPLIT64 sdlProd1;
1269         SPLIT64 sdlProd2;
1270
1271         sdlNum.u.Lo = num[0];
1272         sdlNum.u.Hi = num[1];
1273
1274         if (num[3] == 0 && num[2] < den[2]){
1275                 // Result is zero.  Entire dividend is remainder.
1276                 //
1277                 return 0;
1278         }
1279
1280         // DivMod64by32 returns quotient in Lo, remainder in Hi.
1281         //
1282         sdlQuo.u.Lo = num[2];
1283         sdlQuo.u.Hi = num[3];
1284         sdlQuo.int64 = DivMod64by32(sdlQuo.int64, den[2]);
1285
1286         // Compute full remainder, rem = dividend - (quo * divisor).
1287         //
1288         sdlProd1.int64 = UInt32x32To64(sdlQuo.u.Lo, den[0]); // quo * lo divisor
1289         sdlProd2.int64 = UInt32x32To64(sdlQuo.u.Lo, den[1]); // quo * mid divisor
1290         sdlProd2.int64 += sdlProd1.u.Hi;
1291         sdlProd1.u.Hi = sdlProd2.u.Lo;
1292
1293         sdlNum.int64 -= sdlProd1.int64;
1294         num[2] = sdlQuo.u.Hi - sdlProd2.u.Hi; // sdlQuo.Hi is remainder
1295
1296         // Propagate carries
1297         //
1298         if (sdlNum.int64 > ~sdlProd1.int64) {
1299                 num[2]--;
1300                 if (num[2] >= ~sdlProd2.u.Hi)
1301                         goto NegRem;
1302         } else if (num[2] > ~sdlProd2.u.Hi) {
1303         NegRem:
1304                 // Remainder went negative.  Add divisor back in until it's positive,
1305                 // a max of 2 times.
1306                 //
1307                 sdlProd1.u.Lo = den[0];
1308                 sdlProd1.u.Hi = den[1];
1309
1310                 for (;;) {
1311                         sdlQuo.u.Lo--;
1312                         sdlNum.int64 += sdlProd1.int64;
1313                         num[2] += den[2];
1314
1315                         if (sdlNum.int64 < sdlProd1.int64) {
1316                                 // Detected carry. Check for carry out of top
1317                                 // before adding it in.
1318                                 //
1319                                 if (num[2]++ < den[2])
1320                                         break;
1321                         }
1322                         if (num[2] < den[2])
1323                                 break; // detected carry
1324                 }
1325         }
1326
1327         num[0] = sdlNum.u.Lo;
1328         num[1] = sdlNum.u.Hi;
1329         return sdlQuo.u.Lo;
1330 }
1331
1332 // Add a 32 bit unsigned long to an array of 3 unsigned longs representing a 96 integer
1333 // Returns FALSE if there is an overflow
1334 static gboolean
1335 Add32To96(uint32_t *num, uint32_t value)
1336 {
1337         num[0] += value;
1338         if (num[0] < value) {
1339                 if (++num[1] == 0) {                
1340                         if (++num[2] == 0) {                
1341                                 return FALSE;
1342                         }            
1343                 }
1344         }
1345         return TRUE;
1346 }
1347
1348 static void
1349 OverflowUnscale (uint32_t *quo, gboolean remainder)
1350 {
1351         SPLIT64  sdlTmp;
1352         
1353         // We have overflown, so load the high bit with a one.
1354         sdlTmp.u.Hi = 1u;
1355         sdlTmp.u.Lo = quo[2];
1356         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1357         quo[2] = sdlTmp.u.Lo;
1358         sdlTmp.u.Lo = quo[1];
1359         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1360         quo[1] = sdlTmp.u.Lo;
1361         sdlTmp.u.Lo = quo[0];
1362         sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10u);
1363         quo[0] = sdlTmp.u.Lo;
1364         // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
1365         if ((sdlTmp.u.Hi > 5) || ((sdlTmp.u.Hi == 5) && ( remainder || (quo[0] & 1)))) {
1366                 Add32To96(quo, 1u);
1367         }
1368 }
1369
1370 // VarDecDiv - Decimal divide
1371 static MonoDecimalStatus
1372 VarDecDiv(MonoDecimal *left, MonoDecimal *right, MonoDecimal *result)
1373 {
1374         uint32_t   quo[3];
1375         uint32_t   quoSave[3];
1376         uint32_t   rem[4];
1377         uint32_t   divisor[3];
1378         uint32_t   pwr;
1379         uint32_t   utmp;
1380         uint32_t   utmp1;
1381         SPLIT64 sdlTmp;
1382         SPLIT64 sdlDivisor;
1383         int     scale;
1384         int     cur_scale;
1385
1386         scale = left->u.u.scale - right->u.u.scale;
1387         divisor[0] = right->v.v.Lo32;
1388         divisor[1] = right->v.v.Mid32;
1389         divisor[2] = right->Hi32;
1390
1391         if (divisor[1] == 0 && divisor[2] == 0) {
1392                 // Divisor is only 32 bits.  Easy divide.
1393                 //
1394                 if (divisor[0] == 0)
1395                         return MONO_DECIMAL_DIVBYZERO;
1396
1397                 quo[0] = left->v.v.Lo32;
1398                 quo[1] = left->v.v.Mid32;
1399                 quo[2] = left->Hi32;
1400                 rem[0] = Div96By32(quo, divisor[0]);
1401
1402                 for (;;) {
1403                         if (rem[0] == 0) {
1404                                 if (scale < 0) {
1405                                         cur_scale = min(9, -scale);
1406                                         goto HaveScale;
1407                                 }
1408                                 break;
1409                         }
1410
1411                         // We have computed a quotient based on the natural scale 
1412                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero 
1413                         // remainder, so now we should increase the scale if possible to 
1414                         // include more quotient bits.
1415                         // 
1416                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and 
1417                         // computing more quotient bits as long as the remainder stays 
1418                         // non-zero.  If scaling by that much would cause overflow, we'll 
1419                         // drop out of the loop and scale by as much as we can.
1420                         // 
1421                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9 
1422                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and 
1423                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since 
1424                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888 
1425                         // is the largest value in quo[1] (when quo[2] == 4) that is 
1426                         // assured not to overflow.
1427                         // 
1428                         cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1429                         if (cur_scale == 0) {
1430                                 // No more scaling to be done, but remainder is non-zero.
1431                                 // Round quotient.
1432                                 //
1433                                 utmp = rem[0] << 1;
1434                                 if (utmp < rem[0] || (utmp >= divisor[0] &&
1435                                                       (utmp > divisor[0] || (quo[0] & 1)))) {
1436                                 RoundUp:
1437                                         if (++quo[0] == 0)
1438                                                 if (++quo[1] == 0)
1439                                                         quo[2]++;
1440                                 }
1441                                 break;
1442                         }
1443
1444                         if (cur_scale == -1)
1445                                 return MONO_DECIMAL_OVERFLOW;
1446
1447                 HaveScale:
1448                         pwr = power10[cur_scale];
1449                         scale += cur_scale;
1450
1451                         if (IncreaseScale(quo, pwr) != 0)
1452                                 return MONO_DECIMAL_OVERFLOW;
1453
1454                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
1455                         rem[0] = sdlTmp.u.Hi;
1456
1457                         quo[0] += sdlTmp.u.Lo;
1458                         if (quo[0] < sdlTmp.u.Lo) {
1459                                 if (++quo[1] == 0)
1460                                         quo[2]++;
1461                         }
1462                 } // for (;;)
1463         }
1464         else {
1465                 // Divisor has bits set in the upper 64 bits.
1466                 //
1467                 // Divisor must be fully normalized (shifted so bit 31 of the most 
1468                 // significant uint32_t is 1).  Locate the MSB so we know how much to 
1469                 // normalize by.  The dividend will be shifted by the same amount so 
1470                 // the quotient is not changed.
1471                 //
1472                 if (divisor[2] == 0)
1473                         utmp = divisor[1];
1474                 else
1475                         utmp = divisor[2];
1476
1477                 cur_scale = 0;
1478                 if (!(utmp & 0xFFFF0000)) {
1479                         cur_scale += 16;
1480                         utmp <<= 16;
1481                 }
1482                 if (!(utmp & 0xFF000000)) {
1483                         cur_scale += 8;
1484                         utmp <<= 8;
1485                 }
1486                 if (!(utmp & 0xF0000000)) {
1487                         cur_scale += 4;
1488                         utmp <<= 4;
1489                 }
1490                 if (!(utmp & 0xC0000000)) {
1491                         cur_scale += 2;
1492                         utmp <<= 2;
1493                 }
1494                 if (!(utmp & 0x80000000)) {
1495                         cur_scale++;
1496                         utmp <<= 1;
1497                 }
1498     
1499                 // Shift both dividend and divisor left by cur_scale.
1500                 // 
1501                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
1502                 rem[0] = sdlTmp.u.Lo;
1503                 rem[1] = sdlTmp.u.Hi;
1504                 sdlTmp.u.Lo = left->v.v.Mid32;
1505                 sdlTmp.u.Hi = left->Hi32;
1506                 sdlTmp.int64 <<= cur_scale;
1507                 rem[2] = sdlTmp.u.Hi;
1508                 rem[3] = (left->Hi32 >> (31 - cur_scale)) >> 1;
1509
1510                 sdlDivisor.u.Lo = divisor[0];
1511                 sdlDivisor.u.Hi = divisor[1];
1512                 sdlDivisor.int64 <<= cur_scale;
1513
1514                 if (divisor[2] == 0) {
1515                         // Have a 64-bit divisor in sdlDivisor.  The remainder
1516                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
1517                         //
1518                         sdlTmp.u.Lo = rem[2];
1519                         sdlTmp.u.Hi = rem[3];
1520
1521                         quo[2] = 0;
1522                         quo[1] = Div96By64(&rem[1], sdlDivisor);
1523                         quo[0] = Div96By64(rem, sdlDivisor);
1524
1525                         for (;;) {
1526                                 if ((rem[0] | rem[1]) == 0) {
1527                                         if (scale < 0) {
1528                                                 cur_scale = min(9, -scale);
1529                                                 goto HaveScale64;
1530                                         }
1531                                         break;
1532                                 }
1533
1534                                 // Remainder is non-zero.  Scale up quotient and remainder by 
1535                                 // powers of 10 so we can compute more significant bits.
1536                                 // 
1537                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1538                                 if (cur_scale == 0) {
1539                                         // No more scaling to be done, but remainder is non-zero.
1540                                         // Round quotient.
1541                                         //
1542                                         sdlTmp.u.Lo = rem[0];
1543                                         sdlTmp.u.Hi = rem[1];
1544                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
1545                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
1546                                                 goto RoundUp;
1547                                         break;
1548                                 }
1549
1550                                 if (cur_scale == -1)
1551                                         return MONO_DECIMAL_OVERFLOW;
1552
1553                         HaveScale64:
1554                                 pwr = power10[cur_scale];
1555                                 scale += cur_scale;
1556
1557                                 if (IncreaseScale(quo, pwr) != 0)
1558                                         return MONO_DECIMAL_OVERFLOW;
1559
1560                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
1561                                 IncreaseScale(rem, pwr);
1562                                 utmp = Div96By64(rem, sdlDivisor);
1563                                 quo[0] += utmp;
1564                                 if (quo[0] < utmp)
1565                                         if (++quo[1] == 0)
1566                                                 quo[2]++;
1567
1568                         } // for (;;)
1569                 }
1570                 else {
1571                         // Have a 96-bit divisor in divisor[].
1572                         //
1573                         // Start by finishing the shift left by cur_scale.
1574                         //
1575                         sdlTmp.u.Lo = divisor[1];
1576                         sdlTmp.u.Hi = divisor[2];
1577                         sdlTmp.int64 <<= cur_scale;
1578                         divisor[0] = sdlDivisor.u.Lo;
1579                         divisor[1] = sdlDivisor.u.Hi;
1580                         divisor[2] = sdlTmp.u.Hi;
1581
1582                         // The remainder (currently 96 bits spread over 4 uint32_ts) 
1583                         // will be < divisor.
1584                         // 
1585                         quo[2] = 0;
1586                         quo[1] = 0;
1587                         quo[0] = Div128By96(rem, divisor);
1588
1589                         for (;;) {
1590                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
1591                                         if (scale < 0) {
1592                                                 cur_scale = min(9, -scale);
1593                                                 goto HaveScale96;
1594                                         }
1595                                         break;
1596                                 }
1597
1598                                 // Remainder is non-zero.  Scale up quotient and remainder by 
1599                                 // powers of 10 so we can compute more significant bits.
1600                                 // 
1601                                 cur_scale = SearchScale(quo[2], quo[1], quo [0], scale);
1602                                 if (cur_scale == 0) {
1603                                         // No more scaling to be done, but remainder is non-zero.
1604                                         // Round quotient.
1605                                         //
1606                                         if (rem[2] >= 0x80000000)
1607                                                 goto RoundUp;
1608
1609                                         utmp = rem[0] > 0x80000000;
1610                                         utmp1 = rem[1] > 0x80000000;
1611                                         rem[0] <<= 1;
1612                                         rem[1] = (rem[1] << 1) + utmp;
1613                                         rem[2] = (rem[2] << 1) + utmp1;
1614
1615                                         if ((rem[2] > divisor[2] || rem[2] == divisor[2]) &&
1616                                             ((rem[1] > divisor[1] || rem[1] == divisor[1]) &&
1617                                              ((rem[0] > divisor[0] || rem[0] == divisor[0]) &&
1618                                               (quo[0] & 1))))
1619                                                 goto RoundUp;
1620                                         break;
1621                                 }
1622
1623                                 if (cur_scale == -1)
1624                                         return MONO_DECIMAL_OVERFLOW;
1625
1626                         HaveScale96:
1627                                 pwr = power10[cur_scale];
1628                                 scale += cur_scale;
1629
1630                                 if (IncreaseScale(quo, pwr) != 0)
1631                                         return MONO_DECIMAL_OVERFLOW;
1632
1633                                 rem[3] = IncreaseScale(rem, pwr);
1634                                 utmp = Div128By96(rem, divisor);
1635                                 quo[0] += utmp;
1636                                 if (quo[0] < utmp)
1637                                         if (++quo[1] == 0)
1638                                                 quo[2]++;
1639
1640                         } // for (;;)
1641                 }
1642         }
1643
1644         // No more remainder.  Try extracting any extra powers of 10 we may have 
1645         // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
1646         // If a division by one of these powers returns a zero remainder, then
1647         // we keep the quotient.  If the remainder is not zero, then we restore
1648         // the previous value.
1649         // 
1650         // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
1651         // we can extract.  We use this as a quick test on whether to try a
1652         // given power.
1653         // 
1654         while ((quo[0] & 0xFF) == 0 && scale >= 8) {
1655                 quoSave[0] = quo[0];
1656                 quoSave[1] = quo[1];
1657                 quoSave[2] = quo[2];
1658
1659                 if (Div96By32(quoSave, 100000000) == 0) {
1660                         quo[0] = quoSave[0];
1661                         quo[1] = quoSave[1];
1662                         quo[2] = quoSave[2];
1663                         scale -= 8;
1664                 }
1665                 else
1666                         break;
1667         }
1668
1669         if ((quo[0] & 0xF) == 0 && scale >= 4) {
1670                 quoSave[0] = quo[0];
1671                 quoSave[1] = quo[1];
1672                 quoSave[2] = quo[2];
1673
1674                 if (Div96By32(quoSave, 10000) == 0) {
1675                         quo[0] = quoSave[0];
1676                         quo[1] = quoSave[1];
1677                         quo[2] = quoSave[2];
1678                         scale -= 4;
1679                 }
1680         }
1681
1682         if ((quo[0] & 3) == 0 && scale >= 2) {
1683                 quoSave[0] = quo[0];
1684                 quoSave[1] = quo[1];
1685                 quoSave[2] = quo[2];
1686
1687                 if (Div96By32(quoSave, 100) == 0) {
1688                         quo[0] = quoSave[0];
1689                         quo[1] = quoSave[1];
1690                         quo[2] = quoSave[2];
1691                         scale -= 2;
1692                 }
1693         }
1694
1695         if ((quo[0] & 1) == 0 && scale >= 1) {
1696                 quoSave[0] = quo[0];
1697                 quoSave[1] = quo[1];
1698                 quoSave[2] = quo[2];
1699
1700                 if (Div96By32(quoSave, 10) == 0) {
1701                         quo[0] = quoSave[0];
1702                         quo[1] = quoSave[1];
1703                         quo[2] = quoSave[2];
1704                         scale -= 1;
1705                 }
1706         }
1707
1708         result->Hi32 = quo[2];
1709         result->v.v.Mid32 = quo[1];
1710         result->v.v.Lo32 = quo[0];
1711         result->u.u.scale = scale;
1712         result->u.u.sign = left->u.u.sign ^ right->u.u.sign;
1713         return MONO_DECIMAL_OK;
1714 }
1715
1716 // VarDecAbs - Decimal Absolute Value
1717 static void
1718 VarDecAbs (MonoDecimal *pdecOprd, MonoDecimal *result)
1719 {
1720         COPYDEC(*result, *pdecOprd);
1721         result->u.u.sign &= ~DECIMAL_NEG;
1722         // Microsoft does not set reserved here
1723 }
1724
1725 // VarDecFix - Decimal Fix (chop to integer)
1726 static void
1727 VarDecFix (MonoDecimal *pdecOprd, MonoDecimal *result)
1728 {
1729         DecFixInt(result, pdecOprd);
1730 }
1731
1732
1733 // VarDecInt - Decimal Int (round down to integer)
1734 static void
1735 VarDecInt (MonoDecimal *pdecOprd, MonoDecimal *result)
1736 {
1737         if (DecFixInt(result, pdecOprd) != 0 && (result->u.u.sign & DECIMAL_NEG)) {
1738                 // We have chopped off a non-zero amount from a negative value.  Since
1739                 // we round toward -infinity, we must increase the integer result by
1740                 // 1 to make it more negative.  This will never overflow because
1741                 // in order to have a remainder, we must have had a non-zero scale factor.
1742                 // Our scale factor is back to zero now.
1743                 //
1744                 DECIMAL_LO64_SET(*result, DECIMAL_LO64_GET(*result) + 1);
1745                 if (DECIMAL_LO64_GET(*result) == 0)
1746                         result->Hi32++;
1747         }
1748 }
1749
1750
1751 // VarDecNeg - Decimal Negate
1752 static void
1753 VarDecNeg (MonoDecimal *pdecOprd, MonoDecimal *result)
1754 {
1755         COPYDEC(*result, *pdecOprd);
1756         // Microsoft does not set result->reserved to zero on this case.
1757         result->u.u.sign ^= DECIMAL_NEG;
1758 }
1759
1760 //
1761 // Returns: MONO_DECIMAL_INVALID_ARGUMENT, MONO_DECIMAL_OK
1762 //
1763 static MonoDecimalStatus
1764 VarDecRound(MonoDecimal *input, int cDecimals, MonoDecimal *result)
1765 {
1766         uint32_t num[3];
1767         uint32_t rem;
1768         uint32_t sticky;
1769         uint32_t pwr;
1770         int scale;
1771
1772         if (cDecimals < 0)
1773                 return MONO_DECIMAL_INVALID_ARGUMENT;
1774
1775         scale = input->u.u.scale - cDecimals;
1776         if (scale > 0) {
1777                 num[0] = input->v.v.Lo32;
1778                 num[1] = input->v.v.Mid32;
1779                 num[2] = input->Hi32;
1780                 result->u.u.sign = input->u.u.sign;
1781                 rem = sticky = 0;
1782
1783                 do {
1784                         sticky |= rem;
1785                         if (scale > POWER10_MAX)
1786                                 pwr = ten_to_nine;
1787                         else
1788                                 pwr = power10[scale];
1789
1790                         rem = Div96By32(num, pwr);
1791                         scale -= 9;
1792                 }while (scale > 0);
1793
1794                 // Now round.  rem has last remainder, sticky has sticky bits.
1795                 // To do IEEE rounding, we add LSB of result to sticky bits so
1796                 // either causes round up if remainder * 2 == last divisor.
1797                 //
1798                 sticky |= num[0] & 1;
1799                 rem = (rem << 1) + (sticky != 0);
1800                 if (pwr < rem &&
1801                     ++num[0] == 0 &&
1802                     ++num[1] == 0
1803                         )
1804                         ++num[2];
1805
1806                 result->v.v.Lo32 = num[0];
1807                 result->v.v.Mid32 = num[1];
1808                 result->Hi32 = num[2];
1809                 result->u.u.scale = cDecimals;
1810                 return MONO_DECIMAL_OK;
1811         }
1812
1813         COPYDEC(*result, *input);
1814         // Odd, the Microsoft source does not set the result->reserved to zero here.
1815         return MONO_DECIMAL_OK;
1816 }
1817
1818 //
1819 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1820 static MonoDecimalStatus
1821 VarDecFromR4 (float input, MonoDecimal* result)
1822 {
1823         int         exp;    // number of bits to left of binary point
1824         int         power;
1825         uint32_t       mant;
1826         double      dbl;
1827         SPLIT64     sdlLo;
1828         SPLIT64     sdlHi;
1829         int         lmax, cur;  // temps used during scale reduction
1830         
1831         // The most we can scale by is 10^28, which is just slightly more
1832         // than 2^93.  So a float with an exponent of -94 could just
1833         // barely reach 0.5, but smaller exponents will always round to zero.
1834         //
1835         if ((exp = ((SingleStructure *)&input)->exp - SNGBIAS) < -94 ) {
1836                 DECIMAL_SETZERO(*result);
1837                 return MONO_DECIMAL_OK;
1838         }
1839
1840         if (exp > 96)
1841                 return MONO_DECIMAL_OVERFLOW;
1842
1843         // Round the input to a 7-digit integer.  The R4 format has
1844         // only 7 digits of precision, and we want to keep garbage digits
1845         // out of the Decimal were making.
1846         //
1847         // Calculate max power of 10 input value could have by multiplying 
1848         // the exponent by log10(2).  Using scaled integer multiplcation, 
1849         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1850         //
1851         dbl = fabs(input);
1852         power = 6 - ((exp * 19728) >> 16);
1853         
1854         if (power >= 0) {
1855                 // We have less than 7 digits, scale input up.
1856                 //
1857                 if (power > DECMAX)
1858                         power = DECMAX;
1859                 
1860                 dbl = dbl * double_power10[power];
1861         } else {
1862                 if (power != -1 || dbl >= 1E7)
1863                         dbl = dbl / fnDblPower10(-power);
1864                 else 
1865                         power = 0; // didn't scale it
1866         }
1867         
1868         g_assert (dbl < 1E7);
1869         if (dbl < 1E6 && power < DECMAX) {
1870                 dbl *= 10;
1871                 power++;
1872                 g_assert(dbl >= 1E6);
1873         }
1874         
1875         // Round to integer
1876         //
1877         mant = (int32_t)dbl;
1878         dbl -= (double)mant;  // difference between input & integer
1879         if ( dbl > 0.5 || (dbl == 0.5 && (mant & 1)))
1880                 mant++;
1881         
1882         if (mant == 0) {
1883                 DECIMAL_SETZERO(*result);
1884                 return MONO_DECIMAL_OK;
1885         }
1886         
1887         if (power < 0) {
1888                 // Add -power factors of 10, -power <= (29 - 7) = 22.
1889                 //
1890                 power = -power;
1891                 if (power < 10) {
1892                         sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power]);
1893                         
1894                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1895                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1896                         DECIMAL_HI32(*result) = 0;
1897                 } else {
1898                         // Have a big power of 10.
1899                         //
1900                         if (power > 18) {
1901                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 18]);
1902                                 sdlLo.int64 = UInt64x64To128(sdlLo, ten_to_eighteen, &sdlHi.int64);
1903                                 
1904                                 if (sdlHi.u.Hi != 0)
1905                                         return MONO_DECIMAL_OVERFLOW;
1906                         }
1907                         else {
1908                                 sdlLo.int64 = UInt32x32To64(mant, (uint32_t)long_power10[power - 9]);
1909                                 sdlHi.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Hi);
1910                                 sdlLo.int64 = UInt32x32To64(ten_to_nine, sdlLo.u.Lo);
1911                                 sdlHi.int64 += sdlLo.u.Hi;
1912                                 sdlLo.u.Hi = sdlHi.u.Lo;
1913                                 sdlHi.u.Lo = sdlHi.u.Hi;
1914                         }
1915                         DECIMAL_LO32(*result) = sdlLo.u.Lo;
1916                         DECIMAL_MID32(*result) = sdlLo.u.Hi;
1917                         DECIMAL_HI32(*result) = sdlHi.u.Lo;
1918                 }
1919                 DECIMAL_SCALE(*result) = 0;
1920         } else {
1921                 // Factor out powers of 10 to reduce the scale, if possible.
1922                 // The maximum number we could factor out would be 6.  This
1923                 // comes from the fact we have a 7-digit number, and the
1924                 // MSD must be non-zero -- but the lower 6 digits could be
1925                 // zero.  Note also the scale factor is never negative, so
1926                 // we can't scale by any more than the power we used to
1927                 // get the integer.
1928                 //
1929                 // DivMod32by32 returns the quotient in Lo, the remainder in Hi.
1930                 //
1931                 lmax = min(power, 6);
1932                 
1933                 // lmax is the largest power of 10 to try, lmax <= 6.
1934                 // We'll try powers 4, 2, and 1 unless they're too big.
1935                 //
1936                 for (cur = 4; cur > 0; cur >>= 1)
1937                 {
1938                         if (cur > lmax)
1939                                 continue;
1940                         
1941                         sdlLo.int64 = DivMod32by32(mant, (uint32_t)long_power10[cur]);
1942                         
1943                         if (sdlLo.u.Hi == 0) {
1944                                 mant = sdlLo.u.Lo;
1945                                 power -= cur;
1946                                 lmax -= cur;
1947                         }
1948                 }
1949                 DECIMAL_LO32(*result) = mant;
1950                 DECIMAL_MID32(*result) = 0;
1951                 DECIMAL_HI32(*result) = 0;
1952                 DECIMAL_SCALE(*result) = power;
1953         }
1954         
1955         DECIMAL_SIGN(*result) = (char)((SingleStructure *)&input)->sign << 7;
1956         return MONO_DECIMAL_OK;
1957 }
1958
1959 //
1960 // Returns MONO_DECIMAL_OK or MONO_DECIMAL_OVERFLOW
1961 static MonoDecimalStatus
1962 VarDecFromR8 (double input, MonoDecimal *result)
1963 {
1964         int         exp;    // number of bits to left of binary point
1965         int         power;  // power-of-10 scale factor
1966         SPLIT64     sdlMant;
1967         SPLIT64     sdlLo;
1968         double      dbl;
1969         int         lmax, cur;  // temps used during scale reduction
1970         uint32_t       pwr_cur;
1971         uint32_t       quo;
1972         
1973         
1974         // The most we can scale by is 10^28, which is just slightly more
1975         // than 2^93.  So a float with an exponent of -94 could just
1976         // barely reach 0.5, but smaller exponents will always round to zero.
1977         //
1978         if ((exp = ((DoubleStructure *)&input)->u.exp - DBLBIAS) < -94) {
1979                 DECIMAL_SETZERO(*result);
1980                 return MONO_DECIMAL_OK;
1981         }
1982
1983         if (exp > 96)
1984                 return MONO_DECIMAL_OVERFLOW;
1985
1986         // Round the input to a 15-digit integer.  The R8 format has
1987         // only 15 digits of precision, and we want to keep garbage digits
1988         // out of the Decimal were making.
1989         //
1990         // Calculate max power of 10 input value could have by multiplying 
1991         // the exponent by log10(2).  Using scaled integer multiplcation, 
1992         // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1993         //
1994         dbl = fabs(input);
1995         power = 14 - ((exp * 19728) >> 16);
1996         
1997         if (power >= 0) {
1998                 // We have less than 15 digits, scale input up.
1999                 //
2000                 if (power > DECMAX)
2001                         power = DECMAX;
2002
2003                 dbl = dbl * double_power10[power];
2004         } else {
2005                 if (power != -1 || dbl >= 1E15)
2006                         dbl = dbl / fnDblPower10(-power);
2007                 else 
2008                         power = 0; // didn't scale it
2009         }
2010
2011         g_assert (dbl < 1E15);
2012         if (dbl < 1E14 && power < DECMAX) {
2013                 dbl *= 10;
2014                 power++;
2015                 g_assert(dbl >= 1E14);
2016         }
2017
2018         // Round to int64
2019         //
2020         sdlMant.int64 = (int64_t)dbl;
2021         dbl -= (double)(int64_t)sdlMant.int64;  // dif between input & integer
2022         if ( dbl > 0.5 || (dbl == 0.5 && (sdlMant.u.Lo & 1)))
2023                 sdlMant.int64++;
2024
2025         if (sdlMant.int64 == 0) {
2026                 DECIMAL_SETZERO(*result);
2027                 return MONO_DECIMAL_OK;
2028         }
2029
2030         if (power < 0) {
2031                 // Add -power factors of 10, -power <= (29 - 15) = 14.
2032                 //
2033                 power = -power;
2034                 if (power < 10) {
2035                         sdlLo.int64 = UInt32x32To64(sdlMant.u.Lo, (uint32_t)long_power10[power]);
2036                         sdlMant.int64 = UInt32x32To64(sdlMant.u.Hi, (uint32_t)long_power10[power]);
2037                         sdlMant.int64 += sdlLo.u.Hi;
2038                         sdlLo.u.Hi = sdlMant.u.Lo;
2039                         sdlMant.u.Lo = sdlMant.u.Hi;
2040                 }
2041                 else {
2042                         // Have a big power of 10.
2043                         //
2044                         g_assert(power <= 14);
2045                         sdlLo.int64 = UInt64x64To128(sdlMant, sdl_power10[power-10], &sdlMant.int64);
2046
2047                         if (sdlMant.u.Hi != 0)
2048                                 return MONO_DECIMAL_OVERFLOW;
2049                 }
2050                 DECIMAL_LO32(*result) = sdlLo.u.Lo;
2051                 DECIMAL_MID32(*result) = sdlLo.u.Hi;
2052                 DECIMAL_HI32(*result) = sdlMant.u.Lo;
2053                 DECIMAL_SCALE(*result) = 0;
2054         }
2055         else {
2056                 // Factor out powers of 10 to reduce the scale, if possible.
2057                 // The maximum number we could factor out would be 14.  This
2058                 // comes from the fact we have a 15-digit number, and the 
2059                 // MSD must be non-zero -- but the lower 14 digits could be 
2060                 // zero.  Note also the scale factor is never negative, so
2061                 // we can't scale by any more than the power we used to
2062                 // get the integer.
2063                 //
2064                 // DivMod64by32 returns the quotient in Lo, the remainder in Hi.
2065                 //
2066                 lmax = min(power, 14);
2067
2068                 // lmax is the largest power of 10 to try, lmax <= 14.
2069                 // We'll try powers 8, 4, 2, and 1 unless they're too big.
2070                 //
2071                 for (cur = 8; cur > 0; cur >>= 1)
2072                 {
2073                         if (cur > lmax)
2074                                 continue;
2075
2076                         pwr_cur = (uint32_t)long_power10[cur];
2077
2078                         if (sdlMant.u.Hi >= pwr_cur) {
2079                                 // Overflow if we try to divide in one step.
2080                                 //
2081                                 sdlLo.int64 = DivMod64by32(sdlMant.u.Hi, pwr_cur);
2082                                 quo = sdlLo.u.Lo;
2083                                 sdlLo.u.Lo = sdlMant.u.Lo;
2084                                 sdlLo.int64 = DivMod64by32(sdlLo.int64, pwr_cur);
2085                         }
2086                         else {
2087                                 quo = 0;
2088                                 sdlLo.int64 = DivMod64by32(sdlMant.int64, pwr_cur);
2089                         }
2090
2091                         if (sdlLo.u.Hi == 0) {
2092                                 sdlMant.u.Hi = quo;
2093                                 sdlMant.u.Lo = sdlLo.u.Lo;
2094                                 power -= cur;
2095                                 lmax -= cur;
2096                         }
2097                 }
2098
2099                 DECIMAL_HI32(*result) = 0;
2100                 DECIMAL_SCALE(*result) = power;
2101                 DECIMAL_LO32(*result) = sdlMant.u.Lo;
2102                 DECIMAL_MID32(*result) = sdlMant.u.Hi;
2103         }
2104
2105         DECIMAL_SIGN(*result) = (char)((DoubleStructure *)&input)->u.sign << 7;
2106         return MONO_DECIMAL_OK;
2107 }
2108
2109 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2110 static MonoDecimalStatus
2111 VarR8FromDec(MonoDecimal *input, double *result)
2112 {
2113         SPLIT64  tmp;
2114         double   dbl;
2115         
2116         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2117                 return MONO_DECIMAL_INVALID_ARGUMENT;
2118         
2119         tmp.u.Lo = DECIMAL_LO32(*input);
2120         tmp.u.Hi = DECIMAL_MID32(*input);
2121         
2122         if ((int32_t)DECIMAL_MID32(*input) < 0)
2123                 dbl = (ds2to64.dbl + (double)(int64_t)tmp.int64 +
2124                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input)) ;
2125         else
2126                 dbl = ((double)(int64_t)tmp.int64 +
2127                        (double)DECIMAL_HI32(*input) * ds2to64.dbl) / fnDblPower10(DECIMAL_SCALE(*input));
2128         
2129         if (DECIMAL_SIGN(*input))
2130                 dbl = -dbl;
2131         
2132         *result = dbl;
2133         return MONO_DECIMAL_OK;
2134 }
2135
2136 // Returns: MONO_DECIMAL_OK, or MONO_DECIMAL_INVALID_ARGUMENT
2137 static MonoDecimalStatus
2138 VarR4FromDec(MonoDecimal *input, float *result)
2139 {
2140         double   dbl;
2141         
2142         if (DECIMAL_SCALE(*input) > DECMAX || (DECIMAL_SIGN(*input) & ~DECIMAL_NEG) != 0)
2143                 return MONO_DECIMAL_INVALID_ARGUMENT;
2144         
2145         // Can't overflow; no errors possible.
2146         //
2147         VarR8FromDec(input, &dbl);
2148         *result = (float)dbl;
2149         return MONO_DECIMAL_OK;
2150 }
2151
2152 static void
2153 DecShiftLeft(MonoDecimal* value)
2154 {
2155     g_assert(value != NULL);
2156
2157     unsigned int c0 = DECIMAL_LO32(*value) & 0x80000000? 1: 0;
2158     unsigned int c1 = DECIMAL_MID32(*value) & 0x80000000? 1: 0;
2159     DECIMAL_LO32(*value) <<= 1;
2160     DECIMAL_MID32(*value) = DECIMAL_MID32(*value) << 1 | c0;
2161     DECIMAL_HI32(*value) = DECIMAL_HI32(*value) << 1 | c1;
2162 }
2163
2164 static int
2165 D32AddCarry(uint32_t* value, uint32_t i)
2166 {
2167     uint32_t v = *value;
2168     uint32_t sum = v + i;
2169     *value = sum;
2170     return sum < v || sum < i? 1: 0;
2171 }
2172
2173 static void
2174 DecAdd(MonoDecimal *value, MonoDecimal* d)
2175 {
2176         g_assert(value != NULL && d != NULL);
2177
2178         if (D32AddCarry(&DECIMAL_LO32(*value), DECIMAL_LO32(*d))) {
2179                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2180                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2181                 }
2182         }
2183         if (D32AddCarry(&DECIMAL_MID32(*value), DECIMAL_MID32(*d))) {
2184                 D32AddCarry(&DECIMAL_HI32(*value), 1);
2185         }
2186         D32AddCarry(&DECIMAL_HI32(*value), DECIMAL_HI32(*d));
2187 }
2188
2189 static void
2190 DecMul10(MonoDecimal* value)
2191 {
2192         g_assert (value != NULL);
2193
2194         MonoDecimal d = *value;
2195         DecShiftLeft(value);
2196         DecShiftLeft(value);
2197         DecAdd(value, &d);
2198         DecShiftLeft(value);
2199 }
2200
2201 static void
2202 DecAddInt32(MonoDecimal* value, unsigned int i)
2203 {
2204         g_assert(value != NULL);
2205
2206         if (D32AddCarry(&DECIMAL_LO32(*value), i)) {
2207                 if (D32AddCarry(&DECIMAL_MID32(*value), 1)) {
2208                         D32AddCarry(&DECIMAL_HI32(*value), 1);
2209                 }
2210         }
2211 }
2212
2213 MonoDecimalCompareResult
2214 mono_decimal_compare (MonoDecimal *left, MonoDecimal *right)
2215 {
2216         MONO_ARCH_SAVE_REGS;
2217
2218         uint32_t   left_sign;
2219         uint32_t   right_sign;
2220
2221         // First check signs and whether either are zero.  If both are
2222         // non-zero and of the same sign, just use subtraction to compare.
2223         //
2224         left_sign = left->v.v.Lo32 | left->v.v.Mid32 | left->Hi32;
2225         right_sign = right->v.v.Lo32 | right->v.v.Mid32 | right->Hi32;
2226         if (left_sign != 0)
2227                 left_sign = (left->u.u.sign & DECIMAL_NEG) | 1;
2228
2229         if (right_sign != 0)
2230                 right_sign = (right->u.u.sign & DECIMAL_NEG) | 1;
2231
2232         // left_sign & right_sign have values 1, 0, or 0x81 depending on if the left/right
2233         // operand is +, 0, or -.
2234         //
2235         if (left_sign == right_sign) {
2236                 if (left_sign == 0)    // both are zero
2237                         return MONO_DECIMAL_CMP_EQ; // return equal
2238
2239                 MonoDecimal result;
2240
2241                 DecAddSub(left, right, &result, DECIMAL_NEG);
2242                 if (DECIMAL_LO64_GET(result) == 0 && result.Hi32 == 0)
2243                         return MONO_DECIMAL_CMP_EQ;
2244                 if (result.u.u.sign & DECIMAL_NEG)
2245                         return MONO_DECIMAL_CMP_LT;
2246                 return MONO_DECIMAL_CMP_GT;
2247         }
2248
2249         //
2250         // Signs are different.  Used signed byte compares
2251         //
2252         if ((char)left_sign > (char)right_sign)
2253                 return MONO_DECIMAL_CMP_GT;
2254         return MONO_DECIMAL_CMP_LT;
2255 }
2256
2257 void
2258 mono_decimal_init_single (MonoDecimal *_this, float value)
2259 {
2260         MONO_ARCH_SAVE_REGS;
2261         if (VarDecFromR4 (value, _this) == MONO_DECIMAL_OVERFLOW)
2262                 mono_raise_exception (mono_get_exception_overflow ());
2263         _this->reserved = 0;
2264 }
2265
2266 void
2267 mono_decimal_init_double (MonoDecimal *_this, double value)
2268 {
2269         MONO_ARCH_SAVE_REGS;
2270         if (VarDecFromR8 (value, _this) == MONO_DECIMAL_OVERFLOW)
2271                 mono_raise_exception (mono_get_exception_overflow ());
2272         _this->reserved = 0;
2273 }
2274
2275 void
2276 mono_decimal_floor (MonoDecimal *d)
2277 {
2278         MonoDecimal decRes;
2279
2280         MONO_ARCH_SAVE_REGS;
2281
2282         VarDecInt(d, &decRes);
2283         
2284         // copy decRes into d
2285         COPYDEC(*d, decRes);
2286         d->reserved = 0;
2287         FC_GC_POLL ();
2288 }
2289
2290 int32_t
2291 mono_decimal_get_hash_code (MonoDecimal *d)
2292 {
2293         double dbl;
2294
2295         MONO_ARCH_SAVE_REGS;
2296         if (VarR8FromDec(d, &dbl) != MONO_DECIMAL_OK)
2297                 return 0;
2298         
2299         if (dbl == 0.0) {
2300                 // Ensure 0 and -0 have the same hash code
2301                 return 0;
2302         }
2303         // conversion to double is lossy and produces rounding errors so we mask off the lowest 4 bits
2304         // 
2305         // For example these two numerically equal decimals with different internal representations produce
2306         // slightly different results when converted to double:
2307         //
2308         // decimal a = new decimal(new int[] { 0x76969696, 0x2fdd49fa, 0x409783ff, 0x00160000 });
2309         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.176470588
2310         // decimal b = new decimal(new int[] { 0x3f0f0f0f, 0x1e62edcc, 0x06758d33, 0x00150000 }); 
2311         //                     => (decimal)1999021.176470588235294117647000000000 => (double)1999021.1764705882
2312         //
2313         return ((((int *)&dbl)[0]) & 0xFFFFFFF0) ^ ((int *)&dbl)[1];
2314         
2315 }
2316
2317 void
2318 mono_decimal_multiply (MonoDecimal *d1, MonoDecimal *d2)
2319 {
2320         MonoDecimal decRes;
2321
2322         MONO_ARCH_SAVE_REGS;
2323
2324         MonoDecimalStatus status = VarDecMul(d1, d2, &decRes);
2325         if (status != MONO_DECIMAL_OK)
2326                 mono_raise_exception (mono_get_exception_overflow ());
2327
2328         COPYDEC(*d1, decRes);
2329         d1->reserved = 0;
2330
2331         FC_GC_POLL ();
2332 }
2333
2334 void
2335 mono_decimal_round (MonoDecimal *d, int32_t decimals)
2336 {
2337         MONO_ARCH_SAVE_REGS;
2338
2339         MonoDecimal decRes;
2340         
2341         // GC is only triggered for throwing, no need to protect result 
2342         if (decimals < 0 || decimals > 28)
2343                 mono_raise_exception (mono_get_exception_argument_out_of_range ("d"));
2344
2345         VarDecRound(d, decimals, &decRes);
2346
2347         // copy decRes into d
2348         COPYDEC(*d, decRes);
2349         d->reserved = 0;
2350
2351         FC_GC_POLL();
2352 }
2353
2354 void
2355 mono_decimal_tocurrency (MonoDecimal *decimal)
2356 {
2357         // TODO
2358 }
2359
2360 double
2361 mono_decimal_to_double (MonoDecimal d)
2362 {
2363         MONO_ARCH_SAVE_REGS;
2364
2365         double result = 0.0;
2366         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2367         VarR8FromDec(&d, &result);
2368         return result;
2369 }
2370
2371 int32_t
2372 mono_decimal_to_int32 (MonoDecimal d)
2373 {
2374         MONO_ARCH_SAVE_REGS;
2375
2376         MonoDecimal result;
2377         
2378         // The following can not return an error, it only returns INVALID_ARG if the decimals is < 0
2379         VarDecRound(&d, 0, &result);
2380         
2381         if (DECIMAL_SCALE(result) != 0) {
2382                 d = result;
2383                 VarDecFix (&d, &result);
2384         }
2385         
2386         if (DECIMAL_HI32(result) == 0 && DECIMAL_MID32(result) == 0) {
2387                 int32_t i = DECIMAL_LO32(result);
2388                 if ((int16_t)DECIMAL_SIGNSCALE(result) >= 0) {
2389                         if (i >= 0)
2390                                 return i;
2391                 } else {
2392                         i = -i;
2393                         if (i <= 0)
2394                                 return i;
2395                 }
2396         }
2397         
2398         mono_raise_exception (mono_get_exception_overflow ());
2399         // Not reachable
2400         return 0;
2401 }
2402
2403 float
2404 mono_decimal_to_float (MonoDecimal d)
2405 {
2406         MONO_ARCH_SAVE_REGS;
2407
2408         float result = 0.0f;
2409         // Note: this can fail if the input is an invalid decimal, but for compatibility we should return 0
2410         VarR4FromDec(&d, &result);
2411         return result;
2412 }
2413
2414 void
2415 mono_decimal_truncate (MonoDecimal *d)
2416 {
2417         MONO_ARCH_SAVE_REGS;
2418
2419         MonoDecimal decRes;
2420
2421         VarDecFix(d, &decRes);
2422
2423         // copy decRes into d
2424         COPYDEC(*d, decRes);
2425         d->reserved = 0;
2426         FC_GC_POLL();
2427 }
2428
2429 void
2430 mono_decimal_addsub (MonoDecimal *left, MonoDecimal *right, uint8_t sign)
2431 {
2432         MonoDecimal result, decTmp;
2433         MonoDecimal *pdecTmp, *leftOriginal;
2434         uint32_t    num[6], pwr;
2435         int         scale, hi_prod, cur;
2436         SPLIT64     sdlTmp;
2437         
2438         MONO_ARCH_SAVE_REGS;
2439         g_assert(sign == 0 || sign == DECIMAL_NEG);
2440
2441         leftOriginal = left;
2442
2443         sign ^= (DECIMAL_SIGN(*right) ^ DECIMAL_SIGN(*left)) & DECIMAL_NEG;
2444
2445         if (DECIMAL_SCALE(*right) == DECIMAL_SCALE(*left)) {
2446                 // Scale factors are equal, no alignment necessary.
2447                 //
2448                 DECIMAL_SIGNSCALE(result) = DECIMAL_SIGNSCALE(*left);
2449
2450         AlignedAdd:
2451                 if (sign) {
2452                         // Signs differ - subtract
2453                         //
2454                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) - DECIMAL_LO64_GET(*right)));
2455                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) - DECIMAL_HI32(*right);
2456
2457                         // Propagate carry
2458                         //
2459                         if (DECIMAL_LO64_GET(result) > DECIMAL_LO64_GET(*left)) {
2460                                 DECIMAL_HI32(result)--;
2461                                 if (DECIMAL_HI32(result) >= DECIMAL_HI32(*left))
2462                                         goto SignFlip;
2463                         } else if (DECIMAL_HI32(result) > DECIMAL_HI32(*left)) {
2464                                 // Got negative result.  Flip its sign.
2465                                 // 
2466                         SignFlip:
2467                                 DECIMAL_LO64_SET(result, -(int64_t)DECIMAL_LO64_GET(result));
2468                                 DECIMAL_HI32(result) = ~DECIMAL_HI32(result);
2469                                 if (DECIMAL_LO64_GET(result) == 0)
2470                                         DECIMAL_HI32(result)++;
2471                                 DECIMAL_SIGN(result) ^= DECIMAL_NEG;
2472                         }
2473
2474                 } else {
2475                         // Signs are the same - add
2476                         //
2477                         DECIMAL_LO64_SET(result, (DECIMAL_LO64_GET(*left) + DECIMAL_LO64_GET(*right)));
2478                         DECIMAL_HI32(result) = DECIMAL_HI32(*left) + DECIMAL_HI32(*right);
2479
2480                         // Propagate carry
2481                         //
2482                         if (DECIMAL_LO64_GET(result) < DECIMAL_LO64_GET(*left)) {
2483                                 DECIMAL_HI32(result)++;
2484                                 if (DECIMAL_HI32(result) <= DECIMAL_HI32(*left))
2485                                         goto AlignedScale;
2486                         } else if (DECIMAL_HI32(result) < DECIMAL_HI32(*left)) {
2487                         AlignedScale:
2488                                 // The addition carried above 96 bits.  Divide the result by 10,
2489                                 // dropping the scale factor.
2490                                 // 
2491                                 if (DECIMAL_SCALE(result) == 0)
2492                                         mono_raise_exception (mono_get_exception_overflow ());
2493                                 DECIMAL_SCALE(result)--;
2494
2495                                 sdlTmp.u.Lo = DECIMAL_HI32(result);
2496                                 sdlTmp.u.Hi = 1;
2497                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2498                                 DECIMAL_HI32(result) = sdlTmp.u.Lo;
2499
2500                                 sdlTmp.u.Lo = DECIMAL_MID32(result);
2501                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2502                                 DECIMAL_MID32(result) = sdlTmp.u.Lo;
2503
2504                                 sdlTmp.u.Lo = DECIMAL_LO32(result);
2505                                 sdlTmp.int64 = DivMod64by32(sdlTmp.int64, 10);
2506                                 DECIMAL_LO32(result) = sdlTmp.u.Lo;
2507
2508                                 // See if we need to round up.
2509                                 //
2510                                 if (sdlTmp.u.Hi >= 5 && (sdlTmp.u.Hi > 5 || (DECIMAL_LO32(result) & 1))) {
2511                                         DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(result)+1);
2512                                         if (DECIMAL_LO64_GET(result) == 0)
2513                                                 DECIMAL_HI32(result)++;
2514                                 }
2515                         }
2516                 }
2517         } else {
2518                 // Scale factors are not equal.  Assume that a larger scale
2519                 // factor (more decimal places) is likely to mean that number
2520                 // is smaller.  Start by guessing that the right operand has
2521                 // the larger scale factor.  The result will have the larger
2522                 // scale factor.
2523                 //
2524                 DECIMAL_SCALE(result) = DECIMAL_SCALE(*right);  // scale factor of "smaller"
2525                 DECIMAL_SIGN(result) = DECIMAL_SIGN(*left);    // but sign of "larger"
2526                 scale = DECIMAL_SCALE(result)- DECIMAL_SCALE(*left);
2527
2528                 if (scale < 0) {
2529                         // Guessed scale factor wrong. Swap operands.
2530                         //
2531                         scale = -scale;
2532                         DECIMAL_SCALE(result) = DECIMAL_SCALE(*left);
2533                         DECIMAL_SIGN(result) ^= sign;
2534                         pdecTmp = right;
2535                         right = left;
2536                         left = pdecTmp;
2537                 }
2538
2539                 // *left will need to be multiplied by 10^scale so
2540                 // it will have the same scale as *right.  We could be
2541                 // extending it to up to 192 bits of precision.
2542                 //
2543                 if (scale <= POWER10_MAX) {
2544                         // Scaling won't make it larger than 4 uint32_ts
2545                         //
2546                         pwr = power10[scale];
2547                         DECIMAL_LO64_SET(decTmp, UInt32x32To64(DECIMAL_LO32(*left), pwr));
2548                         sdlTmp.int64 = UInt32x32To64(DECIMAL_MID32(*left), pwr);
2549                         sdlTmp.int64 += DECIMAL_MID32(decTmp);
2550                         DECIMAL_MID32(decTmp) = sdlTmp.u.Lo;
2551                         DECIMAL_HI32(decTmp) = sdlTmp.u.Hi;
2552                         sdlTmp.int64 = UInt32x32To64(DECIMAL_HI32(*left), pwr);
2553                         sdlTmp.int64 += DECIMAL_HI32(decTmp);
2554                         if (sdlTmp.u.Hi == 0) {
2555                                 // Result fits in 96 bits.  Use standard aligned add.
2556                                 //
2557                                 DECIMAL_HI32(decTmp) = sdlTmp.u.Lo;
2558                                 left = &decTmp;
2559                                 goto AlignedAdd;
2560                         }
2561                         num[0] = DECIMAL_LO32(decTmp);
2562                         num[1] = DECIMAL_MID32(decTmp);
2563                         num[2] = sdlTmp.u.Lo;
2564                         num[3] = sdlTmp.u.Hi;
2565                         hi_prod = 3;
2566                 } else {
2567                         // Have to scale by a bunch.  Move the number to a buffer
2568                         // where it has room to grow as it's scaled.
2569                         //
2570                         num[0] = DECIMAL_LO32(*left);
2571                         num[1] = DECIMAL_MID32(*left);
2572                         num[2] = DECIMAL_HI32(*left);
2573                         hi_prod = 2;
2574
2575                         // Scan for zeros in the upper words.
2576                         //
2577                         if (num[2] == 0) {
2578                                 hi_prod = 1;
2579                                 if (num[1] == 0) {
2580                                         hi_prod = 0;
2581                                         if (num[0] == 0) {
2582                                                 // Left arg is zero, return right.
2583                                                 //
2584                                                 DECIMAL_LO64_SET(result, DECIMAL_LO64_GET(*right));
2585                                                 DECIMAL_HI32(result) = DECIMAL_HI32(*right);
2586                                                 DECIMAL_SIGN(result) ^= sign;
2587                                                 goto RetDec;
2588                                         }
2589                                 }
2590                         }
2591
2592                         // Scaling loop, up to 10^9 at a time.  hi_prod stays updated
2593                         // with index of highest non-zero uint32_t.
2594                         //
2595                         for (; scale > 0; scale -= POWER10_MAX) {
2596                                 if (scale > POWER10_MAX)
2597                                         pwr = ten_to_nine;
2598                                 else
2599                                         pwr = power10[scale];
2600
2601                                 sdlTmp.u.Hi = 0;
2602                                 for (cur = 0; cur <= hi_prod; cur++) {
2603                                         sdlTmp.int64 = UInt32x32To64(num[cur], pwr) + sdlTmp.u.Hi;
2604                                         num[cur] = sdlTmp.u.Lo;
2605                                 }
2606
2607                                 if (sdlTmp.u.Hi != 0)
2608                                         // We're extending the result by another uint32_t.
2609                                         num[++hi_prod] = sdlTmp.u.Hi;
2610                         }
2611                 }
2612
2613                 // Scaling complete, do the add.  Could be subtract if signs differ.
2614                 //
2615                 sdlTmp.u.Lo = num[0];
2616                 sdlTmp.u.Hi = num[1];
2617
2618                 if (sign) {
2619                         // Signs differ, subtract.
2620                         //
2621                         DECIMAL_LO64_SET(result, (sdlTmp.int64 - DECIMAL_LO64_GET(*right)));
2622                         DECIMAL_HI32(result) = num[2] - DECIMAL_HI32(*right);
2623
2624                         // Propagate carry
2625                         //
2626                         if (DECIMAL_LO64_GET(result) > sdlTmp.int64) {
2627                                 DECIMAL_HI32(result)--;
2628                                 if (DECIMAL_HI32(result) >= num[2])
2629                                         goto LongSub;
2630                         } else if (DECIMAL_HI32(result) > num[2]) {
2631                         LongSub:
2632                                 // If num has more than 96 bits of precision, then we need to 
2633                                 // carry the subtraction into the higher bits.  If it doesn't, 
2634                                 // then we subtracted in the wrong order and have to flip the 
2635                                 // sign of the result.
2636                                 // 
2637                                 if (hi_prod <= 2)
2638                                         goto SignFlip;
2639
2640                                 cur = 3;
2641                                 while(num[cur++]-- == 0);
2642                                 if (num[hi_prod] == 0)
2643                                         hi_prod--;
2644                         }
2645                 } else {
2646                         // Signs the same, add.
2647                         //
2648                         DECIMAL_LO64_SET(result, (sdlTmp.int64 + DECIMAL_LO64_GET(*right)));
2649                         DECIMAL_HI32(result) = num[2] + DECIMAL_HI32(*right);
2650
2651                         // Propagate carry
2652                         //
2653                         if (DECIMAL_LO64_GET(result) < sdlTmp.int64) {
2654                                 DECIMAL_HI32(result)++;
2655                                 if (DECIMAL_HI32(result) <= num[2])
2656                                         goto LongAdd;
2657                         } else if (DECIMAL_HI32(result) < num[2]) {
2658                         LongAdd:
2659                                 // Had a carry above 96 bits.
2660                                 //
2661                                 cur = 3;
2662                                 do {
2663                                         if (hi_prod < cur) {
2664                                                 num[cur] = 1;
2665                                                 hi_prod = cur;
2666                                                 break;
2667                                         }
2668                                 }while (++num[cur++] == 0);
2669                         }
2670                 }
2671
2672                 if (hi_prod > 2) {
2673                         num[0] = DECIMAL_LO32(result);
2674                         num[1] = DECIMAL_MID32(result);
2675                         num[2] = DECIMAL_HI32(result);
2676                         DECIMAL_SCALE(result) = (uint8_t)ScaleResult(num, hi_prod, DECIMAL_SCALE(result));
2677                         if (DECIMAL_SCALE(result) == (uint8_t)-1)
2678                                 mono_raise_exception (mono_get_exception_overflow ());
2679
2680                         DECIMAL_LO32(result) = num[0];
2681                         DECIMAL_MID32(result) = num[1];
2682                         DECIMAL_HI32(result) = num[2];
2683                 }
2684         }
2685
2686 RetDec:
2687         left = leftOriginal;
2688         COPYDEC(*left, result);
2689         left->reserved = 0;
2690 }
2691
2692 void
2693 mono_decimal_divide (MonoDecimal *left, MonoDecimal *right)
2694 {
2695         uint32_t quo[3], quo_save[3],rem[4], divisor[3];
2696         uint32_t pwr, tmp, tmp1;
2697         SPLIT64  sdlTmp, sdlDivisor;
2698         int      scale, cur_scale;
2699         gboolean unscale;
2700
2701         MONO_ARCH_SAVE_REGS;
2702         
2703         scale = DECIMAL_SCALE(*left) - DECIMAL_SCALE(*right);
2704         unscale = FALSE;
2705         divisor[0] = DECIMAL_LO32(*right);
2706         divisor[1] = DECIMAL_MID32(*right);
2707         divisor[2] = DECIMAL_HI32(*right);
2708
2709         if (divisor[1] == 0 && divisor[2] == 0) {
2710                 // Divisor is only 32 bits.  Easy divide.
2711                 //
2712                 if (divisor[0] == 0)
2713                         mono_raise_exception (mono_get_exception_divide_by_zero ());
2714
2715                 quo[0] = DECIMAL_LO32(*left);
2716                 quo[1] = DECIMAL_MID32(*left);
2717                 quo[2] = DECIMAL_HI32(*left);
2718                 rem[0] = Div96By32(quo, divisor[0]);
2719
2720                 for (;;) {
2721                         if (rem[0] == 0) {
2722                                 if (scale < 0) {
2723                                         cur_scale = min(9, -scale);
2724                                         goto HaveScale;
2725                                 }
2726                                 break;
2727                         }
2728                         // We need to unscale if and only if we have a non-zero remainder
2729                         unscale = TRUE;
2730
2731                         // We have computed a quotient based on the natural scale 
2732                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero 
2733                         // remainder, so now we should increase the scale if possible to 
2734                         // include more quotient bits.
2735                         // 
2736                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and 
2737                         // computing more quotient bits as long as the remainder stays 
2738                         // non-zero.  If scaling by that much would cause overflow, we'll 
2739                         // drop out of the loop and scale by as much as we can.
2740                         // 
2741                         // Scaling by 10^9 will overflow if quo[2].quo[1] >= 2^32 / 10^9 
2742                         // = 4.294 967 296.  So the upper limit is quo[2] == 4 and 
2743                         // quo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since 
2744                         // quotient bits in quo[0] could be all 1's, then 1,266,874,888 
2745                         // is the largest value in quo[1] (when quo[2] == 4) that is 
2746                         // assured not to overflow.
2747                         // 
2748                         cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2749                         if (cur_scale == 0) {
2750                                 // No more scaling to be done, but remainder is non-zero.
2751                                 // Round quotient.
2752                                 //
2753                                 tmp = rem[0] << 1;
2754                                 if (tmp < rem[0] || (tmp >= divisor[0] &&
2755                                                            (tmp > divisor[0] || (quo[0] & 1)))) {
2756                                 RoundUp:
2757                                         if (!Add32To96(quo, 1)) {
2758                                                 if (scale == 0) 
2759                                                         mono_raise_exception (mono_get_exception_overflow ());
2760                                                 scale--;
2761                                                 OverflowUnscale(quo, TRUE);
2762                                                 break;
2763                                         }      
2764                                 }
2765                                 break;
2766                         }
2767
2768                         if (cur_scale < 0) 
2769                                 mono_raise_exception (mono_get_exception_overflow ());
2770
2771                 HaveScale:
2772                         pwr = power10[cur_scale];
2773                         scale += cur_scale;
2774
2775                         if (IncreaseScale(quo, pwr) != 0) 
2776                                 mono_raise_exception (mono_get_exception_overflow ());
2777
2778
2779                         sdlTmp.int64 = DivMod64by32(UInt32x32To64(rem[0], pwr), divisor[0]);
2780                         rem[0] = sdlTmp.u.Hi;
2781
2782                         if (!Add32To96(quo, sdlTmp.u.Lo)) {
2783                                 if (scale == 0)
2784                                         mono_raise_exception (mono_get_exception_overflow ());                                  
2785                                 scale--;
2786                                 OverflowUnscale(quo, (rem[0] != 0));
2787                                 break;
2788                         }
2789                 } // for (;;)
2790         } else {
2791                 // Divisor has bits set in the upper 64 bits.
2792                 //
2793                 // Divisor must be fully normalized (shifted so bit 31 of the most 
2794                 // significant uint32_t is 1).  Locate the MSB so we know how much to 
2795                 // normalize by.  The dividend will be shifted by the same amount so 
2796                 // the quotient is not changed.
2797                 //
2798                 if (divisor[2] == 0)
2799                         tmp = divisor[1];
2800                 else
2801                         tmp = divisor[2];
2802
2803                 cur_scale = 0;
2804                 if (!(tmp & 0xFFFF0000)) {
2805                         cur_scale += 16;
2806                         tmp <<= 16;
2807                 }
2808                 if (!(tmp & 0xFF000000)) {
2809                         cur_scale += 8;
2810                         tmp <<= 8;
2811                 }
2812                 if (!(tmp & 0xF0000000)) {
2813                         cur_scale += 4;
2814                         tmp <<= 4;
2815                 }
2816                 if (!(tmp & 0xC0000000)) {
2817                         cur_scale += 2;
2818                         tmp <<= 2;
2819                 }
2820                 if (!(tmp & 0x80000000)) {
2821                         cur_scale++;
2822                         tmp <<= 1;
2823                 }
2824     
2825                 // Shift both dividend and divisor left by cur_scale.
2826                 // 
2827                 sdlTmp.int64 = DECIMAL_LO64_GET(*left) << cur_scale;
2828                 rem[0] = sdlTmp.u.Lo;
2829                 rem[1] = sdlTmp.u.Hi;
2830                 sdlTmp.u.Lo = DECIMAL_MID32(*left);
2831                 sdlTmp.u.Hi = DECIMAL_HI32(*left);
2832                 sdlTmp.int64 <<= cur_scale;
2833                 rem[2] = sdlTmp.u.Hi;
2834                 rem[3] = (DECIMAL_HI32(*left) >> (31 - cur_scale)) >> 1;
2835
2836                 sdlDivisor.u.Lo = divisor[0];
2837                 sdlDivisor.u.Hi = divisor[1];
2838                 sdlDivisor.int64 <<= cur_scale;
2839
2840                 if (divisor[2] == 0) {
2841                         // Have a 64-bit divisor in sdlDivisor.  The remainder 
2842                         // (currently 96 bits spread over 4 uint32_ts) will be < divisor.
2843                         // 
2844                         sdlTmp.u.Lo = rem[2];
2845                         sdlTmp.u.Hi = rem[3];
2846
2847                         quo[2] = 0;
2848                         quo[1] = Div96By64(&rem[1], sdlDivisor);
2849                         quo[0] = Div96By64(rem, sdlDivisor);
2850
2851                         for (;;) {
2852                                 if ((rem[0] | rem[1]) == 0) {
2853                                         if (scale < 0) {
2854                                                 cur_scale = min(9, -scale);
2855                                                 goto HaveScale64;
2856                                         }
2857                                         break;
2858                                 }
2859
2860                                 // We need to unscale if and only if we have a non-zero remainder
2861                                 unscale = TRUE;
2862
2863                                 // Remainder is non-zero.  Scale up quotient and remainder by 
2864                                 // powers of 10 so we can compute more significant bits.
2865                                 // 
2866                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2867                                 if (cur_scale == 0) {
2868                                         // No more scaling to be done, but remainder is non-zero.
2869                                         // Round quotient.
2870                                         //
2871                                         sdlTmp.u.Lo = rem[0];
2872                                         sdlTmp.u.Hi = rem[1];
2873                                         if (sdlTmp.u.Hi >= 0x80000000 || (sdlTmp.int64 <<= 1) > sdlDivisor.int64 ||
2874                                             (sdlTmp.int64 == sdlDivisor.int64 && (quo[0] & 1)))
2875                                                 goto RoundUp;
2876                                         break;
2877                                 }
2878
2879                                 if (cur_scale < 0) 
2880                                         mono_raise_exception (mono_get_exception_overflow ());
2881
2882                         HaveScale64:
2883                                 pwr = power10[cur_scale];
2884                                 scale += cur_scale;
2885
2886                                 if (IncreaseScale(quo, pwr) != 0)
2887                                         mono_raise_exception (mono_get_exception_overflow ());
2888                                 
2889                                 rem[2] = 0;  // rem is 64 bits, IncreaseScale uses 96
2890                                 IncreaseScale(rem, pwr);
2891                                 tmp = Div96By64(rem, sdlDivisor);
2892                                 if (!Add32To96(quo, tmp)) {
2893                                         if (scale == 0) 
2894                                                 mono_raise_exception (mono_get_exception_overflow ());
2895                                         scale--;
2896                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0));
2897                                         break;
2898                                 }      
2899
2900                         } // for (;;)
2901                 } else {
2902                         // Have a 96-bit divisor in divisor[].
2903                         //
2904                         // Start by finishing the shift left by cur_scale.
2905                         //
2906                         sdlTmp.u.Lo = divisor[1];
2907                         sdlTmp.u.Hi = divisor[2];
2908                         sdlTmp.int64 <<= cur_scale;
2909                         divisor[0] = sdlDivisor.u.Lo;
2910                         divisor[1] = sdlDivisor.u.Hi;
2911                         divisor[2] = sdlTmp.u.Hi;
2912
2913                         // The remainder (currently 96 bits spread over 4 uint32_ts) 
2914                         // will be < divisor.
2915                         // 
2916                         quo[2] = 0;
2917                         quo[1] = 0;
2918                         quo[0] = Div128By96(rem, divisor);
2919
2920                         for (;;) {
2921                                 if ((rem[0] | rem[1] | rem[2]) == 0) {
2922                                         if (scale < 0) {
2923                                                 cur_scale = min(9, -scale);
2924                                                 goto HaveScale96;
2925                                         }
2926                                         break;
2927                                 }
2928
2929                                 // We need to unscale if and only if we have a non-zero remainder
2930                                 unscale = TRUE;
2931
2932                                 // Remainder is non-zero.  Scale up quotient and remainder by 
2933                                 // powers of 10 so we can compute more significant bits.
2934                                 // 
2935                                 cur_scale = SearchScale(quo[2], quo[1], quo[0], scale);
2936                                 if (cur_scale == 0) {
2937                                         // No more scaling to be done, but remainder is non-zero.
2938                                         // Round quotient.
2939                                         //
2940                                         if (rem[2] >= 0x80000000)
2941                                                 goto RoundUp;
2942
2943                                         tmp = rem[0] > 0x80000000;
2944                                         tmp1 = rem[1] > 0x80000000;
2945                                         rem[0] <<= 1;
2946                                         rem[1] = (rem[1] << 1) + tmp;
2947                                         rem[2] = (rem[2] << 1) + tmp1;
2948
2949                                         if (rem[2] > divisor[2] || (rem[2] == divisor[2] && (rem[1] > divisor[1] || rem[1] == (divisor[1] && (rem[0] > divisor[0] || (rem[0] == divisor[0] && (quo[0] & 1)))))))
2950                                                 goto RoundUp;
2951                                         break;
2952                                 }
2953
2954                                 if (cur_scale < 0) 
2955                                         mono_raise_exception (mono_get_exception_overflow ());
2956                                 
2957                         HaveScale96:
2958                                 pwr = power10[cur_scale];
2959                                 scale += cur_scale;
2960
2961                                 if (IncreaseScale(quo, pwr) != 0) 
2962                                         mono_raise_exception (mono_get_exception_overflow ());
2963                                 
2964                                 rem[3] = IncreaseScale(rem, pwr);
2965                                 tmp = Div128By96(rem, divisor);
2966                                 if (!Add32To96(quo, tmp)) {
2967                                         if (scale == 0)
2968                                                 mono_raise_exception (mono_get_exception_overflow ());
2969                                         
2970                                         scale--;
2971                                         OverflowUnscale(quo, (rem[0] != 0 || rem[1] != 0 || rem[2] != 0 || rem[3] != 0));
2972                                         break;
2973                                 }      
2974
2975                         } // for (;;)
2976                 }
2977         }
2978
2979         // We need to unscale if and only if we have a non-zero remainder
2980         if (unscale) {
2981                 // Try extracting any extra powers of 10 we may have 
2982                 // added.  We do this by trying to divide out 10^8, 10^4, 10^2, and 10^1.
2983                 // If a division by one of these powers returns a zero remainder, then
2984                 // we keep the quotient.  If the remainder is not zero, then we restore
2985                 // the previous value.
2986                 // 
2987                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10
2988                 // we can extract.  We use this as a quick test on whether to try a
2989                 // given power.
2990                 // 
2991                 while ((quo[0] & 0xFF) == 0 && scale >= 8) {
2992                         quo_save[0] = quo[0];
2993                         quo_save[1] = quo[1];
2994                         quo_save[2] = quo[2];
2995
2996                         if (Div96By32(quo_save, 100000000) == 0) {
2997                                 quo[0] = quo_save[0];
2998                                 quo[1] = quo_save[1];
2999                                 quo[2] = quo_save[2];
3000                                 scale -= 8;
3001                         } else
3002                                 break;
3003                 }
3004
3005                 if ((quo[0] & 0xF) == 0 && scale >= 4) {
3006                         quo_save[0] = quo[0];
3007                         quo_save[1] = quo[1];
3008                         quo_save[2] = quo[2];
3009
3010                         if (Div96By32(quo_save, 10000) == 0) {
3011                                 quo[0] = quo_save[0];
3012                                 quo[1] = quo_save[1];
3013                                 quo[2] = quo_save[2];
3014                                 scale -= 4;
3015                         }
3016                 }
3017
3018                 if ((quo[0] & 3) == 0 && scale >= 2) {
3019                         quo_save[0] = quo[0];
3020                         quo_save[1] = quo[1];
3021                         quo_save[2] = quo[2];
3022
3023                         if (Div96By32(quo_save, 100) == 0) {
3024                                 quo[0] = quo_save[0];
3025                                 quo[1] = quo_save[1];
3026                                 quo[2] = quo_save[2];
3027                                 scale -= 2;
3028                         }
3029                 }
3030
3031                 if ((quo[0] & 1) == 0 && scale >= 1) {
3032                         quo_save[0] = quo[0];
3033                         quo_save[1] = quo[1];
3034                         quo_save[2] = quo[2];
3035
3036                         if (Div96By32(quo_save, 10) == 0) {
3037                                 quo[0] = quo_save[0];
3038                                 quo[1] = quo_save[1];
3039                                 quo[2] = quo_save[2];
3040                                 scale -= 1;
3041                         }
3042                 }
3043         }
3044
3045         DECIMAL_SIGN(*left) = DECIMAL_SIGN(*left) ^ DECIMAL_SIGN(*right);
3046         DECIMAL_HI32(*left) = quo[2];
3047         DECIMAL_MID32(*left) = quo[1];
3048         DECIMAL_LO32(*left) = quo[0];
3049         DECIMAL_SCALE(*left) = (uint8_t)scale;
3050         left->reserved = 0;
3051
3052 }
3053
3054 #define DECIMAL_PRECISION 29
3055 #define NUMBER_MAXDIGITS 50
3056 typedef struct  {
3057         int32_t precision;
3058         int32_t scale;
3059         int32_t sign;
3060         uint16_t digits[NUMBER_MAXDIGITS + 1];
3061         uint16_t* allDigits;
3062 } CLRNumber;
3063
3064 int
3065 mono_decimal_from_number (void *from, MonoDecimal *target)
3066 {
3067         MONO_ARCH_SAVE_REGS;
3068         CLRNumber *number = (CLRNumber *) from;
3069         g_assert(number != NULL);
3070         g_assert(target != NULL);
3071
3072         MonoDecimal d;
3073         d.reserved = 0;
3074         DECIMAL_SIGNSCALE(d) = 0;
3075         DECIMAL_HI32(d) = 0;
3076         DECIMAL_LO32(d) = 0;
3077         DECIMAL_MID32(d) = 0;
3078         uint16_t* p = number->digits;
3079         g_assert(p != NULL);
3080         int e = number->scale;
3081         if (!*p) {
3082                 // To avoid risking an app-compat issue with pre 4.5 (where some app was illegally using Reflection to examine the internal scale bits), we'll only force
3083                 // the scale to 0 if the scale was previously positive
3084                 if (e > 0) {
3085                         e = 0;
3086                 }
3087         } else {
3088                 if (e > DECIMAL_PRECISION) return 0;
3089                 while ((e > 0 || (*p && e > -28)) && (DECIMAL_HI32(d) < 0x19999999 || (DECIMAL_HI32(d) == 0x19999999 && (DECIMAL_MID32(d) < 0x99999999 || (DECIMAL_MID32(d) == 0x99999999 && (DECIMAL_LO32(d) < 0x99999999 || (DECIMAL_LO32(d) == 0x99999999 && *p <= '5'))))))) {
3090                         DecMul10(&d);
3091                         if (*p)
3092                                 DecAddInt32(&d, *p++ - '0');
3093                         e--;
3094                 }
3095                 if (*p++ >= '5') {
3096                         gboolean round = TRUE;
3097                         if (*(p-1) == '5' && *(p-2) % 2 == 0) { // Check if previous digit is even, only if the when we are unsure whether hows to do Banker's rounding
3098                                 // For digits > 5 we will be roundinp up anyway.
3099                                 int count = 20; // Look at the next 20 digits to check to round
3100                                 while (*p == '0' && count != 0) {
3101                                         p++;
3102                                         count--;
3103                                 }
3104                                 if (*p == '\0' || count == 0) 
3105                                         round = FALSE;// Do nothing
3106                         }
3107                         
3108                         if (round) {
3109                                 DecAddInt32(&d, 1);
3110                                 if ((DECIMAL_HI32(d) | DECIMAL_MID32(d) | DECIMAL_LO32(d)) == 0) {
3111                                         DECIMAL_HI32(d) = 0x19999999;
3112                                         DECIMAL_MID32(d) = 0x99999999;
3113                                         DECIMAL_LO32(d) = 0x9999999A;
3114                                         e++;
3115                                 }
3116                         }
3117                 }
3118         }
3119         if (e > 0)
3120                 return 0;
3121         if (e <= -DECIMAL_PRECISION) {
3122                 // Parsing a large scale zero can give you more precision than fits in the decimal.
3123                 // This should only happen for actual zeros or very small numbers that round to zero.
3124                 DECIMAL_SIGNSCALE(d) = 0;
3125                 DECIMAL_HI32(d) = 0;
3126                 DECIMAL_LO32(d) = 0;
3127                 DECIMAL_MID32(d) = 0;
3128                 DECIMAL_SCALE(d) = (DECIMAL_PRECISION - 1);
3129         } else {
3130                 DECIMAL_SCALE(d) = (uint8_t)(-e);
3131         }
3132         
3133         DECIMAL_SIGN(d) = number->sign? DECIMAL_NEG: 0;
3134         *target = d;
3135         return 1;
3136 }
3137
3138
3139 #endif