mono/metadata/decimal.c

   1 /*
   2    decimal.c
   3
   4    conversions and numerical operations for the c# type System.Decimal
   5
   6    Author: Martin Weindel (martin.weindel@t-online.de)
   7
   8    (C) 2001 by Martin Weindel
   9 */
  10
  11 /*
  12  * machine dependent configuration for
  13  * CSharp value type System.Decimal
  14  */
  15
  16 #include <mono/metadata/exception.h>
  17 #include <stdio.h>
  18 #include <memory.h>
  19 #include <stdlib.h>
  20 #include <string.h>
  21 #include <math.h>
  22
  23 /* needed for building microsoft dll */
  24 #ifdef __GNUC__
  25 #define DECINLINE __inline
  26 #else
  27 #define DECINLINE
  28 #endif
  29
  30 #define LIT_GUINT32(x) x
  31 #define LIT_GUINT64(x) x##LL
  32
  33
  34 /* we need a UInt64 type => guint64 */
  35 #include <glib.h>
  36
  37 #include "decimal.h"
  38
  39 /*
  40  * Deal with anon union support.
  41  */
  42 #define ss32 u.ss32
  43 #define signscale u.signscale
  44
  45 /* debugging stuff */
  46 #ifdef _DEBUG
  47 #include <assert.h>
  48 #define PRECONDITION(flag)  assert(flag)
  49 #define POSTCONDITION(flag)  assert(flag)
  50 #define TEST(flag)  assert(flag)
  51 #define INVARIANT_TEST(p) assert(p->signscale.scale >= 0 && p->signscale.scale <= DECIMAL_MAX_SCALE \
  52         && p->signscale.reserved1 == 0 && p->signscale.reserved2 == 0);
  53 #else
  54 #define PRECONDITION(flag)
  55 #define POSTCONDITION(flag)
  56 #define TEST(flag)
  57 #define INVARIANT_TEST(p)
  58 #endif /*#ifdef _DEBUG*/
  59
  60 #define DECIMAL_MAX_SCALE 28
  61 #define DECIMAL_MAX_INTFACTORS 9
  62
  63 #define DECIMAL_SUCCESS 0
  64 #define DECIMAL_FINISHED 1
  65 #define DECIMAL_OVERFLOW 2
  66 #define DECIMAL_INVALID_CHARACTER 2
  67 #define DECIMAL_INTERNAL_ERROR 3
  68 #define DECIMAL_INVALID_BITS 4
  69 #define DECIMAL_DIVIDE_BY_ZERO 5
  70 #define DECIMAL_BUFFER_OVERFLOW 6
  71
  72 /* some MACROS */
  73 #define DECINIT(src) memset(src, 0, sizeof(decimal_repr))
  74
  75 #define DECCOPY(dest, src) memcpy(dest, src, sizeof(decimal_repr))
  76
  77 #define DECSWAP(p1, p2, h) \
  78         h = (p1)->ss32; (p1)->ss32 = (p2)->ss32; (p2)->ss32 = h; \
  79         h = (p1)->hi32; (p1)->hi32 = (p2)->hi32; (p2)->hi32 = h; \
  80         h = (p1)->mid32; (p1)->mid32 = (p2)->mid32; (p2)->mid32 = h; \
  81         h = (p1)->lo32; (p1)->lo32 = (p2)->lo32; (p2)->lo32 = h;
  82
  83 #define DECNEGATE(p1) (p1)->signscale.sign = 1 - (p1)->signscale.sign
  84
  85 #define LIT_DEC128(hi, mid, lo) { (((guint64)mid)<<32 | lo), hi }
  86
  87 #define DECTO128(pd, lo, hi) \
  88         lo = (((guint64)(pd)->mid32) << 32) | (pd)->lo32; \
  89     hi = (pd)->hi32;
  90
  91 /* some constants */
  92 #define LIT_GUINT32_HIGHBIT LIT_GUINT32(0x80000000)
  93 #define LIT_GUINT64_HIGHBIT LIT_GUINT64(0x8000000000000000)
  94
  95 #define DECIMAL_LOG_NEGINF -1000
  96
  97 static const guint32 constantsDecadeInt32Factors[DECIMAL_MAX_INTFACTORS+1] = {
  98     LIT_GUINT32(1), LIT_GUINT32(10), LIT_GUINT32(100), LIT_GUINT32(1000),
  99     LIT_GUINT32(10000), LIT_GUINT32(100000), LIT_GUINT32(1000000),
 100     LIT_GUINT32(10000000), LIT_GUINT32(100000000), LIT_GUINT32(1000000000)
 101 };
 102
 103 typedef struct {
 104     guint64 lo;
 105     guint64 hi;
 106 } dec128_repr;
 107
 108 static const dec128_repr dec128decadeFactors[DECIMAL_MAX_SCALE+1] = {
 109     LIT_DEC128( 0, 0, 1u), /* == 1 */
 110     LIT_DEC128( 0, 0, 10u), /* == 10 */
 111     LIT_DEC128( 0, 0, 100u), /* == 100 */
 112     LIT_DEC128( 0, 0, 1000u), /* == 1e3m */
 113     LIT_DEC128( 0, 0, 10000u), /* == 1e4m */
 114     LIT_DEC128( 0, 0, 100000u), /* == 1e5m */
 115     LIT_DEC128( 0, 0, 1000000u), /* == 1e6m */
 116     LIT_DEC128( 0, 0, 10000000u), /* == 1e7m */
 117     LIT_DEC128( 0, 0, 100000000u), /* == 1e8m */
 118     LIT_DEC128( 0, 0, 1000000000u), /* == 1e9m */
 119     LIT_DEC128( 0, 2u, 1410065408u), /* == 1e10m */
 120     LIT_DEC128( 0, 23u, 1215752192u), /* == 1e11m */
 121     LIT_DEC128( 0, 232u, 3567587328u), /* == 1e12m */
 122     LIT_DEC128( 0, 2328u, 1316134912u), /* == 1e13m */
 123     LIT_DEC128( 0, 23283u, 276447232u), /* == 1e14m */
 124     LIT_DEC128( 0, 232830u, 2764472320u), /* == 1e15m */
 125     LIT_DEC128( 0, 2328306u, 1874919424u), /* == 1e16m */
 126     LIT_DEC128( 0, 23283064u, 1569325056u), /* == 1e17m */
 127     LIT_DEC128( 0, 232830643u, 2808348672u), /* == 1e18m */
 128     LIT_DEC128( 0, 2328306436u, 2313682944u), /* == 1e19m */
 129     LIT_DEC128( 5u, 1808227885u, 1661992960u), /* == 1e20m */
 130     LIT_DEC128( 54u, 902409669u, 3735027712u), /* == 1e21m */
 131     LIT_DEC128( 542u, 434162106u, 2990538752u), /* == 1e22m */
 132     LIT_DEC128( 5421u, 46653770u, 4135583744u), /* == 1e23m */
 133     LIT_DEC128( 54210u, 466537709u, 2701131776u), /* == 1e24m */
 134     LIT_DEC128( 542101u, 370409800u, 1241513984u), /* == 1e25m */
 135     LIT_DEC128( 5421010u, 3704098002u, 3825205248u), /* == 1e26m */
 136     LIT_DEC128( 54210108u, 2681241660u, 3892314112u), /* == 1e27m */
 137     LIT_DEC128( 542101086u, 1042612833u, 268435456u), /* == 1e28m */
 138 };
 139
 140 /* 192 bit addition: c = a + b
 141    addition is modulo 2**128, any carry is lost */
 142 DECINLINE static void add128(guint64 alo, guint64 ahi,
 143                              guint64 blo, guint64 bhi,
 144                              guint64* pclo, guint64* pchi)
 145 {
 146     alo += blo;
 147     if (alo < blo) ahi++; /* carry */
 148     ahi += bhi;
 149
 150     *pclo = alo;
 151     *pchi = ahi;
 152 }
 153
 154 /* 128 bit subtraction: c = a - b
 155    subtraction is modulo 2**128, any carry is lost */
 156 DECINLINE static void sub128(guint64 alo, guint64 ahi,
 157                              guint64 blo, guint64 bhi,
 158                              guint64* pclo, guint64* pchi)
 159 {
 160     guint64 clo, chi;
 161
 162     clo = alo - blo;
 163     chi = ahi - bhi;
 164     if (alo < blo) chi--; /* borrow */
 165
 166     *pclo = clo;
 167     *pchi = chi;
 168 }
 169
 170 /* 192 bit addition: c = a + b
 171    addition is modulo 2**192, any carry is lost */
 172 DECINLINE static void add192(guint64 alo, guint64 ami, guint64 ahi,
 173                              guint64 blo, guint64 bmi, guint64 bhi,
 174                              guint64* pclo, guint64* pcmi, guint64* pchi)
 175 {
 176     alo += blo;
 177     if (alo < blo) { /* carry low */
 178         ami++;
 179         if (ami == 0) ahi++; /* carry mid */
 180     }
 181     ami += bmi;
 182     if (ami < bmi) ahi++; /* carry mid */
 183     ahi += bhi;
 184     *pclo = alo;
 185     *pcmi = ami;
 186     *pchi = ahi;
 187 }
 188
 189 /* 192 bit subtraction: c = a - b
 190    subtraction is modulo 2**192, any carry is lost */
 191 DECINLINE static void sub192(guint64 alo, guint64 ami, guint64 ahi,
 192                              guint64 blo, guint64 bmi, guint64 bhi,
 193                              guint64* pclo, guint64* pcmi, guint64* pchi)
 194 {
 195     guint64 clo, cmi, chi;
 196
 197     clo = alo - blo;
 198     cmi = ami - bmi;
 199     chi = ahi - bhi;
 200     if (alo < blo) {
 201         if (cmi == 0) chi--; /* borrow mid */
 202         cmi--; /* borrow low */
 203     }
 204     if (ami < bmi) chi--; /* borrow mid */
 205     *pclo = clo;
 206     *pcmi = cmi;
 207     *pchi = chi;
 208 }
 209
 210 /* multiplication c(192bit) = a(96bit) * b(96bit) */
 211 DECINLINE static void mult96by96to192(guint32 alo, guint32 ami, guint32 ahi,
 212                                       guint32 blo, guint32 bmi, guint32 bhi,
 213                                       guint64* pclo, guint64* pcmi, guint64* pchi)
 214 {
 215     guint64 a, b, c, d;
 216     guint32 h0, h1, h2, h3, h4, h5;
 217     int carry0, carry1;
 218
 219     a = ((guint64)alo) * blo;
 220     h0 = (guint32) a;
 221
 222     a >>= 32; carry0 = 0;
 223     b = ((guint64)alo) * bmi;
 224     c = ((guint64)ami) * blo;
 225     a += b; if (a < b) carry0++;
 226     a += c; if (a < c) carry0++;
 227     h1 = (guint32) a;
 228
 229     a >>= 32; carry1 = 0;
 230     b = ((guint64)alo) * bhi;
 231     c = ((guint64)ami) * bmi;
 232     d = ((guint64)ahi) * blo;
 233     a += b; if (a < b) carry1++;
 234     a += c; if (a < c) carry1++;
 235     a += d; if (a < d) carry1++;
 236     h2 = (guint32) a;
 237
 238     a >>= 32; a += carry0; carry0 = 0;
 239     b = ((guint64)ami) * bhi;
 240     c = ((guint64)ahi) * bmi;
 241     a += b; if (a < b) carry0++;
 242     a += c; if (a < c) carry0++;
 243     h3 = (guint32) a;
 244
 245     a >>= 32; a += carry1;
 246     b = ((guint64)ahi) * bhi;
 247     a += b;
 248     h4 = (guint32) a;
 249
 250     a >>= 32; a += carry0;
 251     h5 = (guint32) a;
 252
 253     *pclo = ((guint64)h1) << 32 | h0;
 254     *pcmi = ((guint64)h3) << 32 | h2;
 255     *pchi = ((guint64)h5) << 32 | h4;
 256 }
 257
 258 /* multiplication c(128bit) = a(96bit) * b(32bit) */
 259 DECINLINE static void mult96by32to128(guint32 alo, guint32 ami, guint32 ahi,
 260                                       guint32 factor,
 261                                       guint64* pclo, guint64* pchi)
 262 {
 263     guint64 a;
 264     guint32 h0, h1;
 265
 266     a = ((guint64)alo) * factor;
 267     h0 = (guint32) a;
 268
 269     a >>= 32;
 270     a += ((guint64)ami) * factor;
 271     h1 = (guint32) a;
 272
 273     a >>= 32;
 274     a += ((guint64)ahi) * factor;
 275
 276     *pclo = ((guint64)h1) << 32 | h0;
 277     *pchi = a;
 278 }
 279
 280 /* multiplication c(128bit) *= b(32bit) */
 281 DECINLINE static int mult128by32(guint64* pclo, guint64* pchi, guint32 factor, int roundBit)
 282 {
 283     guint64 a;
 284     guint32 h0, h1;
 285
 286     a = ((guint64)(guint32)(*pclo)) * factor;
 287     if (roundBit) a += factor / 2;
 288     h0 = (guint32) a;
 289
 290     a >>= 32;
 291     a += (*pclo >> 32) * factor;
 292     h1 = (guint32) a;
 293
 294     *pclo = ((guint64)h1) << 32 | h0;
 295
 296     a >>= 32;
 297     a += ((guint64)(guint32)(*pchi)) * factor;
 298     h0 = (guint32) a;
 299
 300     a >>= 32;
 301     a += (*pchi >> 32) * factor;
 302     h1 = (guint32) a;
 303
 304     *pchi = ((guint64)h1) << 32 | h0;
 305
 306     return ((a >> 32) == 0) ? DECIMAL_SUCCESS : DECIMAL_OVERFLOW;
 307 }
 308
 309 DECINLINE static int mult128DecadeFactor(guint64* pclo, guint64* pchi, int powerOfTen)
 310 {
 311     int idx, rc;
 312
 313     while (powerOfTen > 0) {
 314         idx = (powerOfTen >= DECIMAL_MAX_INTFACTORS) ? DECIMAL_MAX_INTFACTORS : powerOfTen;
 315         powerOfTen -= idx;
 316         rc = mult128by32(pclo, pchi, constantsDecadeInt32Factors[idx], 0);
 317         if (rc != DECIMAL_SUCCESS) return rc;
 318     }
 319     return DECIMAL_SUCCESS;
 320 }
 321
 322 /* division: x(128bit) /= factor(32bit)
 323    returns roundBit */
 324 DECINLINE static int div128by32(guint64* plo, guint64* phi, guint32 factor, guint32* pRest)
 325 {
 326     guint64 a, b, c, h;
 327
 328     h = *phi;
 329     a = (guint32)(h >> 32);
 330     b = a / factor;
 331     a -= b * factor;
 332     a <<= 32;
 333     a |= (guint32) h;
 334     c = a / factor;
 335     a -= c * factor;
 336     a <<= 32;
 337     *phi = b << 32 | (guint32)c;
 338
 339     h = *plo;
 340     a |= (guint32)(h >> 32);
 341     b = a / factor;
 342     a -= b * factor;
 343     a <<= 32;
 344     a |= (guint32) h;
 345     c = a / factor;
 346     a -= c * factor;
 347     *plo = b << 32 | (guint32)c;
 348
 349     if (pRest) *pRest = (guint32) a;
 350
 351     a <<= 1;
 352     return (a >= factor || (a == factor && (c & 1) == 1)) ? 1 : 0;
 353 }
 354
 355 /* division: x(192bit) /= factor(32bit)
 356    no rest and no rounding*/
 357 DECINLINE static void div192by32(guint64* plo, guint64* pmi, guint64* phi,
 358                                  guint32 factor)
 359 {
 360     guint64 a, b, c, h;
 361
 362     h = *phi;
 363     a = (guint32)(h >> 32);
 364     b = a / factor;
 365     a -= b * factor;
 366     a <<= 32;
 367     a |= (guint32) h;
 368     c = a / factor;
 369     a -= c * factor;
 370     a <<= 32;
 371     *phi = b << 32 | (guint32)c;
 372
 373     h = *pmi;
 374     a |= (guint32)(h >> 32);
 375     b = a / factor;
 376     a -= b * factor;
 377     a <<= 32;
 378     a |= (guint32) h;
 379     c = a / factor;
 380     a -= c * factor;
 381     a <<= 32;
 382     *pmi = b << 32 | (guint32)c;
 383
 384     h = *plo;
 385     a |= (guint32)(h >> 32);
 386     b = a / factor;
 387     a -= b * factor;
 388     a <<= 32;
 389     a |= (guint32) h;
 390     c = a / factor;
 391     a -= c * factor;
 392     a <<= 32;
 393     *plo = b << 32 | (guint32)c;
 394 }
 395
 396 /* returns upper 32bit for a(192bit) /= b(32bit)
 397    a will contain remainder */
 398 static guint32 div192by96to32withRest(guint64* palo, guint64* pami, guint64* pahi,
 399                                       guint32 blo, guint32 bmi, guint32 bhi)
 400 {
 401     guint64 rlo, rmi, rhi; /* remainder */
 402     guint64 tlo, thi; /* term */
 403     guint32 c;
 404
 405     rlo = *palo; rmi = *pami; rhi = *pahi;
 406     if (rhi >= (((guint64)bhi) << 32)) {
 407         c = LIT_GUINT32(0xFFFFFFFF);
 408     } else {
 409         c = (guint32) (rhi / bhi);
 410     }
 411     mult96by32to128(blo, bmi, bhi, c, &tlo, &thi);
 412     sub192(rlo, rmi, rhi, 0, tlo, thi, &rlo, &rmi, &rhi);
 413     while (((gint64)rhi) < 0) {
 414         c--;
 415         add192(rlo, rmi, rhi, 0, (((guint64)bmi)<<32) | blo, bhi, &rlo, &rmi, &rhi);
 416     }
 417     *palo = rlo ; *pami = rmi ; *pahi = rhi;
 418
 419     POSTCONDITION(rhi >> 32 == 0);
 420
 421     return c;
 422 }
 423
 424 /* c(128bit) = a(192bit) / b(96bit)
 425    b must be >= 2^95 */
 426 static void div192by96to128(guint64 alo, guint64 ami, guint64 ahi,
 427                             guint32 blo, guint32 bmi, guint32 bhi,
 428                             guint64* pclo, guint64* pchi)
 429 {
 430     guint64 rlo, rmi, rhi; /* remainder */
 431     guint32 h, c;
 432
 433     PRECONDITION(ahi < (((guint64)bhi) << 32 | bmi)
 434         || (ahi == (((guint64)bhi) << 32 | bmi) && (ami >> 32) > blo));
 435
 436     /* high 32 bit*/
 437     rlo = alo; rmi = ami; rhi = ahi;
 438     h = div192by96to32withRest(&rlo, &rmi, &rhi, blo, bmi, bhi);
 439
 440     /* mid 32 bit*/
 441     rhi = (rhi << 32) | (rmi >> 32); rmi = (rmi << 32) | (rlo >> 32); rlo <<= 32;
 442     *pchi = (((guint64)h) << 32) | div192by96to32withRest(&rlo, &rmi, &rhi, blo, bmi, bhi);
 443
 444     /* low 32 bit */
 445     rhi = (rhi << 32) | (rmi >> 32); rmi = (rmi << 32) | (rlo >> 32); rlo <<= 32;
 446     h = div192by96to32withRest(&rlo, &rmi, &rhi, blo, bmi, bhi);
 447
 448     /* estimate lowest 32 bit (two last bits may be wrong) */
 449     if (rhi >= bhi) {
 450         c = LIT_GUINT32(0xFFFFFFFF);
 451     } else {
 452         rhi <<= 32;
 453         c = (guint32) (rhi / bhi);
 454     }
 455     *pclo = (((guint64)h) << 32) | c;
 456 }
 457
 458 DECINLINE static void roundUp128(guint64* pclo, guint64* pchi) {
 459     if (++(*pclo) == 0) ++(*pchi);
 460 }
 461
 462 static int normalize128(guint64* pclo, guint64* pchi, int* pScale,
 463                         int roundFlag, int roundBit)
 464 {
 465     guint32 overhang = (guint32)(*pchi >> 32);
 466     int scale = *pScale;
 467     int deltaScale;
 468
 469     while (overhang != 0) {
 470         for (deltaScale = 1; deltaScale < DECIMAL_MAX_INTFACTORS; deltaScale++)
 471         {
 472             if (overhang < constantsDecadeInt32Factors[deltaScale]) break;
 473         }
 474
 475         scale -= deltaScale;
 476         if (scale < 0) return DECIMAL_OVERFLOW;
 477
 478         roundBit = div128by32(pclo, pchi, constantsDecadeInt32Factors[deltaScale], 0);
 479
 480         overhang = (guint32)(*pchi >> 32);
 481         if (roundFlag && roundBit && *pclo == (guint64)-1 && (gint32)*pchi == (gint32)-1) {
 482             overhang = 1;
 483         }
 484     }
 485
 486     *pScale = scale;
 487
 488     if (roundFlag && roundBit) {
 489         roundUp128(pclo, pchi);
 490         TEST((*pchi >> 32) == 0);
 491     }
 492
 493     return DECIMAL_SUCCESS;
 494 }
 495
 496 DECINLINE static int maxLeftShift(/*[In, Out]*/decimal_repr* pA)
 497 {
 498     guint64 lo64 = (((guint64)(pA->mid32)) << 32) | pA->lo32;
 499     guint32 hi32 = pA->hi32;
 500     int shift;
 501
 502     for (shift = 0; ((gint32)hi32) >= 0 && shift < 96; shift++) {
 503         hi32 <<= 1;
 504         if (((gint64)lo64) < 0) hi32++;
 505         lo64 <<= 1;
 506     }
 507
 508     pA->lo32 = (guint32) lo64;
 509     pA->mid32 = (guint32)(lo64>>32);
 510     pA->hi32 = hi32;
 511
 512     return shift;
 513 }
 514
 515 DECINLINE static void rshift128(guint64* pclo, guint64* pchi)
 516 {
 517     *pclo >>= 1;
 518     if (*pchi & 1) *pclo |= LIT_GUINT64_HIGHBIT;
 519     *pchi >>= 1;
 520 }
 521
 522 DECINLINE static void lshift96(guint32* pclo, guint32* pcmid, guint32* pchi)
 523 {
 524     *pchi <<= 1;
 525     if (*pcmid & LIT_GUINT32_HIGHBIT) (*pchi)++;
 526     *pcmid <<= 1;
 527     if (*pclo & LIT_GUINT32_HIGHBIT) (*pcmid)++;
 528     *pclo <<= 1;
 529 }
 530
 531 DECINLINE static void lshift128(guint64* pclo, guint64* pchi)
 532 {
 533     *pchi <<= 1;
 534     if (*pclo & LIT_GUINT64_HIGHBIT) (*pchi)++;
 535     *pclo <<= 1;
 536 }
 537
 538 DECINLINE static void rshift192(guint64* pclo, guint64* pcmi, guint64* pchi)
 539 {
 540     *pclo >>= 1;
 541     if (*pcmi & 1) *pclo |= LIT_GUINT64_HIGHBIT;
 542     *pcmi >>= 1;
 543     if (*pchi & 1) *pcmi |= LIT_GUINT64_HIGHBIT;
 544     *pchi >>= 1;
 545 }
 546
 547 /* returns log2(a) or DECIMAL_LOG_NEGINF for a = 0 */
 548 DECINLINE static int log2_32(guint32 a)
 549 {
 550     int tlog2 = 0;
 551
 552     if (a == 0) return DECIMAL_LOG_NEGINF;
 553
 554     if ((a >> 16) != 0) {
 555         a >>= 16;
 556         tlog2 += 16;
 557     }
 558     if ((a >> 8) != 0) {
 559         a >>= 8;
 560         tlog2 += 8;
 561     }
 562     if ((a >> 4) != 0) {
 563         a >>= 4;
 564         tlog2 += 4;
 565     }
 566     if ((a >> 2) != 0) {
 567         a >>= 2;
 568         tlog2 += 2;
 569     }
 570     if ((a >> 1) != 0) {
 571         a >>= 1;
 572         tlog2 += 1;
 573     }
 574     tlog2 += (int) a;
 575
 576     return tlog2;
 577 }
 578
 579 /* returns log2(a) or DECIMAL_LOG_NEGINF for a = 0 */
 580 DECINLINE static int log2_64(guint64 a)
 581 {
 582     int tlog2 = 0;
 583
 584     if (a == 0) return DECIMAL_LOG_NEGINF;
 585
 586     if ((a >> 32) != 0) {
 587         a >>= 32;
 588         tlog2 += 32;
 589     }
 590     if ((a >> 16) != 0) {
 591         a >>= 16;
 592         tlog2 += 16;
 593     }
 594     if ((a >> 8) != 0) {
 595         a >>= 8;
 596         tlog2 += 8;
 597     }
 598     if ((a >> 4) != 0) {
 599         a >>= 4;
 600         tlog2 += 4;
 601     }
 602     if ((a >> 2) != 0) {
 603         a >>= 2;
 604         tlog2 += 2;
 605     }
 606     if ((a >> 1) != 0) {
 607         a >>= 1;
 608         tlog2 += 1;
 609     }
 610     tlog2 += (int) a;
 611
 612     return tlog2;
 613 }
 614
 615 /* returns log2(a) or DECIMAL_LOG_NEGINF for a = 0 */
 616 DECINLINE static int log2_128(guint64 alo, guint64 ahi)
 617 {
 618     if (ahi == 0) return log2_64(alo);
 619     else return log2_64(ahi) + 64;
 620 }
 621
 622 /* returns a upper limit for log2(a) considering scale */
 623 DECINLINE static int log2withScale_128(guint64 alo, guint64 ahi, int scale)
 624 {
 625     int tlog2 = log2_128(alo, ahi);
 626     if (tlog2 < 0) tlog2 = 0;
 627     return tlog2 - (scale * 33219) / 10000;
 628 }
 629
 630 DECINLINE static int pack128toDecimal(/*[Out]*/decimal_repr* pA, guint64 alo, guint64 ahi,
 631                                       int scale, int sign)
 632 {
 633     PRECONDITION((ahi >> 32) == 0);
 634     PRECONDITION(sign == 0 || sign == 1);
 635     PRECONDITION(scale >= 0 && scale <= DECIMAL_MAX_SCALE);
 636
 637     if (scale < 0 || scale > DECIMAL_MAX_SCALE || (ahi >> 32) != 0) {
 638         return DECIMAL_OVERFLOW;
 639     }
 640
 641     pA->lo32 = (guint32) alo;
 642     pA->mid32 = (guint32) (alo >> 32);
 643     pA->hi32 = (guint32) ahi;
 644     pA->signscale.sign = sign;
 645     pA->signscale.scale = scale;
 646
 647     return DECIMAL_SUCCESS;
 648 }
 649
 650 DECINLINE static int adjustScale128(guint64* palo, guint64* pahi, int deltaScale)
 651 {
 652     int idx, rc;
 653
 654     if (deltaScale < 0) {
 655         deltaScale *= -1;
 656         if (deltaScale > DECIMAL_MAX_SCALE) return DECIMAL_INTERNAL_ERROR;
 657         while (deltaScale > 0) {
 658             idx = (deltaScale > DECIMAL_MAX_INTFACTORS) ? DECIMAL_MAX_INTFACTORS : deltaScale;
 659             deltaScale -= idx;
 660             div128by32(palo, pahi, constantsDecadeInt32Factors[idx], 0);
 661         }
 662     } else if (deltaScale > 0) {
 663         if (deltaScale > DECIMAL_MAX_SCALE) return DECIMAL_INTERNAL_ERROR;
 664         while (deltaScale > 0) {
 665             idx = (deltaScale > DECIMAL_MAX_INTFACTORS) ? DECIMAL_MAX_INTFACTORS : deltaScale;
 666             deltaScale -= idx;
 667             rc = mult128by32(palo, pahi, constantsDecadeInt32Factors[idx], 0);
 668             if (rc != DECIMAL_SUCCESS) return rc;
 669         }
 670     }
 671
 672     return DECIMAL_SUCCESS;
 673 }
 674
 675 /* input: c * 10^-(*pScale) * 2^-exp
 676    output: c * 10^-(*pScale) with
 677    minScale <= *pScale <= maxScale and (chi >> 32) == 0 */
 678 DECINLINE static int rescale128(guint64* pclo, guint64* pchi, int* pScale, int texp,
 679                                 int minScale, int maxScale, int roundFlag)
 680 {
 681     guint32 factor, overhang;
 682     int scale, i, rc, roundBit = 0;
 683
 684     PRECONDITION(texp >= 0);
 685
 686     scale = *pScale;
 687
 688     if (texp > 0) {
 689         /* reduce exp */
 690         while (texp > 0 && scale <= maxScale) {
 691             overhang = (guint32)(*pchi >> 32);
 692             while (texp > 0 && ((*pclo & 1) == 0 || overhang > (2<<DECIMAL_MAX_INTFACTORS))) {
 693                 if (--texp == 0) roundBit = (int)(*pclo & 1);
 694                 rshift128(pclo, pchi);
 695                 overhang = (guint32)(*pchi >> 32);
 696             }
 697
 698             if (texp > DECIMAL_MAX_INTFACTORS) i = DECIMAL_MAX_INTFACTORS;
 699             else i = texp;
 700             if (scale + i > maxScale) i = maxScale - scale;
 701             if (i == 0) break;
 702             texp -= i;
 703             scale += i;
 704             factor = constantsDecadeInt32Factors[i] >> i; /* 10^i/2^i=5^i */
 705             mult128by32(pclo, pchi, factor, 0);
 706     /*printf("3: %.17e\n", (((double)chi) * pow(2,64) + clo) * pow(10, -scale) * pow(2, -texp));*/
 707         }
 708
 709         while (texp > 0) {
 710             if (--texp == 0) roundBit = (int)(*pclo & 1);
 711             rshift128(pclo, pchi);
 712         }
 713     }
 714
 715     TEST(texp == 0);
 716
 717     while (scale > maxScale) {
 718         i = scale - maxScale;
 719         if (i > DECIMAL_MAX_INTFACTORS) i = DECIMAL_MAX_INTFACTORS;
 720         scale -= i;
 721         roundBit = div128by32(pclo, pchi, constantsDecadeInt32Factors[i], 0);
 722     }
 723
 724     while (scale < minScale) {
 725         if (!roundFlag) roundBit = 0;
 726         i = minScale - scale;
 727         if (i > DECIMAL_MAX_INTFACTORS) i = DECIMAL_MAX_INTFACTORS;
 728         scale += i;
 729         rc = mult128by32(pclo, pchi, constantsDecadeInt32Factors[i], roundBit);
 730         if (rc != DECIMAL_SUCCESS) return rc;
 731         roundBit = 0;
 732     }
 733
 734     TEST(scale >= 0 && scale <= DECIMAL_MAX_SCALE);
 735
 736     *pScale = scale;
 737
 738     return normalize128(pclo, pchi, pScale, roundFlag, roundBit);
 739 }
 740
 741 /* performs a += b */
 742 gint32 mono_decimalIncr(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
 743 {
 744     guint64 alo, ahi, blo, bhi;
 745     int log2A, log2B, log2Result, log10Result, rc;
 746     int subFlag, sign, scaleA, scaleB;
 747
 748     MONO_ARCH_SAVE_REGS;
 749
 750     DECTO128(pA, alo, ahi);
 751     DECTO128(pB, blo, bhi);
 752
 753     sign = pA->signscale.sign;
 754     subFlag = sign - (int)pB->signscale.sign;
 755     scaleA = pA->signscale.scale;
 756     scaleB = pB->signscale.scale;
 757     if (scaleA == scaleB) {
 758         /* same scale, that's easy */
 759         if (subFlag) {
 760             sub128(alo, ahi, blo, bhi, &alo, &ahi);
 761             if (ahi & LIT_GUINT64_HIGHBIT) {
 762                 alo--;
 763                 alo = ~alo;
 764                 if (alo == 0) ahi--;
 765                 ahi = ~ahi;
 766                 sign = !sign;
 767             }
 768         } else {
 769             add128(alo, ahi, blo, bhi, &alo, &ahi);
 770         }
 771         rc = normalize128(&alo, &ahi, &scaleA, 1, 0);
 772     } else {
 773         /* scales must be adjusted */
 774         /* Estimate log10 and scale of result for adjusting scales */
 775         log2A = log2withScale_128(alo, ahi, scaleA);
 776         log2B = log2withScale_128(blo, bhi, scaleB);
 777         log2Result = (log2A >= log2B) ? log2A : log2B;
 778         if (!subFlag) log2Result++; /* result can have one bit more */
 779         log10Result = (log2Result * 1000) / 3322 + 1;
 780         /* we will calculate in 128bit, so we may need to adjust scale */
 781         if (scaleB > scaleA) scaleA = scaleB;
 782         if (scaleA + log10Result > DECIMAL_MAX_SCALE + 7) {
 783             /* this may not fit in 128bit, so limit it */
 784             scaleA = DECIMAL_MAX_SCALE + 7 - log10Result;
 785         }
 786
 787         rc = adjustScale128(&alo, &ahi, scaleA - (int)pA->signscale.scale);
 788         if (rc != DECIMAL_SUCCESS) return rc;
 789         rc = adjustScale128(&blo, &bhi, scaleA - scaleB);
 790         if (rc != DECIMAL_SUCCESS) return rc;
 791
 792         if (subFlag) {
 793             sub128(alo, ahi, blo, bhi, &alo, &ahi);
 794             if (ahi & LIT_GUINT64_HIGHBIT) {
 795                 alo--;
 796                 alo = ~alo;
 797                 if (alo == 0) ahi--;
 798                 ahi = ~ahi;
 799                 sign = !sign;
 800             }
 801         } else {
 802             add128(alo, ahi, blo, bhi, &alo, &ahi);
 803         }
 804
 805         if (rc != DECIMAL_SUCCESS) return rc;
 806
 807         rc = rescale128(&alo, &ahi,&scaleA, 0, 0, DECIMAL_MAX_SCALE, 1);
 808     }
 809
 810     if (rc != DECIMAL_SUCCESS) return rc;
 811
 812     return pack128toDecimal(pA, alo, ahi, scaleA, sign);
 813 }
 814
 815 /* performs a += factor * constants[idx] */
 816 static int incMultConstant128(guint64* palo, guint64* pahi, int idx, int factor)
 817 {
 818     guint64 blo, bhi, h;
 819
 820     PRECONDITION(idx >= 0 && idx <= DECIMAL_MAX_SCALE);
 821     PRECONDITION(factor > 0 && factor <= 9);
 822
 823     blo = dec128decadeFactors[idx].lo;
 824     h = bhi = dec128decadeFactors[idx].hi;
 825     if (factor != 1) {
 826         mult128by32(&blo, &bhi, factor, 0);
 827         if (h > bhi) return DECIMAL_OVERFLOW;
 828     }
 829     h = *pahi;
 830     add128(*palo, *pahi, blo, bhi, palo, pahi);
 831     if (h > *pahi) return DECIMAL_OVERFLOW;
 832     return DECIMAL_SUCCESS;
 833 }
 834
 835 DECINLINE static void div128DecadeFactor(guint64* palo, guint64* pahi, int powerOfTen)
 836 {
 837     int idx, roundBit = 0;
 838
 839     while (powerOfTen > 0) {
 840         idx = (powerOfTen > DECIMAL_MAX_INTFACTORS) ? DECIMAL_MAX_INTFACTORS : powerOfTen;
 841         powerOfTen -= idx;
 842         roundBit = div128by32(palo, pahi, constantsDecadeInt32Factors[idx], 0);
 843     }
 844
 845     if (roundBit) roundUp128(palo, pahi);
 846 }
 847
 848 /* calc significant digits of mantisse */
 849 DECINLINE static int calcDigits(guint64 alo, guint64 ahi)
 850 {
 851     int tlog2 = 0;
 852     int tlog10;
 853
 854     if (ahi == 0) {
 855         if (alo == 0) {
 856             return 0; /* zero has no signficant digits */
 857         } else {
 858             tlog2 = log2_64(alo);
 859         }
 860     } else {
 861         tlog2 = 64 + log2_64(ahi);
 862     }
 863
 864     tlog10 = (tlog2 * 1000) / 3322;
 865     /* we need an exact floor value of log10(a) */
 866     if (dec128decadeFactors[tlog10].hi > ahi
 867             || (dec128decadeFactors[tlog10].hi == ahi
 868                     && dec128decadeFactors[tlog10].lo > alo)) {
 869         --tlog10;
 870     }
 871     return tlog10+1;
 872 }
 873
 874 gint32 mono_double2decimal(/*[Out]*/decimal_repr* pA, double val, gint32 digits)
 875 {
 876     guint64 alo, ahi;
 877     guint64* p = (guint64*)(&val);
 878     int sigDigits, sign, texp, rc, scale;
 879     guint16 k;
 880
 881     PRECONDITION(digits <= 15);
 882
 883     sign = ((*p & LIT_GUINT64_HIGHBIT) != 0) ? 1 : 0;
 884     k = ((guint16)((*p) >> 52)) & 0x7FF;
 885     alo = (*p & LIT_GUINT64(0xFFFFFFFFFFFFF)) | LIT_GUINT64(0x10000000000000);
 886     ahi = 0;
 887
 888     texp = (k & 0x7FF) - 0x3FF;
 889     if (k == 0x7FF || texp >= 96) return DECIMAL_OVERFLOW; /* NaNs, SNaNs, Infinities or >= 2^96 */
 890     if (k == 0 || texp <= -94) { /* Subnormals, Zeros or < 2^-94 */
 891         DECINIT(pA); /* return zero */
 892         return DECIMAL_SUCCESS;
 893     }
 894
 895     texp -= 52;
 896     if (texp > 0) {
 897         for (; texp > 0; texp--) {
 898             lshift128(&alo, &ahi);
 899         }
 900     }
 901
 902     scale = 0;
 903     rc = rescale128(&alo, &ahi, &scale, -texp, 0, DECIMAL_MAX_SCALE, 0);
 904     if (rc != DECIMAL_SUCCESS) return rc;
 905
 906     sigDigits = calcDigits(alo, ahi);
 907     /* too much digits, then round */
 908     if (sigDigits > digits) {
 909         div128DecadeFactor(&alo, &ahi, sigDigits - digits);
 910         scale -= sigDigits - digits;
 911         /* check value, may be 10^(digits+1) caused by rounding */
 912         if (ahi == dec128decadeFactors[digits].hi
 913             && alo == dec128decadeFactors[digits].lo) {
 914             div128by32(&alo, &ahi, 10, 0);
 915             scale--;
 916         }
 917         if (scale < 0) {
 918             rc = mult128DecadeFactor(&alo, &ahi, -scale);
 919             if (rc != DECIMAL_SUCCESS) return rc;
 920             scale = 0;
 921         }
 922     }
 923
 924     return pack128toDecimal(pA, alo, ahi, scale, sign);
 925 }
 926
 927 /**
 928  * mono_string2decimal:
 929  * @decimal_repr:
 930  * @str:
 931  * @decrDecimal:
 932  * @sign:
 933  *
 934  * converts a digit string to decimal
 935  * The significant digits must be passed as an integer in buf !
 936  *
 937  * 1. Example:
 938  *   if you want to convert the number 123.456789012345678901234 to decimal
 939  *     buf := "123456789012345678901234"
 940  *     decrDecimal := 3
 941  *     sign := 0
 942  *
 943  * 2. Example:
 944  *   you want to convert -79228162514264337593543950335 to decimal
 945  *     buf := "79228162514264337593543950335"
 946  *     decrDecimal := 29
 947  *     sign := 1
 948  *
 949  * 3. Example:
 950  *   you want to convert -7922816251426433759354395033.250000000000001 to decimal
 951  *     buf := "7922816251426433759354395033250000000000001"
 952  *     decrDecimal := 29
 953  *     sign := 1
 954  *     returns (decimal)-7922816251426433759354395033.3
 955  *
 956  * 4. Example:
 957  *   you want to convert -7922816251426433759354395033.250000000000000 to decimal
 958  *     buf := "7922816251426433759354395033250000000000000"
 959  *     decrDecimal := 29
 960  *     sign := 1
 961  *     returns (decimal)-7922816251426433759354395033.2
 962  *
 963  * 5. Example:
 964  *   you want to convert -7922816251426433759354395033.150000000000000 to decimal
 965  *     buf := "7922816251426433759354395033150000000000000"
 966  *     decrDecimal := 29
 967  *     sign := 1
 968  *     returns (decimal)-7922816251426433759354395033.2
 969  *
 970  * Uses banker's rule for rounding if there are more digits than can be
 971  * represented by the significant
 972  */
 973 gint32 mono_string2decimal(/*[Out]*/decimal_repr* pA, MonoString* str, gint32 decrDecimal, gint32 sign)
 974 {
 975     gushort *buf = mono_string_chars(str);
 976     gushort *p;
 977     guint64 alo, ahi;
 978     int n, rc, i, len, sigLen = -1, firstNonZero;
 979     int scale, roundBit = 0;
 980
 981     alo = ahi = 0;
 982     DECINIT(pA);
 983
 984     for (p = buf, len = 0; *p != 0; len++, p++) { }
 985
 986     for (p = buf, i = 0; *p != 0; i++, p++) {
 987         n = *p - '0';
 988         if (n < 0 || n > 9) {
 989             return DECIMAL_INVALID_CHARACTER;
 990         }
 991         if (n) {
 992             if (sigLen < 0) {
 993                 firstNonZero = i;
 994                 sigLen = (len - firstNonZero > DECIMAL_MAX_SCALE+1)
 995                     ? DECIMAL_MAX_SCALE+1+firstNonZero : len;
 996                 if (decrDecimal > sigLen+1) return DECIMAL_OVERFLOW;
 997             }
 998             if (i >= sigLen) break;
 999             rc = incMultConstant128(&alo, &ahi, sigLen - 1 - i, n);
1000             if (rc != DECIMAL_SUCCESS) {
1001                 return rc;
1002             }
1003         }
1004     }
1005
1006     scale = sigLen - decrDecimal;
1007
1008     if (i < len) { /* too much digits, we must round */
1009         n = buf[i] - '0';
1010         if (n < 0 || n > 9) {
1011             return DECIMAL_INVALID_CHARACTER;
1012         }
1013         if (n > 5) roundBit = 1;
1014         else if (n == 5) { /* we must take a nearer look */
1015             n = buf[i-1] - '0';
1016             for (++i; i < len; ++i) {
1017                 if (buf[i] != '0') break; /* we are greater than .5 */
1018             }
1019             if (i < len /* greater than exactly .5 */
1020                 || n % 2 == 1) { /* exactly .5, use banker's rule for rounding */
1021                 roundBit = 1;
1022             }
1023         }
1024     }
1025
1026     if (ahi != 0) {
1027         rc = normalize128(&alo, &ahi, &scale, 1, roundBit);
1028         if (rc != DECIMAL_SUCCESS) return rc;
1029     }
1030
1031     if (alo == 0 && ahi == 0) {
1032         DECINIT(pA);
1033         return DECIMAL_SUCCESS;
1034     } else {
1035         return pack128toDecimal(pA, alo, ahi, sigLen - decrDecimal, sign);
1036     }
1037 }
1038
1039 /**
1040  * mono_decimal2string:
1041  * @
1042  * returns minimal number of digit string to represent decimal
1043  * No leading or trailing zeros !
1044  * Examples:
1045  * *pA == 0            =>   buf = "", *pDecPos = 1, *pSign = 0
1046  * *pA == 12.34        =>   buf = "1234", *pDecPos = 2, *pSign = 0
1047  * *pA == -1000.0000   =>   buf = "1", *pDecPos = 4, *pSign = 1
1048  * *pA == -0.00000076  =>   buf = "76", *pDecPos = -6, *pSign = 0
1049  *
1050  * Parameters:
1051  *    pA         decimal instance to convert
1052  *    digits     < 0: use decimals instead
1053  *               = 0: gets mantisse as integer
1054  *               > 0: gets at most <digits> digits, rounded according to banker's rule if necessary
1055  *    decimals   only used if digits < 0
1056  *               >= 0: number of decimal places
1057  *    buf        pointer to result buffer
1058  *    bufSize    size of buffer
1059  *    pDecPos    receives insert position of decimal point relative to start of buffer
1060  *    pSign      receives sign
1061  */
1062 gint32 mono_decimal2string(/*[In]*/decimal_repr* pA, gint32 digits, gint32 decimals,
1063                                    MonoArray* pArray, gint32 bufSize, gint32* pDecPos, gint32* pSign)
1064 {
1065     guint16 tmp[41];
1066     guint16 *buf = (guint16*) mono_array_addr(pArray, guint16, 0);
1067     guint16 *q, *p = tmp;
1068     decimal_repr aa;
1069     guint64 alo, ahi;
1070     guint32 rest;
1071     gint32 sigDigits, d;
1072     int i, scale, len;
1073
1074     MONO_ARCH_SAVE_REGS;
1075
1076     scale = pA->signscale.scale;
1077     DECTO128(pA, alo, ahi);
1078     sigDigits = calcDigits(alo, ahi); /* significant digits */
1079
1080     /* calc needed digits (without leading or trailing zeros) */
1081     d = (digits == 0) ? sigDigits : digits;
1082     if (d < 0) { /* use decimals ? */
1083         if (0 <= decimals && decimals < scale) {
1084             d = sigDigits - scale + decimals;
1085         } else {
1086             d = sigDigits; /* use all you can get */
1087         }
1088     }
1089
1090     if (sigDigits > d) { /* we need to round decimal number */
1091         DECCOPY(&aa, pA);
1092         aa.signscale.scale = DECIMAL_MAX_SCALE;
1093         mono_decimalRound(&aa, DECIMAL_MAX_SCALE - sigDigits + d);
1094         DECTO128(&aa, alo, ahi);
1095         sigDigits += calcDigits(alo, ahi) - d;
1096     }
1097
1098     len = 0;
1099     if (d > 0) {
1100         /* get digits starting from the tail */
1101         for (; (alo != 0 || ahi != 0) && len < 40; len++) {
1102             div128by32(&alo, &ahi, 10, &rest);
1103             *p++ = '0' + (char) rest;
1104         }
1105     }
1106     *p = 0;
1107
1108     if (len >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
1109
1110     /* now we have the minimal count of digits,
1111        extend to wished count of digits or decimals */
1112     q = buf;
1113     if (digits >= 0) { /* count digits */
1114         if (digits >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
1115         if (len == 0) {
1116             /* zero or rounded to zero */
1117             *pDecPos = 1;
1118         } else {
1119             /* copy significant digits */
1120             for (i = 0; i < len; i++) {
1121                 *q++ = *(--p);
1122             }
1123             *pDecPos = sigDigits - scale;
1124         }
1125         /* add trailing zeros */
1126         for (i = len; i < digits; i++) {
1127             *q++ = '0';
1128         }
1129     } else { /* count decimals */
1130         if (scale >= sigDigits) { /* add leading zeros */
1131             if (decimals+2 >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
1132             *pDecPos = 1;
1133             for (i = 0; i <= scale - sigDigits; i++) {
1134                 *q++ = '0';
1135             }
1136         } else {
1137             if (sigDigits - scale + decimals+1 >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
1138             *pDecPos = sigDigits - scale;
1139         }
1140         /* copy significant digits */
1141         for (i = 0; i < len; i++) {
1142             *q++ = *(--p);
1143         }
1144         /* add trailing zeros */
1145         for (i = scale; i < decimals; i++) {
1146             *q++ = '0';
1147         }
1148     }
1149     *q = 0;
1150
1151     *pSign = (sigDigits > 0) ? pA->signscale.sign : 0; /* zero has positive sign */
1152
1153     return DECIMAL_SUCCESS;
1154 }
1155
1156 /**
1157  * mono_decimal2UInt64:
1158  * @pA
1159  * @pResult
1160  * converts a decimal to an UInt64 without rounding
1161  */
1162 gint32 mono_decimal2UInt64(/*[In]*/decimal_repr* pA, guint64* pResult)
1163 {
1164     guint64 alo, ahi;
1165     int scale;
1166
1167     MONO_ARCH_SAVE_REGS;
1168
1169     DECTO128(pA, alo, ahi);
1170     scale = pA->signscale.scale;
1171     if (scale > 0) {
1172         div128DecadeFactor(&alo, &ahi, scale);
1173     }
1174
1175     /* overflow if integer too large or < 0 */
1176     if (ahi != 0 || (alo != 0 && pA->signscale.sign)) return DECIMAL_OVERFLOW;
1177
1178     *pResult = alo;
1179     return DECIMAL_SUCCESS;
1180 }
1181
1182 /**
1183  * mono_decimal2Int64:
1184  * @pA:
1185  * pResult:
1186  * converts a decimal to an Int64 without rounding
1187  */
1188 gint32 mono_decimal2Int64(/*[In]*/decimal_repr* pA, gint64* pResult)
1189 {
1190     guint64 alo, ahi;
1191     int sign, scale;
1192
1193     MONO_ARCH_SAVE_REGS;
1194
1195     DECTO128(pA, alo, ahi);
1196     scale = pA->signscale.scale;
1197     if (scale > 0) {
1198         div128DecadeFactor(&alo, &ahi, scale);
1199     }
1200
1201     if (ahi != 0) return DECIMAL_OVERFLOW;
1202
1203     sign = pA->signscale.sign;
1204     if (sign && alo != 0) {
1205         if (alo > LIT_GUINT64_HIGHBIT) return DECIMAL_OVERFLOW;
1206         *pResult = (gint64) ~(alo-1);
1207     } else {
1208         if (alo & LIT_GUINT64_HIGHBIT) return DECIMAL_OVERFLOW;
1209         *pResult = (gint64) alo;
1210     }
1211
1212     return DECIMAL_SUCCESS;
1213 }
1214
1215 void mono_decimalFloorAndTrunc(/*[In, Out]*/decimal_repr* pA, gint32 floorFlag)
1216 {
1217     guint64 alo, ahi;
1218     guint32 factor, rest;
1219     int scale, sign, idx;
1220     int hasRest = 0;
1221
1222     MONO_ARCH_SAVE_REGS;
1223
1224     scale = pA->signscale.scale;
1225     if (scale == 0) return; /* nothing to do */
1226
1227     DECTO128(pA, alo, ahi);
1228     sign = pA->signscale.sign;
1229
1230     while (scale > 0) {
1231         idx = (scale > DECIMAL_MAX_INTFACTORS) ? DECIMAL_MAX_INTFACTORS : scale;
1232         factor = constantsDecadeInt32Factors[idx];
1233         scale -= idx;
1234         div128by32(&alo, &ahi, factor, &rest);
1235         hasRest = hasRest || (rest != 0);
1236     }
1237
1238     if (floorFlag && hasRest && sign) { /* floor: if negative, we must round up */
1239         roundUp128(&alo, &ahi);
1240     }
1241
1242     pack128toDecimal(pA, alo, ahi, 0, sign);
1243 }
1244
1245 void mono_decimalRound(/*[In, Out]*/decimal_repr* pA, gint32 decimals)
1246 {
1247     guint64 alo, ahi;
1248     int scale, sign;
1249
1250     MONO_ARCH_SAVE_REGS;
1251
1252     DECTO128(pA, alo, ahi);
1253     scale = pA->signscale.scale;
1254     sign = pA->signscale.sign;
1255     if (scale > decimals) {
1256         div128DecadeFactor(&alo, &ahi, scale - decimals);
1257         scale = decimals;
1258     }
1259
1260     pack128toDecimal(pA, alo, ahi, scale, sign);
1261 }
1262
1263 gint32 mono_decimalMult(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
1264 {
1265     guint64 low, mid, high;
1266     guint32 factor;
1267     int scale, sign, rc;
1268
1269     MONO_ARCH_SAVE_REGS;
1270
1271     mult96by96to192(pA->lo32, pA->mid32, pA->hi32, pB->lo32, pB->mid32, pB->hi32,
1272         &low, &mid, &high);
1273
1274     /* adjust scale and sign */
1275     scale = (int)pA->signscale.scale + (int)pB->signscale.scale;
1276     sign = pA->signscale.sign ^ pB->signscale.sign;
1277
1278     /* first scaling step */
1279     factor = constantsDecadeInt32Factors[DECIMAL_MAX_INTFACTORS];
1280     while (high != 0 || (mid>>32) >= factor) {
1281         if (high < 100) {
1282             factor /= 1000; /* we need some digits for final rounding */
1283             scale -= DECIMAL_MAX_INTFACTORS - 3;
1284         } else {
1285             scale -= DECIMAL_MAX_INTFACTORS;
1286         }
1287
1288         div192by32(&low, &mid, &high, factor);
1289     }
1290
1291     /* second and final scaling */
1292     rc = rescale128(&low, &mid, &scale, 0, 0, DECIMAL_MAX_SCALE, 1);
1293     if (rc != DECIMAL_SUCCESS) return rc;
1294
1295     return pack128toDecimal(pA, low, mid, scale, sign);
1296 }
1297
1298 static int decimalDivSub(/*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB,
1299                          guint64* pclo, guint64* pchi, int* pExp)
1300 {
1301     guint64 alo, ami, ahi;
1302     guint64 tlo, tmi, thi;
1303     guint32 blo, bmi, bhi;
1304     int ashift, bshift, extraBit, texp;
1305
1306     ahi = (((guint64)(pA->hi32)) << 32) | pA->mid32;
1307     ami = ((guint64)(pA->lo32)) << 32;
1308     alo = 0;
1309     blo = pB->lo32;
1310     bmi = pB->mid32;
1311     bhi = pB->hi32;
1312
1313     if (blo == 0 && bmi == 0 && bhi == 0) {
1314         return DECIMAL_DIVIDE_BY_ZERO;
1315     }
1316
1317     if (ami == 0 && ahi == 0) {
1318         *pclo = *pchi = 0;
1319         return DECIMAL_FINISHED;
1320     }
1321
1322     /* enlarge dividend to get maximal precision */
1323     for (ashift = 0; (ahi & LIT_GUINT64_HIGHBIT) == 0; ++ashift) {
1324         lshift128(&ami, &ahi);
1325     }
1326
1327     /* ensure that divisor is at least 2^95 */
1328     for (bshift = 0; (bhi & LIT_GUINT32_HIGHBIT) == 0; ++bshift) {
1329         lshift96(&blo, &bmi, &bhi);
1330     }
1331
1332     thi = ((guint64)bhi)<<32 | bmi;
1333     tmi = ((guint64)blo)<<32;
1334     tlo = 0;
1335     if (ahi > thi || (ahi == thi && ami >= tmi)) {
1336         sub192(alo, ami, ahi, tlo, tmi, thi, &alo, &ami, &ahi);
1337         extraBit = 1;
1338     } else {
1339         extraBit = 0;
1340     }
1341
1342     div192by96to128(alo, ami, ahi, blo, bmi, bhi, pclo, pchi);
1343     texp = 128 + ashift - bshift;
1344
1345     if (extraBit) {
1346         rshift128(pclo, pchi);
1347         *pchi += LIT_GUINT64_HIGHBIT;
1348         texp--;
1349     }
1350
1351     /* try loss free right shift */
1352     while (texp > 0 && (*pclo & 1) == 0) {
1353         /* right shift */
1354         rshift128(pclo, pchi);
1355         texp--;
1356     }
1357
1358     *pExp = texp;
1359
1360     return DECIMAL_SUCCESS;
1361 }
1362
1363 gint32 mono_decimalDiv(/*[Out]*/decimal_repr* pC, /*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
1364 {
1365     guint64 clo, chi; /* result */
1366     int scale, texp, rc;
1367
1368     MONO_ARCH_SAVE_REGS;
1369
1370     rc = decimalDivSub(pA, pB, &clo, &chi, &texp);
1371     if (rc != DECIMAL_SUCCESS) {
1372         if (rc == DECIMAL_FINISHED) rc = DECIMAL_SUCCESS;
1373         return rc;
1374     }
1375
1376     /* adjust scale and sign */
1377     scale = (int)pA->signscale.scale - (int)pB->signscale.scale;
1378
1379     /*test: printf("0: %.17e\n", (((double)chi) * pow(2,64) + clo) * pow(10, -scale) * pow(2, -exp));*/
1380     rc = rescale128(&clo, &chi, &scale, texp, 0, DECIMAL_MAX_SCALE, 1);
1381     if (rc != DECIMAL_SUCCESS) return rc;
1382
1383     return pack128toDecimal(pC, clo, chi, scale, pA->signscale.sign ^ pB->signscale.sign);
1384 }
1385
1386 gint32 mono_decimalIntDiv(/*[Out]*/decimal_repr* pC, /*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
1387 {
1388     guint64 clo, chi; /* result */
1389     int scale, texp, rc;
1390
1391     MONO_ARCH_SAVE_REGS;
1392
1393     rc = decimalDivSub(pA, pB, &clo, &chi, &texp);
1394     if (rc != DECIMAL_SUCCESS) {
1395         if (rc == DECIMAL_FINISHED) rc = DECIMAL_SUCCESS;
1396         return rc;
1397     }
1398
1399     /* calc scale  */
1400     scale = (int)pA->signscale.scale - (int)pB->signscale.scale;
1401
1402     /* truncate result to integer */
1403     rc = rescale128(&clo, &chi, &scale, texp, 0, 0, 0);
1404     if (rc != DECIMAL_SUCCESS) return rc;
1405
1406     return pack128toDecimal(pC, clo, chi, scale, pA->signscale.sign);
1407 }
1408
1409 /* approximation for log2 of a
1410    If q is the exact value for log2(a), then q <= decimalLog2(a) <= q+1 */
1411 DECINLINE static int decimalLog2(/*[In]*/decimal_repr* pA)
1412 {
1413     int tlog2;
1414     int scale = pA->signscale.scale;
1415
1416     if (pA->hi32 != 0) tlog2 = 64 + log2_32(pA->hi32);
1417     else if (pA->mid32 != 0) tlog2 = 32 + log2_32(pA->mid32);
1418     else tlog2 = log2_32(pA->lo32);
1419
1420     if (tlog2 != DECIMAL_LOG_NEGINF) {
1421         tlog2 -= (scale * 33219) / 10000;
1422     }
1423
1424     return tlog2;
1425 }
1426
1427 DECINLINE static int decimalIsZero(/*[In]*/decimal_repr* pA)
1428 {
1429     return (pA->lo32 == 0 && pA->mid32 == 0 && pA->hi32 == 0);
1430 }
1431
1432 gint32 mono_decimalCompare(/*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
1433 {
1434     int log2a, log2b, delta, sign;
1435     decimal_repr aa;
1436
1437     MONO_ARCH_SAVE_REGS;
1438
1439     sign = (pA->signscale.sign) ? -1 : 1;
1440
1441     if (pA->signscale.sign ^ pB->signscale.sign) {
1442         return (decimalIsZero(pA) && decimalIsZero(pB)) ? 0 : sign;
1443     }
1444
1445     /* try fast comparison via log2 */
1446     log2a = decimalLog2(pA);
1447     log2b = decimalLog2(pB);
1448     delta = log2a - log2b;
1449      /* decimalLog2 is not exact, so we can say nothing
1450         if abs(delta) <= 1 */
1451     if (delta < -1) return -sign;
1452     if (delta > 1) return sign;
1453
1454     DECCOPY(&aa, pA);
1455     DECNEGATE(&aa);
1456     mono_decimalIncr(&aa, pB);
1457
1458     if (decimalIsZero(&aa)) return 0;
1459
1460     return (aa.signscale.sign) ? 1 : -1;
1461 }
1462
1463 /* d=(-1)^sign * n * 2^(k-52) with sign (1bit), k(11bit), n-2^52(52bit) */
1464 DECINLINE static void buildIEEE754Double(double* pd, int sign, int texp, guint64 mantisse)
1465 {
1466     guint64* p = (guint64*) pd;
1467
1468     PRECONDITION(sign == 0 || sign == 1);
1469     *p = (((guint64)sign) << 63) | (((guint64)((1023+texp)&0x7ff)) << 52) | mantisse;
1470 }
1471
1472 double mono_decimal2double(/*[In]*/decimal_repr* pA)
1473 {
1474     double d;
1475     guint64 alo, ahi, mantisse;
1476     guint32 overhang, factor, roundBits;
1477     int scale, texp, log5, i;
1478
1479     MONO_ARCH_SAVE_REGS;
1480
1481     ahi = (((guint64)(pA->hi32)) << 32) | pA->mid32;
1482     alo = ((guint64)(pA->lo32)) << 32;
1483
1484     /* special case zero */
1485     if (ahi == 0 && alo == 0) return 0.0;
1486
1487     texp = 0;
1488     scale = pA->signscale.scale;
1489
1490     /* transform n * 10^-scale and exp = 0 => m * 2^-exp and scale = 0 */
1491     while (scale > 0) {
1492         while ((ahi & LIT_GUINT64_HIGHBIT) == 0) {
1493             lshift128(&alo, &ahi);
1494             texp++;
1495         }
1496
1497         overhang = (guint32) (ahi >> 32);
1498         if (overhang >= 5) {
1499             /* estimate log5 */
1500             log5 = (log2_32(overhang) * 1000) / 2322; /* ln(5)/ln(2) = 2.3219... */
1501             if (log5 < DECIMAL_MAX_INTFACTORS) {
1502                 /* get maximal factor=5^i, so that overhang / factor >= 1 */
1503                 factor = constantsDecadeInt32Factors[log5] >> log5; /* 5^n = 10^n/2^n */
1504                 i = log5 + overhang / factor;
1505             } else {
1506                 i = DECIMAL_MAX_INTFACTORS; /* we have only constants up to 10^DECIMAL_MAX_INTFACTORS */
1507             }
1508             if (i > scale) i = scale;
1509             factor = constantsDecadeInt32Factors[i] >> i; /* 5^n = 10^n/2^n */
1510             /* n * 10^-scale * 2^-exp => m * 10^-(scale-i) * 2^-(exp+i) with m = n * 5^-i */
1511             div128by32(&alo, &ahi, factor, 0);
1512             scale -= i;
1513             texp += i;
1514         }
1515     }
1516
1517     /* normalize significand (highest bit should be 1) */
1518     while ((ahi & LIT_GUINT64_HIGHBIT) == 0) {
1519         lshift128(&alo, &ahi);
1520         texp++;
1521     }
1522
1523     /* round to nearest even */
1524     roundBits = (guint32)ahi & 0x7ff;
1525     ahi += 0x400;
1526     if ((ahi & LIT_GUINT64_HIGHBIT) == 0) { /* overflow ? */
1527         ahi >>= 1;
1528         texp--;
1529     } else if ((roundBits & 0x400) == 0) ahi &= ~1;
1530
1531     /* 96 bit => 1 implizit bit and 52 explicit bits */
1532     mantisse = (ahi & ~LIT_GUINT64_HIGHBIT) >> 11;
1533
1534     buildIEEE754Double(&d, pA->signscale.sign, -texp+95, mantisse);
1535
1536     return d;
1537 }
1538
1539 /* a *= 10^exp */
1540 gint32 mono_decimalSetExponent(/*[In, Out]*/decimal_repr* pA, gint32 texp)
1541 {
1542     guint64 alo, ahi;
1543     int rc;
1544     int scale = pA->signscale.scale;
1545
1546     MONO_ARCH_SAVE_REGS;
1547
1548     scale -= texp;
1549
1550     if (scale < 0 || scale > DECIMAL_MAX_SCALE) {
1551         DECTO128(pA, alo, ahi);
1552         rc = rescale128(&alo, &ahi, &scale, 0, 0, DECIMAL_MAX_SCALE, 1);
1553         if (rc != DECIMAL_SUCCESS) return rc;
1554         return pack128toDecimal(pA, alo, ahi, scale, pA->signscale.sign);
1555     } else {
1556         pA->signscale.scale = scale;
1557         return DECIMAL_SUCCESS;
1558     }
1559 }