Merge pull request #601 from knocte/sock_improvements

[mono.git] / mono / metadata / decimal.c
diff --git a/mono/metadata/decimal.c b/mono/metadata/decimal.c

index a2bd4b28fd5eae38809cf8315c0add10f8ae632b..0e6a7b3df1cacdbbbefbd369506d49a3295dde22 100644 (file)
--- a/mono/metadata/decimal.c
+++ b/mono/metadata/decimal.c
@@ -1,29 +1,42 @@
  /* 
-   decimal.c
-
-   conversions and numerical operations for the c# type System.Decimal
-
-   Author: Martin Weindel (martin.weindel@t-online.de)
-
-   (C) 2001 by Martin Weindel
-*/
+ * decimal.c
+ *
+ * conversions and numerical operations for the c# type System.Decimal
+ *
+ * Author: Martin Weindel (martin.weindel@t-online.de)
+ *
+ * (C) 2001 by Martin Weindel
+ */
  
  /*
   * machine dependent configuration for 
   * CSharp value type System.Decimal
   */
  
+#include "config.h"
+#include <mono/metadata/exception.h>
  #include <stdio.h>
-#include <memory.h>
  #include <stdlib.h>
  #include <string.h>
  #include <math.h>
+#ifdef HAVE_MEMORY_H
+#include <memory.h>
+#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#ifndef DISABLE_DECIMAL
  
  /* needed for building microsoft dll */
+#ifdef __GNUC__
  #define DECINLINE __inline
+#else
+#define DECINLINE
+#endif
  
  #define LIT_GUINT32(x) x
-#define LIT_GUINT64(x) x##L
+#define LIT_GUINT64(x) x##LL
  
  
  /* we need a UInt64 type => guint64 */
@@ -89,7 +102,7 @@
  
  #define DECIMAL_LOG_NEGINF -1000
  
-static guint32 constantsDecadeInt32Factors[DECIMAL_MAX_INTFACTORS+1] = {
+static const guint32 constantsDecadeInt32Factors[DECIMAL_MAX_INTFACTORS+1] = {
      LIT_GUINT32(1), LIT_GUINT32(10), LIT_GUINT32(100), LIT_GUINT32(1000), 
      LIT_GUINT32(10000), LIT_GUINT32(100000), LIT_GUINT32(1000000), 
      LIT_GUINT32(10000000), LIT_GUINT32(100000000), LIT_GUINT32(1000000000)
@@ -100,7 +113,7 @@ typedef struct {
      guint64 hi;
  } dec128_repr;
  
-static dec128_repr dec128decadeFactors[DECIMAL_MAX_SCALE+1] = {
+static const dec128_repr dec128decadeFactors[DECIMAL_MAX_SCALE+1] = {
      LIT_DEC128( 0, 0, 1u), /* == 1 */
      LIT_DEC128( 0, 0, 10u), /* == 10 */
      LIT_DEC128( 0, 0, 100u), /* == 100 */
@@ -344,7 +357,7 @@ DECINLINE static int div128by32(guint64* plo, guint64* phi, guint32 factor, guin
      if (pRest) *pRest = (guint32) a;
  
      a <<= 1;
-    return (a > factor || (a == factor && (c & 1) == 1)) ? 1 : 0;
+    return (a >= factor || (a == factor && (c & 1) == 1)) ? 1 : 0;
  }
  
  /* division: x(192bit) /= factor(32bit) 
@@ -390,8 +403,8 @@ DECINLINE static void div192by32(guint64* plo, guint64* pmi, guint64* phi,
  
  /* returns upper 32bit for a(192bit) /= b(32bit)
     a will contain remainder */
-static guint32 div192by96to32withRest(guint64* palo, guint64* pami, guint64* pahi, 
-                                      guint32 blo, guint32 bmi, guint32 bhi)
+DECINLINE static guint32 div192by96to32withRest(guint64* palo, guint64* pami, guint64* pahi, 
+                                                                                               guint32 blo, guint32 bmi, guint32 bhi)
  {
      guint64 rlo, rmi, rhi; /* remainder */
      guint64 tlo, thi; /* term */
@@ -418,9 +431,9 @@ static guint32 div192by96to32withRest(guint64* palo, guint64* pami, guint64* pah
  
  /* c(128bit) = a(192bit) / b(96bit) 
     b must be >= 2^95 */
-static void div192by96to128(guint64 alo, guint64 ami, guint64 ahi,
-                            guint32 blo, guint32 bmi, guint32 bhi,
-                            guint64* pclo, guint64* pchi)
+DECINLINE static void div192by96to128(guint64 alo, guint64 ami, guint64 ahi,
+                                                                         guint32 blo, guint32 bmi, guint32 bhi,
+                                                                         guint64* pclo, guint64* pchi)
  {
      guint64 rlo, rmi, rhi; /* remainder */
      guint32 h, c;
@@ -454,8 +467,8 @@ DECINLINE static void roundUp128(guint64* pclo, guint64* pchi) {
      if (++(*pclo) == 0) ++(*pchi);
  }
  
-static int normalize128(guint64* pclo, guint64* pchi, int* pScale, 
-                        int roundFlag, int roundBit)
+DECINLINE static int normalize128(guint64* pclo, guint64* pchi, int* pScale, 
+                                                                 int roundFlag, int roundBit)
  {
      guint32 overhang = (guint32)(*pchi >> 32);
      int scale = *pScale;
@@ -510,101 +523,108 @@ DECINLINE static int maxLeftShift(/*[In, Out]*/decimal_repr* pA)
  DECINLINE static void rshift128(guint64* pclo, guint64* pchi)
  {
      *pclo >>= 1;
-    if (*pchi & 1) *pclo |= LIT_GUINT64_HIGHBIT;
+       *pclo |= (*pchi & 1) << 63;
      *pchi >>= 1;
  }
  
  DECINLINE static void lshift96(guint32* pclo, guint32* pcmid, guint32* pchi)
  {
      *pchi <<= 1;
-    if (*pcmid & LIT_GUINT32_HIGHBIT) (*pchi)++;
+       *pchi |= (*pcmid & LIT_GUINT32_HIGHBIT) >> 31;
      *pcmid <<= 1;
-    if (*pclo & LIT_GUINT32_HIGHBIT) (*pcmid)++;
+       *pcmid |= (*pclo & LIT_GUINT32_HIGHBIT) >> 31;
      *pclo <<= 1;
  }
  
  DECINLINE static void lshift128(guint64* pclo, guint64* pchi)
  {
      *pchi <<= 1;
-    if (*pclo & LIT_GUINT64_HIGHBIT) (*pchi)++;
+       *pchi |= (*pclo & LIT_GUINT64_HIGHBIT) >> 63;
      *pclo <<= 1;
  }
  
  DECINLINE static void rshift192(guint64* pclo, guint64* pcmi, guint64* pchi)
  {
      *pclo >>= 1;
-    if (*pcmi & 1) *pclo |= LIT_GUINT64_HIGHBIT;
+       *pclo |= (*pcmi & 1) << 63;
      *pcmi >>= 1;
-    if (*pchi & 1) *pcmi |= LIT_GUINT64_HIGHBIT;
+       *pcmi |= (*pchi & 1) << 63;
      *pchi >>= 1;
  }
  
+#if defined(__native_client__) && (defined(__i386__) || defined(__x86_64))
+#define USE_X86_32BIT_INSTRUCTIONS 1
+#endif
+
+static inline gint
+my_g_bit_nth_msf (gsize mask)
+{
+       /* Mask is expected to be != 0 */
+#if (defined(__i386__) && defined(__GNUC__)) || defined(USE_X86_32BIT_INSTRUCTIONS)
+       int r;
+
+       __asm__("bsrl %1,%0\n\t"
+                       : "=r" (r) : "rm" (mask));
+       return r;
+#elif defined(__x86_64) && defined(__GNUC__)
+       guint64 r;
+
+       __asm__("bsrq %1,%0\n\t"
+                       : "=r" (r) : "rm" (mask));
+       return r;
+#elif defined(__i386__) && defined(_MSC_VER)
+       unsigned long bIndex = 0;
+       if (_BitScanReverse (&bIndex, mask))
+               return bIndex;
+       return -1;
+#elif defined(__x86_64__) && defined(_MSC_VER)
+       unsigned long bIndex = 0;
+       if (_BitScanReverse64 (&bIndex, mask))
+               return bIndex;
+       return -1;
+#elif defined(__s390x__) && defined(__NOT_YET)
+       guint64 r;
+
+       __asm__("\tlrvgr\t%1,%1\n"
+               "\tflogr\t%0,%1\n"
+               "\tjz\t0f\n"
+               "\tlghi\t%0,-1\n"
+               "0:\n"
+               : "=r" (r) : "r" (mask) : "cc");
+#else
+       int i;
+
+       i = sizeof (gsize) * 8;
+       while (i > 0) {
+               i --;
+               if (mask & (1UL << i))
+                       return i;
+       }
+       return -1;
+#endif
+}
+
  /* returns log2(a) or DECIMAL_LOG_NEGINF for a = 0 */
  DECINLINE static int log2_32(guint32 a)
  {
-    int tlog2 = 0;
-
      if (a == 0) return DECIMAL_LOG_NEGINF;
  
-    if ((a >> 16) != 0) {
-        a >>= 16;
-        tlog2 += 16;
-    }
-    if ((a >> 8) != 0) {
-        a >>= 8;
-        tlog2 += 8;
-    }
-    if ((a >> 4) != 0) {
-        a >>= 4;
-        tlog2 += 4;
-    }
-    if ((a >> 2) != 0) {
-        a >>= 2;
-        tlog2 += 2;
-    }
-    if ((a >> 1) != 0) {
-        a >>= 1;
-        tlog2 += 1;
-    }
-    tlog2 += (int) a;
-
-    return tlog2;
+       return my_g_bit_nth_msf (a) + 1;
  }
  
  /* returns log2(a) or DECIMAL_LOG_NEGINF for a = 0 */
  DECINLINE static int log2_64(guint64 a)
  {
-    int tlog2 = 0;
-
      if (a == 0) return DECIMAL_LOG_NEGINF;
  
-    if ((a >> 32) != 0) {
-        a >>= 32;
-        tlog2 += 32;
-    }
-    if ((a >> 16) != 0) {
-        a >>= 16;
-        tlog2 += 16;
-    }
-    if ((a >> 8) != 0) {
-        a >>= 8;
-        tlog2 += 8;
-    }
-    if ((a >> 4) != 0) {
-        a >>= 4;
-        tlog2 += 4;
-    }
-    if ((a >> 2) != 0) {
-        a >>= 2;
-        tlog2 += 2;
-    }
-    if ((a >> 1) != 0) {
-        a >>= 1;
-        tlog2 += 1;
-    }
-    tlog2 += (int) a;
-
-    return tlog2;
+#if SIZEOF_VOID_P == 8
+       return my_g_bit_nth_msf (a) + 1;
+#else
+       if ((a >> 32) == 0)
+               return my_g_bit_nth_msf ((guint32)a) + 1;
+       else
+               return my_g_bit_nth_msf ((guint32)(a >> 32)) + 1 + 32;
+#endif
  }
  
  /* returns log2(a) or DECIMAL_LOG_NEGINF for a = 0 */
@@ -684,11 +704,38 @@ DECINLINE static int rescale128(guint64* pclo, guint64* pchi, int* pScale, int t
          /* reduce exp */
          while (texp > 0 && scale <= maxScale) {
              overhang = (guint32)(*pchi >> 32);
-            while (texp > 0 && ((*pclo & 1) == 0 || overhang > (2<<DECIMAL_MAX_INTFACTORS))) {
-                if (--texp == 0) roundBit = (int)(*pclo & 1);
+
+                       /* The original loop was this: */
+                       /*
+            while (texp > 0 && (overhang > (2<<DECIMAL_MAX_INTFACTORS) || (*pclo & 1) == 0)) {
+                               if (--texp == 0)
+                                       roundBit = (int)(*pclo & 1);
                  rshift128(pclo, pchi);
                  overhang = (guint32)(*pchi >> 32);
              }
+                       */
+                       if (overhang > 0) {
+                               int msf = my_g_bit_nth_msf (overhang);
+                               int shift = msf - (DECIMAL_MAX_INTFACTORS + 2);
+
+                               if (shift >= texp)
+                                       shift = texp - 1;
+
+                               if (shift > 0) {
+                                       texp -= shift;
+                                       *pclo = (*pclo >> shift) | ((*pchi & ((1 << shift) - 1)) << (64 - shift));
+                                       *pchi >>= shift;
+                                       overhang >>= shift;
+
+                                       g_assert (texp > 0);
+                                       g_assert (overhang > (2 << DECIMAL_MAX_INTFACTORS));
+                               }
+                       }
+            while (texp > 0 && (overhang > (2<<DECIMAL_MAX_INTFACTORS) || (*pclo & 1) == 0)) {
+                               if (--texp == 0) roundBit = (int)(*pclo & 1);
+                rshift128(pclo, pchi);
+                overhang >>= 1;
+            }
  
              if (texp > DECIMAL_MAX_INTFACTORS) i = DECIMAL_MAX_INTFACTORS;
              else i = texp;
@@ -733,6 +780,34 @@ DECINLINE static int rescale128(guint64* pclo, guint64* pchi, int* pScale, int t
      return normalize128(pclo, pchi, pScale, roundFlag, roundBit);
  }
  
+guint32 rest;
+static void trimExcessScale(guint64* pclo, guint64* pchi, int* pScale)
+{
+       guint64 ilo = *pclo, lastlo;
+       guint64 ihi = *pchi, lasthi;
+       int scale = *pScale;
+       int i = 0, roundBit;
+       
+       while (scale > 0) {
+               scale--;
+               i++;
+               lastlo = ilo;
+               lasthi = ihi;
+               
+               roundBit = div128by32(&ilo, &ihi, 10, &rest);
+               if (rest != 0){
+                       i--;
+                       if (i == 0)
+                               return;
+
+                       *pclo = lastlo;
+                       *pchi = lasthi;
+                       *pScale = scale+1;
+                       return;
+               }
+       }
+}
+
  /* performs a += b */
  gint32 mono_decimalIncr(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
  {
@@ -740,6 +815,8 @@ gint32 mono_decimalIncr(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
      int log2A, log2B, log2Result, log10Result, rc;
      int subFlag, sign, scaleA, scaleB;
  
+    MONO_ARCH_SAVE_REGS;
+
      DECTO128(pA, alo, ahi);
      DECTO128(pB, blo, bhi);
  
@@ -767,7 +844,7 @@ gint32 mono_decimalIncr(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
          /* Estimate log10 and scale of result for adjusting scales */
          log2A = log2withScale_128(alo, ahi, scaleA);
          log2B = log2withScale_128(blo, bhi, scaleB);
-        log2Result = (log2A >= log2B) ? log2A : log2B;
+        log2Result = MAX (log2A, log2B);
          if (!subFlag) log2Result++; /* result can have one bit more */
          log10Result = (log2Result * 1000) / 3322 + 1;
          /* we will calculate in 128bit, so we may need to adjust scale */
@@ -795,8 +872,6 @@ gint32 mono_decimalIncr(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
              add128(alo, ahi, blo, bhi, &alo, &ahi);
          }
  
-        if (rc != DECIMAL_SUCCESS) return rc;
-
          rc = rescale128(&alo, &ahi,&scaleA, 0, 0, DECIMAL_MAX_SCALE, 1);
      }
  
@@ -874,7 +949,11 @@ gint32 mono_double2decimal(/*[Out]*/decimal_repr* pA, double val, gint32 digits)
      PRECONDITION(digits <= 15);
  
      sign = ((*p & LIT_GUINT64_HIGHBIT) != 0) ? 1 : 0;
+
+    // Exponent
      k = ((guint16)((*p) >> 52)) & 0x7FF;
+
+    // 1-bit followed by the fraction component from the float
      alo = (*p & LIT_GUINT64(0xFFFFFFFFFFFFF)) | LIT_GUINT64(0x10000000000000);
      ahi = 0;
  
@@ -893,7 +972,7 @@ gint32 mono_double2decimal(/*[Out]*/decimal_repr* pA, double val, gint32 digits)
      }
  
      scale = 0;
-    rc = rescale128(&alo, &ahi, &scale, -texp, 0, DECIMAL_MAX_SCALE, 0);
+    rc = rescale128(&alo, &ahi, &scale, -texp, 0, DECIMAL_MAX_SCALE, 1);
      if (rc != DECIMAL_SUCCESS) return rc;
  
      sigDigits = calcDigits(alo, ahi);
@@ -914,10 +993,24 @@ gint32 mono_double2decimal(/*[Out]*/decimal_repr* pA, double val, gint32 digits)
          }
      }
  
+    //
+    // Turn the double 0.6 which at this point is:
+    // 0.6000000000000000
+    // into:
+    // 0.6
+    //
+    trimExcessScale (&alo, &ahi, &scale);
+    
      return pack128toDecimal(pA, alo, ahi, scale, sign);
  }
  
-/** 
+/**
+ * mono_string2decimal:
+ * @decimal_repr:
+ * @str:
+ * @decrDecimal:
+ * @sign:
+ *
   * converts a digit string to decimal
   * The significant digits must be passed as an integer in buf !
   *
@@ -990,6 +1083,10 @@ gint32 mono_string2decimal(/*[Out]*/decimal_repr* pA, MonoString* str, gint32 de
          }
      }
  
+    // Set correct scale for zeros decimal (000 input is 0.00)
+    if (sigLen < 0 && len > decrDecimal)
+        sigLen = len;
+
      scale = sigLen - decrDecimal;
  
      if (i < len) { /* too much digits, we must round */
@@ -1015,8 +1112,7 @@ gint32 mono_string2decimal(/*[Out]*/decimal_repr* pA, MonoString* str, gint32 de
          if (rc != DECIMAL_SUCCESS) return rc;
      }
  
-    if (alo == 0 && ahi == 0) {
-        DECINIT(pA);
+    if (alo == 0 && ahi == 0 && scale <= 0) {
          return DECIMAL_SUCCESS;
      } else {
          return pack128toDecimal(pA, alo, ahi, sigLen - decrDecimal, sign);
@@ -1024,120 +1120,9 @@ gint32 mono_string2decimal(/*[Out]*/decimal_repr* pA, MonoString* str, gint32 de
  }
  
  /**
- * returns minimal number of digit string to represent decimal
- * No leading or trailing zeros !
- * Examples:
- * *pA == 0            =>   buf = "", *pDecPos = 1, *pSign = 0
- * *pA == 12.34        =>   buf = "1234", *pDecPos = 2, *pSign = 0
- * *pA == -1000.0000   =>   buf = "1", *pDecPos = 4, *pSign = 1
- * *pA == -0.00000076  =>   buf = "76", *pDecPos = -6, *pSign = 0
- * 
- * Parameters:
- *    pA         decimal instance to convert     
- *    digits     < 0: use decimals instead
- *               = 0: gets mantisse as integer
- *               > 0: gets at most <digits> digits, rounded according to banker's rule if necessary
- *    decimals   only used if digits < 0
- *               >= 0: number of decimal places
- *    buf        pointer to result buffer
- *    bufSize    size of buffer
- *    pDecPos    receives insert position of decimal point relative to start of buffer
- *    pSign      receives sign
- */
-
-gint32 mono_decimal2string(/*[In]*/decimal_repr* pA, gint32 digits, gint32 decimals,
-                                   MonoArray* pArray, gint32 bufSize, gint32* pDecPos, gint32* pSign)
-{
-    guint16 tmp[41];
-    guint16 *buf = (guint16*) mono_array_addr(pArray, guint16, 0);
-    guint16 *q, *p = tmp;
-    decimal_repr aa;
-    guint64 alo, ahi;
-    guint32 rest;
-    gint32 sigDigits, d;
-    int i, scale, len;
-
-    scale = pA->signscale.scale;
-    DECTO128(pA, alo, ahi);
-    sigDigits = calcDigits(alo, ahi); /* significant digits */
-
-    /* calc needed digits (without leading or trailing zeros) */
-    d = (digits == 0) ? sigDigits : digits;
-    if (d < 0) { /* use decimals ? */
-        if (0 <= decimals && decimals < scale) {
-            d = sigDigits - scale + decimals;
-        } else {
-            d = sigDigits; /* use all you can get */
-        }
-    } 
-
-    if (sigDigits > d) { /* we need to round decimal number */
-        DECCOPY(&aa, pA);
-        aa.signscale.scale = DECIMAL_MAX_SCALE;
-        mono_decimalRound(&aa, DECIMAL_MAX_SCALE - sigDigits + d);
-        DECTO128(&aa, alo, ahi);
-        sigDigits += calcDigits(alo, ahi) - d;
-    }
-
-    len = 0;
-    if (d > 0) {
-        /* get digits starting from the tail */
-        for (; (alo != 0 || ahi != 0) && len < 40; len++) {
-            div128by32(&alo, &ahi, 10, &rest);
-            *p++ = '0' + (char) rest;
-        }
-    }
-    *p = 0;
-
-    if (len >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
-
-    /* now we have the minimal count of digits, 
-       extend to wished count of digits or decimals */
-    q = buf;
-    if (digits >= 0) { /* count digits */
-        if (digits >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
-        if (len == 0) {
-            /* zero or rounded to zero */
-            *pDecPos = 1;
-        } else {
-            /* copy significant digits */
-            for (i = 0; i < len; i++) {
-                *q++ = *(--p);
-            }
-            *pDecPos = sigDigits - scale;
-        }
-        /* add trailing zeros */
-        for (i = len; i < digits; i++) {
-            *q++ = '0';
-        }
-    } else { /* count decimals */
-        if (scale >= sigDigits) { /* add leading zeros */
-            if (decimals+2 >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
-            *pDecPos = 1;
-            for (i = 0; i <= scale - sigDigits; i++) {
-                *q++ = '0';
-            }
-        } else {
-            if (sigDigits - scale + decimals+1 >= bufSize) return DECIMAL_BUFFER_OVERFLOW;
-            *pDecPos = sigDigits - scale;
-        }
-        /* copy significant digits */
-        for (i = 0; i < len; i++) {
-            *q++ = *(--p);
-        }
-        /* add trailing zeros */
-        for (i = scale; i < decimals; i++) {
-            *q++ = '0';
-        }
-    }
-    *q = 0;
-
-    *pSign = (sigDigits > 0) ? pA->signscale.sign : 0; /* zero has positive sign */
-
-    return DECIMAL_SUCCESS;
-}
-
-/**
+ * mono_decimal2UInt64:
+ * @pA
+ * @pResult
   * converts a decimal to an UInt64 without rounding
   */
  gint32 mono_decimal2UInt64(/*[In]*/decimal_repr* pA, guint64* pResult)
@@ -1145,6 +1130,8 @@ gint32 mono_decimal2UInt64(/*[In]*/decimal_repr* pA, guint64* pResult)
      guint64 alo, ahi;
      int scale;
  
+    MONO_ARCH_SAVE_REGS;
+
      DECTO128(pA, alo, ahi);
      scale = pA->signscale.scale;
      if (scale > 0) {
@@ -1159,6 +1146,9 @@ gint32 mono_decimal2UInt64(/*[In]*/decimal_repr* pA, guint64* pResult)
  }
  
  /**
+ * mono_decimal2Int64:
+ * @pA:
+ * pResult:
   * converts a decimal to an Int64 without rounding
   */
  gint32 mono_decimal2Int64(/*[In]*/decimal_repr* pA, gint64* pResult)
@@ -1166,6 +1156,8 @@ gint32 mono_decimal2Int64(/*[In]*/decimal_repr* pA, gint64* pResult)
      guint64 alo, ahi;
      int sign, scale;
  
+    MONO_ARCH_SAVE_REGS;
+
      DECTO128(pA, alo, ahi);
      scale = pA->signscale.scale;
      if (scale > 0) {
@@ -1193,6 +1185,8 @@ void mono_decimalFloorAndTrunc(/*[In, Out]*/decimal_repr* pA, gint32 floorFlag)
      int scale, sign, idx;
      int hasRest = 0;
  
+    MONO_ARCH_SAVE_REGS;
+
      scale = pA->signscale.scale;
      if (scale == 0) return; /* nothing to do */
  
@@ -1219,6 +1213,8 @@ void mono_decimalRound(/*[In, Out]*/decimal_repr* pA, gint32 decimals)
      guint64 alo, ahi;
      int scale, sign;
  
+    MONO_ARCH_SAVE_REGS;
+
      DECTO128(pA, alo, ahi);
      scale = pA->signscale.scale;
      sign = pA->signscale.sign;
@@ -1236,6 +1232,8 @@ gint32 mono_decimalMult(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
      guint32 factor;
      int scale, sign, rc;
  
+    MONO_ARCH_SAVE_REGS;
+
      mult96by96to192(pA->lo32, pA->mid32, pA->hi32, pB->lo32, pB->mid32, pB->hi32,
          &low, &mid, &high);
  
@@ -1263,8 +1261,8 @@ gint32 mono_decimalMult(/*[In, Out]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
      return pack128toDecimal(pA, low, mid, scale, sign);
  }
  
-static int decimalDivSub(/*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB,
-                         guint64* pclo, guint64* pchi, int* pExp)
+static DECINLINE int decimalDivSub(/*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB,
+                                                                  guint64* pclo, guint64* pchi, int* pExp)
  {
      guint64 alo, ami, ahi;
      guint64 tlo, tmi, thi;
@@ -1288,14 +1286,52 @@ static int decimalDivSub(/*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB,
      }
  
      /* enlarge dividend to get maximal precision */
-    for (ashift = 0; (ahi & LIT_GUINT64_HIGHBIT) == 0; ++ashift) {
-        lshift128(&ami, &ahi);
-    }
+       if (ahi == 0) {
+               ahi = ami;
+               ami = 0;
+               for (ashift = 64; (ahi & LIT_GUINT64_HIGHBIT) == 0; ++ashift) {
+                       ahi <<= 1;
+               }
+       } else {
+               for (ashift = 0; (ahi & LIT_GUINT64_HIGHBIT) == 0; ++ashift) {
+                       lshift128(&ami, &ahi);
+               }
+       }
  
      /* ensure that divisor is at least 2^95 */
-    for (bshift = 0; (bhi & LIT_GUINT32_HIGHBIT) == 0; ++bshift) {
-        lshift96(&blo, &bmi, &bhi);
-    }
+       if (bhi == 0) {
+
+               if (bmi == 0) {
+                       guint32 hi_shift;
+                       bhi = blo;
+                       bmi = 0;
+                       blo = 0;
+
+                       //g_assert (g_bit_nth_msf (bhi, 32) == my_g_bit_nth_msf (bhi));
+
+                       hi_shift = 31 - my_g_bit_nth_msf (bhi);
+                       bhi <<= hi_shift;
+                       bshift = 64 + hi_shift;
+               } else {
+                       bhi = bmi;
+                       bmi = blo;
+                       blo = 0;
+
+                       for (bshift = 32; (bhi & LIT_GUINT32_HIGHBIT) == 0; ++bshift) {
+                               bhi <<= 1;
+                               bhi |= (bmi & LIT_GUINT32_HIGHBIT) >> 31;
+                               bmi <<= 1;
+                       }
+               }
+       } else {
+               for (bshift = 0; (bhi & LIT_GUINT32_HIGHBIT) == 0; ++bshift) {
+                       bhi <<= 1;
+                       bhi |= (bmi & LIT_GUINT32_HIGHBIT) >> 31;
+                       bmi <<= 1;
+                       bmi |= (blo & LIT_GUINT32_HIGHBIT) >> 31;
+                       blo <<= 1;
+               }
+       }
  
      thi = ((guint64)bhi)<<32 | bmi;
      tmi = ((guint64)blo)<<32;
@@ -1333,6 +1369,18 @@ gint32 mono_decimalDiv(/*[Out]*/decimal_repr* pC, /*[In]*/decimal_repr* pA, /*[I
      guint64 clo, chi; /* result */
      int scale, texp, rc;
  
+    MONO_ARCH_SAVE_REGS;
+
+       /* Check for common cases */
+       if (mono_decimalCompare (pA, pB) == 0)
+               /* One */
+               return pack128toDecimal (pC, 1, 0, 0, 0);
+       pA->signscale.sign = pA->signscale.sign ? 0 : 1;
+       if (mono_decimalCompare (pA, pB) == 0)
+               /* Minus one */
+               return pack128toDecimal (pC, 1, 0, 0, 1);
+       pA->signscale.sign = pA->signscale.sign ? 0 : 1;
+
      rc = decimalDivSub(pA, pB, &clo, &chi, &texp);
      if (rc != DECIMAL_SUCCESS) {
          if (rc == DECIMAL_FINISHED) rc = DECIMAL_SUCCESS;
@@ -1354,6 +1402,8 @@ gint32 mono_decimalIntDiv(/*[Out]*/decimal_repr* pC, /*[In]*/decimal_repr* pA, /
      guint64 clo, chi; /* result */
      int scale, texp, rc;
  
+    MONO_ARCH_SAVE_REGS;
+
      rc = decimalDivSub(pA, pB, &clo, &chi, &texp);
      if (rc != DECIMAL_SUCCESS) {
          if (rc == DECIMAL_FINISHED) rc = DECIMAL_SUCCESS;
@@ -1398,6 +1448,8 @@ gint32 mono_decimalCompare(/*[In]*/decimal_repr* pA, /*[In]*/decimal_repr* pB)
      int log2a, log2b, delta, sign;
      decimal_repr aa;
  
+    MONO_ARCH_SAVE_REGS;
+
      sign = (pA->signscale.sign) ? -1 : 1;
  
      if (pA->signscale.sign ^ pB->signscale.sign) {
@@ -1429,6 +1481,17 @@ DECINLINE static void buildIEEE754Double(double* pd, int sign, int texp, guint64
  
      PRECONDITION(sign == 0 || sign == 1);
      *p = (((guint64)sign) << 63) | (((guint64)((1023+texp)&0x7ff)) << 52) | mantisse;
+#ifdef ARM_FPU_FPA
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+    {
+           guint32 temp;
+           guint32 *t = (guint32*)p;
+           temp = t [0];
+           t [0] = t [1];
+           t [1] = temp;
+    }
+#endif
+#endif
  }
  
  double mono_decimal2double(/*[In]*/decimal_repr* pA)
@@ -1438,6 +1501,8 @@ double mono_decimal2double(/*[In]*/decimal_repr* pA)
      guint32 overhang, factor, roundBits;
      int scale, texp, log5, i;
  
+    MONO_ARCH_SAVE_REGS;
+
      ahi = (((guint64)(pA->hi32)) << 32) | pA->mid32;
      alo = ((guint64)(pA->lo32)) << 32;
  
@@ -1485,7 +1550,7 @@ double mono_decimal2double(/*[In]*/decimal_repr* pA)
      ahi += 0x400;
      if ((ahi & LIT_GUINT64_HIGHBIT) == 0) { /* overflow ? */
          ahi >>= 1;
-        texp++;
+       texp--;
      } else if ((roundBits & 0x400) == 0) ahi &= ~1;
  
      /* 96 bit => 1 implizit bit and 52 explicit bits */
@@ -1503,6 +1568,8 @@ gint32 mono_decimalSetExponent(/*[In, Out]*/decimal_repr* pA, gint32 texp)
      int rc;
      int scale = pA->signscale.scale;
  
+    MONO_ARCH_SAVE_REGS;
+
      scale -= texp;
  
      if (scale < 0 || scale > DECIMAL_MAX_SCALE) {
@@ -1515,3 +1582,5 @@ gint32 mono_decimalSetExponent(/*[In, Out]*/decimal_repr* pA, gint32 texp)
          return DECIMAL_SUCCESS;
      }
  }
+
+#endif /* DISABLE_DECIMAL */