[sgen] Separate step for starting marking in workers.
[mono.git] / mono / io-layer / atomic.h
index 72067efd30676a02989a9dd32a5656602003d4bd..e45cfcf99b023116e656004b1454caa2fbc5f791 100644 (file)
 #ifndef _WAPI_ATOMIC_H_
 #define _WAPI_ATOMIC_H_
 
+#if defined(__NetBSD__)
+#include <sys/param.h>
+
+#if __NetBSD_Version__ > 499004000
+#include <sys/atomic.h>
+#define HAVE_ATOMIC_OPS
+#endif
+
+#endif
+
 #include <glib.h>
 
 #include "mono/io-layer/wapi.h"
 
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__NetBSD__) && defined(HAVE_ATOMIC_OPS)
+
+#define WAPI_ATOMIC_ASM
+static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
+       gint32 exch, gint32 comp)
+{
+       return atomic_cas_32((uint32_t*)dest, comp, exch);
+}
+
+static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
+{
+       return atomic_cas_ptr(dest, comp, exch);
+}
+
+static inline gint32 InterlockedIncrement(volatile gint32 *val)
+{
+       return atomic_inc_32_nv((uint32_t*)val);
+}
+
+static inline gint32 InterlockedDecrement(volatile gint32 *val)
+{
+       return atomic_dec_32_nv((uint32_t*)val);
+}
+
+static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
+{
+       return atomic_swap_32((uint32_t*)val, new_val);
+}
+
+static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
+               gpointer new_val)
+{
+       return atomic_swap_ptr(val, new_val);
+}
+
+static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
+{
+       return atomic_add_32_nv((uint32_t*)val, add) - add;
+}
+
+#elif defined(__i386__) || defined(__x86_64__)
 #define WAPI_ATOMIC_ASM
 
 /*
@@ -42,7 +92,7 @@ static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest
        gpointer old;
 
        __asm__ __volatile__ ("lock; "
-#ifdef __x86_64__
+#if defined(__x86_64__)  && !defined(__native_client__)
                              "cmpxchgq"
 #else
                              "cmpxchgl"
@@ -104,7 +154,7 @@ static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
        gpointer ret;
        
        __asm__ __volatile__ ("1:; lock; "
-#ifdef __x86_64__
+#if defined(__x86_64__)  && !defined(__native_client__)
                              "cmpxchgq"
 #else
                              "cmpxchgl"
@@ -127,152 +177,165 @@ static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
        return(ret);
 }
 
-#elif defined(sparc) || defined (__sparc__)
+#elif (defined(sparc) || defined (__sparc__)) && defined(__GNUC__)
 #define WAPI_ATOMIC_ASM
 
-#ifdef __GNUC__
-#define BEGIN_SPIN(tmp,lock) \
-__asm__ __volatile__("1:        ldstub [%1],%0\n\t"  \
-                             "          cmp %0, 0\n\t" \
-                             "          bne 1b\n\t" \
-                             "          nop" \
-                             : "=&r" (tmp) \
-                             : "r" (&lock) \
-                             : "memory"); 
-
-#define END_SPIN(lock) \
-__asm__ __volatile__("stb      %%g0, [%0]"  \
-                      : /* no outputs */ \
-                      : "r" (&lock)\
-                      : "memory");
-#else
-static inline void begin_spin(volatile unsigned char *lock)
-{
-       asm("1: ldstub [%i0], %l0");
-       asm("cmp %l0,0");
-       asm("bne 1b");
-       asm("nop");
-}
-#define BEGIN_SPIN(tmp,lock) begin_spin(&lock);
-#define END_SPIN(lock) ((lock) = 0);
-#endif
-
-extern volatile unsigned char _wapi_sparc_lock;
-
 G_GNUC_UNUSED 
-static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp)
+static inline gint32 InterlockedCompareExchange(volatile gint32 *_dest, gint32 _exch, gint32 _comp)
 {
-       int tmp;
-       gint32 old;
-
-       BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-       old = *dest;
-       if (old==comp) {
-               *dest=exch;
-       }
-
-       END_SPIN(_wapi_sparc_lock)
-
-       return(old);
+       register volatile gint32 *dest asm("g1") = _dest;
+       register gint32 comp asm("o4") = _comp;
+       register gint32 exch asm("o5") = _exch;
+
+       __asm__ __volatile__(
+               /* cas [%%g1], %%o4, %%o5 */
+               ".word 0xdbe0500c"
+               : "=r" (exch)
+               : "0" (exch), "r" (dest), "r" (comp)
+               : "memory");
+
+       return exch;
 }
 
 G_GNUC_UNUSED 
-static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
+static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *_dest, gpointer _exch, gpointer _comp)
 {
-        int tmp;
-        gpointer old;
-
-        BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-        old = *dest;
-        if (old==comp) {
-                *dest=exch;
-        }
-
-        END_SPIN(_wapi_sparc_lock)
+       register volatile gpointer *dest asm("g1") = _dest;
+       register gpointer comp asm("o4") = _comp;
+       register gpointer exch asm("o5") = _exch;
+
+       __asm__ __volatile__(
+#ifdef SPARCV9
+               /* casx [%%g1], %%o4, %%o5 */
+               ".word 0xdbf0500c"
+#else
+               /* cas [%%g1], %%o4, %%o5 */
+               ".word 0xdbe0500c"
+#endif
+               : "=r" (exch)
+               : "0" (exch), "r" (dest), "r" (comp)
+               : "memory");
 
-        return(old);
+       return exch;
 }
 
 G_GNUC_UNUSED 
-static inline gint32 InterlockedIncrement(volatile gint32 *dest)
+static inline gint32 InterlockedIncrement(volatile gint32 *_dest)
 {
-        int tmp;
-        gint32 ret;
-
-        BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-        (*dest)++;
-        ret = *dest;
-
-        END_SPIN(_wapi_sparc_lock)
-
-        return(ret);
+       register volatile gint32 *dest asm("g1") = _dest;
+       register gint32 tmp asm("o4");
+       register gint32 ret asm("o5");
+
+       __asm__ __volatile__(
+               "1:     ld      [%%g1], %%o4\n\t"
+               "       add     %%o4, 1, %%o5\n\t"
+               /*      cas     [%%g1], %%o4, %%o5 */
+               "       .word   0xdbe0500c\n\t"
+               "       cmp     %%o4, %%o5\n\t"
+               "       bne     1b\n\t"
+               "        add    %%o5, 1, %%o5"
+               : "=&r" (tmp), "=&r" (ret)
+               : "r" (dest)
+               : "memory", "cc");
+
+        return ret;
 }
 
 G_GNUC_UNUSED 
-static inline gint32 InterlockedDecrement(volatile gint32 *dest)
+static inline gint32 InterlockedDecrement(volatile gint32 *_dest)
 {
-        int tmp;
-        gint32 ret;
-
-        BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-       (*dest)--;
-        ret = *dest;
-
-        END_SPIN(_wapi_sparc_lock)
-
-        return(ret);
+       register volatile gint32 *dest asm("g1") = _dest;
+       register gint32 tmp asm("o4");
+       register gint32 ret asm("o5");
+
+       __asm__ __volatile__(
+               "1:     ld      [%%g1], %%o4\n\t"
+               "       sub     %%o4, 1, %%o5\n\t"
+               /*      cas     [%%g1], %%o4, %%o5 */
+               "       .word   0xdbe0500c\n\t"
+               "       cmp     %%o4, %%o5\n\t"
+               "       bne     1b\n\t"
+               "        sub    %%o5, 1, %%o5"
+               : "=&r" (tmp), "=&r" (ret)
+               : "r" (dest)
+               : "memory", "cc");
+
+        return ret;
 }
 
 G_GNUC_UNUSED
-static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
+static inline gint32 InterlockedExchange(volatile gint32 *_dest, gint32 exch)
 {
-        int tmp;
-        gint32 ret;
-
-        BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-        ret = *dest;
-        *dest = exch;
-
-        END_SPIN(_wapi_sparc_lock)
-
-        return(ret);
+       register volatile gint32 *dest asm("g1") = _dest;
+       register gint32 tmp asm("o4");
+       register gint32 ret asm("o5");
+
+       __asm__ __volatile__(
+               "1:     ld      [%%g1], %%o4\n\t"
+               "       mov     %3, %%o5\n\t"
+               /*      cas     [%%g1], %%o4, %%o5 */
+               "       .word   0xdbe0500c\n\t"
+               "       cmp     %%o4, %%o5\n\t"
+               "       bne     1b\n\t"
+               "        nop"
+               : "=&r" (tmp), "=&r" (ret)
+               : "r" (dest), "r" (exch)
+               : "memory", "cc");
+
+        return ret;
 }
 
 G_GNUC_UNUSED
-static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
+static inline gpointer InterlockedExchangePointer(volatile gpointer *_dest, gpointer exch)
 {
-        int tmp;
-        gpointer ret;
-
-        BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-        ret = *dest;
-        *dest = exch;
+       register volatile gpointer *dest asm("g1") = _dest;
+       register gpointer tmp asm("o4");
+       register gpointer ret asm("o5");
 
-        END_SPIN(_wapi_sparc_lock)
-
-        return(ret);
+       __asm__ __volatile__(
+#ifdef SPARCV9
+               "1:     ldx     [%%g1], %%o4\n\t"
+#else
+               "1:     ld      [%%g1], %%o4\n\t"
+#endif
+               "       mov     %3, %%o5\n\t"
+#ifdef SPARCV9
+               /*      casx    [%%g1], %%o4, %%o5 */
+               "       .word   0xdbf0500c\n\t"
+#else
+               /*      cas     [%%g1], %%o4, %%o5 */
+               "       .word   0xdbe0500c\n\t"
+#endif
+               "       cmp     %%o4, %%o5\n\t"
+               "       bne     1b\n\t"
+               "        nop"
+               : "=&r" (tmp), "=&r" (ret)
+               : "r" (dest), "r" (exch)
+               : "memory", "cc");
+
+        return ret;
 }
 
 G_GNUC_UNUSED
-static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
+static inline gint32 InterlockedExchangeAdd(volatile gint32 *_dest, gint32 add)
 {
-        int tmp;
-        gint32 ret;
-
-        BEGIN_SPIN(tmp,_wapi_sparc_lock)
-
-        ret = *dest;
-        *dest += add;
-
-        END_SPIN(_wapi_sparc_lock)
-
-        return(ret);
+       register volatile gint32 *dest asm("g1") = _dest;
+       register gint32 tmp asm("o4");
+       register gint32 ret asm("o5");
+
+       __asm__ __volatile__(
+               "1:     ld      [%%g1], %%o4\n\t"
+               "       add     %%o4, %3, %%o5\n\t"
+               /*      cas     [%%g1], %%o4, %%o5 */
+               "       .word   0xdbe0500c\n\t"
+               "       cmp     %%o4, %%o5\n\t"
+               "       bne     1b\n\t"
+               "        add    %%o5, %3, %%o5"
+               : "=&r" (tmp), "=&r" (ret)
+               : "r" (dest), "r" (add)
+               : "memory", "cc");
+
+        return ret;
 }
 
 #elif __s390__
@@ -286,13 +349,9 @@ InterlockedCompareExchange(volatile gint32 *dest,
        gint32 old;
 
        __asm__ __volatile__ ("\tLA\t1,%0\n"
-                             "0:\tL\t%1,%0\n"
-                             "\tCR\t%1,%3\n"
-                             "\tJNE\t1f\n"
+                             "\tLR\t%1,%3\n"
                              "\tCS\t%1,%2,0(1)\n"
-                             "\tJNZ\t0b\n"
-                             "1:\n"
-                             : "+m" (*dest), "=r" (old)
+                             : "+m" (*dest), "=&r" (old)
                              : "r" (exch), "r" (comp)
                              : "1", "cc");     
        return(old);
@@ -306,13 +365,9 @@ InterlockedCompareExchangePointer(volatile gpointer *dest,
        gpointer old;
 
        __asm__ __volatile__ ("\tLA\t1,%0\n"
-                             "0:\tL\t%1,%0\n"
-                             "\tCR\t%1,%3\n"
-                             "\tJNE\t1f\n"
+                             "\tLR\t%1,%3\n"
                              "\tCS\t%1,%2,0(1)\n"
-                             "\tJNZ\t0b\n"
-                             "1:\n"
-                             : "+m" (*dest), "=r" (old)
+                             : "+m" (*dest), "=&r" (old)
                              : "r" (exch), "r" (comp)
                              : "1", "cc");     
        return(old);
@@ -326,13 +381,9 @@ InterlockedCompareExchangePointer(volatile gpointer *dest,
        gpointer old;
 
        __asm__ __volatile__ ("\tLA\t1,%0\n"
-                             "0:\tLG\t%1,%0\n"
-                             "\tCGR\t%1,%3\n"
-                             "\tJNE\t1f\n"
+                             "\tLGR\t%1,%3\n"
                              "\tCSG\t%1,%2,0(1)\n"
-                             "\tJNZ\t0b\n"
-                             "1:\n"
-                             : "+m" (*dest), "=r" (old)
+                             : "+m" (*dest), "=&r" (old)
                              : "r" (exch), "r" (comp)
                              : "1", "cc");
 
@@ -340,7 +391,7 @@ InterlockedCompareExchangePointer(volatile gpointer *dest,
 }
 # endif
 
-
+# ifndef __s390x__
 static inline gint32 
 InterlockedIncrement(volatile gint32 *val)
 {
@@ -358,7 +409,27 @@ InterlockedIncrement(volatile gint32 *val)
 
        return(tmp);
 }
+# else
+static inline gint32 
+InterlockedIncrement(volatile gint32 *val)
+{
+       gint32 tmp;
+       
+       __asm__ __volatile__ ("\tLA\t2,%1\n"
+                             "0:\tLGF\t%0,%1\n"
+                             "\tLGFR\t1,%0\n"
+                             "\tAGHI\t1,1\n"
+                             "\tCS\t%0,1,0(2)\n"
+                             "\tJNZ\t0b\n"
+                             "\tLGFR\t%0,1"
+                             : "=r" (tmp), "+m" (*val)
+                             : : "1", "2", "cc");
+
+       return(tmp);
+}
+# endif
 
+# ifndef __s390x__
 static inline gint32 
 InterlockedDecrement(volatile gint32 *val)
 {
@@ -376,7 +447,25 @@ InterlockedDecrement(volatile gint32 *val)
 
        return(tmp);
 }
+# else
+static inline gint32 
+InterlockedDecrement(volatile gint32 *val)
+{
+       gint32 tmp;
+       
+       __asm__ __volatile__ ("\tLA\t2,%1\n"
+                             "0:\tLGF\t%0,%1\n"
+                             "\tLGFR\t1,%0\n"
+                             "\tAGHI\t1,-1\n"
+                             "\tCS\t%0,1,0(2)\n"
+                             "\tJNZ\t0b\n"
+                             "\tLGFR\t%0,1"
+                             : "=r" (tmp), "+m" (*val)
+                             : : "1", "2", "cc");
 
+       return(tmp);
+}
+# endif
 
 static inline gint32 
 InterlockedExchange(volatile gint32 *val, gint32 new_val)
@@ -387,7 +476,7 @@ InterlockedExchange(volatile gint32 *val, gint32 new_val)
                              "0:\tL\t%1,%0\n"
                              "\tCS\t%1,%2,0(1)\n"
                              "\tJNZ\t0b"
-                             : "+m" (*val), "=r" (ret)
+                             : "+m" (*val), "=&r" (ret)
                              : "r" (new_val)
                              : "1", "cc");
 
@@ -404,7 +493,7 @@ InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
                              "0:\tL\t%1,%0\n"
                              "\tCS\t%1,%2,0(1)\n"
                              "\tJNZ\t0b"
-                             : "+m" (*val), "=r" (ret)
+                             : "+m" (*val), "=&r" (ret)
                              : "r" (new_val)
                              : "1", "cc");
 
@@ -420,7 +509,7 @@ InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
                              "0:\tLG\t%1,%0\n"
                              "\tCSG\t%1,%2,0(1)\n"
                              "\tJNZ\t0b"
-                             : "+m" (*val), "=r" (ret)
+                             : "+m" (*val), "=&r" (ret)
                              : "r" (new_val)
                              : "1", "cc");
 
@@ -428,6 +517,7 @@ InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
 }
 # endif
 
+# ifndef __s390x__
 static inline gint32 
 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
 {
@@ -439,16 +529,116 @@ InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
                              "\tAR\t1,%2\n"
                              "\tCS\t%0,1,0(2)\n"
                              "\tJNZ\t0b"
-                             : "=r" (ret), "+m" (*val)
+                             : "=&r" (ret), "+m" (*val)
                              : "r" (add) 
                              : "1", "2", "cc");
        
        return(ret);
 }
+# else
+static inline gint32 
+InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
+{
+       gint32 ret;
+
+       __asm__ __volatile__ ("\tLA\t2,%1\n"
+                             "0:\tLGF\t%0,%1\n"
+                             "\tLGFR\t1,%0\n"
+                             "\tAGR\t1,%2\n"
+                             "\tCS\t%0,1,0(2)\n"
+                             "\tJNZ\t0b"
+                             : "=&r" (ret), "+m" (*val)
+                             : "r" (add) 
+                             : "1", "2", "cc");
+       
+       return(ret);
+}
+# endif
 
-#elif defined(__ppc__) || defined (__powerpc__)
+#elif defined(__mono_ppc__)
 #define WAPI_ATOMIC_ASM
 
+#ifdef G_COMPILER_CODEWARRIOR
+static inline gint32 InterlockedIncrement(volatile register gint32 *val)
+{
+       gint32 result = 0, tmp;
+       register gint32 result = 0;
+       register gint32 tmp;
+
+       asm
+       {
+               @1:
+                       lwarx   tmp, 0, val
+                       addi    result, tmp, 1
+                       stwcx.  result, 0, val
+                       bne-    @1
+       }
+       return result;
+}
+
+static inline gint32 InterlockedDecrement(register volatile gint32 *val)
+{
+       register gint32 result = 0;
+       register gint32 tmp;
+
+       asm
+       {
+               @1:
+                       lwarx   tmp, 0, val
+                       addi    result, tmp, -1
+                       stwcx.  result, 0, val
+                       bne-    @1
+       }
+
+       return result;
+}
+#define InterlockedCompareExchangePointer(dest,exch,comp) (void*)InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
+
+static inline gint32 InterlockedCompareExchange(volatile register gint32 *dest, register gint32 exch, register gint32 comp)
+{
+       register gint32 tmp = 0;
+
+       asm
+       {
+               @1:
+                       lwarx   tmp, 0, dest
+                       cmpw    tmp, comp
+                       bne-    @2
+                       stwcx.  exch, 0, dest
+                       bne-    @1
+               @2:
+       }
+
+       return tmp;
+}
+static inline gint32 InterlockedExchange(register volatile gint32 *dest, register gint32 exch)
+{
+       register gint32 tmp = 0;
+
+       asm
+       {
+               @1:
+                       lwarx   tmp, 0, dest
+                       stwcx.  exch, 0, dest
+                       bne-    @1
+       }
+
+       return tmp;
+}
+#define InterlockedExchangePointer(dest,exch) (void*)InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
+#else
+
+#if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
+#define LDREGX "ldarx"
+#define STREGCXD "stdcx."
+#define CMPREG "cmpd"
+#else
+#define LDREGX "lwarx"
+#define STREGCXD "stwcx."
+#define CMPREG "cmpw"
+#endif
+
 static inline gint32 InterlockedIncrement(volatile gint32 *val)
 {
        gint32 result = 0, tmp;
@@ -475,7 +665,22 @@ static inline gint32 InterlockedDecrement(volatile gint32 *val)
        return result - 1;
 }
 
-#define InterlockedCompareExchangePointer(dest,exch,comp) InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
+static inline gpointer InterlockedCompareExchangePointer (volatile gpointer *dest,
+                                               gpointer exch, gpointer comp)
+{
+       gpointer tmp = NULL;
+
+       __asm__ __volatile__ ("\n1:\n\t"
+                            LDREGX " %0, 0, %1\n\t"
+                            CMPREG " %0, %2\n\t" 
+                            "bne-    2f\n\t"
+                            STREGCXD " %3, 0, %1\n\t"
+                            "bne-    1b\n"
+                            "2:"
+                            : "=&r" (tmp)
+                            : "b" (dest), "r" (comp), "r" (exch): "cc", "memory");
+       return(tmp);
+}
 
 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
                                                gint32 exch, gint32 comp) {
@@ -504,7 +709,18 @@ static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
                              : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
        return(tmp);
 }
-#define InterlockedExchangePointer(dest,exch) InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
+
+static inline gpointer InterlockedExchangePointer (volatile gpointer *dest, gpointer exch)
+{
+       gpointer tmp = NULL;
+
+       __asm__ __volatile__ ("\n1:\n\t"
+                             LDREGX " %0, 0, %2\n\t"
+                             STREGCXD " %3, 0, %2\n\t"
+                             "bne    1b"
+                             : "=r" (tmp) : "0" (tmp), "b" (dest), "r" (exch): "cc", "memory");
+       return(tmp);
+}
 
 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
 {
@@ -519,16 +735,39 @@ static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
         return(result);
 }
 
+#undef LDREGX
+#undef STREGCXD
+#undef CMPREG
+
+#endif /* !G_COMPILER_CODEWARRIOR */
+
 #elif defined(__arm__)
 #define WAPI_ATOMIC_ASM
 
 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp)
 {
-       int a, b;
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gint32 ret, tmp;
+       __asm__ __volatile__ (  "1:\n"
+                               "mov    %0, #0\n"
+                               "ldrex %1, [%2]\n"
+                               "teq    %1, %3\n"
+                               "it eq\n"
+                               "strexeq %0, %4, [%2]\n"
+                               "teq %0, #0\n"
+                               "bne 1b\n"
+                               : "=&r" (tmp), "=&r" (ret)
+                               : "r" (dest), "r" (comp), "r" (exch)
+                               : "memory", "cc");
+
+       return ret;
+#else
+       gint32 a, b;
 
        __asm__ __volatile__ (    "0:\n\t"
                                  "ldr %1, [%2]\n\t"
                                  "cmp %1, %4\n\t"
+                                 "mov %0, %1\n\t"
                                  "bne 1f\n\t"
                                  "swp %0, %3, [%2]\n\t"
                                  "cmp %0, %1\n\t"
@@ -540,15 +779,33 @@ static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 ex
                                  : "cc", "memory");
 
        return a;
+#endif
 }
 
 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
 {
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gpointer ret, tmp;
+       __asm__ __volatile__ (  "1:\n"
+                               "mov    %0, #0\n"
+                               "ldrex %1, [%2]\n"
+                               "teq    %1, %3\n"
+                               "it eq\n"
+                               "strexeq %0, %4, [%2]\n"
+                               "teq %0, #0\n"
+                               "bne 1b\n"
+                               : "=&r" (tmp), "=&r" (ret)
+                               : "r" (dest), "r" (comp), "r" (exch)
+                               : "memory", "cc");
+
+       return ret;
+#else
        gpointer a, b;
 
        __asm__ __volatile__ (    "0:\n\t"
                                  "ldr %1, [%2]\n\t"
                                  "cmp %1, %4\n\t"
+                                 "mov %0, %1\n\t"
                                  "bne 1f\n\t"
                                  "swpeq %0, %3, [%2]\n\t"
                                  "cmp %0, %1\n\t"
@@ -560,11 +817,26 @@ static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest
                                  : "cc", "memory");
 
        return a;
+#endif
 }
 
 static inline gint32 InterlockedIncrement(volatile gint32 *dest)
 {
-       int a, b, c;
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gint32 ret, flag;
+       __asm__ __volatile__ (  "1:\n"
+                               "ldrex %0, [%2]\n"
+                               "add %0, %0, %3\n"
+                               "strex %1, %0, [%2]\n"
+                               "teq %1, #0\n"
+                               "bne 1b\n"
+                               : "=&r" (ret), "=&r" (flag)
+                               : "r" (dest), "r" (1)
+                               : "memory", "cc");
+
+       return ret;
+#else
+       gint32 a, b, c;
 
        __asm__ __volatile__ (  "0:\n\t"
                                "ldr %0, [%3]\n\t"
@@ -578,11 +850,26 @@ static inline gint32 InterlockedIncrement(volatile gint32 *dest)
                                : "cc", "memory");
 
        return b;
+#endif
 }
 
 static inline gint32 InterlockedDecrement(volatile gint32 *dest)
 {
-       int a, b, c;
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gint32 ret, flag;
+       __asm__ __volatile__ (  "1:\n"
+                               "ldrex %0, [%2]\n"
+                               "sub %0, %0, %3\n"
+                               "strex %1, %0, [%2]\n"
+                               "teq %1, #0\n"
+                               "bne 1b\n"
+                               : "=&r" (ret), "=&r" (flag)
+                               : "r" (dest), "r" (1)
+                               : "memory", "cc");
+
+       return ret;
+#else
+       gint32 a, b, c;
 
        __asm__ __volatile__ (  "0:\n\t"
                                "ldr %0, [%3]\n\t"
@@ -596,21 +883,49 @@ static inline gint32 InterlockedDecrement(volatile gint32 *dest)
                                : "cc", "memory");
 
        return b;
+#endif
 }
 
 static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
 {
-       int a;
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gint32 ret, flag;
+       __asm__ __volatile__ (
+                             "1:\n"
+                             "ldrex %0, [%3]\n"
+                             "strex %1, %2, [%3]\n"
+                             "teq %1, #0\n"
+                             "bne 1b\n"
+                             : "=&r" (ret), "=&r" (flag)
+                             : "r" (exch), "r" (dest)
+                             : "memory", "cc");
+       return ret;
+#else
+       gint32 a;
 
        __asm__ __volatile__ (  "swp %0, %2, [%1]"
                                : "=&r" (a)
                                : "r" (dest), "r" (exch));
 
        return a;
+#endif
 }
 
 static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch)
 {
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gpointer ret, flag;
+       __asm__ __volatile__ (
+                             "1:\n"
+                             "ldrex %0, [%3]\n"
+                             "strex %1, %2, [%3]\n"
+                             "teq %1, #0\n"
+                             "bne 1b\n"
+                             : "=&r" (ret), "=&r" (flag)
+                             : "r" (exch), "r" (dest)
+                             : "memory", "cc");
+       return ret;
+#else
        gpointer a;
 
        __asm__ __volatile__ (  "swp %0, %2, [%1]"
@@ -618,10 +933,25 @@ static inline gpointer InterlockedExchangePointer(volatile gpointer *dest, gpoin
                                : "r" (dest), "r" (exch));
 
        return a;
+#endif
 }
 
 static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
 {
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
+       gint32 ret, tmp, flag;
+       __asm__ __volatile__ (  "1:\n"
+                               "ldrex %0, [%3]\n"
+                               "add %1, %0, %4\n"
+                               "strex %2, %1, [%3]\n"
+                               "teq %2, #0\n"
+                               "bne 1b\n"
+                               : "=&r" (ret), "=&r" (tmp), "=&r" (flag)
+                               : "r" (dest), "r" (add)
+                               : "memory", "cc");
+
+       return ret;
+#else
        int a, b, c;
 
        __asm__ __volatile__ (  "0:\n\t"
@@ -636,6 +966,7 @@ static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
                                : "cc", "memory");
 
        return a;
+#endif
 }
 
 #elif defined(__ia64__)
@@ -649,13 +980,16 @@ static inline gint32 InterlockedCompareExchange(gint32 volatile *dest,
                                                gint32 exch, gint32 comp)
 {
        gint32 old;
+       guint64 real_comp;
 
 #ifdef __INTEL_COMPILER
        old = _InterlockedCompareExchange (dest, exch, comp);
 #else
+       /* cmpxchg4 zero extends the value read from memory */
+       real_comp = (guint64)(guint32)comp;
        asm volatile ("mov ar.ccv = %2 ;;\n\t"
                                  "cmpxchg4.acq %0 = [%1], %3, ar.ccv\n\t"
-                                 : "=r" (old) : "r" (dest), "r" (comp), "r" (exch));
+                                 : "=r" (old) : "r" (dest), "r" (real_comp), "r" (exch));
 #endif
 
        return(old);
@@ -752,6 +1086,199 @@ static inline gint32 InterlockedExchangeAdd(gint32 volatile *val, gint32 add)
 #endif
 }
 
+#elif defined(__alpha__)
+#define WAPI_ATOMIC_ASM
+
+static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
+                                               gint32 exch, gint32 comp)
+{
+       gint32 old, temp, temp2;
+       long compq = comp, exchq = exch;
+
+       __asm__ __volatile__ (
+               "1:     ldl_l %2, %0\n"
+               "       mov %2, %1\n"
+               "       cmpeq %2, %5, %3\n"
+               "       cmovne %3, %4, %2\n"
+               "       stl_c %2, %0\n"
+               "       beq %2, 1b\n"
+               : "=m" (*dest), "=&r" (old), "=&r" (temp), "=&r" (temp2)
+               : "r" (exchq), "r" (compq), "m" (*dest));
+       return(old);
+}
+
+static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
+{
+       gpointer old, temp, temp2;
+
+       __asm__ __volatile__ (
+               "1:     ldq_l %2, %0\n"
+               "       mov %2, %1\n"
+               "       cmpeq %2, %5, %3\n"
+               "       cmovne %3, %4, %2\n"
+               "       stq_c %2, %0\n"
+               "       beq %2, 1b\n"
+               : "=m" (*dest), "=&r" (old), "=&r" (temp), "=&r" (temp2)
+               : "r" (exch), "r" (comp), "m" (*dest));
+       return(old);
+}
+
+static inline gint32 InterlockedIncrement(volatile gint32 *val)
+{
+       gint32 temp, cur;
+       
+       __asm__ __volatile__ (
+               "1:     ldl_l %0, %1\n"
+               "       addl %0, %3, %0\n"
+               "       mov %0, %2\n"
+               "       stl_c %0, %1\n"
+               "       beq %0, 1b\n"
+               : "=&r" (temp), "=m" (*val), "=r" (cur)
+               : "Ir" (1), "m" (*val));
+       return(cur);
+}
+
+static inline gint32 InterlockedDecrement(volatile gint32 *val)
+{
+       gint32 temp, cur;
+       
+       __asm__ __volatile__ (
+               "1:     ldl_l %0, %1\n"
+               "       subl %0, %3, %0\n"
+               "       mov %0, %2\n"
+               "       stl_c %0, %1\n"
+               "       beq %0, 1b\n"
+               : "=&r" (temp), "=m" (*val), "=r" (cur)
+               : "Ir" (1), "m" (*val));
+       return(cur);
+}
+
+static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
+{
+       gint32 ret, temp;
+
+       __asm__ __volatile__ (
+               "1:     ldl_l %1, %0\n"
+               "       mov %3, %2\n"
+               "       stl_c %2, %0\n"
+               "       beq %2, 1b\n"
+               : "=m" (*val), "=&r" (ret), "=&r" (temp)
+               : "r" (new_val), "m" (*val));
+       return(ret);
+}
+
+static inline gpointer InterlockedExchangePointer(volatile gpointer *val, gpointer new_val)
+{
+       gpointer ret, temp;
+
+       __asm__ __volatile__ (
+               "1:     ldq_l %1, %0\n"
+               "       mov %3, %2\n"
+               "       stq_c %2, %0\n"
+               "       beq %2, 1b\n"
+               : "=m" (*val), "=&r" (ret), "=&r" (temp)
+               : "r" (new_val), "m" (*val));
+       return(ret);
+}
+
+static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
+{
+       gint32 ret, temp;
+       
+       __asm__ __volatile__ (
+               "1:     ldl_l   %2, %0\n"
+               "       mov     %2, %1\n"
+               "       addl    %2, %3, %2\n"
+               "       stl_c   %2, %0\n"
+               "       beq     %2, 1b\n"
+               : "=m" (*val), "=&r" (ret), "=&r" (temp)
+               : "r" (add), "m" (*val));
+       
+       return(ret);
+}
+
+#elif defined(__mips__)
+#define WAPI_ATOMIC_ASM
+
+static inline gint32 InterlockedIncrement(volatile gint32 *val)
+{
+       gint32 tmp, result = 0;
+
+       __asm__ __volatile__ ("    .set    mips32\n"
+                             "1:  ll      %0, %2\n"
+                             "    addu    %1, %0, 1\n"
+                              "    sc      %1, %2\n"
+                             "    beqz    %1, 1b\n"
+                             "    .set    mips0\n"
+                             : "=&r" (result), "=&r" (tmp), "=m" (*val)
+                             : "m" (*val));
+       return result + 1;
+}
+
+static inline gint32 InterlockedDecrement(volatile gint32 *val)
+{
+       gint32 tmp, result = 0;
+
+       __asm__ __volatile__ ("    .set    mips32\n"
+                             "1:  ll      %0, %2\n"
+                             "    subu    %1, %0, 1\n"
+                              "    sc      %1, %2\n"
+                             "    beqz    %1, 1b\n"
+                             "    .set    mips0\n"
+                             : "=&r" (result), "=&r" (tmp), "=m" (*val)
+                             : "m" (*val));
+       return result - 1;
+}
+
+#define InterlockedCompareExchangePointer(dest,exch,comp) InterlockedCompareExchange((volatile gint32 *)(dest), (gint32)(exch), (gint32)(comp))
+
+static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
+                                               gint32 exch, gint32 comp) {
+       gint32 old, tmp;
+
+       __asm__ __volatile__ ("    .set    mips32\n"
+                             "1:  ll      %0, %2\n"
+                             "    bne     %0, %5, 2f\n"
+                             "    move    %1, %4\n"
+                              "    sc      %1, %2\n"
+                             "    beqz    %1, 1b\n"
+                             "2:  .set    mips0\n"
+                             : "=&r" (old), "=&r" (tmp), "=m" (*dest)
+                             : "m" (*dest), "r" (exch), "r" (comp));
+       return(old);
+}
+
+static inline gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch)
+{
+       gint32 result, tmp;
+
+       __asm__ __volatile__ ("    .set    mips32\n"
+                             "1:  ll      %0, %2\n"
+                             "    move    %1, %4\n"
+                              "    sc      %1, %2\n"
+                             "    beqz    %1, 1b\n"
+                             "    .set    mips0\n"
+                             : "=&r" (result), "=&r" (tmp), "=m" (*dest)
+                             : "m" (*dest), "r" (exch));
+       return(result);
+}
+#define InterlockedExchangePointer(dest,exch) InterlockedExchange((volatile gint32 *)(dest), (gint32)(exch))
+
+static inline gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add)
+{
+        gint32 result, tmp;
+
+       __asm__ __volatile__ ("    .set    mips32\n"
+                             "1:  ll      %0, %2\n"
+                             "    addu    %1, %0, %4\n"
+                              "    sc      %1, %2\n"
+                             "    beqz    %1, 1b\n"
+                             "    .set    mips0\n"
+                             : "=&r" (result), "=&r" (tmp), "=m" (*dest)
+                             : "m" (*dest), "r" (add));
+        return result;
+}
+
 #else
 
 extern gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp);
@@ -762,7 +1289,7 @@ extern gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch);
 extern gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch);
 extern gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add);
 
-#if defined(__hpux) && !defined(__GNUC__)
+#if defined(__hppa__)
 #define WAPI_ATOMIC_ASM
 #endif