X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Futils%2Fatomic.h;h=dca6170a1a1e72cfe2f03cc77a5ce9a2895e1d6d;hb=28ec46d6e215ee8710301cddfd9186ab4562e5be;hp=c8c70007bc751d0a55d612c230a28450c79e968f;hpb=b57fcaa28b7108c701b378a27f772917c225d033;p=mono.git

diff --git a/mono/utils/atomic.h b/mono/utils/atomic.h
index c8c70007bc7..dca6170a1a1 100755
--- a/mono/utils/atomic.h
+++ b/mono/utils/atomic.h
@@ -6,6 +6,7 @@
  *
  * (C) 2002 Ximian, Inc.
  * Copyright 2012 Xamarin Inc
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
  */
 
 #ifndef _WAPI_ATOMIC_H_
@@ -13,10 +14,14 @@
 
 #include "config.h"
 #include <glib.h>
+#include <mono/utils/mono-membar.h>
 
-#ifdef ENABLE_EXTENSION_MODULE
-#include "../../../mono-extensions/mono/utils/atomic.h"
-#endif
+/*
+The current Nexus 7 arm-v7a fails with:
+F/MonoDroid( 1568): shared runtime initialization error: Cannot load library: reloc_library[1285]:    37 cannot locate '__sync_val_compare_and_swap_8'
+
+Apple targets have historically being problematic, xcode 4.6 would miscompile the intrinsic.
+*/
 
 /* On Windows, we always use the functions provided by the Windows API. */
 #if defined(__WIN32__) || defined(_WIN32)
@@ -25,7 +30,6 @@
 #define WIN32_LEAN_AND_MEAN
 #endif
 #include <windows.h>
-#include <mono/utils/mono-membar.h>
 
 /* mingw is missing InterlockedCompareExchange64 () from winbase.h */
 #if HAVE_DECL_INTERLOCKEDCOMPAREEXCHANGE64==0
@@ -108,13 +112,19 @@ static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add)
 #endif
 #endif
 
+#ifdef HOST_WIN32
+#define TO_INTERLOCKED_ARGP(ptr) ((volatile LONG*)(ptr))
+#else
+#define TO_INTERLOCKED_ARGP(ptr) (ptr)
+#endif
+
 /* And now for some dirty hacks... The Windows API doesn't
  * provide any useful primitives for this (other than getting
  * into architecture-specific madness), so use CAS. */
 
 static inline gint32 InterlockedRead(volatile gint32 *src)
 {
-	return InterlockedCompareExchange (src, 0, 0);
+	return InterlockedCompareExchange (TO_INTERLOCKED_ARGP (src), 0, 0);
 }
 
 static inline gint64 InterlockedRead64(volatile gint64 *src)
@@ -129,7 +139,7 @@ static inline gpointer InterlockedReadPointer(volatile gpointer *src)
 
 static inline void InterlockedWrite(volatile gint32 *dst, gint32 val)
 {
-	InterlockedExchange (dst, val);
+	InterlockedExchange (TO_INTERLOCKED_ARGP (dst), val);
 }
 
 static inline void InterlockedWrite64(volatile gint64 *dst, gint64 val)
@@ -167,33 +177,66 @@ static inline void InterlockedWrite16(volatile gint16 *dst, gint16 val)
 	mono_memory_barrier ();
 }
 
-/* Prefer GCC atomic ops if the target supports it (see configure.in). */
+/* Prefer GCC atomic ops if the target supports it (see configure.ac). */
 #elif defined(USE_GCC_ATOMIC_OPS)
 
+/*
+ * As of this comment (August 2016), all current Clang versions get atomic
+ * intrinsics on ARM64 wrong. All GCC versions prior to 5.3.0 do, too. The bug
+ * is the same: The compiler developers thought that the acq + rel barriers
+ * that ARM64 load/store instructions can impose are sufficient to provide
+ * sequential consistency semantics. This is not the case:
+ *
+ *     http://lists.infradead.org/pipermail/linux-arm-kernel/2014-February/229588.html
+ *
+ * We work around this bug by inserting full barriers around each atomic
+ * intrinsic if we detect that we're built with a buggy compiler.
+ */
+
+#if defined (HOST_ARM64) && (defined (__clang__) || MONO_GNUC_VERSION < 50300)
+#define WRAP_ATOMIC_INTRINSIC(INTRIN) \
+	({ \
+		mono_memory_barrier (); \
+		__typeof__ (INTRIN) atomic_ret__ = (INTRIN); \
+		mono_memory_barrier (); \
+		atomic_ret__; \
+	})
+
+#define gcc_sync_val_compare_and_swap(a, b, c) WRAP_ATOMIC_INTRINSIC (__sync_val_compare_and_swap (a, b, c))
+#define gcc_sync_add_and_fetch(a, b) WRAP_ATOMIC_INTRINSIC (__sync_add_and_fetch (a, b))
+#define gcc_sync_sub_and_fetch(a, b) WRAP_ATOMIC_INTRINSIC (__sync_sub_and_fetch (a, b))
+#define gcc_sync_fetch_and_add(a, b) WRAP_ATOMIC_INTRINSIC (__sync_fetch_and_add (a, b))
+#else
+#define gcc_sync_val_compare_and_swap(a, b, c) __sync_val_compare_and_swap (a, b, c)
+#define gcc_sync_add_and_fetch(a, b) __sync_add_and_fetch (a, b)
+#define gcc_sync_sub_and_fetch(a, b) __sync_sub_and_fetch (a, b)
+#define gcc_sync_fetch_and_add(a, b) __sync_fetch_and_add (a, b)
+#endif
+
 static inline gint32 InterlockedCompareExchange(volatile gint32 *dest,
 						gint32 exch, gint32 comp)
 {
-	return __sync_val_compare_and_swap (dest, comp, exch);
+	return gcc_sync_val_compare_and_swap (dest, comp, exch);
 }
 
 static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp)
 {
-	return __sync_val_compare_and_swap (dest, comp, exch);
+	return gcc_sync_val_compare_and_swap (dest, comp, exch);
 }
 
 static inline gint32 InterlockedAdd(volatile gint32 *dest, gint32 add)
 {
-	return __sync_add_and_fetch (dest, add);
+	return gcc_sync_add_and_fetch (dest, add);
 }
 
 static inline gint32 InterlockedIncrement(volatile gint32 *val)
 {
-	return __sync_add_and_fetch (val, 1);
+	return gcc_sync_add_and_fetch (val, 1);
 }
 
 static inline gint32 InterlockedDecrement(volatile gint32 *val)
 {
-	return __sync_sub_and_fetch (val, 1);
+	return gcc_sync_sub_and_fetch (val, 1);
 }
 
 static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
@@ -201,7 +244,7 @@ static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val)
 	gint32 old_val;
 	do {
 		old_val = *val;
-	} while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
+	} while (gcc_sync_val_compare_and_swap (val, old_val, new_val) != old_val);
 	return old_val;
 }
 
@@ -211,30 +254,30 @@ static inline gpointer InterlockedExchangePointer(volatile gpointer *val,
 	gpointer old_val;
 	do {
 		old_val = *val;
-	} while (__sync_val_compare_and_swap (val, old_val, new_val) != old_val);
+	} while (gcc_sync_val_compare_and_swap (val, old_val, new_val) != old_val);
 	return old_val;
 }
 
 static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add)
 {
-	return __sync_fetch_and_add (val, add);
+	return gcc_sync_fetch_and_add (val, add);
 }
 
 static inline gint8 InterlockedRead8(volatile gint8 *src)
 {
 	/* Kind of a hack, but GCC doesn't give us anything better, and it's
 	 * certainly not as bad as using a CAS loop. */
-	return __sync_fetch_and_add (src, 0);
+	return gcc_sync_fetch_and_add (src, 0);
 }
 
 static inline gint16 InterlockedRead16(volatile gint16 *src)
 {
-	return __sync_fetch_and_add (src, 0);
+	return gcc_sync_fetch_and_add (src, 0);
 }
 
 static inline gint32 InterlockedRead(volatile gint32 *src)
 {
-	return __sync_fetch_and_add (src, 0);
+	return gcc_sync_fetch_and_add (src, 0);
 }
 
 static inline void InterlockedWrite8(volatile gint8 *dst, gint8 val)
@@ -243,7 +286,7 @@ static inline void InterlockedWrite8(volatile gint8 *dst, gint8 val)
 	gint8 old_val;
 	do {
 		old_val = *dst;
-	} while (__sync_val_compare_and_swap (dst, old_val, val) != old_val);
+	} while (gcc_sync_val_compare_and_swap (dst, old_val, val) != old_val);
 }
 
 static inline void InterlockedWrite16(volatile gint16 *dst, gint16 val)
@@ -251,7 +294,7 @@ static inline void InterlockedWrite16(volatile gint16 *dst, gint16 val)
 	gint16 old_val;
 	do {
 		old_val = *dst;
-	} while (__sync_val_compare_and_swap (dst, old_val, val) != old_val);
+	} while (gcc_sync_val_compare_and_swap (dst, old_val, val) != old_val);
 }
 
 static inline void InterlockedWrite(volatile gint32 *dst, gint32 val)
@@ -260,7 +303,7 @@ static inline void InterlockedWrite(volatile gint32 *dst, gint32 val)
 	gint32 old_val;
 	do {
 		old_val = *dst;
-	} while (__sync_val_compare_and_swap (dst, old_val, val) != old_val);
+	} while (gcc_sync_val_compare_and_swap (dst, old_val, val) != old_val);
 }
 
 #if defined (TARGET_OSX) || defined (__arm__) || (defined (__mips__) && !defined (__mips64)) || (defined (__powerpc__) && !defined (__powerpc64__)) || (defined (__sparc__) && !defined (__arch64__))
@@ -271,33 +314,33 @@ static inline void InterlockedWrite(volatile gint32 *dst, gint32 val)
 
 static inline gint64 InterlockedCompareExchange64(volatile gint64 *dest, gint64 exch, gint64 comp)
 {
-	return __sync_val_compare_and_swap (dest, comp, exch);
+	return gcc_sync_val_compare_and_swap (dest, comp, exch);
 }
 
 static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add)
 {
-	return __sync_add_and_fetch (dest, add);
+	return gcc_sync_add_and_fetch (dest, add);
 }
 
 static inline gint64 InterlockedIncrement64(volatile gint64 *val)
 {
-	return __sync_add_and_fetch (val, 1);
+	return gcc_sync_add_and_fetch (val, 1);
 }
 
 static inline gint64 InterlockedDecrement64(volatile gint64 *val)
 {
-	return __sync_sub_and_fetch (val, 1);
+	return gcc_sync_sub_and_fetch (val, 1);
 }
 
 static inline gint64 InterlockedExchangeAdd64(volatile gint64 *val, gint64 add)
 {
-	return __sync_fetch_and_add (val, add);
+	return gcc_sync_fetch_and_add (val, add);
 }
 
 static inline gint64 InterlockedRead64(volatile gint64 *src)
 {
 	/* Kind of a hack, but GCC doesn't give us anything better. */
-	return __sync_fetch_and_add (src, 0);
+	return gcc_sync_fetch_and_add (src, 0);
 }
 
 #else
@@ -383,122 +426,6 @@ static inline void InterlockedWrite64(volatile gint64 *dst, gint64 val)
 	InterlockedExchange64 (dst, val);
 }
 
-#elif defined(__ia64__)
-
-#ifdef __INTEL_COMPILER
-#include <ia64intrin.h>
-#endif
-
-static inline gint32 InterlockedCompareExchange(gint32 volatile *dest,
-						gint32 exch, gint32 comp)
-{
-	gint32 old;
-	guint64 real_comp;
-
-#ifdef __INTEL_COMPILER
-	old = _InterlockedCompareExchange (dest, exch, comp);
-#else
-	/* cmpxchg4 zero extends the value read from memory */
-	real_comp = (guint64)(guint32)comp;
-	asm volatile ("mov ar.ccv = %2 ;;\n\t"
-				  "cmpxchg4.acq %0 = [%1], %3, ar.ccv\n\t"
-				  : "=r" (old) : "r" (dest), "r" (real_comp), "r" (exch));
-#endif
-
-	return(old);
-}
-
-static inline gpointer InterlockedCompareExchangePointer(gpointer volatile *dest,
-						gpointer exch, gpointer comp)
-{
-	gpointer old;
-
-#ifdef __INTEL_COMPILER
-	old = _InterlockedCompareExchangePointer (dest, exch, comp);
-#else
-	asm volatile ("mov ar.ccv = %2 ;;\n\t"
-				  "cmpxchg8.acq %0 = [%1], %3, ar.ccv\n\t"
-				  : "=r" (old) : "r" (dest), "r" (comp), "r" (exch));
-#endif
-
-	return(old);
-}
-
-static inline gint32 InterlockedIncrement(gint32 volatile *val)
-{
-#ifdef __INTEL_COMPILER
-	return _InterlockedIncrement (val);
-#else
-	gint32 old;
-
-	do {
-		old = *val;
-	} while (InterlockedCompareExchange (val, old + 1, old) != old);
-
-	return old + 1;
-#endif
-}
-
-static inline gint32 InterlockedDecrement(gint32 volatile *val)
-{
-#ifdef __INTEL_COMPILER
-	return _InterlockedDecrement (val);
-#else
-	gint32 old;
-
-	do {
-		old = *val;
-	} while (InterlockedCompareExchange (val, old - 1, old) != old);
-
-	return old - 1;
-#endif
-}
-
-static inline gint32 InterlockedExchange(gint32 volatile *dest, gint32 new_val)
-{
-#ifdef __INTEL_COMPILER
-	return _InterlockedExchange (dest, new_val);
-#else
-	gint32 res;
-
-	do {
-		res = *dest;
-	} while (InterlockedCompareExchange (dest, new_val, res) != res);
-
-	return res;
-#endif
-}
-
-static inline gpointer InterlockedExchangePointer(gpointer volatile *dest, gpointer new_val)
-{
-#ifdef __INTEL_COMPILER
-	return (gpointer)_InterlockedExchange64 ((gint64*)dest, (gint64)new_val);
-#else
-	gpointer res;
-
-	do {
-		res = *dest;
-	} while (InterlockedCompareExchangePointer (dest, new_val, res) != res);
-
-	return res;
-#endif
-}
-
-static inline gint32 InterlockedExchangeAdd(gint32 volatile *val, gint32 add)
-{
-	gint32 old;
-
-#ifdef __INTEL_COMPILER
-	old = _InterlockedExchangeAdd (val, add);
-#else
-	do {
-		old = *val;
-	} while (InterlockedCompareExchange (val, old + add, old) != old);
-
-	return old;
-#endif
-}
-
 #else
 
 #define WAPI_NO_ATOMIC_ASM