/** * \file * Atomic operations * * Author: * Dick Porter (dick@ximian.com) * * (C) 2002 Ximian, Inc. * Copyright 2012 Xamarin Inc * Licensed under the MIT license. See LICENSE file in the project root for full license information. */ #ifndef _WAPI_ATOMIC_H_ #define _WAPI_ATOMIC_H_ #include "config.h" #include #include /* The current Nexus 7 arm-v7a fails with: F/MonoDroid( 1568): shared runtime initialization error: Cannot load library: reloc_library[1285]: 37 cannot locate '__sync_val_compare_and_swap_8' Apple targets have historically being problematic, xcode 4.6 would miscompile the intrinsic. */ /* On Windows, we always use the functions provided by the Windows API. */ #if defined(__WIN32__) || defined(_WIN32) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include /* mingw is missing InterlockedCompareExchange64 () from winbase.h */ #if HAVE_DECL_INTERLOCKEDCOMPAREEXCHANGE64==0 static inline gint64 InterlockedCompareExchange64(volatile gint64 *dest, gint64 exch, gint64 comp) { return __sync_val_compare_and_swap (dest, comp, exch); } #endif /* mingw is missing InterlockedExchange64 () from winbase.h */ #if HAVE_DECL_INTERLOCKEDEXCHANGE64==0 static inline gint64 InterlockedExchange64(volatile gint64 *val, gint64 new_val) { gint64 old_val; do { old_val = *val; } while (InterlockedCompareExchange64 (val, new_val, old_val) != old_val); return old_val; } #endif /* mingw is missing InterlockedIncrement64 () from winbase.h */ #if HAVE_DECL_INTERLOCKEDINCREMENT64==0 static inline gint64 InterlockedIncrement64(volatile gint64 *val) { return __sync_add_and_fetch (val, 1); } #endif /* mingw is missing InterlockedDecrement64 () from winbase.h */ #if HAVE_DECL_INTERLOCKEDDECREMENT64==0 static inline gint64 InterlockedDecrement64(volatile gint64 *val) { return __sync_sub_and_fetch (val, 1); } #endif /* mingw is missing InterlockedAdd () from winbase.h */ #if HAVE_DECL_INTERLOCKEDADD==0 static inline gint32 InterlockedAdd(volatile gint32 *dest, gint32 add) { return __sync_add_and_fetch (dest, add); } #endif /* mingw is missing InterlockedAdd64 () from winbase.h */ #if HAVE_DECL_INTERLOCKEDADD64==0 static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add) { return __sync_add_and_fetch (dest, add); } #endif #if defined(_MSC_VER) && !defined(InterlockedAdd) /* MSVC before 2013 only defines InterlockedAdd* for the Itanium architecture */ static inline gint32 InterlockedAdd(volatile gint32 *dest, gint32 add) { return InterlockedExchangeAdd (dest, add) + add; } #endif #if defined(_MSC_VER) && !defined(InterlockedAdd64) #if defined(InterlockedExchangeAdd64) /* This may be defined only on amd64 */ static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add) { return InterlockedExchangeAdd64 (dest, add) + add; } #else static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add) { gint64 prev_value; do { prev_value = *dest; } while (prev_value != InterlockedCompareExchange64(dest, prev_value + add, prev_value)); return prev_value + add; } #endif #endif #ifdef HOST_WIN32 #define TO_INTERLOCKED_ARGP(ptr) ((volatile LONG*)(ptr)) #else #define TO_INTERLOCKED_ARGP(ptr) (ptr) #endif /* And now for some dirty hacks... The Windows API doesn't * provide any useful primitives for this (other than getting * into architecture-specific madness), so use CAS. */ static inline gint32 InterlockedRead(volatile gint32 *src) { return InterlockedCompareExchange (TO_INTERLOCKED_ARGP (src), 0, 0); } static inline gint64 InterlockedRead64(volatile gint64 *src) { return InterlockedCompareExchange64 (src, 0, 0); } static inline gpointer InterlockedReadPointer(volatile gpointer *src) { return InterlockedCompareExchangePointer (src, NULL, NULL); } static inline void InterlockedWrite(volatile gint32 *dst, gint32 val) { InterlockedExchange (TO_INTERLOCKED_ARGP (dst), val); } static inline void InterlockedWrite64(volatile gint64 *dst, gint64 val) { InterlockedExchange64 (dst, val); } static inline void InterlockedWritePointer(volatile gpointer *dst, gpointer val) { InterlockedExchangePointer (dst, val); } /* We can't even use CAS for these, so write them out * explicitly according to x86(_64) semantics... */ static inline gint8 InterlockedRead8(volatile gint8 *src) { return *src; } static inline gint16 InterlockedRead16(volatile gint16 *src) { return *src; } static inline void InterlockedWrite8(volatile gint8 *dst, gint8 val) { *dst = val; mono_memory_barrier (); } static inline void InterlockedWrite16(volatile gint16 *dst, gint16 val) { *dst = val; mono_memory_barrier (); } /* Prefer GCC atomic ops if the target supports it (see configure.ac). */ #elif defined(USE_GCC_ATOMIC_OPS) /* * As of this comment (August 2016), all current Clang versions get atomic * intrinsics on ARM64 wrong. All GCC versions prior to 5.3.0 do, too. The bug * is the same: The compiler developers thought that the acq + rel barriers * that ARM64 load/store instructions can impose are sufficient to provide * sequential consistency semantics. This is not the case: * * http://lists.infradead.org/pipermail/linux-arm-kernel/2014-February/229588.html * * We work around this bug by inserting full barriers around each atomic * intrinsic if we detect that we're built with a buggy compiler. */ #if defined (HOST_ARM64) && (defined (__clang__) || MONO_GNUC_VERSION < 50300) #define WRAP_ATOMIC_INTRINSIC(INTRIN) \ ({ \ mono_memory_barrier (); \ __typeof__ (INTRIN) atomic_ret__ = (INTRIN); \ mono_memory_barrier (); \ atomic_ret__; \ }) #define gcc_sync_val_compare_and_swap(a, b, c) WRAP_ATOMIC_INTRINSIC (__sync_val_compare_and_swap (a, b, c)) #define gcc_sync_add_and_fetch(a, b) WRAP_ATOMIC_INTRINSIC (__sync_add_and_fetch (a, b)) #define gcc_sync_sub_and_fetch(a, b) WRAP_ATOMIC_INTRINSIC (__sync_sub_and_fetch (a, b)) #define gcc_sync_fetch_and_add(a, b) WRAP_ATOMIC_INTRINSIC (__sync_fetch_and_add (a, b)) #else #define gcc_sync_val_compare_and_swap(a, b, c) __sync_val_compare_and_swap (a, b, c) #define gcc_sync_add_and_fetch(a, b) __sync_add_and_fetch (a, b) #define gcc_sync_sub_and_fetch(a, b) __sync_sub_and_fetch (a, b) #define gcc_sync_fetch_and_add(a, b) __sync_fetch_and_add (a, b) #endif static inline gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp) { return gcc_sync_val_compare_and_swap (dest, comp, exch); } static inline gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp) { return gcc_sync_val_compare_and_swap (dest, comp, exch); } static inline gint32 InterlockedAdd(volatile gint32 *dest, gint32 add) { return gcc_sync_add_and_fetch (dest, add); } static inline gint32 InterlockedIncrement(volatile gint32 *val) { return gcc_sync_add_and_fetch (val, 1); } static inline gint32 InterlockedDecrement(volatile gint32 *val) { return gcc_sync_sub_and_fetch (val, 1); } static inline gint32 InterlockedExchange(volatile gint32 *val, gint32 new_val) { gint32 old_val; do { old_val = *val; } while (gcc_sync_val_compare_and_swap (val, old_val, new_val) != old_val); return old_val; } static inline gpointer InterlockedExchangePointer(volatile gpointer *val, gpointer new_val) { gpointer old_val; do { old_val = *val; } while (gcc_sync_val_compare_and_swap (val, old_val, new_val) != old_val); return old_val; } static inline gint32 InterlockedExchangeAdd(volatile gint32 *val, gint32 add) { return gcc_sync_fetch_and_add (val, add); } static inline gint8 InterlockedRead8(volatile gint8 *src) { /* Kind of a hack, but GCC doesn't give us anything better, and it's * certainly not as bad as using a CAS loop. */ return gcc_sync_fetch_and_add (src, 0); } static inline gint16 InterlockedRead16(volatile gint16 *src) { return gcc_sync_fetch_and_add (src, 0); } static inline gint32 InterlockedRead(volatile gint32 *src) { return gcc_sync_fetch_and_add (src, 0); } static inline void InterlockedWrite8(volatile gint8 *dst, gint8 val) { /* Nothing useful from GCC at all, so fall back to CAS. */ gint8 old_val; do { old_val = *dst; } while (gcc_sync_val_compare_and_swap (dst, old_val, val) != old_val); } static inline void InterlockedWrite16(volatile gint16 *dst, gint16 val) { gint16 old_val; do { old_val = *dst; } while (gcc_sync_val_compare_and_swap (dst, old_val, val) != old_val); } static inline void InterlockedWrite(volatile gint32 *dst, gint32 val) { /* Nothing useful from GCC at all, so fall back to CAS. */ gint32 old_val; do { old_val = *dst; } while (gcc_sync_val_compare_and_swap (dst, old_val, val) != old_val); } #if defined (TARGET_OSX) || defined (__arm__) || (defined (__mips__) && !defined (__mips64)) || (defined (__powerpc__) && !defined (__powerpc64__)) || (defined (__sparc__) && !defined (__arch64__)) #define BROKEN_64BIT_ATOMICS_INTRINSIC 1 #endif #if !defined (BROKEN_64BIT_ATOMICS_INTRINSIC) static inline gint64 InterlockedCompareExchange64(volatile gint64 *dest, gint64 exch, gint64 comp) { return gcc_sync_val_compare_and_swap (dest, comp, exch); } static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add) { return gcc_sync_add_and_fetch (dest, add); } static inline gint64 InterlockedIncrement64(volatile gint64 *val) { return gcc_sync_add_and_fetch (val, 1); } static inline gint64 InterlockedDecrement64(volatile gint64 *val) { return gcc_sync_sub_and_fetch (val, 1); } static inline gint64 InterlockedExchangeAdd64(volatile gint64 *val, gint64 add) { return gcc_sync_fetch_and_add (val, add); } static inline gint64 InterlockedRead64(volatile gint64 *src) { /* Kind of a hack, but GCC doesn't give us anything better. */ return gcc_sync_fetch_and_add (src, 0); } #else /* Implement 64-bit cmpxchg by hand or emulate it. */ extern gint64 InterlockedCompareExchange64(volatile gint64 *dest, gint64 exch, gint64 comp); /* Implement all other 64-bit atomics in terms of a specialized CAS * in this case, since chances are that the other 64-bit atomic * intrinsics are broken too. */ static inline gint64 InterlockedExchangeAdd64(volatile gint64 *dest, gint64 add) { gint64 old_val; do { old_val = *dest; } while (InterlockedCompareExchange64 (dest, old_val + add, old_val) != old_val); return old_val; } static inline gint64 InterlockedIncrement64(volatile gint64 *val) { gint64 get, set; do { get = *val; set = get + 1; } while (InterlockedCompareExchange64 (val, set, get) != get); return set; } static inline gint64 InterlockedDecrement64(volatile gint64 *val) { gint64 get, set; do { get = *val; set = get - 1; } while (InterlockedCompareExchange64 (val, set, get) != get); return set; } static inline gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add) { gint64 get, set; do { get = *dest; set = get + add; } while (InterlockedCompareExchange64 (dest, set, get) != get); return set; } static inline gint64 InterlockedRead64(volatile gint64 *src) { return InterlockedCompareExchange64 (src, 0, 0); } #endif static inline gpointer InterlockedReadPointer(volatile gpointer *src) { return InterlockedCompareExchangePointer (src, NULL, NULL); } static inline void InterlockedWritePointer(volatile gpointer *dst, gpointer val) { InterlockedExchangePointer (dst, val); } /* We always implement this in terms of a 64-bit cmpxchg since * GCC doesn't have an intrisic to model it anyway. */ static inline gint64 InterlockedExchange64(volatile gint64 *val, gint64 new_val) { gint64 old_val; do { old_val = *val; } while (InterlockedCompareExchange64 (val, new_val, old_val) != old_val); return old_val; } static inline void InterlockedWrite64(volatile gint64 *dst, gint64 val) { /* Nothing useful from GCC at all, so fall back to CAS. */ InterlockedExchange64 (dst, val); } #else #define WAPI_NO_ATOMIC_ASM extern gint32 InterlockedCompareExchange(volatile gint32 *dest, gint32 exch, gint32 comp); extern gint64 InterlockedCompareExchange64(volatile gint64 *dest, gint64 exch, gint64 comp); extern gpointer InterlockedCompareExchangePointer(volatile gpointer *dest, gpointer exch, gpointer comp); extern gint32 InterlockedAdd(volatile gint32 *dest, gint32 add); extern gint64 InterlockedAdd64(volatile gint64 *dest, gint64 add); extern gint32 InterlockedIncrement(volatile gint32 *dest); extern gint64 InterlockedIncrement64(volatile gint64 *dest); extern gint32 InterlockedDecrement(volatile gint32 *dest); extern gint64 InterlockedDecrement64(volatile gint64 *dest); extern gint32 InterlockedExchange(volatile gint32 *dest, gint32 exch); extern gint64 InterlockedExchange64(volatile gint64 *dest, gint64 exch); extern gpointer InterlockedExchangePointer(volatile gpointer *dest, gpointer exch); extern gint32 InterlockedExchangeAdd(volatile gint32 *dest, gint32 add); extern gint64 InterlockedExchangeAdd64(volatile gint64 *dest, gint64 add); extern gint8 InterlockedRead8(volatile gint8 *src); extern gint16 InterlockedRead16(volatile gint16 *src); extern gint32 InterlockedRead(volatile gint32 *src); extern gint64 InterlockedRead64(volatile gint64 *src); extern gpointer InterlockedReadPointer(volatile gpointer *src); extern void InterlockedWrite8(volatile gint8 *dst, gint8 val); extern void InterlockedWrite16(volatile gint16 *dst, gint16 val); extern void InterlockedWrite(volatile gint32 *dst, gint32 val); extern void InterlockedWrite64(volatile gint64 *dst, gint64 val); extern void InterlockedWritePointer(volatile gpointer *dst, gpointer val); #endif #if SIZEOF_VOID_P == 4 #define InterlockedAddP(p,add) InterlockedAdd ((volatile gint32*)p, (gint32)add) #else #define InterlockedAddP(p,add) InterlockedAdd64 ((volatile gint64*)p, (gint64)add) #endif /* The following functions cannot be found on any platform, and thus they can be declared without further existence checks */ static inline void InterlockedWriteBool (volatile gboolean *dest, gboolean val) { /* both, gboolean and gint32, are int32_t; the purpose of these casts is to make things explicit */ InterlockedWrite ((volatile gint32 *)dest, (gint32)val); } #endif /* _WAPI_ATOMIC_H_ */