--- /dev/null
+/*
+ * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
+ * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
+ * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
+ *
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose, provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ *
+ */
+
+/* Memory model documented at http://www-106.ibm.com/developerworks/ */
+/* eserver/articles/archguide.html and (clearer) */
+/* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
+/* There appears to be no implicit ordering between any kind of */
+/* independent memory references. */
+/* Architecture enforces some ordering based on control dependence. */
+/* I don't know if that could help. */
+/* Data-dependent loads are always ordered. */
+/* Based on the above references, eieio is intended for use on */
+/* uncached memory, which we don't support. It does not order loads */
+/* from cached memory. */
+/* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */
+/* track some of this down and correcting my misunderstandings. -HB */
+/* Earl Chew subsequently contributed further fixes & additions. */
+
+#include "../all_aligned_atomic_load_store.h"
+
+#include "../test_and_set_t_is_ao_t.h"
+ /* There seems to be no byte equivalent of lwarx, so this */
+ /* may really be what we want, at least in the 32-bit case. */
+
+AO_INLINE void
+AO_nop_full(void)
+{
+ __asm__ __volatile__("sync" : : : "memory");
+}
+#define AO_HAVE_nop_full
+
+/* lwsync apparently works for everything but a StoreLoad barrier. */
+AO_INLINE void
+AO_lwsync(void)
+{
+#ifdef __NO_LWSYNC__
+ __asm__ __volatile__("sync" : : : "memory");
+#else
+ __asm__ __volatile__("lwsync" : : : "memory");
+#endif
+}
+
+#define AO_nop_write() AO_lwsync()
+#define AO_HAVE_nop_write
+
+#define AO_nop_read() AO_lwsync()
+#define AO_HAVE_nop_read
+
+/* We explicitly specify load_acquire, since it is important, and can */
+/* be implemented relatively cheaply. It could be implemented */
+/* with an ordinary load followed by a lwsync. But the general wisdom */
+/* seems to be that a data dependent branch followed by an isync is */
+/* cheaper. And the documentation is fairly explicit that this also */
+/* has acquire semantics. */
+/* ppc64 uses ld not lwz */
+AO_INLINE AO_t
+AO_load_acquire(const volatile AO_t *addr)
+{
+ AO_t result;
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
+ __asm__ __volatile__ (
+ "ld%U1%X1 %0,%1\n"
+ "cmpw %0,%0\n"
+ "bne- 1f\n"
+ "1: isync\n"
+ : "=r" (result)
+ : "m"(*addr) : "memory", "cr0");
+#else
+ /* FIXME: We should get gcc to allocate one of the condition */
+ /* registers. I always got "impossible constraint" when I */
+ /* tried the "y" constraint. */
+ __asm__ __volatile__ (
+ "lwz%U1%X1 %0,%1\n"
+ "cmpw %0,%0\n"
+ "bne- 1f\n"
+ "1: isync\n"
+ : "=r" (result)
+ : "m"(*addr) : "memory", "cc");
+#endif
+ return result;
+}
+#define AO_HAVE_load_acquire
+
+/* We explicitly specify store_release, since it relies */
+/* on the fact that lwsync is also a LoadStore barrier. */
+AO_INLINE void
+AO_store_release(volatile AO_t *addr, AO_t value)
+{
+ AO_lwsync();
+ *addr = value;
+}
+#define AO_HAVE_store_release
+
+/* This is similar to the code in the garbage collector. Deleting */
+/* this and having it synthesized from compare_and_swap would probably */
+/* only cost us a load immediate instruction. */
+AO_INLINE AO_TS_VAL_t
+AO_test_and_set(volatile AO_TS_t *addr) {
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
+/* Completely untested. And we should be using smaller objects anyway. */
+ unsigned long oldval;
+ unsigned long temp = 1; /* locked value */
+
+ __asm__ __volatile__(
+ "1:ldarx %0,0,%1\n" /* load and reserve */
+ "cmpdi %0, 0\n" /* if load is */
+ "bne 2f\n" /* non-zero, return already set */
+ "stdcx. %2,0,%1\n" /* else store conditional */
+ "bne- 1b\n" /* retry if lost reservation */
+ "2:\n" /* oldval is zero if we set */
+ : "=&r"(oldval)
+ : "r"(addr), "r"(temp)
+ : "memory", "cr0");
+#else
+ int oldval;
+ int temp = 1; /* locked value */
+
+ __asm__ __volatile__(
+ "1:lwarx %0,0,%1\n" /* load and reserve */
+ "cmpwi %0, 0\n" /* if load is */
+ "bne 2f\n" /* non-zero, return already set */
+ "stwcx. %2,0,%1\n" /* else store conditional */
+ "bne- 1b\n" /* retry if lost reservation */
+ "2:\n" /* oldval is zero if we set */
+ : "=&r"(oldval)
+ : "r"(addr), "r"(temp)
+ : "memory", "cr0");
+#endif
+ return (AO_TS_VAL_t)oldval;
+}
+#define AO_HAVE_test_and_set
+
+AO_INLINE AO_TS_VAL_t
+AO_test_and_set_acquire(volatile AO_TS_t *addr) {
+ AO_TS_VAL_t result = AO_test_and_set(addr);
+ AO_lwsync();
+ return result;
+}
+#define AO_HAVE_test_and_set_acquire
+
+AO_INLINE AO_TS_VAL_t
+AO_test_and_set_release(volatile AO_TS_t *addr) {
+ AO_lwsync();
+ return AO_test_and_set(addr);
+}
+#define AO_HAVE_test_and_set_release
+
+AO_INLINE AO_TS_VAL_t
+AO_test_and_set_full(volatile AO_TS_t *addr) {
+ AO_TS_VAL_t result;
+ AO_lwsync();
+ result = AO_test_and_set(addr);
+ AO_lwsync();
+ return result;
+}
+#define AO_HAVE_test_and_set_full
+
+AO_INLINE int
+AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
+ AO_t oldval;
+ int result = 0;
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
+/* FIXME: Completely untested. */
+ __asm__ __volatile__(
+ "1:ldarx %0,0,%2\n" /* load and reserve */
+ "cmpd %0, %4\n" /* if load is not equal to */
+ "bne 2f\n" /* old, fail */
+ "stdcx. %3,0,%2\n" /* else store conditional */
+ "bne- 1b\n" /* retry if lost reservation */
+ "li %1,1\n" /* result = 1; */
+ "2:\n"
+ : "=&r"(oldval), "=&r"(result)
+ : "r"(addr), "r"(new_val), "r"(old), "1"(result)
+ : "memory", "cr0");
+#else
+ __asm__ __volatile__(
+ "1:lwarx %0,0,%2\n" /* load and reserve */
+ "cmpw %0, %4\n" /* if load is not equal to */
+ "bne 2f\n" /* old, fail */
+ "stwcx. %3,0,%2\n" /* else store conditional */
+ "bne- 1b\n" /* retry if lost reservation */
+ "li %1,1\n" /* result = 1; */
+ "2:\n"
+ : "=&r"(oldval), "=&r"(result)
+ : "r"(addr), "r"(new_val), "r"(old), "1"(result)
+ : "memory", "cr0");
+#endif
+ return result;
+}
+#define AO_HAVE_compare_and_swap
+
+AO_INLINE int
+AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) {
+ int result = AO_compare_and_swap(addr, old, new_val);
+ AO_lwsync();
+ return result;
+}
+#define AO_HAVE_compare_and_swap_acquire
+
+AO_INLINE int
+AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) {
+ AO_lwsync();
+ return AO_compare_and_swap(addr, old, new_val);
+}
+#define AO_HAVE_compare_and_swap_release
+
+AO_INLINE int
+AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) {
+ int result;
+ AO_lwsync();
+ result = AO_compare_and_swap(addr, old, new_val);
+ AO_lwsync();
+ return result;
+}
+#define AO_HAVE_compare_and_swap_full
+
+AO_INLINE AO_t
+AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
+ AO_t oldval;
+ AO_t newval;
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
+/* FIXME: Completely untested. */
+ __asm__ __volatile__(
+ "1:ldarx %0,0,%2\n" /* load and reserve */
+ "add %1,%0,%3\n" /* increment */
+ "stdcx. %1,0,%2\n" /* store conditional */
+ "bne- 1b\n" /* retry if lost reservation */
+ : "=&r"(oldval), "=&r"(newval)
+ : "r"(addr), "r"(incr)
+ : "memory", "cr0");
+#else
+ __asm__ __volatile__(
+ "1:lwarx %0,0,%2\n" /* load and reserve */
+ "add %1,%0,%3\n" /* increment */
+ "stwcx. %1,0,%2\n" /* store conditional */
+ "bne- 1b\n" /* retry if lost reservation */
+ : "=&r"(oldval), "=&r"(newval)
+ : "r"(addr), "r"(incr)
+ : "memory", "cr0");
+#endif
+ return oldval;
+}
+#define AO_HAVE_fetch_and_add
+
+AO_INLINE AO_t
+AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
+ AO_t result = AO_fetch_and_add(addr, incr);
+ AO_lwsync();
+ return result;
+}
+#define AO_HAVE_fetch_and_add_acquire
+
+AO_INLINE AO_t
+AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
+ AO_lwsync();
+ return AO_fetch_and_add(addr, incr);
+}
+#define AO_HAVE_fetch_and_add_release
+
+AO_INLINE AO_t
+AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
+ AO_t result;
+ AO_lwsync();
+ result = AO_fetch_and_add(addr, incr);
+ AO_lwsync();
+ return result;
+}
+#define AO_HAVE_fetch_and_add_full
+
+#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
+#else
+# include "../ao_t_is_int.h"
+#endif