2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
4 * Copyright (c) 1999-2004 Hewlett-Packard Development Company, L.P.
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
18 /* Memory model documented at http://www-106.ibm.com/developerworks/ */
19 /* eserver/articles/archguide.html and (clearer) */
20 /* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
21 /* There appears to be no implicit ordering between any kind of */
22 /* independent memory references. */
23 /* Architecture enforces some ordering based on control dependence. */
24 /* I don't know if that could help. */
25 /* Data-dependent loads are always ordered. */
26 /* Based on the above references, eieio is intended for use on */
27 /* uncached memory, which we don't support. It does not order loads */
28 /* from cached memory. */
29 /* Thanks to Maged Michael, Doug Lea, and Roger Hoover for helping to */
30 /* track some of this down and correcting my misunderstandings. -HB */
31 /* Earl Chew subsequently contributed further fixes & additions. */
33 #include "../all_aligned_atomic_load_store.h"
35 #include "../test_and_set_t_is_ao_t.h"
36 /* There seems to be no byte equivalent of lwarx, so this */
37 /* may really be what we want, at least in the 32-bit case. */
42 __asm__ __volatile__("sync" : : : "memory");
45 #define AO_HAVE_nop_full
47 /* lwsync apparently works for everything but a StoreLoad barrier. */
52 __asm__ __volatile__("sync" : : : "memory");
54 __asm__ __volatile__("lwsync" : : : "memory");
58 #define AO_nop_write() AO_lwsync()
59 #define AO_HAVE_nop_write
61 #define AO_nop_read() AO_lwsync()
62 #define AO_HAVE_nop_read
64 /* We explicitly specify load_acquire, since it is important, and can */
65 /* be implemented relatively cheaply. It could be implemented */
66 /* with an ordinary load followed by a lwsync. But the general wisdom */
67 /* seems to be that a data dependent branch followed by an isync is */
68 /* cheaper. And the documentation is fairly explicit that this also */
69 /* has acquire semantics. */
70 /* ppc64 uses ld not lwz */
71 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
73 AO_load_acquire(const volatile AO_t *addr)
77 __asm__ __volatile__ (
83 : "m"(*addr) : "memory", "cr0");
88 AO_load_acquire(const volatile AO_t *addr)
92 /* FIXME: We should get gcc to allocate one of the condition */
93 /* registers. I always got "impossible constraint" when I */
94 /* tried the "y" constraint. */
95 __asm__ __volatile__ (
101 : "m"(*addr) : "memory", "cc");
105 #define AO_HAVE_load_acquire
107 /* We explicitly specify store_release, since it relies */
108 /* on the fact that lwsync is also a LoadStore barrier. */
110 AO_store_release(volatile AO_t *addr, AO_t value)
116 #define AO_HAVE_load_acquire
118 /* This is similar to the code in the garbage collector. Deleting */
119 /* this and having it synthesized from compare_and_swap would probably */
120 /* only cost us a load immediate instruction. */
121 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
122 /* Completely untested. And we should be using smaller objects anyway. */
123 AO_INLINE AO_TS_VAL_t
124 AO_test_and_set(volatile AO_TS_t *addr) {
125 unsigned long oldval;
126 unsigned long temp = 1; /* locked value */
128 __asm__ __volatile__(
129 "1:ldarx %0,0,%1\n" /* load and reserve */
130 "cmpdi %0, 0\n" /* if load is */
131 "bne 2f\n" /* non-zero, return already set */
132 "stdcx. %2,0,%1\n" /* else store conditional */
133 "bne- 1b\n" /* retry if lost reservation */
134 "2:\n" /* oldval is zero if we set */
136 : "r"(addr), "r"(temp)
139 return (AO_TS_VAL_t)oldval;
144 AO_INLINE AO_TS_VAL_t
145 AO_test_and_set(volatile AO_TS_t *addr) {
147 int temp = 1; /* locked value */
149 __asm__ __volatile__(
150 "1:lwarx %0,0,%1\n" /* load and reserve */
151 "cmpwi %0, 0\n" /* if load is */
152 "bne 2f\n" /* non-zero, return already set */
153 "stwcx. %2,0,%1\n" /* else store conditional */
154 "bne- 1b\n" /* retry if lost reservation */
155 "2:\n" /* oldval is zero if we set */
157 : "r"(addr), "r"(temp)
160 return (AO_TS_VAL_t)oldval;
165 #define AO_have_test_and_set
167 AO_INLINE AO_TS_VAL_t
168 AO_test_and_set_acquire(volatile AO_TS_t *addr) {
169 AO_TS_VAL_t result = AO_test_and_set(addr);
174 #define AO_HAVE_test_and_set_acquire
176 AO_INLINE AO_TS_VAL_t
177 AO_test_and_set_release(volatile AO_TS_t *addr) {
179 return AO_test_and_set(addr);
182 #define AO_HAVE_test_and_set_release
184 AO_INLINE AO_TS_VAL_t
185 AO_test_and_set_full(volatile AO_TS_t *addr) {
188 result = AO_test_and_set(addr);
193 #define AO_HAVE_test_and_set_full
195 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
196 /* FIXME: Completely untested. */
198 AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
202 __asm__ __volatile__(
203 "1:ldarx %0,0,%2\n" /* load and reserve */
204 "cmpd %0, %4\n" /* if load is not equal to */
205 "bne 2f\n" /* old, fail */
206 "stdcx. %3,0,%2\n" /* else store conditional */
207 "bne- 1b\n" /* retry if lost reservation */
208 "li %1,1\n" /* result = 1; */
210 : "=&r"(oldval), "=&r"(result)
211 : "r"(addr), "r"(new_val), "r"(old), "1"(result)
220 AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val) {
224 __asm__ __volatile__(
225 "1:lwarx %0,0,%2\n" /* load and reserve */
226 "cmpw %0, %4\n" /* if load is not equal to */
227 "bne 2f\n" /* old, fail */
228 "stwcx. %3,0,%2\n" /* else store conditional */
229 "bne- 1b\n" /* retry if lost reservation */
230 "li %1,1\n" /* result = 1; */
232 : "=&r"(oldval), "=&r"(result)
233 : "r"(addr), "r"(new_val), "r"(old), "1"(result)
240 #define AO_HAVE_compare_and_swap
243 AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val) {
244 int result = AO_compare_and_swap(addr, old, new_val);
249 #define AO_HAVE_compare_and_swap_acquire
252 AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val) {
254 return AO_compare_and_swap(addr, old, new_val);
257 #define AO_HAVE_compare_and_swap_release
260 AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val) {
263 result = AO_compare_and_swap(addr, old, new_val);
268 #define AO_HAVE_compare_and_swap_full
270 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
271 /* FIXME: Completely untested. */
274 AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
278 __asm__ __volatile__(
279 "1:ldarx %0,0,%2\n" /* load and reserve */
280 "add %1,%0,%3\n" /* increment */
281 "stdcx. %1,0,%2\n" /* store conditional */
282 "bne- 1b\n" /* retry if lost reservation */
283 : "=&r"(oldval), "=&r"(newval)
284 : "r"(addr), "r"(incr)
290 #define AO_HAVE_fetch_and_add
295 AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
299 __asm__ __volatile__(
300 "1:lwarx %0,0,%2\n" /* load and reserve */
301 "add %1,%0,%3\n" /* increment */
302 "stwcx. %1,0,%2\n" /* store conditional */
303 "bne- 1b\n" /* retry if lost reservation */
304 : "=&r"(oldval), "=&r"(newval)
305 : "r"(addr), "r"(incr)
311 #define AO_HAVE_fetch_and_add
316 AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
317 AO_t result = AO_fetch_and_add(addr, incr);
322 #define AO_HAVE_fetch_and_add_acquire
325 AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
327 return AO_fetch_and_add(addr, incr);
330 #define AO_HAVE_fetch_and_add_release
333 AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
336 result = AO_fetch_and_add(addr, incr);
341 #define AO_HAVE_fetch_and_add_full
343 #if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
345 # include "../ao_t_is_int.h"