2 * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
3 * Original Author: Hans Boehm
5 * This file may be redistributed and/or modified under the
6 * terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 2, or (at your option) any later version.
9 * It is distributed in the hope that it will be useful, but WITHOUT ANY
10 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License in the
12 * file doc/COPYING for more details.
15 #if defined(HAVE_CONFIG_H)
22 #define AO_REQUIRE_CAS
23 #include "atomic_ops_stack.h"
25 #if defined(_MSC_VER) \
26 || defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__CYGWIN__)
27 /* AO_pause not defined elsewhere */
28 /* FIXME: At least AO_spin should be factored out. */
33 /* Spin for 2**n units. */
34 static void AO_spin(int n)
37 AO_T j = AO_load(&dummy);
39 for (i = 0; i < (2 << n); ++i)
55 /* Short async-signal-safe sleep. */
56 msecs = (n > 18? 100 : (1 << (n - 12)));
63 /* AO_pause is available elsewhere */
65 extern void AO_pause(int);
69 #ifdef AO_USE_ALMOST_LOCK_FREE
71 /* LIFO linked lists based on compare-and-swap. We need to avoid */
72 /* the case of a node deletion and reinsertion while I'm deleting */
73 /* it, since that may cause my CAS to succeed eventhough the next */
74 /* pointer is now wrong. Our solution is not fully lock-free, but it */
75 /* is good enough for signal handlers, provided we have a suitably low */
76 /* bound on the number of recursive signal handler reentries. */
77 /* A list consists of a first pointer and a blacklist */
78 /* of pointer values that are currently being removed. No list element */
79 /* on the blacklist may be inserted. If we would otherwise do so, we */
80 /* are allowed to insert a variant that differs only in the least */
81 /* significant, ignored, bits. If the list is full, we wait. */
83 /* Crucial observation: A particular padded pointer x (i.e. pointer */
84 /* plus arbitrary low order bits) can never be newly inserted into */
85 /* a list while it's in the corresponding auxiliary data structure. */
87 /* The second argument is a pointer to the link field of the element */
89 /* Both list headers and link fields contain "perturbed" pointers, i.e. */
90 /* pointers with extra bits "or"ed into the low order bits. */
92 AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x,
95 AO_t x_bits = (AO_t)x;
98 /* No deletions of x can start here, since x is not currently in the */
103 /* Start all loads as close to concurrently as possible. */
104 AO_t entry1 = AO_load(a -> AO_stack_bl);
105 AO_t entry2 = AO_load(a -> AO_stack_bl + 1);
106 if (entry1 == x_bits || entry2 == x_bits)
108 /* Entry is currently being removed. Change it a little. */
110 if ((x_bits & AO_BIT_MASK) == 0)
111 /* Version count overflowed; */
112 /* EXTREMELY unlikely, but possible. */
120 for (i = 0; i < AO_BL_SIZE; ++i)
122 if (AO_load(a -> AO_stack_bl + i) == x_bits)
124 /* Entry is currently being removed. Change it a little. */
126 if ((x_bits & AO_BIT_MASK) == 0)
127 /* Version count overflowed; */
128 /* EXTREMELY unlikely, but possible. */
135 /* x_bits is not currently being deleted */
138 next = AO_load(list);
141 while(!AO_compare_and_swap_release(list, next, x_bits));
145 * I concluded experimentally that checking a value first before
146 * performing a compare-and-swap is usually beneficial on X86, but
147 * slows things down appreciably with contention on Itanium.
148 * Since the Itanium behavior makes more sense to me (more cache line
149 * movement unless we're mostly reading, but back-off should guard
150 * against that), we take Itanium as the default. Measurements on
151 * other multiprocessor architectures would be useful. (On a uniprocessor,
152 * the initial check is almost certainly a very small loss.) - HB
155 # define PRECHECK(a) (a) == 0 &&
161 AO_stack_pop_explicit_aux_acquire(volatile AO_t *list, AO_stack_aux * a)
170 first = AO_load(list);
171 if (0 == first) return 0;
172 /* Insert first into aux black list. */
173 /* This may spin if more than AO_BL_SIZE removals using auxiliary */
174 /* structure a are currently in progress. */
177 if (PRECHECK(a -> AO_stack_bl[i])
178 AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first))
181 if ( i >= AO_BL_SIZE )
187 assert(i >= 0 && i < AO_BL_SIZE);
188 assert(a -> AO_stack_bl[i] == first);
189 /* First is on the auxiliary black list. It may be removed by */
190 /* another thread before we get to it, but a new insertion of x */
191 /* cannot be started here. */
192 /* Only we can remove it from the black list. */
193 /* We need to make sure that first is still the first entry on the */
194 /* list. Otherwise it's possible that a reinsertion of it was */
195 /* already started before we added the black list entry. */
196 if (first != AO_load(list)) {
197 AO_store_release(a->AO_stack_bl+i, 0);
200 first_ptr = AO_REAL_NEXT_PTR(first);
201 next = AO_load(first_ptr);
202 if (!AO_compare_and_swap_release(list, first, next)) {
203 AO_store_release(a->AO_stack_bl+i, 0);
206 assert(*list != first);
207 /* Since we never insert an entry on the black list, this cannot have */
208 /* succeeded unless first remained on the list while we were running. */
209 /* Thus its next link cannot have changed out from under us, and we */
210 /* removed exactly one entry and preserved the rest of the list. */
211 /* Note that it is quite possible that an additional entry was */
212 /* inserted and removed while we were running; this is OK since the */
213 /* part of the list following first must have remained unchanged, and */
214 /* first must again have been at the head of the list when the */
215 /* compare_and_swap succeeded. */
216 AO_store_release(a->AO_stack_bl+i, 0);
220 #else /* ! USE_ALMOST_LOCK_FREE */
222 /* Better names for fields in AO_stack_t */
224 #define version AO_val1
226 #if defined(AO_HAVE_compare_double_and_swap_double)
228 void AO_stack_push_release(AO_stack_t *list, AO_t *element)
233 next = AO_load(&(list -> ptr));
235 } while (!AO_compare_and_swap_release
236 ( &(list -> ptr), next, (AO_t) element));
237 /* This uses a narrow CAS here, an old optimization suggested */
238 /* by Treiber. Pop is still safe, since we run into the ABA */
239 /* problem only if there were both intervening "pop"s and "push"es. */
240 /* In that case we still see a change in the version number. */
243 AO_t *AO_stack_pop_acquire(AO_stack_t *list)
247 /* Use volatile to workaround a bug in */
248 /* clang-1.1/x86 causing test_stack failure. */
256 /* Version must be loaded first. */
257 cversion = AO_load_acquire(&(list -> version));
258 cptr = (AO_t *)AO_load(&(list -> ptr));
259 if (cptr == 0) return 0;
261 } while (!AO_compare_double_and_swap_double_release
262 (list, cversion, (AO_t) cptr, cversion+1, (AO_t) next));
267 #elif defined(AO_HAVE_compare_and_swap_double)
269 /* Needed for future IA64 processors. No current clients? */
271 #error Untested! Probably doesnt work.
273 /* We have a wide CAS, but only does an AO_t-wide comparison. */
274 /* We can't use the Treiber optimization, since we only check */
275 /* for an unchanged version number, not an unchanged pointer. */
276 void AO_stack_push_release(AO_stack_t *list, AO_t *element)
282 /* Again version must be loaded first, for different reason. */
283 version = AO_load_acquire(&(list -> version));
284 next_ptr = AO_load(&(list -> ptr));
286 } while (!AO_compare_and_swap_double_release(
288 version+1, (AO_t) element));
291 AO_t *AO_stack_pop_acquire(AO_stack_t *list)
298 cversion = AO_load_acquire(&(list -> version));
299 cptr = (AO_t *)AO_load(&(list -> ptr));
300 if (cptr == 0) return 0;
302 } while (!AO_compare_double_and_swap_double_release
303 (list, cversion, (AO_t) cptr, cversion+1, next));
308 #endif /* AO_HAVE_compare_and_swap_double */
310 #endif /* ! USE_ALMOST_LOCK_FREE */