5 * (C) Copyright 2011 Novell, Inc
7 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
11 * This is a simplified version of the lock-free allocator described in
13 * Scalable Lock-Free Dynamic Memory Allocation
14 * Maged M. Michael, PLDI 2004
16 * I could not get Michael's allocator working bug free under heavy
17 * stress tests. The paper doesn't provide correctness proof and after
18 * failing to formalize the ownership of descriptors I devised this
21 * Allocation within superblocks proceeds exactly like in Michael's
22 * allocator. The simplification is that a thread has to "acquire" a
23 * descriptor before it can allocate from its superblock. While it owns
24 * the descriptor no other thread can acquire and hence allocate from
25 * it. A consequence of this is that the ABA problem cannot occur, so
26 * we don't need the tag field and don't have to use 64 bit CAS.
28 * Descriptors are stored in two locations: The partial queue and the
29 * active field. They can only be in at most one of those at one time.
30 * If a thread wants to allocate, it needs to get a descriptor. It
31 * tries the active descriptor first, CASing it to NULL. If that
32 * doesn't work, it gets a descriptor out of the partial queue. Once it
33 * has the descriptor it owns it because it is not referenced anymore.
34 * It allocates a slot and then gives the descriptor back (unless it is
37 * Note that it is still possible that a slot is freed while an
38 * allocation is in progress from the same superblock. Ownership in
39 * this case is not complicated, though. If the block was FULL and the
40 * free set it to PARTIAL, the free now owns the block (because FULL
41 * blocks are not referenced from partial and active) and has to give it
42 * back. If the block was PARTIAL then the free doesn't own the block
43 * (because it's either still referenced, or an alloc owns it). A
44 * special case of this is that it has changed from PARTIAL to EMPTY and
45 * now needs to be retired. Technically, the free wouldn't have to do
46 * anything in this case because the first thing an alloc does when it
47 * gets ownership of a descriptor is to check whether it is EMPTY and
48 * retire it if that is the case. As an optimization, our free does try
49 * to acquire the descriptor (by CASing the active field, which, if it
50 * is lucky, points to that descriptor) and if it can do so, retire it.
51 * If it can't, it tries to retire other descriptors from the partial
52 * queue, so that we can be sure that even if no more allocations
53 * happen, descriptors are still retired. This is analogous to what
54 * Michael's allocator does.
56 * Another difference to Michael's allocator is not related to
57 * concurrency, however: We don't point from slots to descriptors.
58 * Instead we allocate superblocks aligned and point from the start of
59 * the superblock to the descriptor, so we only need one word of
60 * metadata per superblock.
62 * FIXME: Having more than one allocator per size class is probably
63 * buggy because it was never tested.
69 #include <mono/utils/atomic.h>
70 #ifdef SGEN_WITHOUT_MONO
71 #include <mono/sgen/sgen-gc.h>
72 #include <mono/sgen/sgen-client.h>
74 #include <mono/utils/mono-mmap.h>
76 #include <mono/utils/mono-membar.h>
77 #include <mono/utils/hazard-pointer.h>
78 #include <mono/utils/lock-free-queue.h>
80 #include <mono/utils/lock-free-alloc.h>
82 //#define DESC_AVAIL_DUMMY
99 typedef struct _MonoLockFreeAllocDescriptor Descriptor;
100 struct _MonoLockFreeAllocDescriptor {
101 MonoLockFreeQueueNode node;
102 MonoLockFreeAllocator *heap;
103 volatile Anchor anchor;
104 unsigned int slot_size;
105 unsigned int block_size;
106 unsigned int max_count;
108 #ifndef DESC_AVAIL_DUMMY
109 Descriptor * volatile next;
111 gboolean in_use; /* used for debugging only */
114 #define NUM_DESC_BATCH 64
116 static MONO_ALWAYS_INLINE gpointer
117 sb_header_for_addr (gpointer addr, size_t block_size)
119 return (gpointer)(((size_t)addr) & (~(block_size - 1)));
122 /* Taken from SGen */
125 prot_flags_for_activate (int activate)
127 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
128 return prot_flags | MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
132 alloc_sb (Descriptor *desc)
134 static int pagesize = -1;
139 pagesize = mono_pagesize ();
141 sb_header = desc->block_size == pagesize ?
142 mono_valloc (NULL, desc->block_size, prot_flags_for_activate (TRUE), desc->heap->account_type) :
143 mono_valloc_aligned (desc->block_size, desc->block_size, prot_flags_for_activate (TRUE), desc->heap->account_type);
145 g_assert (sb_header == sb_header_for_addr (sb_header, desc->block_size));
147 *(Descriptor**)sb_header = desc;
148 //g_print ("sb %p for %p\n", sb_header, desc);
150 return (char*)sb_header + LOCK_FREE_ALLOC_SB_HEADER_SIZE;
154 free_sb (gpointer sb, size_t block_size, MonoMemAccountType type)
156 gpointer sb_header = sb_header_for_addr (sb, block_size);
157 g_assert ((char*)sb_header + LOCK_FREE_ALLOC_SB_HEADER_SIZE == sb);
158 mono_vfree (sb_header, block_size, type);
159 //g_print ("free sb %p\n", sb_header);
162 #ifndef DESC_AVAIL_DUMMY
163 static Descriptor * volatile desc_avail;
166 desc_alloc (MonoMemAccountType type)
168 MonoThreadHazardPointers *hp = mono_hazard_pointer_get ();
174 desc = (Descriptor *) mono_get_hazardous_pointer ((gpointer * volatile)&desc_avail, hp, 1);
176 Descriptor *next = desc->next;
177 success = (InterlockedCompareExchangePointer ((gpointer * volatile)&desc_avail, next, desc) == desc);
179 size_t desc_size = sizeof (Descriptor);
183 desc = (Descriptor *) mono_valloc (NULL, desc_size * NUM_DESC_BATCH, prot_flags_for_activate (TRUE), type);
185 /* Organize into linked list. */
187 for (i = 0; i < NUM_DESC_BATCH; ++i) {
188 Descriptor *next = (i == (NUM_DESC_BATCH - 1)) ? NULL : (Descriptor*)((char*)desc + ((i + 1) * desc_size));
190 mono_lock_free_queue_node_init (&d->node, TRUE);
194 mono_memory_write_barrier ();
196 success = (InterlockedCompareExchangePointer ((gpointer * volatile)&desc_avail, desc->next, NULL) == NULL);
199 mono_vfree (desc, desc_size * NUM_DESC_BATCH, type);
202 mono_hazard_pointer_clear (hp, 1);
208 g_assert (!desc->in_use);
215 desc_enqueue_avail (gpointer _desc)
217 Descriptor *desc = (Descriptor *) _desc;
218 Descriptor *old_head;
220 g_assert (desc->anchor.data.state == STATE_EMPTY);
221 g_assert (!desc->in_use);
224 old_head = desc_avail;
225 desc->next = old_head;
226 mono_memory_write_barrier ();
227 } while (InterlockedCompareExchangePointer ((gpointer * volatile)&desc_avail, desc, old_head) != old_head);
231 desc_retire (Descriptor *desc)
233 g_assert (desc->anchor.data.state == STATE_EMPTY);
234 g_assert (desc->in_use);
235 desc->in_use = FALSE;
236 free_sb (desc->sb, desc->block_size, desc->heap->account_type);
237 mono_thread_hazardous_try_free (desc, desc_enqueue_avail);
240 MonoLockFreeQueue available_descs;
243 desc_alloc (MonoMemAccountType type)
245 Descriptor *desc = (Descriptor*)mono_lock_free_queue_dequeue (&available_descs);
250 return g_calloc (1, sizeof (Descriptor));
254 desc_retire (Descriptor *desc)
256 free_sb (desc->sb, desc->block_size, desc->heap->account_type);
257 mono_lock_free_queue_enqueue (&available_descs, &desc->node);
262 list_get_partial (MonoLockFreeAllocSizeClass *sc)
265 Descriptor *desc = (Descriptor*) mono_lock_free_queue_dequeue (&sc->partial);
268 if (desc->anchor.data.state != STATE_EMPTY)
275 desc_put_partial (gpointer _desc)
277 Descriptor *desc = (Descriptor *) _desc;
279 g_assert (desc->anchor.data.state != STATE_FULL);
281 mono_lock_free_queue_node_unpoison (&desc->node);
282 mono_lock_free_queue_enqueue (&desc->heap->sc->partial, &desc->node);
286 list_put_partial (Descriptor *desc)
288 g_assert (desc->anchor.data.state != STATE_FULL);
289 mono_thread_hazardous_try_free (desc, desc_put_partial);
293 list_remove_empty_desc (MonoLockFreeAllocSizeClass *sc)
295 int num_non_empty = 0;
297 Descriptor *desc = (Descriptor*) mono_lock_free_queue_dequeue (&sc->partial);
301 * We don't need to read atomically because we're the
302 * only thread that references this descriptor.
304 if (desc->anchor.data.state == STATE_EMPTY) {
307 g_assert (desc->heap->sc == sc);
308 mono_thread_hazardous_try_free (desc, desc_put_partial);
309 if (++num_non_empty >= 2)
316 heap_get_partial (MonoLockFreeAllocator *heap)
318 return list_get_partial (heap->sc);
322 heap_put_partial (Descriptor *desc)
324 list_put_partial (desc);
328 set_anchor (Descriptor *desc, Anchor old_anchor, Anchor new_anchor)
330 if (old_anchor.data.state == STATE_EMPTY)
331 g_assert (new_anchor.data.state == STATE_EMPTY);
333 return InterlockedCompareExchange (&desc->anchor.value, new_anchor.value, old_anchor.value) == old_anchor.value;
337 alloc_from_active_or_partial (MonoLockFreeAllocator *heap)
340 Anchor old_anchor, new_anchor;
346 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, NULL, desc) != desc)
349 desc = heap_get_partial (heap);
354 /* Now we own the desc. */
358 new_anchor = old_anchor = *(volatile Anchor*)&desc->anchor.value;
359 if (old_anchor.data.state == STATE_EMPTY) {
360 /* We must free it because we own it. */
364 g_assert (old_anchor.data.state == STATE_PARTIAL);
365 g_assert (old_anchor.data.count > 0);
367 addr = (char*)desc->sb + old_anchor.data.avail * desc->slot_size;
369 mono_memory_read_barrier ();
371 next = *(unsigned int*)addr;
372 g_assert (next < LOCK_FREE_ALLOC_SB_USABLE_SIZE (desc->block_size) / desc->slot_size);
374 new_anchor.data.avail = next;
375 --new_anchor.data.count;
377 if (new_anchor.data.count == 0)
378 new_anchor.data.state = STATE_FULL;
379 } while (!set_anchor (desc, old_anchor, new_anchor));
381 /* If the desc is partial we have to give it back. */
382 if (new_anchor.data.state == STATE_PARTIAL) {
383 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, desc, NULL) != NULL)
384 heap_put_partial (desc);
391 alloc_from_new_sb (MonoLockFreeAllocator *heap)
393 unsigned int slot_size, block_size, count, i;
394 Descriptor *desc = desc_alloc (heap->account_type);
396 slot_size = desc->slot_size = heap->sc->slot_size;
397 block_size = desc->block_size = heap->sc->block_size;
398 count = LOCK_FREE_ALLOC_SB_USABLE_SIZE (block_size) / slot_size;
402 * Setting avail to 1 because 0 is the block we're allocating
405 desc->anchor.data.avail = 1;
406 desc->slot_size = heap->sc->slot_size;
407 desc->max_count = count;
409 desc->anchor.data.count = desc->max_count - 1;
410 desc->anchor.data.state = STATE_PARTIAL;
412 desc->sb = alloc_sb (desc);
414 /* Organize blocks into linked list. */
415 for (i = 1; i < count - 1; ++i)
416 *(unsigned int*)((char*)desc->sb + i * slot_size) = i + 1;
418 mono_memory_write_barrier ();
420 /* Make it active or free it again. */
421 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, desc, NULL) == NULL) {
424 desc->anchor.data.state = STATE_EMPTY;
431 mono_lock_free_alloc (MonoLockFreeAllocator *heap)
437 addr = alloc_from_active_or_partial (heap);
441 addr = alloc_from_new_sb (heap);
450 mono_lock_free_free (gpointer ptr, size_t block_size)
452 Anchor old_anchor, new_anchor;
455 MonoLockFreeAllocator *heap = NULL;
457 desc = *(Descriptor**) sb_header_for_addr (ptr, block_size);
458 g_assert (block_size == desc->block_size);
463 new_anchor = old_anchor = *(volatile Anchor*)&desc->anchor.value;
464 *(unsigned int*)ptr = old_anchor.data.avail;
465 new_anchor.data.avail = ((char*)ptr - (char*)sb) / desc->slot_size;
466 g_assert (new_anchor.data.avail < LOCK_FREE_ALLOC_SB_USABLE_SIZE (block_size) / desc->slot_size);
468 if (old_anchor.data.state == STATE_FULL)
469 new_anchor.data.state = STATE_PARTIAL;
471 if (++new_anchor.data.count == desc->max_count) {
473 new_anchor.data.state = STATE_EMPTY;
475 } while (!set_anchor (desc, old_anchor, new_anchor));
477 if (new_anchor.data.state == STATE_EMPTY) {
478 g_assert (old_anchor.data.state != STATE_EMPTY);
480 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, NULL, desc) == desc) {
482 * We own desc, check if it's still empty, in which case we retire it.
483 * If it's partial we need to put it back either on the active slot or
484 * on the partial list.
486 if (desc->anchor.data.state == STATE_EMPTY) {
488 } else if (desc->anchor.data.state == STATE_PARTIAL) {
489 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, desc, NULL) != NULL)
490 heap_put_partial (desc);
495 * Somebody else must free it, so we do some
496 * freeing for others.
498 list_remove_empty_desc (heap->sc);
500 } else if (old_anchor.data.state == STATE_FULL) {
502 * Nobody owned it, now we do, so we need to give it
506 g_assert (new_anchor.data.state == STATE_PARTIAL);
508 if (InterlockedCompareExchangePointer ((gpointer * volatile)&desc->heap->active, desc, NULL) != NULL)
509 heap_put_partial (desc);
513 #define g_assert_OR_PRINT(c, format, ...) do { \
516 g_print ((format), ## __VA_ARGS__); \
523 descriptor_check_consistency (Descriptor *desc, gboolean print)
525 int count = desc->anchor.data.count;
526 int max_count = LOCK_FREE_ALLOC_SB_USABLE_SIZE (desc->block_size) / desc->slot_size;
528 gboolean* linked = alloca(max_count*sizeof(gboolean));
530 gboolean linked [max_count];
535 #ifndef DESC_AVAIL_DUMMY
538 for (avail = desc_avail; avail; avail = avail->next)
539 g_assert_OR_PRINT (desc != avail, "descriptor is in the available list\n");
542 g_assert_OR_PRINT (desc->slot_size == desc->heap->sc->slot_size, "slot size doesn't match size class\n");
545 g_print ("descriptor %p is ", desc);
547 switch (desc->anchor.data.state) {
551 g_assert_OR_PRINT (count == 0, "count is not zero: %d\n", count);
555 g_print ("partial\n");
556 g_assert_OR_PRINT (count < max_count, "count too high: is %d but must be below %d\n", count, max_count);
561 g_assert_OR_PRINT (count == max_count, "count is wrong: is %d but should be %d\n", count, max_count);
564 g_assert_OR_PRINT (FALSE, "invalid state\n");
567 for (i = 0; i < max_count; ++i)
570 index = desc->anchor.data.avail;
572 for (i = 0; i < count; ++i) {
573 gpointer addr = (char*)desc->sb + index * desc->slot_size;
574 g_assert_OR_PRINT (index >= 0 && index < max_count,
575 "index %d for %dth available slot, linked from %d, not in range [0 .. %d)\n",
576 index, i, last, max_count);
577 g_assert_OR_PRINT (!linked [index], "%dth available slot %d linked twice\n", i, index);
580 linked [index] = TRUE;
582 index = *(unsigned int*)addr;
587 mono_lock_free_allocator_check_consistency (MonoLockFreeAllocator *heap)
589 Descriptor *active = heap->active;
592 g_assert (active->anchor.data.state == STATE_PARTIAL);
593 descriptor_check_consistency (active, FALSE);
595 while ((desc = (Descriptor*)mono_lock_free_queue_dequeue (&heap->sc->partial))) {
596 g_assert (desc->anchor.data.state == STATE_PARTIAL || desc->anchor.data.state == STATE_EMPTY);
597 descriptor_check_consistency (desc, FALSE);
603 mono_lock_free_allocator_init_size_class (MonoLockFreeAllocSizeClass *sc, unsigned int slot_size, unsigned int block_size)
605 g_assert (block_size > 0);
606 g_assert ((block_size & (block_size - 1)) == 0); /* check if power of 2 */
607 g_assert (slot_size * 2 <= LOCK_FREE_ALLOC_SB_USABLE_SIZE (block_size));
609 mono_lock_free_queue_init (&sc->partial);
610 sc->slot_size = slot_size;
611 sc->block_size = block_size;
615 mono_lock_free_allocator_init_allocator (MonoLockFreeAllocator *heap, MonoLockFreeAllocSizeClass *sc, MonoMemAccountType account_type)
619 heap->account_type = account_type;