mono/utils/lock-free-alloc.c

   1 /*
   2  * lock-free-alloc.c: Lock free allocator.
   3  *
   4  * (C) Copyright 2011 Novell, Inc
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining
   7  * a copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sublicense, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be
  15  * included in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  */
  25
  26 /*
  27  * This is a simplified version of the lock-free allocator described in
  28  *
  29  * Scalable Lock-Free Dynamic Memory Allocation
  30  * Maged M. Michael, PLDI 2004
  31  *
  32  * I could not get Michael's allocator working bug free under heavy
  33  * stress tests.  The paper doesn't provide correctness proof and after
  34  * failing to formalize the ownership of descriptors I devised this
  35  * simpler allocator.
  36  *
  37  * Allocation within superblocks proceeds exactly like in Michael's
  38  * allocator.  The simplification is that a thread has to "acquire" a
  39  * descriptor before it can allocate from its superblock.  While it owns
  40  * the descriptor no other thread can acquire and hence allocate from
  41  * it.  A consequence of this is that the ABA problem cannot occur, so
  42  * we don't need the tag field and don't have to use 64 bit CAS.
  43  *
  44  * Descriptors are stored in two locations: The partial queue and the
  45  * active field.  They can only be in at most one of those at one time.
  46  * If a thread wants to allocate, it needs to get a descriptor.  It
  47  * tries the active descriptor first, CASing it to NULL.  If that
  48  * doesn't work, it gets a descriptor out of the partial queue.  Once it
  49  * has the descriptor it owns it because it is not referenced anymore.
  50  * It allocates a slot and then gives the descriptor back (unless it is
  51  * FULL).
  52  *
  53  * Note that it is still possible that a slot is freed while an
  54  * allocation is in progress from the same superblock.  Ownership in
  55  * this case is not complicated, though.  If the block was FULL and the
  56  * free set it to PARTIAL, the free now owns the block (because FULL
  57  * blocks are not referenced from partial and active) and has to give it
  58  * back.  If the block was PARTIAL then the free doesn't own the block
  59  * (because it's either still referenced, or an alloc owns it).  A
  60  * special case of this is that it has changed from PARTIAL to EMPTY and
  61  * now needs to be retired.  Technically, the free wouldn't have to do
  62  * anything in this case because the first thing an alloc does when it
  63  * gets ownership of a descriptor is to check whether it is EMPTY and
  64  * retire it if that is the case.  As an optimization, our free does try
  65  * to acquire the descriptor (by CASing the active field, which, if it
  66  * is lucky, points to that descriptor) and if it can do so, retire it.
  67  * If it can't, it tries to retire other descriptors from the partial
  68  * queue, so that we can be sure that even if no more allocations
  69  * happen, descriptors are still retired.  This is analogous to what
  70  * Michael's allocator does.
  71  *
  72  * Another difference to Michael's allocator is not related to
  73  * concurrency, however: We don't point from slots to descriptors.
  74  * Instead we allocate superblocks aligned and point from the start of
  75  * the superblock to the descriptor, so we only need one word of
  76  * metadata per superblock.
  77  *
  78  * FIXME: Having more than one allocator per size class is probably
  79  * buggy because it was never tested.
  80  */
  81
  82 #include <glib.h>
  83 #include <stdlib.h>
  84
  85 #include <mono/utils/atomic.h>
  86 #include <mono/utils/mono-mmap.h>
  87 #include <mono/utils/mono-membar.h>
  88 #include <mono/utils/hazard-pointer.h>
  89 #include <mono/utils/lock-free-queue.h>
  90
  91 #include <mono/utils/lock-free-alloc.h>
  92
  93 //#define DESC_AVAIL_DUMMY
  94
  95 enum {
  96         STATE_FULL,
  97         STATE_PARTIAL,
  98         STATE_EMPTY
  99 };
 100
 101 typedef union {
 102         gint32 value;
 103         struct {
 104                 guint32 avail : 15;
 105                 guint32 count : 15;
 106                 guint32 state : 2;
 107         } data;
 108 } Anchor;
 109
 110 typedef struct _MonoLockFreeAllocDescriptor Descriptor;
 111 struct _MonoLockFreeAllocDescriptor {
 112         MonoLockFreeQueueNode node;
 113         MonoLockFreeAllocator *heap;
 114         volatile Anchor anchor;
 115         unsigned int slot_size;
 116         unsigned int max_count;
 117         gpointer sb;
 118 #ifndef DESC_AVAIL_DUMMY
 119         Descriptor * volatile next;
 120 #endif
 121         gboolean in_use;        /* used for debugging only */
 122 };
 123
 124 #define NUM_DESC_BATCH  64
 125
 126 #define SB_SIZE         16384
 127 #define SB_HEADER_SIZE  16
 128 #define SB_USABLE_SIZE  (SB_SIZE - SB_HEADER_SIZE)
 129
 130 #define SB_HEADER_FOR_ADDR(a)   ((gpointer)((gulong)(a) & ~(gulong)(SB_SIZE-1)))
 131 #define DESCRIPTOR_FOR_ADDR(a)  (*(Descriptor**)SB_HEADER_FOR_ADDR (a))
 132
 133 /* Taken from SGen */
 134
 135 static unsigned long
 136 prot_flags_for_activate (int activate)
 137 {
 138         unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
 139         return prot_flags | MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
 140 }
 141
 142 static void*
 143 mono_sgen_alloc_os_memory (size_t size, int activate)
 144 {
 145         return mono_valloc (0, size, prot_flags_for_activate (activate));
 146 }
 147
 148 static void
 149 mono_sgen_free_os_memory (void *addr, size_t size)
 150 {
 151         mono_vfree (addr, size);
 152 }
 153
 154 /* size must be a power of 2 */
 155 static void*
 156 mono_sgen_alloc_os_memory_aligned (size_t size, size_t alignment, gboolean activate)
 157 {
 158         return mono_valloc_aligned (size, alignment, prot_flags_for_activate (activate));
 159 }
 160
 161 static gpointer
 162 alloc_sb (Descriptor *desc)
 163 {
 164         gpointer sb_header = mono_sgen_alloc_os_memory_aligned (SB_SIZE, SB_SIZE, TRUE);
 165         g_assert (sb_header == SB_HEADER_FOR_ADDR (sb_header));
 166         DESCRIPTOR_FOR_ADDR (sb_header) = desc;
 167         //g_print ("sb %p for %p\n", sb_header, desc);
 168         return (char*)sb_header + SB_HEADER_SIZE;
 169 }
 170
 171 static void
 172 free_sb (gpointer sb)
 173 {
 174         gpointer sb_header = SB_HEADER_FOR_ADDR (sb);
 175         g_assert ((char*)sb_header + SB_HEADER_SIZE == sb);
 176         mono_sgen_free_os_memory (sb_header, SB_SIZE);
 177         //g_print ("free sb %p\n", sb_header);
 178 }
 179
 180 #ifndef DESC_AVAIL_DUMMY
 181 static Descriptor * volatile desc_avail;
 182
 183 static Descriptor*
 184 desc_alloc (void)
 185 {
 186         MonoThreadHazardPointers *hp = mono_hazard_pointer_get ();
 187         Descriptor *desc;
 188
 189         for (;;) {
 190                 gboolean success;
 191
 192                 desc = get_hazardous_pointer ((gpointer * volatile)&desc_avail, hp, 1);
 193                 if (desc) {
 194                         Descriptor *next = desc->next;
 195                         success = (InterlockedCompareExchangePointer ((gpointer * volatile)&desc_avail, next, desc) == desc);
 196                 } else {
 197                         size_t desc_size = sizeof (Descriptor);
 198                         Descriptor *d;
 199                         int i;
 200
 201                         desc = mono_sgen_alloc_os_memory (desc_size * NUM_DESC_BATCH, TRUE);
 202
 203                         /* Organize into linked list. */
 204                         d = desc;
 205                         for (i = 0; i < NUM_DESC_BATCH; ++i) {
 206                                 Descriptor *next = (i == (NUM_DESC_BATCH - 1)) ? NULL : (Descriptor*)((char*)desc + ((i + 1) * desc_size));
 207                                 d->next = next;
 208                                 mono_lock_free_queue_node_init (&d->node, TRUE);
 209                                 d = next;
 210                         }
 211
 212                         mono_memory_write_barrier ();
 213
 214                         success = (InterlockedCompareExchangePointer ((gpointer * volatile)&desc_avail, desc->next, NULL) == NULL);
 215
 216                         if (!success)
 217                                 mono_sgen_free_os_memory (desc, desc_size * NUM_DESC_BATCH);
 218                 }
 219
 220                 mono_hazard_pointer_clear (hp, 1);
 221
 222                 if (success)
 223                         break;
 224         }
 225
 226         g_assert (!desc->in_use);
 227         desc->in_use = TRUE;
 228
 229         return desc;
 230 }
 231
 232 static void
 233 desc_enqueue_avail (gpointer _desc)
 234 {
 235         Descriptor *desc = _desc;
 236         Descriptor *old_head;
 237
 238         g_assert (desc->anchor.data.state == STATE_EMPTY);
 239         g_assert (!desc->in_use);
 240
 241         do {
 242                 old_head = desc_avail;
 243                 desc->next = old_head;
 244                 mono_memory_write_barrier ();
 245         } while (InterlockedCompareExchangePointer ((gpointer * volatile)&desc_avail, desc, old_head) != old_head);
 246 }
 247
 248 static void
 249 desc_retire (Descriptor *desc)
 250 {
 251         g_assert (desc->anchor.data.state == STATE_EMPTY);
 252         g_assert (desc->in_use);
 253         desc->in_use = FALSE;
 254         free_sb (desc->sb);
 255         mono_thread_hazardous_free_or_queue (desc, desc_enqueue_avail, FALSE, TRUE);
 256 }
 257 #else
 258 MonoLockFreeQueue available_descs;
 259
 260 static Descriptor*
 261 desc_alloc (void)
 262 {
 263         Descriptor *desc = (Descriptor*)mono_lock_free_queue_dequeue (&available_descs);
 264
 265         if (desc)
 266                 return desc;
 267
 268         return calloc (1, sizeof (Descriptor));
 269 }
 270
 271 static void
 272 desc_retire (Descriptor *desc)
 273 {
 274         free_sb (desc->sb);
 275         mono_lock_free_queue_enqueue (&available_descs, &desc->node);
 276 }
 277 #endif
 278
 279 static Descriptor*
 280 list_get_partial (MonoLockFreeAllocSizeClass *sc)
 281 {
 282         for (;;) {
 283                 Descriptor *desc = (Descriptor*) mono_lock_free_queue_dequeue (&sc->partial);
 284                 if (!desc)
 285                         return NULL;
 286                 if (desc->anchor.data.state != STATE_EMPTY)
 287                         return desc;
 288                 desc_retire (desc);
 289         }
 290 }
 291
 292 static void
 293 desc_put_partial (gpointer _desc)
 294 {
 295         Descriptor *desc = _desc;
 296
 297         g_assert (desc->anchor.data.state != STATE_FULL);
 298
 299         mono_lock_free_queue_node_free (&desc->node);
 300         mono_lock_free_queue_enqueue (&desc->heap->sc->partial, &desc->node);
 301 }
 302
 303 static void
 304 list_put_partial (Descriptor *desc)
 305 {
 306         g_assert (desc->anchor.data.state != STATE_FULL);
 307         mono_thread_hazardous_free_or_queue (desc, desc_put_partial, FALSE, TRUE);
 308 }
 309
 310 static void
 311 list_remove_empty_desc (MonoLockFreeAllocSizeClass *sc)
 312 {
 313         int num_non_empty = 0;
 314         for (;;) {
 315                 Descriptor *desc = (Descriptor*) mono_lock_free_queue_dequeue (&sc->partial);
 316                 if (!desc)
 317                         return;
 318                 /*
 319                  * We don't need to read atomically because we're the
 320                  * only thread that references this descriptor.
 321                  */
 322                 if (desc->anchor.data.state == STATE_EMPTY) {
 323                         desc_retire (desc);
 324                 } else {
 325                         g_assert (desc->heap->sc == sc);
 326                         mono_thread_hazardous_free_or_queue (desc, desc_put_partial, FALSE, TRUE);
 327                         if (++num_non_empty >= 2)
 328                                 return;
 329                 }
 330         }
 331 }
 332
 333 static Descriptor*
 334 heap_get_partial (MonoLockFreeAllocator *heap)
 335 {
 336         return list_get_partial (heap->sc);
 337 }
 338
 339 static void
 340 heap_put_partial (Descriptor *desc)
 341 {
 342         list_put_partial (desc);
 343 }
 344
 345 static gboolean
 346 set_anchor (Descriptor *desc, Anchor old_anchor, Anchor new_anchor)
 347 {
 348         if (old_anchor.data.state == STATE_EMPTY)
 349                 g_assert (new_anchor.data.state == STATE_EMPTY);
 350
 351         return InterlockedCompareExchange (&desc->anchor.value, new_anchor.value, old_anchor.value) == old_anchor.value;
 352 }
 353
 354 static gpointer
 355 alloc_from_active_or_partial (MonoLockFreeAllocator *heap)
 356 {
 357         Descriptor *desc;
 358         Anchor old_anchor, new_anchor;
 359         gpointer addr;
 360
 361  retry:
 362         desc = heap->active;
 363         if (desc) {
 364                 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, NULL, desc) != desc)
 365                         goto retry;
 366         } else {
 367                 desc = heap_get_partial (heap);
 368                 if (!desc)
 369                         return NULL;
 370         }
 371
 372         /* Now we own the desc. */
 373
 374         do {
 375                 unsigned int next;
 376
 377                 new_anchor = old_anchor = *(volatile Anchor*)&desc->anchor.value;
 378                 if (old_anchor.data.state == STATE_EMPTY) {
 379                         /* We must free it because we own it. */
 380                         desc_retire (desc);
 381                         goto retry;
 382                 }
 383                 g_assert (old_anchor.data.state == STATE_PARTIAL);
 384                 g_assert (old_anchor.data.count > 0);
 385
 386                 addr = (char*)desc->sb + old_anchor.data.avail * desc->slot_size;
 387
 388                 mono_memory_read_barrier ();
 389
 390                 next = *(unsigned int*)addr;
 391                 g_assert (next < SB_USABLE_SIZE / desc->slot_size);
 392
 393                 new_anchor.data.avail = next;
 394                 --new_anchor.data.count;
 395
 396                 if (new_anchor.data.count == 0)
 397                         new_anchor.data.state = STATE_FULL;
 398         } while (!set_anchor (desc, old_anchor, new_anchor));
 399
 400         /* If the desc is partial we have to give it back. */
 401         if (new_anchor.data.state == STATE_PARTIAL) {
 402                 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, desc, NULL) != NULL)
 403                         heap_put_partial (desc);
 404         }
 405
 406         return addr;
 407 }
 408
 409 static gpointer
 410 alloc_from_new_sb (MonoLockFreeAllocator *heap)
 411 {
 412         unsigned int slot_size, count, i;
 413         Descriptor *desc = desc_alloc ();
 414
 415         desc->sb = alloc_sb (desc);
 416
 417         slot_size = desc->slot_size = heap->sc->slot_size;
 418         count = SB_USABLE_SIZE / slot_size;
 419
 420         /* Organize blocks into linked list. */
 421         for (i = 1; i < count - 1; ++i)
 422                 *(unsigned int*)((char*)desc->sb + i * slot_size) = i + 1;
 423
 424         desc->heap = heap;
 425         /*
 426          * Setting avail to 1 because 0 is the block we're allocating
 427          * right away.
 428          */
 429         desc->anchor.data.avail = 1;
 430         desc->slot_size = heap->sc->slot_size;
 431         desc->max_count = count;
 432
 433         desc->anchor.data.count = desc->max_count - 1;
 434         desc->anchor.data.state = STATE_PARTIAL;
 435
 436         mono_memory_write_barrier ();
 437
 438         /* Make it active or free it again. */
 439         if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, desc, NULL) == NULL) {
 440                 return desc->sb;
 441         } else {
 442                 desc->anchor.data.state = STATE_EMPTY;
 443                 desc_retire (desc);
 444                 return NULL;
 445         }
 446 }
 447
 448 gpointer
 449 mono_lock_free_alloc (MonoLockFreeAllocator *heap)
 450 {
 451         gpointer addr;
 452
 453         for (;;) {
 454
 455                 addr = alloc_from_active_or_partial (heap);
 456                 if (addr)
 457                         break;
 458
 459                 addr = alloc_from_new_sb (heap);
 460                 if (addr)
 461                         break;
 462         }
 463
 464         return addr;
 465 }
 466
 467 void
 468 mono_lock_free_free (gpointer ptr)
 469 {
 470         Anchor old_anchor, new_anchor;
 471         Descriptor *desc;
 472         gpointer sb;
 473         MonoLockFreeAllocator *heap = NULL;
 474
 475         desc = DESCRIPTOR_FOR_ADDR (ptr);
 476         sb = desc->sb;
 477         g_assert (SB_HEADER_FOR_ADDR (ptr) == SB_HEADER_FOR_ADDR (sb));
 478
 479         do {
 480                 new_anchor = old_anchor = *(volatile Anchor*)&desc->anchor.value;
 481                 *(unsigned int*)ptr = old_anchor.data.avail;
 482                 new_anchor.data.avail = ((char*)ptr - (char*)sb) / desc->slot_size;
 483                 g_assert (new_anchor.data.avail < SB_USABLE_SIZE / desc->slot_size);
 484
 485                 if (old_anchor.data.state == STATE_FULL)
 486                         new_anchor.data.state = STATE_PARTIAL;
 487
 488                 if (++new_anchor.data.count == desc->max_count) {
 489                         heap = desc->heap;
 490                         new_anchor.data.state = STATE_EMPTY;
 491                 }
 492         } while (!set_anchor (desc, old_anchor, new_anchor));
 493
 494         if (new_anchor.data.state == STATE_EMPTY) {
 495                 g_assert (old_anchor.data.state != STATE_EMPTY);
 496
 497                 if (InterlockedCompareExchangePointer ((gpointer * volatile)&heap->active, NULL, desc) == desc) {
 498                         /* We own it, so we free it. */
 499                         desc_retire (desc);
 500                 } else {
 501                         /*
 502                          * Somebody else must free it, so we do some
 503                          * freeing for others.
 504                          */
 505                         list_remove_empty_desc (heap->sc);
 506                 }
 507         } else if (old_anchor.data.state == STATE_FULL) {
 508                 /*
 509                  * Nobody owned it, now we do, so we need to give it
 510                  * back.
 511                  */
 512
 513                 g_assert (new_anchor.data.state == STATE_PARTIAL);
 514
 515                 if (InterlockedCompareExchangePointer ((gpointer * volatile)&desc->heap->active, desc, NULL) != NULL)
 516                         heap_put_partial (desc);
 517         }
 518 }
 519
 520 #define g_assert_OR_PRINT(c, format, ...)       do {                            \
 521                 if (!(c)) {                                             \
 522                         if (print)                                      \
 523                                 g_print ((format), ## __VA_ARGS__);     \
 524                         else                                            \
 525                                 g_assert (FALSE);                               \
 526                 }                                                       \
 527         } while (0)
 528
 529 static void
 530 descriptor_check_consistency (Descriptor *desc, gboolean print)
 531 {
 532         int count = desc->anchor.data.count;
 533         int max_count = SB_USABLE_SIZE / desc->slot_size;
 534 #if _MSC_VER
 535         gboolean* linked = alloca(max_count*sizeof(gboolean));
 536 #else
 537         gboolean linked [max_count];
 538 #endif
 539         int i, last;
 540         unsigned int index;
 541
 542 #ifndef DESC_AVAIL_DUMMY
 543         Descriptor *avail;
 544
 545         for (avail = desc_avail; avail; avail = avail->next)
 546                 g_assert_OR_PRINT (desc != avail, "descriptor is in the available list\n");
 547 #endif
 548
 549         g_assert_OR_PRINT (desc->slot_size == desc->heap->sc->slot_size, "slot size doesn't match size class\n");
 550
 551         if (print)
 552                 g_print ("descriptor %p is ", desc);
 553
 554         switch (desc->anchor.data.state) {
 555         case STATE_FULL:
 556                 if (print)
 557                         g_print ("full\n");
 558                 g_assert_OR_PRINT (count == 0, "count is not zero: %d\n", count);
 559                 break;
 560         case STATE_PARTIAL:
 561                 if (print)
 562                         g_print ("partial\n");
 563                 g_assert_OR_PRINT (count < max_count, "count too high: is %d but must be below %d\n", count, max_count);
 564                 break;
 565         case STATE_EMPTY:
 566                 if (print)
 567                         g_print ("empty\n");
 568                 g_assert_OR_PRINT (count == max_count, "count is wrong: is %d but should be %d\n", count, max_count);
 569                 break;
 570         default:
 571                 g_assert_OR_PRINT (FALSE, "invalid state\n");
 572         }
 573
 574         for (i = 0; i < max_count; ++i)
 575                 linked [i] = FALSE;
 576
 577         index = desc->anchor.data.avail;
 578         last = -1;
 579         for (i = 0; i < count; ++i) {
 580                 gpointer addr = (char*)desc->sb + index * desc->slot_size;
 581                 g_assert_OR_PRINT (index >= 0 && index < max_count,
 582                                 "index %d for %dth available slot, linked from %d, not in range [0 .. %d)\n",
 583                                 index, i, last, max_count);
 584                 g_assert_OR_PRINT (!linked [index], "%dth available slot %d linked twice\n", i, index);
 585                 if (linked [index])
 586                         break;
 587                 linked [index] = TRUE;
 588                 last = index;
 589                 index = *(unsigned int*)addr;
 590         }
 591 }
 592
 593 gboolean
 594 mono_lock_free_allocator_check_consistency (MonoLockFreeAllocator *heap)
 595 {
 596         Descriptor *active = heap->active;
 597         Descriptor *desc;
 598         if (active) {
 599                 g_assert (active->anchor.data.state == STATE_PARTIAL);
 600                 descriptor_check_consistency (active, FALSE);
 601         }
 602         while ((desc = (Descriptor*)mono_lock_free_queue_dequeue (&heap->sc->partial))) {
 603                 g_assert (desc->anchor.data.state == STATE_PARTIAL || desc->anchor.data.state == STATE_EMPTY);
 604                 descriptor_check_consistency (desc, FALSE);
 605         }
 606         return TRUE;
 607 }
 608
 609 void
 610 mono_lock_free_allocator_init_size_class (MonoLockFreeAllocSizeClass *sc, unsigned int slot_size)
 611 {
 612         g_assert (slot_size <= SB_USABLE_SIZE / 2);
 613
 614         mono_lock_free_queue_init (&sc->partial);
 615         sc->slot_size = slot_size;
 616 }
 617
 618 void
 619 mono_lock_free_allocator_init_allocator (MonoLockFreeAllocator *heap, MonoLockFreeAllocSizeClass *sc)
 620 {
 621         heap->sc = sc;
 622         heap->active = NULL;
 623 }