2 * sgen-nursery-allocator.c: Nursery allocation code.
5 * Copyright 2009-2010 Novell, Inc.
8 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Permission is hereby granted, free of charge, to any person obtaining
12 * a copy of this software and associated documentation files (the
13 * "Software"), to deal in the Software without restriction, including
14 * without limitation the rights to use, copy, modify, merge, publish,
15 * distribute, sublicense, and/or sell copies of the Software, and to
16 * permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
19 * The above copyright notice and this permission notice shall be
20 * included in all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 * The young generation is divided into fragments. This is because
33 * we can hand one fragments to a thread for lock-less fast alloc and
34 * because the young generation ends up fragmented anyway by pinned objects.
35 * Once a collection is done, a list of fragments is created. When doing
36 * thread local alloc we use smallish nurseries so we allow new threads to
37 * allocate memory from gen0 without triggering a collection. Threads that
38 * are found to allocate lots of memory are given bigger fragments. This
39 * should make the finalizer thread use little nursery memory after a while.
40 * We should start assigning threads very small fragments: if there are many
41 * threads the nursery will be full of reserved space that the threads may not
42 * use at all, slowing down allocation speed.
43 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
44 * Allocation Buffers (TLABs).
55 #ifdef HAVE_SEMAPHORE_H
56 #include <semaphore.h>
72 cleanup the code that readies the nursery for pinning/collection
73 this means removing all remaining memseting and use phony objects
74 all around. Have a separate fragments head so we can process all
77 #include "metadata/sgen-gc.h"
78 #include "metadata/metadata-internals.h"
79 #include "metadata/class-internals.h"
80 #include "metadata/gc-internal.h"
81 #include "metadata/object-internals.h"
82 #include "metadata/threads.h"
83 #include "metadata/sgen-cardtable.h"
84 #include "metadata/sgen-protocol.h"
85 #include "metadata/sgen-archdep.h"
86 #include "metadata/sgen-bridge.h"
87 #include "metadata/mono-gc.h"
88 #include "metadata/method-builder.h"
89 #include "metadata/profiler-private.h"
90 #include "metadata/monitor.h"
91 #include "metadata/threadpool-internals.h"
92 #include "metadata/mempool-internals.h"
93 #include "metadata/marshal.h"
94 #include "utils/mono-mmap.h"
95 #include "utils/mono-time.h"
96 #include "utils/mono-semaphore.h"
97 #include "utils/mono-counters.h"
98 #include "utils/mono-proclib.h"
99 #include "utils/mono-threads.h"
102 typedef struct _Fragment Fragment;
106 char *fragment_start;
107 char *fragment_next; /* the current soft limit for allocation */
109 Fragment *next_in_order; /* We use a different entry for all active fragments so we can avoid SMR. */
113 Fragment *alloc_head; /* List head to be used when allocating memory. Walk with fragment_next. */
114 Fragment *region_head; /* List head of the region used by this allocator. Walk with next_in_order. */
117 /* Enable it so nursery allocation diagnostic data is collected */
118 //#define NALLOC_DEBUG 1
120 /* fragments that are free and ready to be used for allocation */
121 static FragmentAllocator nursery_allocator;
123 /* freeelist of fragment structures */
124 static Fragment *fragment_freelist = NULL;
126 /* Allocator cursors */
127 static char *nursery_last_pinned_end = NULL;
129 char *mono_sgen_nursery_start;
130 char *mono_sgen_nursery_end;
133 int mono_sgen_nursery_size = (1 << 22);
134 #ifdef SGEN_ALIGN_NURSERY
135 int mono_sgen_nursery_bits = 22;
140 static void mono_sgen_clear_range (char *start, char *end);
142 #ifdef HEAVY_STATISTICS
144 static gint32 stat_wasted_bytes_trailer = 0;
145 static gint32 stat_wasted_bytes_small_areas = 0;
146 static gint32 stat_wasted_bytes_discarded_fragments = 0;
147 static gint32 stat_nursery_alloc_requests = 0;
148 static gint32 stat_alloc_iterations = 0;
149 static gint32 stat_alloc_retries = 0;
151 static gint32 stat_nursery_alloc_range_requests = 0;
152 static gint32 stat_alloc_range_iterations = 0;
153 static gint32 stat_alloc_range_retries = 0;
157 /************************************Nursery allocation debugging *********************************************/
174 MonoNativeThreadId tid;
177 #define ALLOC_RECORD_COUNT 128000
180 static AllocRecord *alloc_records;
181 static volatile int next_record;
182 static volatile int alloc_count;
186 get_reason_name (AllocRecord *rec)
188 switch (rec->reason) {
189 case FIXED_ALLOC: return "fixed-alloc";
190 case RANGE_ALLOC: return "range-alloc";
191 case PINNING: return "pinning";
192 case BLOCK_ZEROING: return "block-zeroing";
193 case CLEAR_NURSERY_FRAGS: return "clear-nursery-frag";
194 default: return "invalid";
199 reset_alloc_records (void)
206 add_alloc_record (char *addr, size_t size, int reason)
208 int idx = InterlockedIncrement (&next_record) - 1;
209 alloc_records [idx].address = addr;
210 alloc_records [idx].size = size;
211 alloc_records [idx].reason = reason;
212 alloc_records [idx].seq = idx;
213 alloc_records [idx].tid = mono_native_thread_id_get ();
217 comp_alloc_record (const void *_a, const void *_b)
219 const AllocRecord *a = _a;
220 const AllocRecord *b = _b;
221 if (a->address == b->address)
222 return a->seq - b->seq;
223 return a->address - b->address;
226 #define rec_end(REC) ((REC)->address + (REC)->size)
229 dump_alloc_records (void)
232 qsort (alloc_records, next_record, sizeof (AllocRecord), comp_alloc_record);
234 printf ("------------------------------------DUMP RECORDS----------------------------\n");
235 for (i = 0; i < next_record; ++i) {
236 AllocRecord *rec = alloc_records + i;
237 printf ("obj [%p, %p] size %zd reason %s seq %d tid %zx\n", rec->address, rec_end (rec), rec->size, get_reason_name (rec), rec->seq, (size_t)rec->tid);
242 verify_alloc_records (void)
248 AllocRecord *prev = NULL;
250 qsort (alloc_records, next_record, sizeof (AllocRecord), comp_alloc_record);
251 printf ("------------------------------------DUMP RECORDS- %d %d---------------------------\n", next_record, alloc_count);
252 for (i = 0; i < next_record; ++i) {
253 AllocRecord *rec = alloc_records + i;
257 if (rec_end (prev) > rec->address)
258 printf ("WE GOT OVERLAPPING objects %p and %p\n", prev->address, rec->address);
259 if ((rec->address - rec_end (prev)) >= 8)
261 hole_size = rec->address - rec_end (prev);
262 max_hole = MAX (max_hole, hole_size);
264 printf ("obj [%p, %p] size %zd hole to prev %d reason %s seq %d tid %zx\n", rec->address, rec_end (rec), rec->size, hole_size, get_reason_name (rec), rec->seq, (size_t)rec->tid);
267 printf ("SUMMARY total alloc'd %d holes %d max_hole %d\n", total, holes, max_hole);
272 /*********************************************************************************/
275 static inline gpointer
276 mask (gpointer n, uintptr_t bit)
278 return (gpointer)(((uintptr_t)n) | bit);
281 static inline gpointer
284 return (gpointer)((uintptr_t)p & ~(uintptr_t)0x3);
287 static inline uintptr_t
288 get_mark (gpointer n)
290 return (uintptr_t)n & 0x1;
293 /*MUST be called with world stopped*/
295 alloc_fragment (void)
297 Fragment *frag = fragment_freelist;
299 fragment_freelist = frag->next_in_order;
300 frag->next = frag->next_in_order = NULL;
303 frag = mono_sgen_alloc_internal (INTERNAL_MEM_FRAGMENT);
304 frag->next = frag->next_in_order = NULL;
309 add_fragment (FragmentAllocator *allocator, char *start, char *end)
313 fragment = alloc_fragment ();
314 fragment->fragment_start = start;
315 fragment->fragment_next = start;
316 fragment->fragment_end = end;
317 fragment->next_in_order = fragment->next = unmask (allocator->region_head);
319 allocator->region_head = allocator->alloc_head = fragment;
323 release_fragment_list (FragmentAllocator *allocator)
325 Fragment *last = allocator->region_head;
329 /* Find the last fragment in insert order */
330 for (; last->next_in_order; last = last->next_in_order) ;
332 last->next_in_order = fragment_freelist;
333 fragment_freelist = allocator->region_head;
334 allocator->alloc_head = allocator->region_head = NULL;
338 find_previous_pointer_fragment (FragmentAllocator *allocator, Fragment *frag)
341 Fragment *cur, *next;
347 prev = &allocator->alloc_head;
350 printf ("retry count for fppf is %d\n", count);
353 cur = unmask (*prev);
361 * We need to make sure that we dereference prev below
362 * after reading cur->next above, so we need a read
365 mono_memory_read_barrier ();
370 if (!get_mark (next)) {
375 next = unmask (next);
376 if (InterlockedCompareExchangePointer ((volatile gpointer*)prev, next, cur) != cur)
378 /*we must make sure that the next from cur->next happens after*/
379 mono_memory_write_barrier ();
382 cur = mono_lls_pointer_unmask (next);
388 claim_remaining_size (Fragment *frag, char *alloc_end)
390 /* All space used, nothing to claim. */
391 if (frag->fragment_end <= alloc_end)
394 /* Try to alloc all the remaining space. */
395 return InterlockedCompareExchangePointer ((volatile gpointer*)&frag->fragment_next, frag->fragment_end, alloc_end) == alloc_end;
399 par_alloc_from_fragment (FragmentAllocator *allocator, Fragment *frag, size_t size)
401 char *p = frag->fragment_next;
402 char *end = p + size;
404 if (end > frag->fragment_end)
407 /* p = frag->fragment_next must happen before */
408 mono_memory_barrier ();
410 if (InterlockedCompareExchangePointer ((volatile gpointer*)&frag->fragment_next, end, p) != p)
413 if (frag->fragment_end - end < SGEN_MAX_NURSERY_WASTE) {
414 Fragment *next, **prev_ptr;
417 * Before we clean the remaining nursery, we must claim the remaining space
418 * as it could end up been used by the range allocator since it can end up
419 * allocating from this dying fragment as it doesn't respect SGEN_MAX_NURSERY_WASTE
420 * when doing second chance allocation.
422 if (mono_sgen_get_nursery_clear_policy () == CLEAR_AT_TLAB_CREATION && claim_remaining_size (frag, end)) {
423 mono_sgen_clear_range (end, frag->fragment_end);
424 HEAVY_STAT (InterlockedExchangeAdd (&stat_wasted_bytes_trailer, frag->fragment_end - end));
426 add_alloc_record (end, frag->fragment_end - end, BLOCK_ZEROING);
430 prev_ptr = find_previous_pointer_fragment (allocator, frag);
432 /*Use Michaels linked list remove*/
434 /*prev_ptr will be null if the fragment was removed concurrently */
439 if (!get_mark (next)) {
440 /*frag->next read must happen before the first CAS*/
441 mono_memory_write_barrier ();
443 /*Fail if the next done is removed concurrently and its CAS wins */
444 if (InterlockedCompareExchangePointer ((volatile gpointer*)&frag->next, mask (next, 1), next) != next) {
449 /* The second CAS must happen after the first CAS or frag->next. */
450 mono_memory_write_barrier ();
452 /* Fail if the previous node was deleted and its CAS wins */
453 if (InterlockedCompareExchangePointer ((volatile gpointer*)prev_ptr, next, frag) != frag) {
454 prev_ptr = find_previous_pointer_fragment (allocator, frag);
465 serial_alloc_from_fragment (Fragment **previous, Fragment *frag, size_t size)
467 char *p = frag->fragment_next;
468 char *end = p + size;
470 if (end > frag->fragment_end)
473 frag->fragment_next = end;
475 if (frag->fragment_end - end < SGEN_MAX_NURSERY_WASTE) {
476 *previous = frag->next;
478 /* Clear the remaining space, pinning depends on this. FIXME move this to use phony arrays */
479 memset (end, 0, frag->fragment_end - end);
481 *previous = frag->next;
488 par_alloc (FragmentAllocator *allocator, size_t size)
493 InterlockedIncrement (&alloc_count);
497 for (frag = unmask (allocator->alloc_head); unmask (frag); frag = unmask (frag->next)) {
498 HEAVY_STAT (InterlockedIncrement (&stat_alloc_iterations));
500 if (size <= (frag->fragment_end - frag->fragment_next)) {
501 void *p = par_alloc_from_fragment (allocator, frag, size);
503 HEAVY_STAT (InterlockedIncrement (&stat_alloc_retries));
507 add_alloc_record (p, size, FIXED_ALLOC);
516 serial_alloc (FragmentAllocator *allocator, size_t size)
521 InterlockedIncrement (&alloc_count);
524 previous = &allocator->alloc_head;
526 for (frag = *previous; frag; frag = *previous) {
527 HEAVY_STAT (InterlockedIncrement (&stat_alloc_iterations));
528 char *p = serial_alloc_from_fragment (previous, frag, size);
531 add_alloc_record (p, size, FIXED_ALLOC);
535 previous = &frag->next;
541 par_range_alloc (FragmentAllocator *allocator, size_t desired_size, size_t minimum_size, int *out_alloc_size)
543 Fragment *frag, *min_frag;
548 InterlockedIncrement (&alloc_count);
551 for (frag = unmask (allocator->alloc_head); frag; frag = unmask (frag->next)) {
552 int frag_size = frag->fragment_end - frag->fragment_next;
554 HEAVY_STAT (InterlockedIncrement (&stat_alloc_range_iterations));
556 if (desired_size <= frag_size) {
558 *out_alloc_size = desired_size;
560 p = par_alloc_from_fragment (allocator, frag, desired_size);
562 HEAVY_STAT (InterlockedIncrement (&stat_alloc_range_retries));
566 add_alloc_record (p, desired_size, RANGE_ALLOC);
570 if (minimum_size <= frag_size)
574 /* The second fragment_next read should be ordered in respect to the first code block */
575 mono_memory_barrier ();
581 frag_size = min_frag->fragment_end - min_frag->fragment_next;
582 if (frag_size < minimum_size)
585 *out_alloc_size = frag_size;
587 mono_memory_barrier ();
588 p = par_alloc_from_fragment (allocator, min_frag, frag_size);
590 /*XXX restarting here is quite dubious given this is already second chance allocation. */
592 HEAVY_STAT (InterlockedIncrement (&stat_alloc_retries));
596 add_alloc_record (p, frag_size, RANGE_ALLOC);
605 clear_allocator_fragments (FragmentAllocator *allocator)
609 for (frag = unmask (allocator->alloc_head); frag; frag = unmask (frag->next)) {
610 DEBUG (4, fprintf (gc_debug_file, "Clear nursery frag %p-%p\n", frag->fragment_next, frag->fragment_end));
611 mono_sgen_clear_range (frag->fragment_next, frag->fragment_end);
613 add_alloc_record (frag->fragment_next, frag->fragment_end - frag->fragment_next, CLEAR_NURSERY_FRAGS);
618 /* Clear all remaining nursery fragments */
620 mono_sgen_clear_nursery_fragments (void)
622 if (mono_sgen_get_nursery_clear_policy () == CLEAR_AT_TLAB_CREATION) {
623 clear_allocator_fragments (&nursery_allocator);
628 mono_sgen_clear_range (char *start, char *end)
631 size_t size = end - start;
633 if (size < sizeof (MonoArray)) {
634 memset (start, 0, size);
638 o = (MonoArray*)start;
639 o->obj.vtable = mono_sgen_get_array_fill_vtable ();
640 /* Mark this as not a real object */
641 o->obj.synchronisation = GINT_TO_POINTER (-1);
643 o->max_length = size - sizeof (MonoArray);
644 mono_sgen_set_nursery_scan_start (start);
645 g_assert (start + mono_sgen_safe_object_get_size ((MonoObject*)o) == end);
649 mono_sgen_nursery_allocator_prepare_for_pinning (void)
651 clear_allocator_fragments (&nursery_allocator);
654 static mword fragment_total = 0;
656 * We found a fragment of free memory in the nursery: memzero it and if
657 * it is big enough, add it to the list of fragments that can be used for
661 add_nursery_frag (FragmentAllocator *allocator, size_t frag_size, char* frag_start, char* frag_end)
663 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
664 binary_protocol_empty (frag_start, frag_size);
665 /* Not worth dealing with smaller fragments: need to tune */
666 if (frag_size >= SGEN_MAX_NURSERY_WASTE) {
667 /* memsetting just the first chunk start is bound to provide better cache locality */
668 if (mono_sgen_get_nursery_clear_policy () == CLEAR_AT_GC)
669 memset (frag_start, 0, frag_size);
672 /* XXX convert this into a flight record entry
673 printf ("\tfragment [%p %p] size %zd\n", frag_start, frag_end, frag_size);
676 add_fragment (allocator, frag_start, frag_end);
677 fragment_total += frag_size;
679 /* Clear unused fragments, pinning depends on this */
680 mono_sgen_clear_range (frag_start, frag_end);
681 HEAVY_STAT (InterlockedExchangeAdd (&stat_wasted_bytes_small_areas, frag_size));
686 mono_sgen_build_nursery_fragments (GCMemSection *nursery_section, void **start, int num_entries)
688 char *frag_start, *frag_end;
693 reset_alloc_records ();
696 release_fragment_list (&nursery_allocator);
698 frag_start = mono_sgen_nursery_start;
700 /* clear scan starts */
701 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
702 for (i = 0; i < num_entries; ++i) {
703 frag_end = start [i];
704 /* remove the pin bit from pinned objects */
705 SGEN_UNPIN_OBJECT (frag_end);
706 mono_sgen_set_nursery_scan_start (frag_end);
707 frag_size = frag_end - frag_start;
709 add_nursery_frag (&nursery_allocator, frag_size, frag_start, frag_end);
710 frag_size = SGEN_ALIGN_UP (mono_sgen_safe_object_get_size ((MonoObject*)start [i]));
712 add_alloc_record (start [i], frag_size, PINNING);
714 frag_start = (char*)start [i] + frag_size;
716 nursery_last_pinned_end = frag_start;
717 frag_end = mono_sgen_nursery_end;
718 frag_size = frag_end - frag_start;
720 add_nursery_frag (&nursery_allocator, frag_size, frag_start, frag_end);
721 if (!unmask (nursery_allocator.alloc_head)) {
722 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", num_entries));
723 for (i = 0; i < num_entries; ++i) {
724 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", start [i], mono_sgen_safe_name (start [i]), mono_sgen_safe_object_get_size (start [i])));
727 return fragment_total;
731 mono_sgen_nursery_alloc_get_upper_alloc_bound (void)
736 for (frag = unmask (nursery_allocator.alloc_head); frag; frag = unmask (frag->next))
737 p = MAX (p, frag->fragment_next);
739 return MAX (p, nursery_last_pinned_end);
742 /*** Nursery memory allocation ***/
744 mono_sgen_nursery_retire_region (void *address, ptrdiff_t size)
746 HEAVY_STAT (InterlockedExchangeAdd (&stat_wasted_bytes_discarded_fragments, size));
750 mono_sgen_can_alloc_size (size_t size)
753 size = SGEN_ALIGN_UP (size);
755 for (frag = unmask (nursery_allocator.alloc_head); frag; frag = unmask (frag->next)) {
756 if ((frag->fragment_end - frag->fragment_next) >= size)
763 mono_sgen_nursery_alloc (size_t size)
765 DEBUG (4, fprintf (gc_debug_file, "Searching nursery for size: %zd\n", size));
766 size = SGEN_ALIGN_UP (size);
768 HEAVY_STAT (InterlockedIncrement (&stat_nursery_alloc_requests));
770 return par_alloc (&nursery_allocator, size);
774 mono_sgen_nursery_alloc_range (size_t desired_size, size_t minimum_size, int *out_alloc_size)
776 DEBUG (4, fprintf (gc_debug_file, "Searching for byte range desired size: %zd minimum size %zd\n", desired_size, minimum_size));
778 HEAVY_STAT (InterlockedIncrement (&stat_nursery_alloc_range_requests));
780 return par_range_alloc (&nursery_allocator, desired_size, minimum_size, out_alloc_size);
783 /*** Initialization ***/
785 #ifdef HEAVY_STATISTICS
788 mono_sgen_nursery_allocator_init_heavy_stats (void)
790 mono_counters_register ("bytes wasted trailer fragments", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_wasted_bytes_trailer);
791 mono_counters_register ("bytes wasted small areas", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_wasted_bytes_small_areas);
792 mono_counters_register ("bytes wasted discarded fragments", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_wasted_bytes_discarded_fragments);
794 mono_counters_register ("# nursery alloc requests", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_nursery_alloc_requests);
795 mono_counters_register ("# nursery alloc iterations", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_alloc_iterations);
796 mono_counters_register ("# nursery alloc retries", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_alloc_retries);
798 mono_counters_register ("# nursery alloc range requests", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_nursery_alloc_range_requests);
799 mono_counters_register ("# nursery alloc range iterations", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_alloc_range_iterations);
800 mono_counters_register ("# nursery alloc range restries", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_alloc_range_retries);
806 mono_sgen_init_nursery_allocator (void)
808 mono_sgen_register_fixed_internal_mem_type (INTERNAL_MEM_FRAGMENT, sizeof (Fragment));
810 alloc_records = mono_sgen_alloc_os_memory (sizeof (AllocRecord) * ALLOC_RECORD_COUNT, TRUE);
815 mono_sgen_nursery_allocator_set_nursery_bounds (char *start, char *end)
817 /* Setup the single first large fragment */
818 add_fragment (&nursery_allocator, start, end);
819 mono_sgen_nursery_start = start;
820 mono_sgen_nursery_end = end;