X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmetadata%2Fsgen-gc.c;h=8c2cfe6250fbeef494653cacb4ebb032e9ebf706;hb=dac383857be2fdd09b7b6bbede60e4d1073f47f7;hp=fc108ae858250adbee97f03343af27f92ca9e5d5;hpb=5fd0ee87b8bdeefd1f49e7ef013311f41518a822;p=mono.git diff --git a/mono/metadata/sgen-gc.c b/mono/metadata/sgen-gc.c index fc108ae8582..8c2cfe6250f 100644 --- a/mono/metadata/sgen-gc.c +++ b/mono/metadata/sgen-gc.c @@ -3,6 +3,7 @@ * * Author: * Paolo Molaro (lupus@ximian.com) + * Rodrigo Kumpera (kumpera@gmail.com) * * Copyright 2005-2010 Novell, Inc (http://www.novell.com) * @@ -24,6 +25,7 @@ * * Copyright 2001-2003 Ximian, Inc * Copyright 2003-2010 Novell, Inc. + * Copyright 2011 Xamarin, Inc. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the @@ -174,7 +176,9 @@ A good place to start is add_nursery_frag. The tricky thing here is placing those objects atomically outside of a collection. - + *) Allocation should use asymmetric Dekker synchronization: + http://blogs.oracle.com/dave/resource/Asymmetric-Dekker-Synchronization.txt + This should help weak consistency archs. */ #include "config.h" #ifdef HAVE_SGEN_GC @@ -211,11 +215,13 @@ #include "metadata/threadpool-internals.h" #include "metadata/mempool-internals.h" #include "metadata/marshal.h" +#include "metadata/runtime.h" #include "utils/mono-mmap.h" #include "utils/mono-time.h" #include "utils/mono-semaphore.h" #include "utils/mono-counters.h" #include "utils/mono-proclib.h" +#include "utils/mono-memory-model.h" #include @@ -258,6 +264,7 @@ static gboolean conservative_stack_mark = FALSE; /* If set, do a plausibility check on the scan_starts before and after each collection */ static gboolean do_scan_starts_check = FALSE; +static gboolean nursery_collection_is_parallel = FALSE; static gboolean disable_minor_collections = FALSE; static gboolean disable_major_collections = FALSE; @@ -290,9 +297,6 @@ static long long stat_global_remsets_readded = 0; static long long stat_global_remsets_processed = 0; static long long stat_global_remsets_discarded = 0; -static long long stat_wasted_fragments_used = 0; -static long long stat_wasted_fragments_bytes = 0; - static int stat_wbarrier_set_field = 0; static int stat_wbarrier_set_arrayref = 0; static int stat_wbarrier_arrayref_copy = 0; @@ -333,6 +337,7 @@ static long long time_major_fragment_creation = 0; int gc_debug_level = 0; FILE* gc_debug_file; +static gboolean debug_print_allowance = FALSE; /* void @@ -356,42 +361,8 @@ mono_gc_flush_info (void) #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) -/* The method used to clear the nursery */ -/* Clearing at nursery collections is the safest, but has bad interactions with caches. - * Clearing at TLAB creation is much faster, but more complex and it might expose hard - * to find bugs. - */ -typedef enum { - CLEAR_AT_GC, - CLEAR_AT_TLAB_CREATION -} NurseryClearPolicy; - static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION; -/* - * The young generation is divided into fragments. This is because - * we can hand one fragments to a thread for lock-less fast alloc and - * because the young generation ends up fragmented anyway by pinned objects. - * Once a collection is done, a list of fragments is created. When doing - * thread local alloc we use smallish nurseries so we allow new threads to - * allocate memory from gen0 without triggering a collection. Threads that - * are found to allocate lots of memory are given bigger fragments. This - * should make the finalizer thread use little nursery memory after a while. - * We should start assigning threads very small fragments: if there are many - * threads the nursery will be full of reserved space that the threads may not - * use at all, slowing down allocation speed. - * Thread local allocation is done from areas of memory Hotspot calls Thread Local - * Allocation Buffers (TLABs). - */ -typedef struct _Fragment Fragment; - -struct _Fragment { - Fragment *next; - char *fragment_start; - char *fragment_limit; /* the current soft limit for allocation */ - char *fragment_end; -}; - /* the runtime can register areas of memory as roots: we keep two lists of roots, * a pinned root set for conservatively scanned roots and a normal one for * precisely scanned roots (currently implemented as a single list). @@ -424,7 +395,7 @@ enum { REMSET_LOCATION, /* just a pointer to the exact location */ REMSET_RANGE, /* range of pointer fields */ REMSET_OBJECT, /* mark all the object for scanning */ - REMSET_VTYPE, /* a valuetype array described by a gc descriptor and a count */ + REMSET_VTYPE, /* a valuetype array described by a gc descriptor, a count and a size */ REMSET_TYPE_MASK = 0x3 }; @@ -509,9 +480,6 @@ static int default_nursery_bits = 22; #define SCAN_START_SIZE SGEN_SCAN_START_SIZE -/* the minimum size of a fragment that we consider useful for allocation */ -#define FRAGMENT_MIN_SIZE (512) - static mword pagesize = 4096; static mword nursery_size; static int degraded_mode = 0; @@ -587,19 +555,10 @@ int current_collection_generation = -1; #define DISLINK_OBJECT(d) (REVEAL_POINTER (*(d)->link)) #define DISLINK_TRACK(d) ((~(gulong)(*(d)->link)) & 1) -/* - * The finalizable hash has the object as the key, the - * disappearing_link hash, has the link address as key. - */ -static FinalizeEntryHashTable minor_finalizable_hash; -static FinalizeEntryHashTable major_finalizable_hash; /* objects that are ready to be finalized */ static FinalizeEntry *fin_ready_list = NULL; static FinalizeEntry *critical_fin_list = NULL; -static DisappearingLinkHashTable minor_disappearing_link_hash; -static DisappearingLinkHashTable major_disappearing_link_hash; - static EphemeronLinkNode *ephemeron_list; static int num_ready_finalizers = 0; @@ -667,10 +626,8 @@ add_profile_gc_root (GCRootReport *report, void *object, int rtype, uintptr_t ex * MAX(nursery_last_pinned_end, nursery_frag_real_end) */ static char *nursery_start = NULL; -static char *nursery_next = NULL; -static char *nursery_frag_real_end = NULL; static char *nursery_end = NULL; -static char *nursery_last_pinned_end = NULL; +static char *nursery_alloc_bound = NULL; #ifdef HAVE_KW_THREAD #define TLAB_ACCESS_INIT @@ -695,9 +652,18 @@ static pthread_key_t thread_info_key; #define IN_CRITICAL_REGION (__thread_info__->in_critical_region) #endif -/* we use the memory barrier only to prevent compiler reordering (a memory constraint may be enough) */ -#define ENTER_CRITICAL_REGION do {IN_CRITICAL_REGION = 1;mono_memory_barrier ();} while (0) -#define EXIT_CRITICAL_REGION do {IN_CRITICAL_REGION = 0;mono_memory_barrier ();} while (0) +#ifndef DISABLE_CRITICAL_REGION + +/* Enter must be visible before anything is done in the critical region. */ +#define ENTER_CRITICAL_REGION do { mono_atomic_store_release (&IN_CRITICAL_REGION, 1); } while (0) + +/* Exit must make sure all critical regions stores are visible before it signal the end of the region. + * We don't need to emit a full barrier since we + */ +#define EXIT_CRITICAL_REGION do { mono_atomic_store_seq (&IN_CRITICAL_REGION, 0); } while (0) + + +#endif /* * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS @@ -725,14 +691,6 @@ static __thread long *store_remset_buffer_index_addr; */ static guint32 tlab_size = (1024 * 4); -/*How much space is tolerable to be wasted from the current fragment when allocating a new TLAB*/ -#define MAX_NURSERY_TLAB_WASTE 512 - -/* fragments that are free and ready to be used for allocation */ -static Fragment *nursery_fragments = NULL; -/* freeelist of fragment structures */ -static Fragment *fragment_freelist = NULL; - #define MAX_SMALL_OBJ_SIZE SGEN_MAX_SMALL_OBJ_SIZE /* Functions supplied by the runtime to be called by the GC */ @@ -761,6 +719,7 @@ pthread_t main_gc_thread = NULL; */ /*heap limits*/ static mword max_heap_size = ((mword)0)- ((mword)1); +static mword soft_heap_limit = ((mword)0) - ((mword)1); static mword allocated_heap; /*Object was pinned during the current collection*/ @@ -789,11 +748,19 @@ mono_sgen_try_alloc_space (mword size, int space) } static void -init_heap_size_limits (glong max_heap) +init_heap_size_limits (glong max_heap, glong soft_limit) { + if (soft_limit) + soft_heap_limit = soft_limit; + if (max_heap == 0) return; + if (max_heap < soft_limit) { + fprintf (stderr, "max-heap-size must be at least as large as soft-heap-limit.\n"); + exit (1); + } + if (max_heap < nursery_size * 4) { fprintf (stderr, "max-heap-size must be at least 4 times larger than nursery size.\n"); exit (1); @@ -818,9 +785,6 @@ align_pointer (void *ptr) typedef SgenGrayQueue GrayQueue; -typedef void (*CopyOrMarkObjectFunc) (void**, GrayQueue*); -typedef char* (*ScanObjectFunc) (char*, GrayQueue*); - /* forward declarations */ static int stop_world (int generation); static int restart_world (int generation); @@ -834,12 +798,11 @@ static void report_registered_roots (void); static void find_pinning_ref_from_thread (char *obj, size_t size); static void update_current_thread_stack (void *start); static void finalize_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end, int generation, GrayQueue *queue); -static void add_or_remove_disappearing_link (MonoObject *obj, void **link, gboolean track, int generation); +static void process_fin_stage_entries (void); static void null_link_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end, int generation, gboolean before_finalization, GrayQueue *queue); static void null_links_for_domain (MonoDomain *domain, int generation); -static gboolean alloc_fragment_for_size (size_t size); -static int alloc_fragment_for_size_range (size_t desired_size, size_t minimum_size); -static void clear_nursery_fragments (char *next); +static void process_dislink_stage_entries (void); + static void pin_from_roots (void *start_nursery, void *end_nursery, GrayQueue *queue); static int pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery, GrayQueue *queue); static void optimize_pin_queue (int start_slot); @@ -851,7 +814,10 @@ static void finish_gray_stack (char *start_addr, char *end_addr, int generation, static gboolean need_major_collection (mword space_needed); static void major_collection (const char *reason); -static void mono_gc_register_disappearing_link (MonoObject *obj, void **link, gboolean track); +static gboolean collection_is_parallel (void); + +static void mono_gc_register_disappearing_link (MonoObject *obj, void **link, gboolean track, gboolean in_gc); +static gboolean mono_gc_is_critical_method (MonoMethod *method); void describe_ptr (char *ptr); void check_object (char *start); @@ -949,9 +915,9 @@ alloc_complex_descriptor (gsize *bitmap, int numbits) } gsize* -mono_sgen_get_complex_descriptor (GCVTable *vt) +mono_sgen_get_complex_descriptor (mword desc) { - return complex_descriptors + (vt->desc >> LOW_TYPE_BITS); + return complex_descriptors + (desc >> LOW_TYPE_BITS); } /* @@ -1003,12 +969,6 @@ mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size) DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set)); return (void*) desc; } - /* we know the 2-word header is ptr-free */ - if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) { - desc = DESC_TYPE_SMALL_BITMAP | (stored_size << 1) | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT); - DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set)); - return (void*) desc; - } } /* we know the 2-word header is ptr-free */ if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) { @@ -1081,14 +1041,18 @@ mono_gc_get_bitmap_for_descr (void *descr, int *numbits) return bitmap; } - case DESC_TYPE_SMALL_BITMAP: - bitmap = g_new0 (gsize, 1); + case DESC_TYPE_LARGE_BITMAP: { + gsize bmap = (d >> LOW_TYPE_BITS) << OBJECT_HEADER_WORDS; - bitmap [0] = (d >> SMALL_BITMAP_SHIFT) << OBJECT_HEADER_WORDS; - - *numbits = GC_BITS_PER_WORD; - + bitmap = g_new0 (gsize, 1); + bitmap [0] = bmap; + *numbits = 0; + while (bmap) { + (*numbits) ++; + bmap >>= 1; + } return bitmap; + } default: g_assert_not_reached (); } @@ -1363,30 +1327,6 @@ mono_gc_scan_for_specific_ref (MonoObject *key, gboolean precise) } } -static void -clear_current_nursery_fragment (char *next) -{ - if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { - g_assert (next <= nursery_frag_real_end); - DEBUG (4, fprintf (gc_debug_file, "Clear nursery frag %p-%p\n", next, nursery_frag_real_end)); - memset (next, 0, nursery_frag_real_end - next); - } -} - -/* Clear all remaining nursery fragments */ -static void -clear_nursery_fragments (char *next) -{ - Fragment *frag; - if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { - clear_current_nursery_fragment (next); - for (frag = nursery_fragments; frag; frag = frag->next) { - DEBUG (4, fprintf (gc_debug_file, "Clear nursery frag %p-%p\n", frag->fragment_start, frag->fragment_end)); - memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start); - } - } -} - static gboolean need_remove_object_for_domain (char *start, MonoDomain *domain) { @@ -1511,7 +1451,7 @@ clear_domain_process_object (char *obj, MonoDomain *domain) if (remove && ((MonoObject*)obj)->synchronisation) { void **dislink = mono_monitor_get_object_monitor_weak_link ((MonoObject*)obj); if (dislink) - mono_gc_register_disappearing_link (NULL, dislink, FALSE); + mono_gc_register_disappearing_link (NULL, dislink, FALSE, TRUE); } return remove; @@ -1561,7 +1501,10 @@ mono_gc_clear_domain (MonoDomain * domain) LOCK_GC; - clear_nursery_fragments (nursery_next); + process_fin_stage_entries (); + process_dislink_stage_entries (); + + mono_sgen_clear_nursery_fragments (); if (xdomain_checks && domain != mono_get_root_domain ()) { scan_for_registered_roots_in_domain (domain, ROOT_TYPE_NORMAL); @@ -1668,7 +1611,7 @@ void mono_sgen_add_to_global_remset (gpointer ptr) { RememberedSet *rs; - gboolean lock = major_collector.is_parallel; + gboolean lock = collection_is_parallel (); if (use_cardtable) { sgen_card_table_mark_address ((mword)ptr); @@ -1728,17 +1671,19 @@ drain_gray_stack (GrayQueue *queue, int max_objs) char *obj; if (current_collection_generation == GENERATION_NURSERY) { + ScanObjectFunc scan_func = mono_sgen_get_minor_scan_object (); + for (;;) { GRAY_OBJECT_DEQUEUE (queue, obj); if (!obj) return TRUE; DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", obj, safe_name (obj))); - major_collector.minor_scan_object (obj, queue); + scan_func (obj, queue); } } else { int i; - if (major_collector.is_parallel && queue == &workers_distribute_gray_queue) + if (collection_is_parallel () && queue == &workers_distribute_gray_queue) return TRUE; do { @@ -1772,30 +1717,8 @@ pin_objects_from_addresses (GCMemSection *section, void **start, void **end, voi void *addr; int idx; void **definitely_pinned = start; - Fragment *frag; - /* - * The code below starts the search from an entry in scan_starts, which might point into a nursery - * fragment containing random data. Clearing the nursery fragments takes a lot of time, and searching - * though them too, so lay arrays at each location inside a fragment where a search can start: - * - scan_locations[i] - * - start_nursery - * - the start of each fragment (the last_obj + last_obj case) - * The third encompasses the first two, since scan_locations [i] can't point inside a nursery fragment. - */ - for (frag = nursery_fragments; frag; frag = frag->next) { - MonoArray *o; - - g_assert (frag->fragment_end - frag->fragment_start >= sizeof (MonoArray)); - o = (MonoArray*)frag->fragment_start; - memset (o, 0, sizeof (MonoArray)); - g_assert (array_fill_vtable); - o->obj.vtable = array_fill_vtable; - /* Mark this as not a real object */ - o->obj.synchronisation = GINT_TO_POINTER (-1); - o->max_length = (frag->fragment_end - frag->fragment_start) - sizeof (MonoArray); - g_assert (frag->fragment_start + safe_object_get_size ((MonoObject*)o) == frag->fragment_end); - } + mono_sgen_nursery_allocator_prepare_for_pinning (); while (start < end) { addr = *start; @@ -1899,7 +1822,7 @@ mono_sgen_pin_objects_in_section (GCMemSection *section, GrayQueue *queue) void mono_sgen_pin_object (void *object, GrayQueue *queue) { - if (major_collector.is_parallel) { + if (collection_is_parallel ()) { LOCK_PIN_QUEUE; /*object arrives pinned*/ pin_stage_ptr (object); @@ -2216,52 +2139,49 @@ mono_sgen_update_heap_boundaries (mword low, mword high) } while (SGEN_CAS_PTR ((gpointer*)&highest_heap_address, (gpointer)high, (gpointer)old) != (gpointer)old); } -static Fragment* -alloc_fragment (void) +static unsigned long +prot_flags_for_activate (int activate) { - Fragment *frag = fragment_freelist; - if (frag) { - fragment_freelist = frag->next; - frag->next = NULL; - return frag; - } - frag = mono_sgen_alloc_internal (INTERNAL_MEM_FRAGMENT); - frag->next = NULL; - return frag; + unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE; + return prot_flags | MONO_MMAP_PRIVATE | MONO_MMAP_ANON; } - -static void -add_fragment (char *start, char *end) -{ - Fragment *fragment; - fragment = alloc_fragment (); - fragment->fragment_start = start; - fragment->fragment_limit = start; - fragment->fragment_end = end; - fragment->next = nursery_fragments; - nursery_fragments = fragment; +/* + * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes). + * This must not require any lock. + */ +void* +mono_sgen_alloc_os_memory (size_t size, int activate) +{ + void *ptr = mono_valloc (0, size, prot_flags_for_activate (activate)); + if (ptr) { + /* FIXME: CAS */ + total_alloc += size; + } + return ptr; } /* size must be a power of 2 */ void* mono_sgen_alloc_os_memory_aligned (mword size, mword alignment, gboolean activate) { - /* Allocate twice the memory to be able to put the block on an aligned address */ - char *mem = mono_sgen_alloc_os_memory (size + alignment, activate); - char *aligned; - - g_assert (mem); - - aligned = (char*)((mword)(mem + (alignment - 1)) & ~(alignment - 1)); - g_assert (aligned >= mem && aligned + size <= mem + size + alignment && !((mword)aligned & (alignment - 1))); - - if (aligned > mem) - mono_sgen_free_os_memory (mem, aligned - mem); - if (aligned + size < mem + size + alignment) - mono_sgen_free_os_memory (aligned + size, (mem + size + alignment) - (aligned + size)); + void *ptr = mono_valloc_aligned (size, alignment, prot_flags_for_activate (activate)); + if (ptr) { + /* FIXME: CAS */ + total_alloc += size; + } + return ptr; +} - return aligned; +/* + * Free the memory returned by mono_sgen_alloc_os_memory (), returning it to the OS. + */ +void +mono_sgen_free_os_memory (void *addr, size_t size) +{ + mono_vfree (addr, size); + /* FIXME: CAS */ + total_alloc -= size; } /* @@ -2308,8 +2228,7 @@ alloc_nursery (void) nursery_section = section; - /* Setup the single first large fragment */ - add_fragment (nursery_start, nursery_end); + mono_sgen_nursery_allocator_set_nursery_bounds (nursery_start, nursery_end); } void* @@ -2449,32 +2368,6 @@ scan_finalizer_entries (CopyOrMarkObjectFunc copy_func, FinalizeEntry *list, Gra } } -static mword fragment_total = 0; -/* - * We found a fragment of free memory in the nursery: memzero it and if - * it is big enough, add it to the list of fragments that can be used for - * allocation. - */ -static void -add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end) -{ - DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size)); - binary_protocol_empty (frag_start, frag_size); - /* Not worth dealing with smaller fragments: need to tune */ - if (frag_size >= FRAGMENT_MIN_SIZE) { - /* memsetting just the first chunk start is bound to provide better cache locality */ - if (nursery_clear_policy == CLEAR_AT_GC) - memset (frag_start, 0, frag_size); - - add_fragment (frag_start, frag_end); - fragment_total += frag_size; - } else { - /* Clear unused fragments, pinning depends on this */ - /*TODO place an int[] here instead of the memset if size justify it*/ - memset (frag_start, 0, frag_size); - } -} - static const char* generation_name (int generation) { @@ -2485,26 +2378,6 @@ generation_name (int generation) } } -static DisappearingLinkHashTable* -get_dislink_hash_table (int generation) -{ - switch (generation) { - case GENERATION_NURSERY: return &minor_disappearing_link_hash; - case GENERATION_OLD: return &major_disappearing_link_hash; - default: g_assert_not_reached (); - } -} - -static FinalizeEntryHashTable* -get_finalize_entry_hash_table (int generation) -{ - switch (generation) { - case GENERATION_NURSERY: return &minor_finalizable_hash; - case GENERATION_OLD: return &major_finalizable_hash; - default: g_assert_not_reached (); - } -} - static MonoObject **finalized_array = NULL; static int finalized_array_capacity = 0; static int finalized_array_entries = 0; @@ -2527,6 +2400,53 @@ bridge_register_finalized_object (MonoObject *object) finalized_array [finalized_array_entries++] = object; } +static void +bridge_process (void) +{ + if (finalized_array_entries <= 0) + return; + + g_assert (mono_sgen_need_bridge_processing ()); + mono_sgen_bridge_processing (finalized_array_entries, finalized_array); + + finalized_array_entries = 0; +} + +CopyOrMarkObjectFunc +mono_sgen_get_copy_object (void) +{ + if (current_collection_generation == GENERATION_NURSERY) { + if (collection_is_parallel ()) + return major_collector.copy_object; + else + return major_collector.nopar_copy_object; + } else { + return major_collector.copy_or_mark_object; + } +} + +ScanObjectFunc +mono_sgen_get_minor_scan_object (void) +{ + g_assert (current_collection_generation == GENERATION_NURSERY); + + if (collection_is_parallel ()) + return major_collector.minor_scan_object; + else + return major_collector.nopar_minor_scan_object; +} + +ScanVTypeFunc +mono_sgen_get_minor_scan_vtype (void) +{ + g_assert (current_collection_generation == GENERATION_NURSERY); + + if (collection_is_parallel ()) + return major_collector.minor_scan_vtype; + else + return major_collector.nopar_minor_scan_vtype; +} + static void finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *queue) { @@ -2535,7 +2455,7 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue * int fin_ready; int ephemeron_rounds = 0; int num_loops; - CopyOrMarkObjectFunc copy_func = current_collection_generation == GENERATION_NURSERY ? major_collector.copy_object : major_collector.copy_or_mark_object; + CopyOrMarkObjectFunc copy_func = mono_sgen_get_copy_object (); /* * We copied all the reachable objects. Now it's the time to copy @@ -2596,11 +2516,8 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue * if (generation == GENERATION_OLD) finalize_in_range (copy_func, nursery_start, nursery_end, GENERATION_NURSERY, queue); - if (fin_ready != num_ready_finalizers) { + if (fin_ready != num_ready_finalizers) ++num_loops; - if (finalized_array != NULL) - mono_sgen_bridge_processing (finalized_array_entries, finalized_array); - } /* drain the new stack that might have been created */ DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin\n")); @@ -2663,53 +2580,6 @@ check_scan_starts (void) static int last_num_pinned = 0; -static void -build_nursery_fragments (void **start, int num_entries) -{ - char *frag_start, *frag_end; - size_t frag_size; - int i; - - while (nursery_fragments) { - Fragment *next = nursery_fragments->next; - nursery_fragments->next = fragment_freelist; - fragment_freelist = nursery_fragments; - nursery_fragments = next; - } - frag_start = nursery_start; - fragment_total = 0; - /* clear scan starts */ - memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer)); - for (i = 0; i < num_entries; ++i) { - frag_end = start [i]; - /* remove the pin bit from pinned objects */ - unpin_object (frag_end); - nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end; - frag_size = frag_end - frag_start; - if (frag_size) - add_nursery_frag (frag_size, frag_start, frag_end); - frag_size = ALIGN_UP (safe_object_get_size ((MonoObject*)start [i])); - frag_start = (char*)start [i] + frag_size; - } - nursery_last_pinned_end = frag_start; - frag_end = nursery_end; - frag_size = frag_end - frag_start; - if (frag_size) - add_nursery_frag (frag_size, frag_start, frag_end); - if (!nursery_fragments) { - DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", num_entries)); - for (i = 0; i < num_entries; ++i) { - DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", start [i], safe_name (start [i]), safe_object_get_size (start [i]))); - } - degraded_mode = 1; - } - - nursery_next = nursery_frag_real_end = NULL; - - /* Clear TLABs for all threads */ - clear_tlabs (); -} - static void scan_from_registered_roots (CopyOrMarkObjectFunc copy_func, char *addr_start, char *addr_end, int root_type, GrayQueue *queue) { @@ -2854,7 +2724,7 @@ mono_sgen_register_moved_object (void *obj, void *destination) g_assert (mono_profiler_events & MONO_PROFILE_GC_MOVES); /* FIXME: handle this for parallel collector */ - g_assert (!major_collector.is_parallel); + g_assert (!collection_is_parallel ()); if (moved_objects_idx == MOVED_OBJECTS_NUM) { mono_profiler_gc_moves (moved_objects, moved_objects_idx); @@ -2926,8 +2796,7 @@ init_stats (void) mono_counters_register ("# nursery copy_object() failed forwarded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_nursery_copy_object_failed_forwarded); mono_counters_register ("# nursery copy_object() failed pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_nursery_copy_object_failed_pinned); - mono_counters_register ("# wasted fragments used", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_wasted_fragments_used); - mono_counters_register ("bytes in wasted fragments", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_wasted_fragments_bytes); + mono_sgen_nursery_allocator_init_heavy_stats (); mono_counters_register ("Store remsets", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_store_remsets); mono_counters_register ("Unique store remsets", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_store_remsets_unique); @@ -2960,7 +2829,7 @@ static void try_calculate_minor_collection_allowance (gboolean overwrite) { int num_major_sections, num_major_sections_saved, save_target, allowance_target; - mword los_memory_saved; + mword los_memory_saved, new_major, new_heap_size; if (overwrite) g_assert (need_calculate_minor_collection_allowance); @@ -2979,7 +2848,16 @@ try_calculate_minor_collection_allowance (gboolean overwrite) num_major_sections_saved = MAX (last_collection_old_num_major_sections - num_major_sections, 0); los_memory_saved = MAX (last_collection_old_los_memory_usage - last_collection_los_memory_usage, 1); - save_target = ((num_major_sections * major_collector.section_size) + los_memory_saved) / 2; + new_major = num_major_sections * major_collector.section_size; + new_heap_size = new_major + last_collection_los_memory_usage; + + /* + * FIXME: Why is save_target half the major memory plus half the + * LOS memory saved? Shouldn't it be half the major memory + * saved plus half the LOS memory saved? Or half the whole heap + * size? + */ + save_target = (new_major + los_memory_saved) / 2; /* * We aim to allow the allocation of as many sections as is @@ -3000,6 +2878,23 @@ try_calculate_minor_collection_allowance (gboolean overwrite) minor_collection_allowance = MAX (MIN (allowance_target, num_major_sections * major_collector.section_size + los_memory_usage), MIN_MINOR_COLLECTION_ALLOWANCE); + if (new_heap_size + minor_collection_allowance > soft_heap_limit) { + if (new_heap_size > soft_heap_limit) + minor_collection_allowance = MIN_MINOR_COLLECTION_ALLOWANCE; + else + minor_collection_allowance = MAX (soft_heap_limit - new_heap_size, MIN_MINOR_COLLECTION_ALLOWANCE); + } + + if (debug_print_allowance) { + mword old_major = last_collection_old_num_major_sections * major_collector.section_size; + + fprintf (gc_debug_file, "Before collection: %ld bytes (%ld major, %ld LOS)\n", + old_major + last_collection_old_los_memory_usage, old_major, last_collection_old_los_memory_usage); + fprintf (gc_debug_file, "After collection: %ld bytes (%ld major, %ld LOS)\n", + new_heap_size, new_major, last_collection_los_memory_usage); + fprintf (gc_debug_file, "Allowance: %ld bytes\n", minor_collection_allowance); + } + if (major_collector.have_computed_minor_collection_allowance) major_collector.have_computed_minor_collection_allowance (); @@ -3020,6 +2915,25 @@ mono_sgen_need_major_collection (mword space_needed) return need_major_collection (space_needed); } +static gboolean +collection_is_parallel (void) +{ + switch (current_collection_generation) { + case GENERATION_NURSERY: + return nursery_collection_is_parallel; + case GENERATION_OLD: + return major_collector.is_parallel; + default: + g_assert_not_reached (); + } +} + +gboolean +mono_sgen_nursery_collection_is_parallel (void) +{ + return nursery_collection_is_parallel; +} + static GrayQueue* job_gray_queue (WorkerData *worker_data) { @@ -3083,10 +2997,11 @@ collect_nursery (size_t requested_size) { gboolean needs_major; size_t max_garbage_amount; - char *orig_nursery_next; + char *nursery_next; ScanFromRemsetsJobData sfrjd; ScanFromRegisteredRootsJobData scrrjd_normal, scrrjd_wbarrier; ScanThreadDataJobData stdjd; + mword fragment_total; TV_DECLARE (all_atv); TV_DECLARE (all_btv); TV_DECLARE (atv); @@ -3104,11 +3019,12 @@ collect_nursery (size_t requested_size) degraded_mode = 0; objects_pinned = 0; - orig_nursery_next = nursery_next; - nursery_next = MAX (nursery_next, nursery_last_pinned_end); + nursery_next = mono_sgen_nursery_alloc_get_upper_alloc_bound (); /* FIXME: optimize later to use the higher address where an object can be present */ nursery_next = MAX (nursery_next, nursery_end); + nursery_alloc_bound = nursery_next; + DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start))); max_garbage_amount = nursery_next - nursery_start; g_assert (nursery_section->size >= max_garbage_amount); @@ -3118,7 +3034,7 @@ collect_nursery (size_t requested_size) atv = all_atv; /* Pinning no longer depends on clearing all nursery fragments */ - clear_current_nursery_fragment (orig_nursery_next); + mono_sgen_clear_current_nursery_fragment (); TV_GETTIME (btv); time_minor_pre_collection_fragment_clear += TV_ELAPSED_MS (atv, btv); @@ -3140,6 +3056,9 @@ collect_nursery (size_t requested_size) global_remset_cache_clear (); + process_fin_stage_entries (); + process_dislink_stage_entries (); + /* pin from pinned handles */ init_pinning (); mono_profiler_gc_event (MONO_GC_EVENT_MARK_START, 0); @@ -3187,7 +3106,7 @@ collect_nursery (size_t requested_size) time_minor_scan_card_table += TV_ELAPSED_MS (atv, btv); } - if (!major_collector.is_parallel) + if (!collection_is_parallel ()) drain_gray_stack (&gray_queue, -1); if (mono_profiler_get_events () & MONO_PROFILE_GC_ROOTS) @@ -3198,13 +3117,13 @@ collect_nursery (size_t requested_size) time_minor_scan_pinned += TV_ELAPSED_MS (btv, atv); /* registered roots, this includes static fields */ - scrrjd_normal.func = major_collector.copy_object; + scrrjd_normal.func = collection_is_parallel () ? major_collector.copy_object : major_collector.nopar_copy_object; scrrjd_normal.heap_start = nursery_start; scrrjd_normal.heap_end = nursery_next; scrrjd_normal.root_type = ROOT_TYPE_NORMAL; workers_enqueue_job (job_scan_from_registered_roots, &scrrjd_normal); - scrrjd_wbarrier.func = major_collector.copy_object; + scrrjd_wbarrier.func = collection_is_parallel () ? major_collector.copy_object : major_collector.nopar_copy_object; scrrjd_wbarrier.heap_start = nursery_start; scrrjd_wbarrier.heap_end = nursery_next; scrrjd_wbarrier.root_type = ROOT_TYPE_WBARRIER; @@ -3222,15 +3141,15 @@ collect_nursery (size_t requested_size) time_minor_scan_thread_data += TV_ELAPSED_MS (btv, atv); btv = atv; - if (major_collector.is_parallel) { + if (collection_is_parallel ()) { while (!gray_object_queue_is_empty (WORKERS_DISTRIBUTE_GRAY_QUEUE)) { workers_distribute_gray_queue_sections (); - usleep (1000); + g_usleep (1000); } } workers_join (); - if (major_collector.is_parallel) + if (collection_is_parallel ()) g_assert (gray_object_queue_is_empty (&gray_queue)); finish_gray_stack (nursery_start, nursery_next, GENERATION_NURSERY, &gray_queue); @@ -3259,7 +3178,13 @@ collect_nursery (size_t requested_size) * next allocations. */ mono_profiler_gc_event (MONO_GC_EVENT_RECLAIM_START, 0); - build_nursery_fragments (pin_queue, next_pin_slot); + fragment_total = mono_sgen_build_nursery_fragments (nursery_section, pin_queue, next_pin_slot); + if (!fragment_total) + degraded_mode = 1; + + /* Clear TLABs for all threads */ + clear_tlabs (); + mono_profiler_gc_event (MONO_GC_EVENT_RECLAIM_END, 0); TV_GETTIME (btv); time_minor_fragment_creation += TV_ELAPSED_MS (atv, btv); @@ -3365,7 +3290,7 @@ major_do_collection (const char *reason) atv = all_atv; /* Pinning depends on this */ - clear_nursery_fragments (nursery_next); + mono_sgen_clear_nursery_fragments (); TV_GETTIME (btv); time_major_pre_collection_fragment_clear += TV_ELAPSED_MS (atv, btv); @@ -3390,6 +3315,9 @@ major_do_collection (const char *reason) if (use_cardtable) card_table_clear (); + process_fin_stage_entries (); + process_dislink_stage_entries (); + TV_GETTIME (atv); init_pinning (); DEBUG (6, fprintf (gc_debug_file, "Collecting pinned addresses\n")); @@ -3496,7 +3424,7 @@ major_do_collection (const char *reason) if (major_collector.is_parallel) { while (!gray_object_queue_is_empty (WORKERS_DISTRIBUTE_GRAY_QUEUE)) { workers_distribute_gray_queue_sections (); - usleep (1000); + g_usleep (1000); } } workers_join (); @@ -3573,7 +3501,11 @@ major_do_collection (const char *reason) * pinned objects as we go, memzero() the empty fragments so they are ready for the * next allocations. */ - build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_num_entries); + if (!mono_sgen_build_nursery_fragments (nursery_section, nursery_section->pin_queue_start, nursery_section->pin_queue_num_entries)) + degraded_mode = 1; + + /* Clear TLABs for all threads */ + clear_tlabs (); TV_GETTIME (atv); time_major_fragment_creation += TV_ELAPSED_MS (btv, atv); @@ -3655,7 +3587,7 @@ minor_collect_or_expand_inner (size_t size) DEBUG (2, fprintf (gc_debug_file, "Heap size: %lu, LOS size: %lu\n", (unsigned long)total_alloc, (unsigned long)los_memory_usage)); restart_world (0); /* this also sets the proper pointers for the next allocation */ - if (!alloc_fragment_for_size (size)) { + if (!mono_sgen_can_alloc_size (size)) { int i; /* TypeBuilder and MonoMethod are killing mcs with fragmentation */ DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned)); @@ -3690,39 +3622,6 @@ report_internal_mem_usage (void) major_collector.report_pinned_memory_usage (); } -/* - * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes). - * This must not require any lock. - */ -void* -mono_sgen_alloc_os_memory (size_t size, int activate) -{ - void *ptr; - unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE; - - prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON; - size += pagesize - 1; - size &= ~(pagesize - 1); - ptr = mono_valloc (0, size, prot_flags); - /* FIXME: CAS */ - total_alloc += size; - return ptr; -} - -/* - * Free the memory returned by mono_sgen_alloc_os_memory (), returning it to the OS. - */ -void -mono_sgen_free_os_memory (void *addr, size_t size) -{ - mono_vfree (addr, size); - - size += pagesize - 1; - size &= ~(pagesize - 1); - /* FIXME: CAS */ - total_alloc -= size; -} - /* * ###################################################################### * ######## Object allocation @@ -3735,100 +3634,23 @@ mono_sgen_free_os_memory (void *addr, size_t size) * *) allocation of pinned objects */ -static void -setup_fragment (Fragment *frag, Fragment *prev, size_t size) -{ - /* remove from the list */ - if (prev) - prev->next = frag->next; - else - nursery_fragments = frag->next; - nursery_next = frag->fragment_start; - nursery_frag_real_end = frag->fragment_end; - - DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %td (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size)); - frag->next = fragment_freelist; - fragment_freelist = frag; -} - -/* - * Allocate a new nursery fragment able to hold an object of size @size. - * nursery_next and nursery_frag_real_end are set to the boundaries of the fragment. - * Return TRUE if found, FALSE otherwise. - */ -static gboolean -alloc_fragment_for_size (size_t size) -{ - Fragment *frag, *prev; - DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size)); - - if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { - /* Clear the remaining space, pinning depends on this */ - memset (nursery_next, 0, nursery_frag_real_end - nursery_next); - } - - prev = NULL; - for (frag = nursery_fragments; frag; frag = frag->next) { - if (size <= (frag->fragment_end - frag->fragment_start)) { - setup_fragment (frag, prev, size); - return TRUE; - } - prev = frag; - } - return FALSE; -} - -/* - * Same as alloc_fragment_for_size but if search for @desired_size fails, try to satisfy @minimum_size. - * This improves nursery usage. - */ -static int -alloc_fragment_for_size_range (size_t desired_size, size_t minimum_size) -{ - Fragment *frag, *prev, *min_prev; - DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, desired size: %zd minimum size %zd\n", nursery_frag_real_end, desired_size, minimum_size)); - - if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { - /* Clear the remaining space, pinning depends on this */ - memset (nursery_next, 0, nursery_frag_real_end - nursery_next); - } - - min_prev = GINT_TO_POINTER (-1); - prev = NULL; - - for (frag = nursery_fragments; frag; frag = frag->next) { - int frag_size = frag->fragment_end - frag->fragment_start; - if (desired_size <= frag_size) { - setup_fragment (frag, prev, desired_size); - return desired_size; +static void* +alloc_degraded (MonoVTable *vtable, size_t size, gboolean for_mature) +{ + static int last_major_gc_warned = -1; + static int num_degraded = 0; + + if (!for_mature) { + if (last_major_gc_warned < num_major_gcs) { + ++num_degraded; + if (num_degraded == 1 || num_degraded == 3) + fprintf (stderr, "Warning: Degraded allocation. Consider increasing nursery-size if the warning persists.\n"); + else if (num_degraded == 10) + fprintf (stderr, "Warning: Repeated degraded allocation. Consider increasing nursery-size.\n"); + last_major_gc_warned = num_major_gcs; } - if (minimum_size <= frag_size) - min_prev = prev; - - prev = frag; - } - - if (min_prev != GINT_TO_POINTER (-1)) { - int frag_size; - if (min_prev) - frag = min_prev->next; - else - frag = nursery_fragments; - - frag_size = frag->fragment_end - frag->fragment_start; - HEAVY_STAT (++stat_wasted_fragments_used); - HEAVY_STAT (stat_wasted_fragments_bytes += frag_size); - - setup_fragment (frag, min_prev, minimum_size); - return frag_size; } - return 0; -} - -static void* -alloc_degraded (MonoVTable *vtable, size_t size) -{ if (need_major_collection (0)) { mono_profiler_gc_event (MONO_GC_EVENT_START, 1); stop_world (1); @@ -3876,7 +3698,7 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) collect_nursery (0); restart_world (0); mono_profiler_gc_event (MONO_GC_EVENT_END, 0); - if (!degraded_mode && !alloc_fragment_for_size (size) && size <= MAX_SMALL_OBJ_SIZE) { + if (!degraded_mode && !mono_sgen_can_alloc_size (size) && size <= MAX_SMALL_OBJ_SIZE) { // FIXME: g_assert_not_reached (); } @@ -3915,9 +3737,7 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p , vtable, size); g_assert (*p == NULL); - *p = vtable; - - g_assert (TLAB_NEXT == new_next); + mono_atomic_store_seq (p, vtable); return p; } @@ -3933,8 +3753,8 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) * This avoids taking again the GC lock when registering, but this is moot when * doing thread-local allocation, so it may not be a good idea. */ - g_assert (TLAB_NEXT == new_next); if (TLAB_NEXT >= TLAB_REAL_END) { + int available_in_tlab; /* * Run out of space in the TLAB. When this happens, some amount of space * remains in the TLAB, but not enough to satisfy the current allocation @@ -3947,28 +3767,28 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) * for a while, to decrease the number of useless nursery collections. */ if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) { - p = alloc_degraded (vtable, size); + p = alloc_degraded (vtable, size, FALSE); binary_protocol_alloc_degraded (p, vtable, size); return p; } - /*FIXME This codepath is current deadcode since tlab_size > MAX_SMALL_OBJ_SIZE*/ - if (size > tlab_size) { + available_in_tlab = TLAB_REAL_END - TLAB_NEXT; + if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) { /* Allocate directly from the nursery */ - if (nursery_next + size >= nursery_frag_real_end) { - if (!alloc_fragment_for_size (size)) { + do { + p = mono_sgen_nursery_alloc (size); + if (!p) { minor_collect_or_expand_inner (size); if (degraded_mode) { - p = alloc_degraded (vtable, size); + p = alloc_degraded (vtable, size, FALSE); binary_protocol_alloc_degraded (p, vtable, size); return p; + } else { + p = mono_sgen_nursery_alloc (size); } } - } - - p = (void*)nursery_next; - nursery_next += size; - if (nursery_next > nursery_frag_real_end) { + } while (!p); + if (!p) { // no space left g_assert (0); } @@ -3977,31 +3797,32 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) memset (p, 0, size); } } else { - int alloc_size = tlab_size; - int available_in_nursery = nursery_frag_real_end - nursery_next; + int alloc_size = 0; if (TLAB_START) DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size))); + mono_sgen_nursery_retire_region (p, available_in_tlab); - if (alloc_size >= available_in_nursery) { - if (available_in_nursery > MAX_NURSERY_TLAB_WASTE && available_in_nursery > size) { - alloc_size = available_in_nursery; - } else { - alloc_size = alloc_fragment_for_size_range (tlab_size, size); - if (!alloc_size) { - alloc_size = tlab_size; - minor_collect_or_expand_inner (tlab_size); - if (degraded_mode) { - p = alloc_degraded (vtable, size); - binary_protocol_alloc_degraded (p, vtable, size); - return p; - } - } + do { + p = mono_sgen_nursery_alloc_range (tlab_size, size, &alloc_size); + if (!p) { + minor_collect_or_expand_inner (tlab_size); + if (degraded_mode) { + p = alloc_degraded (vtable, size, FALSE); + binary_protocol_alloc_degraded (p, vtable, size); + return p; + } else { + p = mono_sgen_nursery_alloc_range (tlab_size, size, &alloc_size); + } } + } while (!p); + + if (!p) { + // no space left + g_assert (0); } /* Allocate a new TLAB from the current nursery fragment */ - TLAB_START = nursery_next; - nursery_next += alloc_size; + TLAB_START = (char*)p; TLAB_NEXT = TLAB_START; TLAB_REAL_END = TLAB_START + alloc_size; TLAB_TEMP_END = TLAB_START + MIN (SCAN_START_SIZE, alloc_size); @@ -4013,7 +3834,6 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) /* Allocate from the TLAB */ p = (void*)TLAB_NEXT; TLAB_NEXT += size; - g_assert (TLAB_NEXT <= TLAB_REAL_END); nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p; } @@ -4031,7 +3851,7 @@ mono_gc_alloc_obj_nolock (MonoVTable *vtable, size_t size) if (G_LIKELY (p)) { DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc (p, vtable, size); - *p = vtable; + mono_atomic_store_seq (p, vtable); } return p; @@ -4047,36 +3867,78 @@ mono_gc_try_alloc_obj_nolock (MonoVTable *vtable, size_t size) size = ALIGN_UP (size); g_assert (vtable->gc_descr); - if (size <= MAX_SMALL_OBJ_SIZE) { + if (size > MAX_SMALL_OBJ_SIZE) + return NULL; + + if (G_UNLIKELY (size > tlab_size)) { + /* Allocate directly from the nursery */ + p = mono_sgen_nursery_alloc (size); + if (!p) + return NULL; + + /*FIXME we should use weak memory ops here. Should help specially on x86. */ + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) + memset (p, 0, size); + } else { + int available_in_tlab; + char *real_end; /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ p = (void**)TLAB_NEXT; /* FIXME: handle overflow */ new_next = (char*)p + size; - TLAB_NEXT = new_next; - if (G_LIKELY (new_next < TLAB_TEMP_END)) { - /* Fast path */ + real_end = TLAB_REAL_END; + available_in_tlab = real_end - (char*)p; - /* - * FIXME: We might need a memory barrier here so the change to tlab_next is - * visible before the vtable store. - */ - - HEAVY_STAT (++stat_objects_alloced); - HEAVY_STAT (stat_bytes_alloced += size); - - DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); - binary_protocol_alloc (p, vtable, size); - g_assert (*p == NULL); - *p = vtable; + if (G_LIKELY (new_next < real_end)) { + TLAB_NEXT = new_next; + } else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) { + /* Allocate directly from the nursery */ + p = mono_sgen_nursery_alloc (size); + if (!p) + return NULL; - g_assert (TLAB_NEXT == new_next); + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) + memset (p, 0, size); + } else { + int alloc_size = 0; + + mono_sgen_nursery_retire_region (p, available_in_tlab); + new_next = mono_sgen_nursery_alloc_range (tlab_size, size, &alloc_size); + p = (void**)new_next; + if (!p) + return NULL; + + TLAB_START = (char*)new_next; + TLAB_NEXT = new_next + size; + TLAB_REAL_END = new_next + alloc_size; + TLAB_TEMP_END = new_next + MIN (SCAN_START_SIZE, alloc_size); + + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) + memset (new_next, 0, alloc_size); + new_next += size; + } - return p; + /* Second case, we overflowed temp end */ + if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) { + nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p; + /* we just bump tlab_temp_end as well */ + TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SCAN_START_SIZE); + DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", TLAB_NEXT, TLAB_TEMP_END)); } } - return NULL; + + HEAVY_STAT (++stat_objects_alloced); + HEAVY_STAT (stat_bytes_alloced += size); + + DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); + binary_protocol_alloc (p, vtable, size); + g_assert (*p == NULL); /* FIXME disable this in non debug builds */ + + mono_atomic_store_seq (p, vtable); + + return p; } void* @@ -4110,6 +3972,7 @@ mono_gc_alloc_vector (MonoVTable *vtable, size_t size, uintptr_t max_length) ENTER_CRITICAL_REGION; arr = mono_gc_try_alloc_obj_nolock (vtable, size); if (arr) { + /*This doesn't require fencing since EXIT_CRITICAL_REGION already does it for us*/ arr->max_length = max_length; EXIT_CRITICAL_REGION; return arr; @@ -4165,6 +4028,7 @@ mono_gc_alloc_string (MonoVTable *vtable, size_t size, gint32 len) ENTER_CRITICAL_REGION; str = mono_gc_try_alloc_obj_nolock (vtable, size); if (str) { + /*This doesn't require fencing since EXIT_CRITICAL_REGION already does it for us*/ str->length = len; EXIT_CRITICAL_REGION; return str; @@ -4208,7 +4072,7 @@ mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size) if (G_LIKELY (p)) { DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); binary_protocol_alloc_pinned (p, vtable, size); - *p = vtable; + mono_atomic_store_seq (p, vtable); } UNLOCK_GC; return p; @@ -4220,8 +4084,8 @@ mono_gc_alloc_mature (MonoVTable *vtable) void **res; size_t size = ALIGN_UP (vtable->klass->instance_size); LOCK_GC; - res = alloc_degraded (vtable, size); - *res = vtable; + res = alloc_degraded (vtable, size, TRUE); + mono_atomic_store_seq (res, vtable); UNLOCK_GC; if (G_UNLIKELY (vtable->klass->has_finalize)) mono_object_register_finalizer ((MonoObject*)res); @@ -4268,117 +4132,6 @@ queue_finalization_entry (FinalizeEntry *entry) { } } -/* LOCKING: requires that the GC lock is held */ -static void -rehash_fin_table (FinalizeEntryHashTable *hash_table) -{ - FinalizeEntry **finalizable_hash = hash_table->table; - mword finalizable_hash_size = hash_table->size; - int i; - unsigned int hash; - FinalizeEntry **new_hash; - FinalizeEntry *entry, *next; - int new_size = g_spaced_primes_closest (hash_table->num_registered); - - new_hash = mono_sgen_alloc_internal_dynamic (new_size * sizeof (FinalizeEntry*), INTERNAL_MEM_FIN_TABLE); - for (i = 0; i < finalizable_hash_size; ++i) { - for (entry = finalizable_hash [i]; entry; entry = next) { - hash = mono_object_hash (entry->object) % new_size; - next = entry->next; - entry->next = new_hash [hash]; - new_hash [hash] = entry; - } - } - mono_sgen_free_internal_dynamic (finalizable_hash, finalizable_hash_size * sizeof (FinalizeEntry*), INTERNAL_MEM_FIN_TABLE); - hash_table->table = new_hash; - hash_table->size = new_size; -} - -/* LOCKING: requires that the GC lock is held */ -static void -rehash_fin_table_if_necessary (FinalizeEntryHashTable *hash_table) -{ - if (hash_table->num_registered >= hash_table->size * 2) - rehash_fin_table (hash_table); -} - -/* LOCKING: requires that the GC lock is held */ -static void -finalize_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end, int generation, GrayQueue *queue) -{ - FinalizeEntryHashTable *hash_table = get_finalize_entry_hash_table (generation); - FinalizeEntry *entry, *prev; - int i; - FinalizeEntry **finalizable_hash = hash_table->table; - mword finalizable_hash_size = hash_table->size; - - if (no_finalize) - return; - for (i = 0; i < finalizable_hash_size; ++i) { - prev = NULL; - for (entry = finalizable_hash [i]; entry;) { - if ((char*)entry->object >= start && (char*)entry->object < end && !major_collector.is_object_live (entry->object)) { - gboolean is_fin_ready = object_is_fin_ready (entry->object); - char *copy = entry->object; - copy_func ((void**)©, queue); - if (is_fin_ready) { - char *from; - FinalizeEntry *next; - /* remove and put in fin_ready_list */ - if (prev) - prev->next = entry->next; - else - finalizable_hash [i] = entry->next; - next = entry->next; - num_ready_finalizers++; - hash_table->num_registered--; - queue_finalization_entry (entry); - bridge_register_finalized_object ((MonoObject*)copy); - /* Make it survive */ - from = entry->object; - entry->object = copy; - DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, hash_table->num_registered)); - entry = next; - continue; - } else { - char *from = entry->object; - if (hash_table == &minor_finalizable_hash && !ptr_in_nursery (copy)) { - FinalizeEntry *next = entry->next; - unsigned int major_hash; - /* remove from the list */ - if (prev) - prev->next = entry->next; - else - finalizable_hash [i] = entry->next; - hash_table->num_registered--; - - entry->object = copy; - - /* insert it into the major hash */ - rehash_fin_table_if_necessary (&major_finalizable_hash); - major_hash = mono_object_hash ((MonoObject*) copy) % - major_finalizable_hash.size; - entry->next = major_finalizable_hash.table [major_hash]; - major_finalizable_hash.table [major_hash] = entry; - major_finalizable_hash.num_registered++; - - DEBUG (5, fprintf (gc_debug_file, "Promoting finalization of object %p (%s) (was at %p) to major table\n", copy, safe_name (copy), from)); - - entry = next; - continue; - } else { - /* update pointer */ - DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s) (was at %p)\n", entry->object, safe_name (entry->object), from)); - entry->object = copy; - } - } - } - prev = entry; - entry = entry->next; - } - } -} - static int object_is_reachable (char *object, char *start, char *end) { @@ -4388,6 +4141,8 @@ object_is_reachable (char *object, char *start, char *end) return !object_is_fin_ready (object) || major_collector.is_object_live (object); } +#include "sgen-fin-weak-hash.c" + gboolean mono_sgen_object_is_live (void *obj) { @@ -4557,352 +4312,6 @@ mark_ephemerons_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end return nothing_marked; } -/* LOCKING: requires that the GC lock is held */ -static void -null_link_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end, int generation, gboolean before_finalization, GrayQueue *queue) -{ - DisappearingLinkHashTable *hash = get_dislink_hash_table (generation); - DisappearingLink **disappearing_link_hash = hash->table; - int disappearing_link_hash_size = hash->size; - DisappearingLink *entry, *prev; - int i; - if (!hash->num_links) - return; - for (i = 0; i < disappearing_link_hash_size; ++i) { - prev = NULL; - for (entry = disappearing_link_hash [i]; entry;) { - char *object; - gboolean track = DISLINK_TRACK (entry); - - /* - * Tracked references are processed after - * finalization handling whereas standard weak - * references are processed before. If an - * object is still not marked after finalization - * handling it means that it either doesn't have - * a finalizer or the finalizer has already run, - * so we must null a tracking reference. - */ - if (track == before_finalization) { - prev = entry; - entry = entry->next; - continue; - } - - object = DISLINK_OBJECT (entry); - - if (object >= start && object < end && !major_collector.is_object_live (object)) { - if (object_is_fin_ready (object)) { - void **p = entry->link; - DisappearingLink *old; - *p = NULL; - /* remove from list */ - if (prev) - prev->next = entry->next; - else - disappearing_link_hash [i] = entry->next; - DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, object)); - old = entry->next; - mono_sgen_free_internal (entry, INTERNAL_MEM_DISLINK); - entry = old; - hash->num_links--; - continue; - } else { - char *copy = object; - copy_func ((void**)©, queue); - - /* Update pointer if it's moved. If the object - * has been moved out of the nursery, we need to - * remove the link from the minor hash table to - * the major one. - * - * FIXME: what if an object is moved earlier? - */ - - if (hash == &minor_disappearing_link_hash && !ptr_in_nursery (copy)) { - void **link = entry->link; - DisappearingLink *old; - /* remove from list */ - if (prev) - prev->next = entry->next; - else - disappearing_link_hash [i] = entry->next; - old = entry->next; - mono_sgen_free_internal (entry, INTERNAL_MEM_DISLINK); - entry = old; - hash->num_links--; - - add_or_remove_disappearing_link ((MonoObject*)copy, link, - track, GENERATION_OLD); - - DEBUG (5, fprintf (gc_debug_file, "Upgraded dislink at %p to major because object %p moved to %p\n", link, object, copy)); - - continue; - } else { - *entry->link = HIDE_POINTER (copy, track); - DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->link, DISLINK_OBJECT (entry))); - } - } - } - prev = entry; - entry = entry->next; - } - } -} - -/* LOCKING: requires that the GC lock is held */ -static void -null_links_for_domain (MonoDomain *domain, int generation) -{ - DisappearingLinkHashTable *hash = get_dislink_hash_table (generation); - DisappearingLink **disappearing_link_hash = hash->table; - int disappearing_link_hash_size = hash->size; - DisappearingLink *entry, *prev; - int i; - for (i = 0; i < disappearing_link_hash_size; ++i) { - prev = NULL; - for (entry = disappearing_link_hash [i]; entry; ) { - char *object = DISLINK_OBJECT (entry); - if (object && !((MonoObject*)object)->vtable) { - DisappearingLink *next = entry->next; - - if (prev) - prev->next = next; - else - disappearing_link_hash [i] = next; - - if (*(entry->link)) { - *(entry->link) = NULL; - g_warning ("Disappearing link %p not freed", entry->link); - } else { - mono_sgen_free_internal (entry, INTERNAL_MEM_DISLINK); - } - - entry = next; - continue; - } - prev = entry; - entry = entry->next; - } - } -} - -/* LOCKING: requires that the GC lock is held */ -static int -finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size, - FinalizeEntryHashTable *hash_table) -{ - FinalizeEntry **finalizable_hash = hash_table->table; - mword finalizable_hash_size = hash_table->size; - FinalizeEntry *entry, *prev; - int i, count; - - if (no_finalize || !out_size || !out_array) - return 0; - count = 0; - for (i = 0; i < finalizable_hash_size; ++i) { - prev = NULL; - for (entry = finalizable_hash [i]; entry;) { - if (mono_object_domain (entry->object) == domain) { - FinalizeEntry *next; - /* remove and put in out_array */ - if (prev) - prev->next = entry->next; - else - finalizable_hash [i] = entry->next; - next = entry->next; - hash_table->num_registered--; - out_array [count ++] = entry->object; - DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, hash_table->num_registered)); - entry = next; - if (count == out_size) - return count; - continue; - } - prev = entry; - entry = entry->next; - } - } - return count; -} - -/** - * mono_gc_finalizers_for_domain: - * @domain: the unloading appdomain - * @out_array: output array - * @out_size: size of output array - * - * Store inside @out_array up to @out_size objects that belong to the unloading - * appdomain @domain. Returns the number of stored items. Can be called repeteadly - * until it returns 0. - * The items are removed from the finalizer data structure, so the caller is supposed - * to finalize them. - * @out_array should be on the stack to allow the GC to know the objects are still alive. - */ -int -mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size) -{ - int result; - - LOCK_GC; - result = finalizers_for_domain (domain, out_array, out_size, &minor_finalizable_hash); - if (result < out_size) { - result += finalizers_for_domain (domain, out_array + result, out_size - result, - &major_finalizable_hash); - } - UNLOCK_GC; - - return result; -} - -static void -register_for_finalization (MonoObject *obj, void *user_data, int generation) -{ - FinalizeEntryHashTable *hash_table = get_finalize_entry_hash_table (generation); - FinalizeEntry **finalizable_hash; - mword finalizable_hash_size; - FinalizeEntry *entry, *prev; - unsigned int hash; - if (no_finalize) - return; - g_assert (user_data == NULL || user_data == mono_gc_run_finalize); - hash = mono_object_hash (obj); - LOCK_GC; - rehash_fin_table_if_necessary (hash_table); - finalizable_hash = hash_table->table; - finalizable_hash_size = hash_table->size; - hash %= finalizable_hash_size; - prev = NULL; - for (entry = finalizable_hash [hash]; entry; entry = entry->next) { - if (entry->object == obj) { - if (!user_data) { - /* remove from the list */ - if (prev) - prev->next = entry->next; - else - finalizable_hash [hash] = entry->next; - hash_table->num_registered--; - DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, hash_table->num_registered)); - mono_sgen_free_internal (entry, INTERNAL_MEM_FINALIZE_ENTRY); - } - UNLOCK_GC; - return; - } - prev = entry; - } - if (!user_data) { - /* request to deregister, but already out of the list */ - UNLOCK_GC; - return; - } - entry = mono_sgen_alloc_internal (INTERNAL_MEM_FINALIZE_ENTRY); - entry->object = obj; - entry->next = finalizable_hash [hash]; - finalizable_hash [hash] = entry; - hash_table->num_registered++; - DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d) to %s table\n", entry, obj, obj->vtable->klass->name, hash_table->num_registered, generation_name (generation))); - UNLOCK_GC; -} - -void -mono_gc_register_for_finalization (MonoObject *obj, void *user_data) -{ - if (ptr_in_nursery (obj)) - register_for_finalization (obj, user_data, GENERATION_NURSERY); - else - register_for_finalization (obj, user_data, GENERATION_OLD); -} - -static void -rehash_dislink (DisappearingLinkHashTable *hash_table) -{ - DisappearingLink **disappearing_link_hash = hash_table->table; - int disappearing_link_hash_size = hash_table->size; - int i; - unsigned int hash; - DisappearingLink **new_hash; - DisappearingLink *entry, *next; - int new_size = g_spaced_primes_closest (hash_table->num_links); - - new_hash = mono_sgen_alloc_internal_dynamic (new_size * sizeof (DisappearingLink*), INTERNAL_MEM_DISLINK_TABLE); - for (i = 0; i < disappearing_link_hash_size; ++i) { - for (entry = disappearing_link_hash [i]; entry; entry = next) { - hash = mono_aligned_addr_hash (entry->link) % new_size; - next = entry->next; - entry->next = new_hash [hash]; - new_hash [hash] = entry; - } - } - mono_sgen_free_internal_dynamic (disappearing_link_hash, - disappearing_link_hash_size * sizeof (DisappearingLink*), INTERNAL_MEM_DISLINK_TABLE); - hash_table->table = new_hash; - hash_table->size = new_size; -} - -/* LOCKING: assumes the GC lock is held */ -static void -add_or_remove_disappearing_link (MonoObject *obj, void **link, gboolean track, int generation) -{ - DisappearingLinkHashTable *hash_table = get_dislink_hash_table (generation); - DisappearingLink *entry, *prev; - unsigned int hash; - DisappearingLink **disappearing_link_hash = hash_table->table; - int disappearing_link_hash_size = hash_table->size; - - if (hash_table->num_links >= disappearing_link_hash_size * 2) { - rehash_dislink (hash_table); - disappearing_link_hash = hash_table->table; - disappearing_link_hash_size = hash_table->size; - } - /* FIXME: add check that link is not in the heap */ - hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size; - entry = disappearing_link_hash [hash]; - prev = NULL; - for (; entry; entry = entry->next) { - /* link already added */ - if (link == entry->link) { - /* NULL obj means remove */ - if (obj == NULL) { - if (prev) - prev->next = entry->next; - else - disappearing_link_hash [hash] = entry->next; - hash_table->num_links--; - DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d) from %s table\n", entry, hash_table->num_links, generation_name (generation))); - mono_sgen_free_internal (entry, INTERNAL_MEM_DISLINK); - *link = NULL; - } else { - *link = HIDE_POINTER (obj, track); /* we allow the change of object */ - } - return; - } - prev = entry; - } - if (obj == NULL) - return; - entry = mono_sgen_alloc_internal (INTERNAL_MEM_DISLINK); - *link = HIDE_POINTER (obj, track); - entry->link = link; - entry->next = disappearing_link_hash [hash]; - disappearing_link_hash [hash] = entry; - hash_table->num_links++; - DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p to %s table\n", entry, obj, obj->vtable->klass->name, link, generation_name (generation))); -} - -/* LOCKING: assumes the GC lock is held */ -static void -mono_gc_register_disappearing_link (MonoObject *obj, void **link, gboolean track) -{ - add_or_remove_disappearing_link (NULL, link, FALSE, GENERATION_NURSERY); - add_or_remove_disappearing_link (NULL, link, FALSE, GENERATION_OLD); - if (obj) { - if (ptr_in_nursery (obj)) - add_or_remove_disappearing_link (obj, link, track, GENERATION_NURSERY); - else - add_or_remove_disappearing_link (obj, link, track, GENERATION_OLD); - } -} - int mono_gc_invoke_finalizers (void) { @@ -5001,7 +4410,7 @@ mono_sgen_get_minor_collection_allowance (void) */ static void -rehash_roots (gboolean pinned) +rehash_roots (int root_type) { int i; unsigned int hash; @@ -5009,19 +4418,19 @@ rehash_roots (gboolean pinned) RootRecord *entry, *next; int new_size; - new_size = g_spaced_primes_closest (num_roots_entries [pinned]); + new_size = g_spaced_primes_closest (num_roots_entries [root_type]); new_hash = mono_sgen_alloc_internal_dynamic (new_size * sizeof (RootRecord*), INTERNAL_MEM_ROOTS_TABLE); - for (i = 0; i < roots_hash_size [pinned]; ++i) { - for (entry = roots_hash [pinned][i]; entry; entry = next) { + for (i = 0; i < roots_hash_size [root_type]; ++i) { + for (entry = roots_hash [root_type][i]; entry; entry = next) { hash = mono_aligned_addr_hash (entry->start_root) % new_size; next = entry->next; entry->next = new_hash [hash]; new_hash [hash] = entry; } } - mono_sgen_free_internal_dynamic (roots_hash [pinned], roots_hash_size [pinned] * sizeof (RootRecord*), INTERNAL_MEM_ROOTS_TABLE); - roots_hash [pinned] = new_hash; - roots_hash_size [pinned] = new_size; + mono_sgen_free_internal_dynamic (roots_hash [root_type], roots_hash_size [root_type] * sizeof (RootRecord*), INTERNAL_MEM_ROOTS_TABLE); + roots_hash [root_type] = new_hash; + roots_hash_size [root_type] = new_size; } static RootRecord* @@ -5137,13 +4546,7 @@ mono_gc_deregister_root (char* addr) * ###################################################################### */ -#if USE_SIGNAL_BASED_START_STOP_WORLD - -static MonoSemType suspend_ack_semaphore; -static MonoSemType *suspend_ack_semaphore_ptr; -static unsigned int global_stop_count = 0; - -static sigset_t suspend_signal_mask; +unsigned int mono_sgen_global_stop_count = 0; #ifdef USE_MONO_CTX static MonoContext cur_thread_ctx = {0}; @@ -5173,6 +4576,15 @@ update_current_thread_stack (void *start) gc_callbacks.thread_suspend_func (info->runtime_data, NULL); } +void +mono_sgen_fill_thread_info_for_suspend (SgenThreadInfo *info) +{ +#ifdef HAVE_KW_THREAD + /* update the remset info in the thread data structure */ + info->remset = remembered_set; +#endif +} + /* * Define this and use the "xdomain-checks" MONO_GC_DEBUG option to * have cross-domain checks in the write barrier. @@ -5191,24 +4603,6 @@ update_current_thread_stack (void *start) static gboolean is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip); -void -mono_sgen_wait_for_suspend_ack (int count) -{ -#if defined(__MACH__) && MONO_MACH_ARCH_SUPPORTED - /* mach thread_resume is synchronous so we dont need to wait for them */ -#else - int i, result; - - for (i = 0; i < count; ++i) { - while ((result = MONO_SEM_WAIT (suspend_ack_semaphore_ptr)) != 0) { - if (errno != EINTR) { - g_error ("sem_wait ()"); - } - } - } -#endif -} - static int restart_threads_until_none_in_managed_allocator (void) { @@ -5224,8 +4618,8 @@ restart_threads_until_none_in_managed_allocator (void) gboolean result; if (info->skip) continue; - if (!info->stack_start || info->in_critical_region || - is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip)) { + if (!info->thread_is_dying && (!info->stack_start || info->in_critical_region || + is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) { binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info)); result = mono_sgen_resume_thread (info); if (result) { @@ -5281,98 +4675,17 @@ restart_threads_until_none_in_managed_allocator (void) return num_threads_died; } -/* LOCKING: assumes the GC lock is held (by the stopping thread) */ -static void -suspend_handler (int sig, siginfo_t *siginfo, void *context) -{ - SgenThreadInfo *info; - int stop_count; - int old_errno = errno; -#ifdef USE_MONO_CTX - MonoContext monoctx; -#else - gpointer regs [ARCH_NUM_REGS]; -#endif - gpointer stack_start; - - info = mono_thread_info_current (); - if (!info) - /* This can happen while a thread is dying */ - return; - - info->stopped_domain = mono_domain_get (); - info->stopped_ip = (gpointer) ARCH_SIGCTX_IP (context); - stop_count = global_stop_count; - /* duplicate signal */ - if (0 && info->stop_count == stop_count) { - errno = old_errno; - return; - } -#ifdef HAVE_KW_THREAD - /* update the remset info in the thread data structure */ - info->remset = remembered_set; -#endif - stack_start = (char*) ARCH_SIGCTX_SP (context) - REDZONE_SIZE; - /* If stack_start is not within the limits, then don't set it - in info and we will be restarted. */ - if (stack_start >= info->stack_start_limit && info->stack_start <= info->stack_end) { - info->stack_start = stack_start; - -#ifdef USE_MONO_CTX - mono_sigctx_to_monoctx (context, &monoctx); - info->monoctx = &monoctx; -#else - ARCH_COPY_SIGCTX_REGS (regs, context); - info->stopped_regs = regs; -#endif - } else { - g_assert (!info->stack_start); - } - - /* Notify the JIT */ - if (gc_callbacks.thread_suspend_func) - gc_callbacks.thread_suspend_func (info->runtime_data, context); - - DEBUG (4, fprintf (gc_debug_file, "Posting suspend_ack_semaphore for suspend from %p %p\n", info, (gpointer)mono_native_thread_id_get ())); - /* notify the waiting thread */ - MONO_SEM_POST (suspend_ack_semaphore_ptr); - info->stop_count = stop_count; - - /* wait until we receive the restart signal */ - do { - info->signal = 0; - sigsuspend (&suspend_signal_mask); - } while (info->signal != restart_signal_num); - - DEBUG (4, fprintf (gc_debug_file, "Posting suspend_ack_semaphore for resume from %p %p\n", info, (gpointer)mono_native_thread_id_get ())); - /* notify the waiting thread */ - MONO_SEM_POST (suspend_ack_semaphore_ptr); - - errno = old_errno; -} - -static void -restart_handler (int sig) -{ - SgenThreadInfo *info; - int old_errno = errno; - - info = mono_thread_info_current (); - info->signal = restart_signal_num; - DEBUG (4, fprintf (gc_debug_file, "Restart handler in %p %p\n", info, (gpointer)mono_native_thread_id_get ())); - - errno = old_errno; -} - static void acquire_gc_locks (void) { LOCK_INTERRUPTION; + mono_thread_info_suspend_lock (); } static void release_gc_locks (void) { + mono_thread_info_suspend_unlock (); UNLOCK_INTERRUPTION; } @@ -5390,10 +4703,10 @@ stop_world (int generation) update_current_thread_stack (&count); - global_stop_count++; - DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ())); + mono_sgen_global_stop_count++; + DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", mono_sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ())); TV_GETTIME (stop_world_time); - count = mono_sgen_thread_handshake (suspend_signal_num); + count = mono_sgen_thread_handshake (TRUE); count -= restart_threads_until_none_in_managed_allocator (); g_assert (count >= 0); DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count)); @@ -5429,17 +4742,18 @@ restart_world (int generation) release_gc_locks (); - count = mono_sgen_thread_handshake (restart_signal_num); + count = mono_sgen_thread_handshake (FALSE); TV_GETTIME (end_sw); usec = TV_ELAPSED (stop_world_time, end_sw); max_pause_usec = MAX (usec, max_pause_usec); DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec)); mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation); + + bridge_process (); + return count; } -#endif /* USE_SIGNAL_BASED_START_STOP_WORLD */ - int mono_sgen_get_current_collection_generation (void) { @@ -5472,10 +4786,14 @@ mono_gc_scan_object (void *obj) { UserCopyOrMarkData *data = pthread_getspecific (user_copy_or_mark_key); - if (current_collection_generation == GENERATION_NURSERY) - major_collector.copy_object (&obj, data->queue); - else + if (current_collection_generation == GENERATION_NURSERY) { + if (collection_is_parallel ()) + major_collector.copy_object (&obj, data->queue); + else + major_collector.nopar_copy_object (&obj, data->queue); + } else { major_collector.copy_or_mark_object (&obj, data->queue); + } return obj; } @@ -5502,15 +4820,16 @@ scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise, Gray gc_callbacks.thread_mark_func (info->runtime_data, info->stack_start, info->stack_end, precise); set_user_copy_or_mark_data (NULL); } else if (!precise) { - conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery, PIN_TYPE_STACK); + if (!info->thread_is_dying) + conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery, PIN_TYPE_STACK); } #ifdef USE_MONO_CTX - if (!precise) + if (!info->thread_is_dying && !precise) conservatively_pin_objects_from ((void**)info->monoctx, (void**)info->monoctx + ARCH_NUM_REGS, start_nursery, end_nursery, PIN_TYPE_STACK); #else - if (!precise) + if (!info->thread_is_dying && !precise) conservatively_pin_objects_from (info->stopped_regs, info->stopped_regs + ARCH_NUM_REGS, start_nursery, end_nursery, PIN_TYPE_STACK); #endif @@ -5611,17 +4930,23 @@ handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global ptr = (void**)(*p & ~REMSET_TYPE_MASK); if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery)) return p + 1; - major_collector.minor_scan_object ((char*)ptr, queue); + mono_sgen_get_minor_scan_object () ((char*)ptr, queue); return p + 1; case REMSET_VTYPE: { + ScanVTypeFunc scan_vtype = mono_sgen_get_minor_scan_vtype (); + size_t skip_size; + ptr = (void**)(*p & ~REMSET_TYPE_MASK); if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery)) - return p + 3; + return p + 4; desc = p [1]; count = p [2]; - while (count-- > 0) - ptr = (void**) major_collector.minor_scan_vtype ((char*)ptr, desc, start_nursery, end_nursery, queue); - return p + 3; + skip_size = p [3]; + while (count-- > 0) { + scan_vtype ((char*)ptr, desc, queue); + ptr = (void**)((char*)ptr + skip_size); + } + return p + 4; } default: g_assert_not_reached (); @@ -5660,7 +4985,7 @@ collect_store_remsets (RememberedSet *remset, mword *bumper) p += 1; break; case REMSET_VTYPE: - p += 3; + p += 4; break; default: g_assert_not_reached (); @@ -5921,9 +5246,13 @@ sgen_thread_register (SgenThreadInfo* info, void *addr) thread_info = info; #endif +#if !defined(__MACH__) info->stop_count = -1; - info->skip = 0; info->signal = 0; +#endif + info->skip = 0; + info->doing_handshake = FALSE; + info->thread_is_dying = FALSE; info->stack_start = NULL; info->tlab_start_addr = &TLAB_START; info->tlab_next_addr = &TLAB_NEXT; @@ -6020,9 +5349,38 @@ sgen_thread_unregister (SgenThreadInfo *p) if (mono_domain_get ()) mono_thread_detach (mono_thread_current ()); + p->thread_is_dying = TRUE; + + /* + There is a race condition between a thread finishing executing and been removed + from the GC thread set. + This happens on posix systems when TLS data is been cleaned-up, libpthread will + set the thread_info slot to NULL before calling the cleanup function. This + opens a window in which the thread is registered but has a NULL TLS. + + The suspend signal handler needs TLS data to know where to store thread state + data or otherwise it will simply ignore the thread. + + This solution works because the thread doing STW will wait until all threads been + suspended handshake back, so there is no race between the doing_hankshake test + and the suspend_thread call. + + This is not required on systems that do synchronous STW as those can deal with + the above race at suspend time. + + FIXME: I believe we could avoid this by using mono_thread_info_lookup when + mono_thread_info_current returns NULL. Or fix mono_thread_info_lookup to do so. + */ +#if (defined(__MACH__) && MONO_MACH_ARCH_SUPPORTED) || !defined(HAVE_PTHREAD_KILL) LOCK_GC; +#else + while (!TRYLOCK_GC) { + if (!mono_sgen_park_current_thread_if_doing_handshake (p)) + g_usleep (50); + } +#endif - binary_protocol_thread_unregister ((gpointer)id); + binary_protocol_thread_unregister ((gpointer)mono_thread_info_get_tid (p)); DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)mono_thread_info_get_tid (p))); #if defined(__MACH__) @@ -6057,6 +5415,8 @@ sgen_thread_unregister (SgenThreadInfo *p) * this buffer, which would either clobber memory or crash. */ *p->store_remset_buffer_addr = NULL; + + mono_threads_unregister_current_thread (p); UNLOCK_GC; } @@ -6110,7 +5470,7 @@ mono_gc_set_stack_end (void *stack_end) int mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) { - return mono_threads_pthread_create (new_thread, attr, start_routine, arg); + return pthread_create (new_thread, attr, start_routine, arg); } int @@ -6168,6 +5528,8 @@ mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* val return; } DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr)); + if (value) + binary_protocol_wbarrier (field_ptr, value, value->vtable); if (use_cardtable) { *(void**)field_ptr = value; if (ptr_in_nursery (value)) @@ -6206,6 +5568,8 @@ mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* va return; } DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr)); + if (value) + binary_protocol_wbarrier (slot_ptr, value, value->vtable); if (use_cardtable) { *(void**)slot_ptr = value; if (ptr_in_nursery (value)) @@ -6241,10 +5605,22 @@ mono_gc_wbarrier_arrayref_copy (gpointer dest_ptr, gpointer src_ptr, int count) HEAVY_STAT (++stat_wbarrier_arrayref_copy); /*This check can be done without taking a lock since dest_ptr array is pinned*/ if (ptr_in_nursery (dest_ptr) || count <= 0) { - memmove (dest_ptr, src_ptr, count * sizeof (gpointer)); + mono_gc_memmove (dest_ptr, src_ptr, count * sizeof (gpointer)); return; } +#ifdef SGEN_BINARY_PROTOCOL + { + int i; + for (i = 0; i < count; ++i) { + gpointer dest = (gpointer*)dest_ptr + i; + gpointer obj = *((gpointer*)src_ptr + i); + if (obj) + binary_protocol_wbarrier (dest, obj, (gpointer)LOAD_VTABLE (obj)); + } + } +#endif + if (use_cardtable) { gpointer *dest = dest_ptr; gpointer *src = src_ptr; @@ -6276,7 +5652,7 @@ mono_gc_wbarrier_arrayref_copy (gpointer dest_ptr, gpointer src_ptr, int count) RememberedSet *rs; TLAB_ACCESS_INIT; LOCK_GC; - memmove (dest_ptr, src_ptr, count * sizeof (gpointer)); + mono_gc_memmove (dest_ptr, src_ptr, count * sizeof (gpointer)); rs = REMEMBERED_SET; DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", dest_ptr, count)); @@ -6447,20 +5823,59 @@ void mono_gc_wbarrier_value_copy_bitmap (gpointer _dest, gpointer _src, int size } } +#ifdef SGEN_BINARY_PROTOCOL +#undef HANDLE_PTR +#define HANDLE_PTR(ptr,obj) do { \ + gpointer o = *(gpointer*)(ptr); \ + if ((o)) { \ + gpointer d = ((char*)dest) + ((char*)(ptr) - (char*)(obj)); \ + binary_protocol_wbarrier (d, o, (gpointer) LOAD_VTABLE (o)); \ + } \ + } while (0) + +static void +scan_object_for_binary_protocol_copy_wbarrier (gpointer dest, char *start, mword desc) +{ +#define SCAN_OBJECT_NOVTABLE +#include "sgen-scan-object.h" +} +#endif void mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass) { RememberedSet *rs; - size_t size = count * mono_class_value_size (klass, NULL); + size_t element_size = mono_class_value_size (klass, NULL); + size_t size = count * element_size; TLAB_ACCESS_INIT; HEAVY_STAT (++stat_wbarrier_value_copy); g_assert (klass->valuetype); - LOCK_GC; - memmove (dest, src, size); +#ifdef SGEN_BINARY_PROTOCOL + { + int i; + for (i = 0; i < count; ++i) { + scan_object_for_binary_protocol_copy_wbarrier ((char*)dest + i * element_size, + (char*)src + i * element_size - sizeof (MonoObject), + (mword) klass->gc_descr); + } + } +#endif if (use_cardtable) { +#ifdef DISABLE_CRITICAL_REGION + LOCK_GC; +#else + ENTER_CRITICAL_REGION; +#endif + mono_gc_memmove (dest, src, size); sgen_card_table_mark_range ((mword)dest, size); +#ifdef DISABLE_CRITICAL_REGION + UNLOCK_GC; +#else + EXIT_CRITICAL_REGION; +#endif } else { + LOCK_GC; + mono_gc_memmove (dest, src, size); rs = REMEMBERED_SET; if (ptr_in_nursery (dest) || ptr_on_stack (dest) || !SGEN_CLASS_HAS_REFERENCES (klass)) { UNLOCK_GC; @@ -6469,10 +5884,11 @@ mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass * g_assert (klass->gc_descr_inited); DEBUG (8, fprintf (gc_debug_file, "Adding value remset at %p, count %d, descr %p for class %s (%p)\n", dest, count, klass->gc_descr, klass->name, klass)); - if (rs->store_next + 3 < rs->end_set) { + if (rs->store_next + 4 < rs->end_set) { *(rs->store_next++) = (mword)dest | REMSET_VTYPE; *(rs->store_next++) = (mword)klass->gc_descr; *(rs->store_next++) = (mword)count; + *(rs->store_next++) = (mword)element_size; UNLOCK_GC; return; } @@ -6485,8 +5901,9 @@ mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass * *(rs->store_next++) = (mword)dest | REMSET_VTYPE; *(rs->store_next++) = (mword)klass->gc_descr; *(rs->store_next++) = (mword)count; + *(rs->store_next++) = (mword)element_size; + UNLOCK_GC; } - UNLOCK_GC; } /** @@ -6506,8 +5923,11 @@ mono_gc_wbarrier_object_copy (MonoObject* obj, MonoObject *src) DEBUG (6, fprintf (gc_debug_file, "Adding object remset for %p\n", obj)); size = mono_object_class (obj)->instance_size; LOCK_GC; +#ifdef SGEN_BINARY_PROTOCOL + scan_object_for_binary_protocol_copy_wbarrier (obj, (char*)src, (mword) src->vtable->gc_descr); +#endif /* do not copy the sync state */ - memcpy ((char*)obj + sizeof (MonoObject), (char*)src + sizeof (MonoObject), + mono_gc_memmove ((char*)obj + sizeof (MonoObject), (char*)src + sizeof (MonoObject), size - sizeof (MonoObject)); if (ptr_in_nursery (obj) || ptr_on_stack (obj)) { UNLOCK_GC; @@ -6630,18 +6050,7 @@ find_in_remset_loc (mword *p, char *addr, gboolean *found) ptr = (void**)(*p & ~REMSET_TYPE_MASK); desc = p [1]; count = p [2]; - - switch (desc & 0x7) { - case DESC_TYPE_RUN_LENGTH: - OBJ_RUN_LEN_SIZE (skip_size, desc, ptr); - break; - case DESC_TYPE_SMALL_BITMAP: - OBJ_BITMAP_SIZE (skip_size, desc, start); - break; - default: - // FIXME: - g_assert_not_reached (); - } + skip_size = p [3]; /* The descriptor includes the size of MonoObject */ skip_size -= sizeof (MonoObject); @@ -6649,7 +6058,7 @@ find_in_remset_loc (mword *p, char *addr, gboolean *found) if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer))) *found = TRUE; - return p + 3; + return p + 4; default: g_assert_not_reached (); } @@ -6727,7 +6136,7 @@ static gboolean missing_remsets; */ #undef HANDLE_PTR #define HANDLE_PTR(ptr,obj) do { \ - if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \ + if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_end) { \ if (!find_in_remsets ((char*)(ptr)) && (!use_cardtable || !sgen_card_table_address_is_marked ((mword)ptr))) { \ fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %td in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \ binary_protocol_missing_remset ((obj), (gpointer)LOAD_VTABLE ((obj)), (char*)(ptr) - (char*)(obj), *(ptr), (gpointer)LOAD_VTABLE(*(ptr)), object_is_pinned (*(ptr))); \ @@ -6895,7 +6304,7 @@ mono_gc_walk_heap (int flags, MonoGCReferences callback, void *data) hwi.callback = callback; hwi.data = data; - clear_nursery_fragments (nursery_next); + mono_sgen_clear_nursery_fragments (); mono_sgen_scan_area_with_callback (nursery_section->data, nursery_section->end_data, walk_references, &hwi, FALSE); major_collector.iterate_objects (TRUE, TRUE, walk_references, &hwi); @@ -6999,17 +6408,13 @@ mono_gc_enable_events (void) void mono_gc_weak_link_add (void **link_addr, MonoObject *obj, gboolean track) { - LOCK_GC; - mono_gc_register_disappearing_link (obj, link_addr, track); - UNLOCK_GC; + mono_gc_register_disappearing_link (obj, link_addr, track, FALSE); } void mono_gc_weak_link_remove (void **link_addr) { - LOCK_GC; - mono_gc_register_disappearing_link (NULL, link_addr, FALSE); - UNLOCK_GC; + mono_gc_register_disappearing_link (NULL, link_addr, FALSE, FALSE); } MonoObject* @@ -7137,6 +6542,12 @@ mono_gc_is_gc_thread (void) return result; } +static gboolean +is_critical_method (MonoMethod *method) +{ + return mono_runtime_is_critical_method (method) || mono_gc_is_critical_method (method); +} + void mono_gc_base_init (void) { @@ -7144,10 +6555,11 @@ mono_gc_base_init (void) char *env; char **opts, **ptr; char *major_collector_opt = NULL; - struct sigaction sinfo; glong max_heap = 0; + glong soft_limit = 0; int num_workers; int result; + int dummy; do { result = InterlockedCompareExchange (&gc_initialized, -1, 0); @@ -7157,7 +6569,7 @@ mono_gc_base_init (void) return; case -1: /* being inited by another thread */ - usleep (1000); + g_usleep (1000); break; case 0: /* we will init it */ @@ -7170,11 +6582,14 @@ mono_gc_base_init (void) LOCK_INIT (gc_mutex); pagesize = mono_pagesize (); - gc_debug_file = stdout; + gc_debug_file = stderr; cb.thread_register = sgen_thread_register; cb.thread_unregister = sgen_thread_unregister; cb.thread_attach = sgen_thread_attach; + cb.mono_method_is_critical = (gpointer)is_critical_method; + cb.mono_gc_pthread_create = (gpointer)mono_gc_pthread_create; + mono_threads_init (&cb, sizeof (SgenThreadInfo)); LOCK_INIT (interruption_mutex); @@ -7196,8 +6611,8 @@ mono_gc_base_init (void) init_stats (); mono_sgen_init_internal_allocator (); + mono_sgen_init_nursery_allocator (); - mono_sgen_register_fixed_internal_mem_type (INTERNAL_MEM_FRAGMENT, sizeof (Fragment)); mono_sgen_register_fixed_internal_mem_type (INTERNAL_MEM_SECTION, SGEN_SIZEOF_GC_MEM_SECTION); mono_sgen_register_fixed_internal_mem_type (INTERNAL_MEM_FINALIZE_ENTRY, sizeof (FinalizeEntry)); mono_sgen_register_fixed_internal_mem_type (INTERNAL_MEM_DISLINK, sizeof (DisappearingLink)); @@ -7218,25 +6633,9 @@ mono_gc_base_init (void) * it inits the small id which is required for hazard pointer * operations. */ - suspend_ack_semaphore_ptr = &suspend_ack_semaphore; - MONO_SEM_INIT (&suspend_ack_semaphore, 0); - - sigfillset (&sinfo.sa_mask); - sinfo.sa_flags = SA_RESTART | SA_SIGINFO; - sinfo.sa_sigaction = suspend_handler; - if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) { - g_error ("failed sigaction"); - } - - sinfo.sa_handler = restart_handler; - if (sigaction (restart_signal_num, &sinfo, NULL) != 0) { - g_error ("failed sigaction"); - } - - sigfillset (&suspend_signal_mask); - sigdelset (&suspend_signal_mask, restart_signal_num); + mono_sgen_os_init (); - mono_thread_info_attach (&sinfo); + mono_thread_info_attach (&dummy); if (!major_collector_opt || !strcmp (major_collector_opt, "marksweep")) { mono_sgen_marksweep_init (&major_collector); @@ -7300,6 +6699,19 @@ mono_gc_base_init (void) } continue; } + if (g_str_has_prefix (opt, "soft-heap-limit=")) { + opt = strchr (opt, '=') + 1; + if (*opt && mono_gc_parse_environment_string_extract_number (opt, &soft_limit)) { + if (soft_limit <= 0) { + fprintf (stderr, "soft-heap-limit must be positive.\n"); + exit (1); + } + } else { + fprintf (stderr, "soft-heap-limit must be an integer.\n"); + exit (1); + } + continue; + } if (g_str_has_prefix (opt, "workers=")) { long val; char *endptr; @@ -7358,6 +6770,7 @@ mono_gc_base_init (void) if (!(major_collector.handle_gc_param && major_collector.handle_gc_param (opt))) { fprintf (stderr, "MONO_GC_PARAMS must be a comma-delimited list of one or more of the following:\n"); fprintf (stderr, " max-heap-size=N (where N is an integer, possibly with a k, m or a g suffix)\n"); + fprintf (stderr, " soft-heap-limit=n (where N is an integer, possibly with a k, m or a g suffix)\n"); fprintf (stderr, " nursery-size=N (where N is an integer, possibly with a k, m or a g suffix)\n"); fprintf (stderr, " major=COLLECTOR (where COLLECTOR is `marksweep', `marksweep-par' or `copying')\n"); fprintf (stderr, " wbarrier=WBARRIER (where WBARRIER is `remset' or `cardtable')\n"); @@ -7378,7 +6791,7 @@ mono_gc_base_init (void) nursery_size = DEFAULT_NURSERY_SIZE; minor_collection_allowance = MIN_MINOR_COLLECTION_ALLOWANCE; - init_heap_size_limits (max_heap); + init_heap_size_limits (max_heap, soft_limit); alloc_nursery (); @@ -7398,6 +6811,8 @@ mono_gc_base_init (void) gc_debug_file = stderr; g_free (rf); } + } else if (!strcmp (opt, "print-allowance")) { + debug_print_allowance = TRUE; } else if (!strcmp (opt, "collect-before-allocs")) { collect_before_allocs = 1; } else if (g_str_has_prefix (opt, "collect-before-allocs=")) { @@ -7439,6 +6854,7 @@ mono_gc_base_init (void) fprintf (stderr, " disable-major\n"); fprintf (stderr, " xdomain-checks\n"); fprintf (stderr, " clear-at-gc\n"); + fprintf (stderr, " print-allowance\n"); exit (1); } } @@ -7457,12 +6873,6 @@ mono_gc_base_init (void) gc_initialized = 1; } -int -mono_gc_get_suspend_signal (void) -{ - return suspend_signal_num; -} - enum { ATYPE_NORMAL, ATYPE_VECTOR, @@ -7669,11 +7079,6 @@ create_allocator (int atype) mono_mb_emit_byte (mb, CEE_ADD); mono_mb_emit_stloc (mb, new_next_var); - /* tlab_next = new_next */ - mono_mb_emit_ldloc (mb, tlab_next_addr_var); - mono_mb_emit_ldloc (mb, new_next_var); - mono_mb_emit_byte (mb, CEE_STIND_I); - /* if (G_LIKELY (new_next < tlab_temp_end)) */ mono_mb_emit_ldloc (mb, new_next_var); EMIT_TLS_ACCESS (mb, tlab_temp_end, tlab_temp_end_offset); @@ -7704,6 +7109,15 @@ create_allocator (int atype) /* FIXME: Memory barrier */ + /* tlab_next = new_next */ + mono_mb_emit_ldloc (mb, tlab_next_addr_var); + mono_mb_emit_ldloc (mb, new_next_var); + mono_mb_emit_byte (mb, CEE_STIND_I); + + /*The tlab store must be visible before the the vtable store. This could be replaced with a DDS but doing it with IL would be tricky. */ + mono_mb_emit_byte ((mb), MONO_CUSTOM_PREFIX); + mono_mb_emit_op (mb, CEE_MONO_MEMORY_BARRIER, StoreStoreBarrier); + /* *p = vtable; */ mono_mb_emit_ldloc (mb, p_var); mono_mb_emit_ldarg (mb, 0); @@ -7717,6 +7131,12 @@ create_allocator (int atype) mono_mb_emit_byte (mb, CEE_STIND_I); } + /* + We must make sure both vtable and max_length are globaly visible before returning to managed land. + */ + mono_mb_emit_byte ((mb), MONO_CUSTOM_PREFIX); + mono_mb_emit_op (mb, CEE_MONO_MEMORY_BARRIER, StoreStoreBarrier); + /* return p */ mono_mb_emit_ldloc (mb, p_var); mono_mb_emit_byte (mb, CEE_RET); @@ -7743,7 +7163,7 @@ mono_gc_get_gc_name (void) static MonoMethod* alloc_method_cache [ATYPE_NUM]; static MonoMethod *write_barrier_method; -gboolean +static gboolean mono_gc_is_critical_method (MonoMethod *method) { int i; @@ -7876,6 +7296,67 @@ mono_gc_get_managed_allocator_types (void) return ATYPE_NUM; } +static void +emit_nursery_check (MonoMethodBuilder *mb, int *nursery_check_return_labels) +{ + memset (nursery_check_return_labels, 0, sizeof (int) * 3); +#ifdef SGEN_ALIGN_NURSERY + // if (ptr_in_nursery (ptr)) return; + /* + * Masking out the bits might be faster, but we would have to use 64 bit + * immediates, which might be slower. + */ + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS); + mono_mb_emit_byte (mb, CEE_SHR_UN); + mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS); + nursery_check_return_labels [0] = mono_mb_emit_branch (mb, CEE_BEQ); + + // if (!ptr_in_nursery (*ptr)) return; + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_byte (mb, CEE_LDIND_I); + mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS); + mono_mb_emit_byte (mb, CEE_SHR_UN); + mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS); + nursery_check_return_labels [1] = mono_mb_emit_branch (mb, CEE_BNE_UN); +#else + int label_continue1, label_continue2; + int dereferenced_var; + + // if (ptr < (nursery_start)) goto continue; + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_ptr (mb, (gpointer) nursery_start); + label_continue_1 = mono_mb_emit_branch (mb, CEE_BLT); + + // if (ptr >= nursery_end)) goto continue; + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_ptr (mb, (gpointer) nursery_end); + label_continue_2 = mono_mb_emit_branch (mb, CEE_BGE); + + // Otherwise return + nursery_check_return_labels [0] = mono_mb_emit_branch (mb, CEE_BR); + + // continue: + mono_mb_patch_branch (mb, label_continue_1); + mono_mb_patch_branch (mb, label_continue_2); + + // Dereference and store in local var + dereferenced_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg); + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_byte (mb, CEE_LDIND_I); + mono_mb_emit_stloc (mb, dereferenced_var); + + // if (*ptr < nursery_start) return; + mono_mb_emit_ldloc (mb, dereferenced_var); + mono_mb_emit_ptr (mb, (gpointer) nursery_start); + nursery_check_return_labels [1] = mono_mb_emit_branch (mb, CEE_BLT); + + // if (*ptr >= nursery_end) return; + mono_mb_emit_ldloc (mb, dereferenced_var); + mono_mb_emit_ptr (mb, (gpointer) nursery_end); + nursery_check_return_labels [2] = mono_mb_emit_branch (mb, CEE_BGE); +#endif +} MonoMethod* mono_gc_get_write_barrier (void) @@ -7884,11 +7365,8 @@ mono_gc_get_write_barrier (void) MonoMethodBuilder *mb; MonoMethodSignature *sig; #ifdef MANAGED_WBARRIER - int label_no_wb_1, label_no_wb_2, label_no_wb_3, label_no_wb_4, label_need_wb, label_slow_path; -#ifndef SGEN_ALIGN_NURSERY - int label_continue_1, label_continue_2, label_no_wb_5; - int dereferenced_var; -#endif + int i, nursery_check_labels [3]; + int label_no_wb_3, label_no_wb_4, label_need_wb, label_slow_path; int buffer_var, buffer_index_var, dummy_var; #ifdef HAVE_KW_THREAD @@ -7906,8 +7384,6 @@ mono_gc_get_write_barrier (void) #endif #endif - g_assert (!use_cardtable); - // FIXME: Maybe create a separate version for ctors (the branch would be // correctly predicted more times) if (write_barrier_method) @@ -7921,62 +7397,48 @@ mono_gc_get_write_barrier (void) mb = mono_mb_new (mono_defaults.object_class, "wbarrier", MONO_WRAPPER_WRITE_BARRIER); #ifdef MANAGED_WBARRIER - if (mono_runtime_has_tls_get ()) { -#ifdef SGEN_ALIGN_NURSERY - // if (ptr_in_nursery (ptr)) return; + if (use_cardtable) { + emit_nursery_check (mb, nursery_check_labels); /* - * Masking out the bits might be faster, but we would have to use 64 bit - * immediates, which might be slower. - */ - mono_mb_emit_ldarg (mb, 0); - mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS); - mono_mb_emit_byte (mb, CEE_SHR_UN); - mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS); - label_no_wb_1 = mono_mb_emit_branch (mb, CEE_BEQ); - - // if (!ptr_in_nursery (*ptr)) return; + addr = sgen_cardtable + ((address >> CARD_BITS) & CARD_MASK) + *addr = 1; + + sgen_cardtable: + LDC_PTR sgen_cardtable + + address >> CARD_BITS + LDARG_0 + LDC_I4 CARD_BITS + SHR_UN + if (SGEN_HAVE_OVERLAPPING_CARDS) { + LDC_PTR card_table_mask + AND + } + AND + ldc_i4_1 + stind_i1 + */ + mono_mb_emit_ptr (mb, sgen_cardtable); mono_mb_emit_ldarg (mb, 0); - mono_mb_emit_byte (mb, CEE_LDIND_I); - mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS); + mono_mb_emit_icon (mb, CARD_BITS); mono_mb_emit_byte (mb, CEE_SHR_UN); - mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS); - label_no_wb_2 = mono_mb_emit_branch (mb, CEE_BNE_UN); -#else - - // if (ptr < (nursery_start)) goto continue; - mono_mb_emit_ldarg (mb, 0); - mono_mb_emit_ptr (mb, (gpointer) nursery_start); - label_continue_1 = mono_mb_emit_branch (mb, CEE_BLT); - - // if (ptr >= nursery_end)) goto continue; - mono_mb_emit_ldarg (mb, 0); - mono_mb_emit_ptr (mb, (gpointer) nursery_end); - label_continue_2 = mono_mb_emit_branch (mb, CEE_BGE); - - // Otherwise return - label_no_wb_1 = mono_mb_emit_branch (mb, CEE_BR); - - // continue: - mono_mb_patch_branch (mb, label_continue_1); - mono_mb_patch_branch (mb, label_continue_2); - - // Dereference and store in local var - dereferenced_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg); - mono_mb_emit_ldarg (mb, 0); - mono_mb_emit_byte (mb, CEE_LDIND_I); - mono_mb_emit_stloc (mb, dereferenced_var); - - // if (*ptr < nursery_start) return; - mono_mb_emit_ldloc (mb, dereferenced_var); - mono_mb_emit_ptr (mb, (gpointer) nursery_start); - label_no_wb_2 = mono_mb_emit_branch (mb, CEE_BLT); +#ifdef SGEN_HAVE_OVERLAPPING_CARDS + mono_mb_emit_ptr (mb, (gpointer)CARD_MASK); + mono_mb_emit_byte (mb, CEE_AND); +#endif + mono_mb_emit_byte (mb, CEE_ADD); + mono_mb_emit_icon (mb, 1); + mono_mb_emit_byte (mb, CEE_STIND_I1); - // if (*ptr >= nursery_end) return; - mono_mb_emit_ldloc (mb, dereferenced_var); - mono_mb_emit_ptr (mb, (gpointer) nursery_end); - label_no_wb_5 = mono_mb_emit_branch (mb, CEE_BGE); + // return; + for (i = 0; i < 3; ++i) { + if (nursery_check_labels [i]) + mono_mb_patch_branch (mb, nursery_check_labels [i]); + } + mono_mb_emit_byte (mb, CEE_RET); + } else if (mono_runtime_has_tls_get ()) { + emit_nursery_check (mb, nursery_check_labels); -#endif // if (ptr >= stack_end) goto need_wb; mono_mb_emit_ldarg (mb, 0); EMIT_TLS_ACCESS (mb, stack_end, stack_end_offset); @@ -8039,23 +7501,27 @@ mono_gc_get_write_barrier (void) mono_mb_emit_byte (mb, CEE_STIND_I); // return; - mono_mb_patch_branch (mb, label_no_wb_1); - mono_mb_patch_branch (mb, label_no_wb_2); + for (i = 0; i < 3; ++i) { + if (nursery_check_labels [i]) + mono_mb_patch_branch (mb, nursery_check_labels [i]); + } mono_mb_patch_branch (mb, label_no_wb_3); mono_mb_patch_branch (mb, label_no_wb_4); -#ifndef SGEN_ALIGN_NURSERY - mono_mb_patch_branch (mb, label_no_wb_5); -#endif mono_mb_emit_byte (mb, CEE_RET); // slow path mono_mb_patch_branch (mb, label_slow_path); - } -#endif - mono_mb_emit_ldarg (mb, 0); - mono_mb_emit_icall (mb, mono_gc_wbarrier_generic_nostore); - mono_mb_emit_byte (mb, CEE_RET); + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_icall (mb, mono_gc_wbarrier_generic_nostore); + mono_mb_emit_byte (mb, CEE_RET); + } else +#endif + { + mono_mb_emit_ldarg (mb, 0); + mono_mb_emit_icall (mb, mono_gc_wbarrier_generic_nostore); + mono_mb_emit_byte (mb, CEE_RET); + } res = mono_mb_create_method (mb, sig, 16); mono_mb_free (mb); @@ -8123,4 +7589,16 @@ BOOL APIENTRY mono_gc_dllmain (HMODULE module_handle, DWORD reason, LPVOID reser } #endif +NurseryClearPolicy +mono_sgen_get_nursery_clear_policy (void) +{ + return nursery_clear_policy; +} + +MonoVTable* +mono_sgen_get_array_fill_vtable (void) +{ + return array_fill_vtable; +} + #endif /* HAVE_SGEN_GC */