X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmetadata%2Fsgen-gc.c;h=bf4ef445b6e51a40ff61e6994ddd5121299098df;hb=282132ce157f6e45275e9a5f9db905a3cbe51d62;hp=d8dd8fddf81dc9d462e0c8fba1d7106303e9a87b;hpb=f84f760a1c8a8c0ec6ae16b7f38d14a49d329ad7;p=mono.git diff --git a/mono/metadata/sgen-gc.c b/mono/metadata/sgen-gc.c index d8dd8fddf81..bf4ef445b6e 100644 --- a/mono/metadata/sgen-gc.c +++ b/mono/metadata/sgen-gc.c @@ -4,7 +4,7 @@ * Author: * Paolo Molaro (lupus@ximian.com) * - * Copyright (C) 2005-2006 Novell, Inc + * Copyright 2005-2009 Novell, Inc (http://www.novell.com) * * Thread start/stop adapted from Boehm's GC: * Copyright (c) 1994 by Xerox Corporation. All rights reserved. @@ -32,7 +32,7 @@ * We should provide a small memory config with half the sizes * * We currently try to make as few mono assumptions as possible: - * 1) 2-word header with no GC pointers in it (firts vtable, second to store the + * 1) 2-word header with no GC pointers in it (first vtable, second to store the * forwarding ptr) * 2) gc descriptor is the second word in the vtable (first word in the class) * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME) @@ -127,7 +127,6 @@ #include #include #include -#include #include #include #include @@ -135,6 +134,7 @@ #include #include #include +#include #include #include #include "metadata/metadata-internals.h" @@ -144,7 +144,23 @@ #include "metadata/threads.h" #include "metadata/sgen-gc.h" #include "metadata/mono-gc.h" +#include "metadata/method-builder.h" +#include "metadata/profiler-private.h" +#include "utils/mono-mmap.h" +#ifdef HAVE_VALGRIND_MEMCHECK_H +#include +#endif + +#define OPDEF(a,b,c,d,e,f,g,h,i,j) \ + a = i, + +enum { +#include "mono/cil/opcode.def" + CEE_LAST +}; + +#undef OPDEF /* * ###################################################################### @@ -160,16 +176,25 @@ typedef guint64 mword; static int gc_initialized = 0; static int gc_debug_level = 0; static FILE* gc_debug_file; +/* If set, do a minor collection before every allocation */ +static gboolean collect_before_allocs = FALSE; +/* If set, do a heap consistency check before each minor collection */ +static gboolean consistency_check_at_minor_collection = FALSE; +/* void mono_gc_flush_info (void) { fflush (gc_debug_file); } +*/ + +#define MAX_DEBUG_LEVEL 8 +#define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0) -#define MAX_DEBUG_LEVEL 9 -#define DEBUG(level,a) do {if ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level) a;} while (0) -#define TV_ELAPSED(start,end) ((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec) +#define TV_DECLARE(name) struct timeval name +#define TV_GETTIME(tv) gettimeofday (&(tv), NULL) +#define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec) #define GC_BITS_PER_WORD (sizeof (mword) * 8) @@ -219,9 +244,10 @@ typedef struct _LOSObject LOSObject; struct _LOSObject { LOSObject *next; mword size; /* this is the object size */ - int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN */ - unsigned char role; - char *data [MONO_ZERO_LEN_ARRAY]; + int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */ + guint16 role; + guint16 scanned; + char data [MONO_ZERO_LEN_ARRAY]; }; /* Pinned objects are allocated in the LOS space if bigger than half a page @@ -269,6 +295,26 @@ struct _PinnedChunk { void *data [1]; /* page sizes and free lists are stored here */ }; +/* The method used to clear the nursery */ +/* Clearing at nursery collections is the safest, but has bad interactions with caches. + * Clearing at TLAB creation is much faster, but more complex and it might expose hard + * to find bugs. + */ +typedef enum { + CLEAR_AT_GC, + CLEAR_AT_TLAB_CREATION +} NurseryClearPolicy; + +static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION; + +/* + * If this is set, the nursery is aligned to an address aligned to its size, ie. + * a 1MB nursery will be aligned to an address divisible by 1MB. This allows us to + * speed up ptr_in_nursery () checks which are very frequent. This requires the + * nursery size to be a compile time constant. + */ +#define ALIGN_NURSERY 1 + /* * The young generation is divided into fragments. This is because * we can hand one fragments to a thread for lock-less fast alloc and @@ -281,6 +327,8 @@ struct _PinnedChunk { * We should start assigning threads very small fragments: if there are many * threads the nursery will be full of reserved space that the threads may not * use at all, slowing down allocation speed. + * Thread local allocation is done from areas of memory Hotspot calls Thread Local + * Allocation Buffers (TLABs). */ typedef struct _Fragment Fragment; @@ -316,13 +364,21 @@ struct _RememberedSet { enum { REMSET_LOCATION, /* just a pointer to the exact location */ REMSET_RANGE, /* range of pointer fields */ - REMSET_OBJECT, /* mark all the object for scanning */ + REMSET_OBJECT, /* mark all the object for scanning */ + REMSET_OTHER, /* all others */ REMSET_TYPE_MASK = 0x3 }; +/* Subtypes of REMSET_OTHER */ +enum { + REMSET_VTYPE, /* a valuetype described by a gc descriptor */ + REMSET_ROOT_LOCATION, /* a location inside a root */ +}; + static __thread RememberedSet *remembered_set MONO_TLS_FAST; +static pthread_key_t remembered_set_key; static RememberedSet *global_remset; -static int store_to_global_remset = 0; +//static int store_to_global_remset = 0; /* FIXME: later choose a size that takes into account the RememberedSet struct * and doesn't waste any alloc paddin space. @@ -341,7 +397,7 @@ typedef struct { /* these bits are set in the object vtable: we could merge them since an object can be * either pinned or forwarded but not both. * We store them in the vtable slot because the bits are used in the sync block for - * other purpouses: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change + * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that * would be an invalid combination for the monitor and hash code). * The values are already shifted. @@ -367,6 +423,11 @@ typedef struct { ((mword*)(obj))[0] &= ~PINNED_BIT; \ } while (0) +#ifdef ALIGN_NURSERY +#define ptr_in_nursery(ptr) (((mword)(ptr) & ~((1 << DEFAULT_NURSERY_BITS) - 1)) == (mword)nursery_start) +#else +#define ptr_in_nursery(ptr) ((char*)(ptr) >= nursery_start && (char*)(ptr) < nursery_real_end) +#endif /* * Since we set bits in the vtable, use the macro to load it from the pointer to @@ -381,7 +442,7 @@ safe_name (void* obj) return vt->klass->name; } -static guint +static inline guint safe_object_get_size (MonoObject* o) { MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass; @@ -390,7 +451,7 @@ safe_object_get_size (MonoObject* o) } else if (klass->rank) { MonoArray *array = (MonoArray*)o; size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array); - if (array->bounds) { + if (G_UNLIKELY (array->bounds)) { size += 3; size &= ~3; size += sizeof (MonoArrayBounds) * klass->rank; @@ -402,12 +463,25 @@ safe_object_get_size (MonoObject* o) } } +static inline gboolean +is_maybe_half_constructed (MonoObject *o) +{ + MonoClass *klass; + + klass = ((MonoVTable*)LOAD_VTABLE (o))->klass; + if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) || + (klass->rank && mono_array_length ((MonoArray*)o) == 0)) + return TRUE; + else + return FALSE; +} + /* * ###################################################################### * ######## Global data. * ###################################################################### */ -static pthread_mutex_t gc_mutex = PTHREAD_MUTEX_INITIALIZER; +static LOCK_DECLARE (gc_mutex); static int gc_disabled = 0; static int num_minor_gcs = 0; static int num_major_gcs = 0; @@ -415,9 +489,11 @@ static int num_major_gcs = 0; /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */ //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118) #define DEFAULT_NURSERY_SIZE (1024*512*2) +/* The number of trailing 0 bits in DEFAULT_NURSERY_SIZE */ +#define DEFAULT_NURSERY_BITS 20 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16) #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2) -/* to quickly find the heard of an object pinned by a conservative address +/* to quickly find the head of an object pinned by a conservative address * we keep track of the objects allocated for each SCAN_START_SIZE memory * chunk in the nursery or other memory sections. Larger values have less * memory overhead and bigger runtime cost. 4-8 KB are reasonable values. @@ -428,11 +504,12 @@ static int num_major_gcs = 0; /* This is a fixed value used for pinned chunks, not the system pagesize */ #define FREELIST_PAGESIZE 4096 -static mword pagesize = 4096; /* FIXME */ +static mword pagesize = 4096; static mword nursery_size = DEFAULT_NURSERY_SIZE; static mword next_section_size = DEFAULT_NURSERY_SIZE * 4; static mword max_section_size = DEFAULT_MAX_SECTION; static int section_size_used = 0; +static int degraded_mode = 0; static LOSObject *los_object_list = NULL; static mword los_memory_usage = 0; @@ -470,9 +547,9 @@ static FinalizeEntry **disappearing_link_hash = NULL; static mword disappearing_link_hash_size = 0; static mword finalizable_hash_size = 0; -static mword num_registered_finalizers = 0; -static mword num_ready_finalizers = 0; -static mword num_disappearing_links = 0; +static int num_registered_finalizers = 0; +static int num_ready_finalizers = 0; +static int num_disappearing_links = 0; static int no_finalize = 0; /* keep each size a multiple of ALLOC_ALIGN */ @@ -495,43 +572,72 @@ obj_is_from_pinned_alloc (char *p) { PinnedChunk *chunk = pinned_chunk_list; for (; chunk; chunk = chunk->next) { - if (p >= chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE)) + if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE)) return TRUE; } return FALSE; } +enum { + ROOT_TYPE_NORMAL = 0, /* "normal" roots */ + ROOT_TYPE_PINNED = 1, /* roots without a GC descriptor */ + ROOT_TYPE_WBARRIER = 2, /* roots with a write barrier */ + ROOT_TYPE_NUM +}; + /* registered roots: the key to the hash is the root start address */ -static RootRecord **roots_hash = NULL; -static int roots_hash_size = 0; +/* + * Different kinds of roots are kept separate to speed up pin_from_roots () for example. + */ +static RootRecord **roots_hash [ROOT_TYPE_NUM] = { NULL, NULL }; +static int roots_hash_size [ROOT_TYPE_NUM] = { 0, 0, 0 }; static mword roots_size = 0; /* amount of memory in the root set */ -static mword num_roots_entries = 0; +static int num_roots_entries [ROOT_TYPE_NUM] = { 0, 0, 0 }; /* * The current allocation cursors * We allocate objects in the nursery. * The nursery is the area between nursery_start and nursery_real_end. - * nursery_next is the pointer to the space where the next object will be allocated. - * nursery_temp_end is the pointer to the end of the temporary space reserved for - * the allocation: this allows us to allow allocations inside the fragments of the - * nursery (the empty holes between pinned objects) and it allows us to set the - * scan starts at reasonable intervals. - * nursery_next and nursery_temp_end will become per-thread vars to allow lock-free - * allocations. + * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated + * from nursery fragments. + * tlab_next is the pointer to the space inside the TLAB where the next object will + * be allocated. + * tlab_temp_end is the pointer to the end of the temporary space reserved for + * the allocation: it allows us to set the scan starts at reasonable intervals. + * tlab_real_end points to the end of the TLAB. + * nursery_frag_real_end points to the end of the currently used nursery fragment. * nursery_first_pinned_start points to the start of the first pinned object in the nursery * nursery_last_pinned_end points to the end of the last pinned object in the nursery * At the next allocation, the area of the nursery where objects can be present is * between MIN(nursery_first_pinned_start, first_fragment_start) and - * MAX(nursery_last_pinned_end, nursery_temp_end) + * MAX(nursery_last_pinned_end, nursery_frag_real_end) */ static char *nursery_start = NULL; + +/* + * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS + * variables for next+temp_end ? + */ +static __thread char *tlab_start; +static __thread char *tlab_next; +static __thread char *tlab_temp_end; +static __thread char *tlab_real_end; +/* Used by the managed allocator */ +static __thread char **tlab_next_addr; static char *nursery_next = NULL; -static char *nursery_temp_end = NULL; -static char *nursery_real_end = NULL; static char *nursery_frag_real_end = NULL; -static char *nursery_first_pinned_start = NULL; +static char *nursery_real_end = NULL; +//static char *nursery_first_pinned_start = NULL; static char *nursery_last_pinned_end = NULL; +/* The size of a TLAB */ +/* The bigger the value, the less often we have to go to the slow path to allocate a new + * one, but the more space is wasted by threads not allocating much memory. + * FIXME: Tune this. + * FIXME: Make this self-tuning for each thread. + */ +static guint32 tlab_size = (1024 * 4); + /* fragments that are free and ready to be used for allocation */ static Fragment *nursery_fragments = NULL; /* freeelist of fragment structures */ @@ -540,7 +646,7 @@ static Fragment *fragment_freelist = NULL; /* * used when moving the objects * When the nursery is collected, objects are copied to to_space. - * The area between to_space and gray_objects is used as a stack + * The area between gray_first and gray_objects is used as a stack * of objects that need their fields checked for more references * to be copied. * We should optimize somehow this mechanism to avoid rescanning @@ -548,6 +654,7 @@ static Fragment *fragment_freelist = NULL; * test cache misses and other graph traversal orders. */ static char *to_space = NULL; +static char *gray_first = NULL; static char *gray_objects = NULL; static char *to_space_end = NULL; static GCMemSection *to_space_section = NULL; @@ -561,12 +668,6 @@ static GCMemSection *to_space_section = NULL; * ###################################################################### */ -/* - * Recursion is not allowed for the thread lock. - */ -#define LOCK_GC pthread_mutex_lock (&gc_mutex) -#define UNLOCK_GC pthread_mutex_unlock (&gc_mutex) - #define UPDATE_HEAP_BOUNDARIES(low,high) do { \ if ((mword)(low) < lowest_heap_address) \ lowest_heap_address = (mword)(low); \ @@ -588,7 +689,7 @@ static void* get_internal_mem (size_t size); static void free_internal_mem (void *addr); static void* get_os_memory (size_t size, int activate); static void free_os_memory (void *addr, size_t size); -static void report_internal_mem_usage (void); +static G_GNUC_UNUSED void report_internal_mem_usage (void); static int stop_world (void); static int restart_world (void); @@ -597,15 +698,22 @@ static void scan_from_remsets (void *start_nursery, void *end_nursery); static void find_pinning_ref_from_thread (char *obj, size_t size); static void update_current_thread_stack (void *start); static GCMemSection* alloc_section (size_t size); -static void finalize_in_range (void **start, void **end); -static void null_link_in_range (void **start, void **end); +static void finalize_in_range (char *start, char *end); +static void null_link_in_range (char *start, char *end); static gboolean search_fragment_for_size (size_t size); static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end); static void clear_remsets (void); +static void clear_tlabs (void); +static char *find_tlab_next_from_address (char *addr); static void sweep_pinned_objects (void); +static void scan_from_pinned_objects (char *addr_start, char *addr_end); static void free_large_object (LOSObject *obj); static void free_mem_section (GCMemSection *section); +void describe_ptr (char *ptr); +void check_consistency (void); +char* check_object (char *start); + /* * ###################################################################### * ######## GC descriptors @@ -666,22 +774,29 @@ enum { #define ALLOC_ALIGN 8 -/* Root bitmap descriptors are simpler: the lower two bits describe the type +/* Root bitmap descriptors are simpler: the lower three bits describe the type * and we either have 30/62 bitmap bits or nibble-based run-length, - * or a complex descriptor + * or a complex descriptor, or a user defined marker function. */ enum { ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */ ROOT_DESC_BITMAP, - ROOT_DESC_RUN_LEN, - ROOT_DESC_LARGE_BITMAP, - ROOT_DESC_TYPE_MASK = 0x3, - ROOT_DESC_TYPE_SHIFT = 2, + ROOT_DESC_RUN_LEN, + ROOT_DESC_COMPLEX, + ROOT_DESC_USER, + ROOT_DESC_TYPE_MASK = 0x7, + ROOT_DESC_TYPE_SHIFT = 3, }; +#define MAKE_ROOT_DESC(type,val) ((type) | ((val) << ROOT_DESC_TYPE_SHIFT)) + +#define MAX_USER_DESCRIPTORS 16 + static gsize* complex_descriptors = NULL; static int complex_descriptors_size = 0; static int complex_descriptors_next = 0; +static MonoGCMarkFunc user_descriptors [MAX_USER_DESCRIPTORS]; +static int user_descriptors_next = 0; static int alloc_complex_descriptor (gsize *bitmap, int numbits) @@ -732,7 +847,7 @@ alloc_complex_descriptor (gsize *bitmap, int numbits) * Descriptor builders. */ void* -mono_gc_make_descr_for_string (void) +mono_gc_make_descr_for_string (gsize *bitmap, int numbits) { return (void*) DESC_TYPE_STRING; } @@ -760,24 +875,24 @@ mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size) */ if (first_set < 0) { desc = DESC_TYPE_RUN_LENGTH | stored_size; - DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %d\n", (void*)desc, stored_size)); + DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size)); return (void*) desc; } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) { desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24); - DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %d, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set)); + DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set)); return (void*) desc; } /* we know the 2-word header is ptr-free */ if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) { desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT); - DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set)); + DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set)); return (void*) desc; } } /* we know the 2-word header is ptr-free */ if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) { desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS); - DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set)); + DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set)); return (void*) desc; } /* it's a complex object ... */ @@ -806,7 +921,7 @@ mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_ } /* Note: we also handle structs with just ref fields */ if (num_set * sizeof (gpointer) == elem_size) { - return (void*)(desc | VECTOR_SUBTYPE_REFS | ((-1LL) << 16)); + return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16)); } /* FIXME: try run-len first */ /* Note: we can't skip the object header here, because it's not present */ @@ -827,25 +942,25 @@ mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_ (size) &= ~(ALLOC_ALIGN - 1); \ } while (0) -#define OBJ_RUN_LEN_SIZE(size,vt,obj) do { \ - (size) = (vt)->desc & 0xfff8; \ - } while (0) +#define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \ + (size) = (desc) & 0xfff8; \ + } while (0) -#define OBJ_BITMAP_SIZE(size,vt,obj) do { \ - (size) = (vt)->desc & 0xfff8; \ - } while (0) +#define OBJ_BITMAP_SIZE(size,desc,obj) do { \ + (size) = (desc) & 0xfff8; \ + } while (0) //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr))) #define PREFETCH(addr) /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */ -#define OBJ_RUN_LEN_FOREACH_PTR(vt,obj) do { \ - if ((vt)->desc & 0xffff0000) { \ +#define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \ + if ((desc) & 0xffff0000) { \ /* there are pointers */ \ void **_objptr_end; \ void **_objptr = (void**)(obj); \ - _objptr += ((vt)->desc >> 16) & 0xff; \ - _objptr_end = _objptr + (((vt)->desc >> 24) & 0xff); \ + _objptr += ((desc) >> 16) & 0xff; \ + _objptr_end = _objptr + (((desc) >> 24) & 0xff); \ while (_objptr < _objptr_end) { \ HANDLE_PTR (_objptr, (obj)); \ _objptr++; \ @@ -856,10 +971,10 @@ mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_ /* a bitmap desc means that there are pointer references or we'd have * choosen run-length, instead: add an assert to check. */ -#define OBJ_BITMAP_FOREACH_PTR(vt,obj) do { \ +#define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \ /* there are pointers */ \ void **_objptr = (void**)(obj); \ - gsize _bmap = (vt)->desc >> 16; \ + gsize _bmap = (desc) >> 16; \ _objptr += OBJECT_HEADER_WORDS; \ while (_bmap) { \ if ((_bmap & 1)) { \ @@ -893,7 +1008,7 @@ mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_ bitmap_data++; \ if (0) { \ MonoObject *myobj = (MonoObject*)obj; \ - g_print ("found %d at %p (0x%x): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \ + g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \ } \ while (bwords-- > 0) { \ gsize _bmap = *bitmap_data++; \ @@ -920,7 +1035,7 @@ mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_ char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \ if (0) { \ MonoObject *myobj = (MonoObject*)start; \ - g_print ("found %d at %p (0x%x): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \ + g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \ } \ while (e_start < e_end) { \ void **_objptr = (void**)e_start; \ @@ -997,7 +1112,6 @@ static mword obj_references_checked = 0; #undef HANDLE_PTR #define HANDLE_PTR(ptr,obj) do { \ if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \ - MonoObject *o = (MonoObject*)(obj); \ new_obj_references++; \ /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \ } else { \ @@ -1012,6 +1126,7 @@ static mword obj_references_checked = 0; * This section of code deals with detecting the objects no longer in use * and reclaiming the memory. */ +#if 0 static void __attribute__((noinline)) scan_area (char *start, char *end) { @@ -1019,8 +1134,7 @@ scan_area (char *start, char *end) size_t skip_size; int type; int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0; - char *old_start = start; - void **saved_vt; + mword desc; new_obj_references = 0; obj_references_checked = 0; while (start < end) { @@ -1032,18 +1146,19 @@ scan_area (char *start, char *end) DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name)); if (0) { MonoObject *obj = (MonoObject*)start; - g_print ("found at %p (0x%x): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name); + g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name); } - type = vt->desc & 0x7; + desc = vt->desc; + type = desc & 0x7; if (type == DESC_TYPE_STRING) { STRING_SIZE (skip_size, start); start += skip_size; type_str++; continue; } else if (type == DESC_TYPE_RUN_LENGTH) { - OBJ_RUN_LEN_SIZE (skip_size, vt, start); + OBJ_RUN_LEN_SIZE (skip_size, desc, start); g_assert (skip_size); - OBJ_RUN_LEN_FOREACH_PTR (vt,start); + OBJ_RUN_LEN_FOREACH_PTR (desc,start); start += skip_size; type_rlen++; continue; @@ -1061,9 +1176,9 @@ scan_area (char *start, char *end) type_vector++; continue; } else if (type == DESC_TYPE_SMALL_BITMAP) { - OBJ_BITMAP_SIZE (skip_size, vt, start); + OBJ_BITMAP_SIZE (skip_size, desc, start); g_assert (skip_size); - OBJ_BITMAP_FOREACH_PTR (vt,start); + OBJ_BITMAP_FOREACH_PTR (desc,start); start += skip_size; type_bitmap++; continue; @@ -1107,19 +1222,158 @@ scan_area (char *start, char *end) type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/ } +static void __attribute__((noinline)) +scan_area_for_domain (MonoDomain *domain, char *start, char *end) +{ + GCVTable *vt; + size_t skip_size; + int type, remove; + mword desc; + + while (start < end) { + if (!*(void**)start) { + start += sizeof (void*); /* should be ALLOC_ALIGN, really */ + continue; + } + vt = (GCVTable*)LOAD_VTABLE (start); + /* handle threads someway (maybe insert the root domain vtable?) */ + if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) { + DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start))); + remove = 1; + } else { + remove = 0; + } + desc = vt->desc; + type = desc & 0x7; + if (type == DESC_TYPE_STRING) { + STRING_SIZE (skip_size, start); + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else if (type == DESC_TYPE_RUN_LENGTH) { + OBJ_RUN_LEN_SIZE (skip_size, desc, start); + g_assert (skip_size); + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too + skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE; + skip_size *= mono_array_length ((MonoArray*)start); + skip_size += sizeof (MonoArray); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + if (type == DESC_TYPE_ARRAY) { + /* account for the bounds */ + } + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else if (type == DESC_TYPE_SMALL_BITMAP) { + OBJ_BITMAP_SIZE (skip_size, desc, start); + g_assert (skip_size); + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else if (type == DESC_TYPE_LARGE_BITMAP) { + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else if (type == DESC_TYPE_COMPLEX) { + /* this is a complex object */ + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else if (type == DESC_TYPE_COMPLEX_ARR) { + /* this is an array of complex structs */ + skip_size = mono_array_element_size (((MonoVTable*)vt)->klass); + skip_size *= mono_array_length ((MonoArray*)start); + skip_size += sizeof (MonoArray); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + if (type == DESC_TYPE_ARRAY) { + /* account for the bounds */ + } + if (remove) memset (start, 0, skip_size); + start += skip_size; + continue; + } else { + g_assert (0); + } + } +} + +/* + * When appdomains are unloaded we can easily remove objects that have finalizers, + * but all the others could still be present in random places on the heap. + * We need a sweep to get rid of them even though it's going to be costly + * with big heaps. + * The reason we need to remove them is because we access the vtable and class + * structures to know the object size and the reference bitmap: once the domain is + * unloaded the point to random memory. + */ +void +mono_gc_clear_domain (MonoDomain * domain) +{ + GCMemSection *section; + LOCK_GC; + for (section = section_list; section; section = section->next) { + scan_area_for_domain (domain, section->data, section->end_data); + } + /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */ + UNLOCK_GC; +} +#endif + +/* + * add_to_global_remset: + * + * The global remset contains locations which point into newspace after + * a minor collection. This can happen if the objects they point to are pinned. + */ static void -add_to_global_remset (gpointer ptr) +add_to_global_remset (gpointer ptr, gboolean root) { RememberedSet *rs; + DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr)); - if (global_remset->store_next < global_remset->end_set) { - *(global_remset->store_next++) = (mword)ptr; + + /* + * FIXME: If an object remains pinned, we need to add it at every minor collection. + * To avoid uncontrolled growth of the global remset, only add each pointer once. + */ + if (global_remset->store_next + 3 < global_remset->end_set) { + if (root) { + *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER; + *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION; + } else { + *(global_remset->store_next++) = (mword)ptr; + } return; } rs = alloc_remset (global_remset->end_set - global_remset->data, NULL); rs->next = global_remset; global_remset = rs; - *(global_remset->store_next++) = (mword)ptr; + if (root) { + *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER; + *(global_remset->store_next++) = (mword)REMSET_LOCATION; + } else { + *(global_remset->store_next++) = (mword)ptr; + } + + { + int global_rs_size = 0; + + for (rs = global_remset; rs; rs = rs->next) { + global_rs_size += rs->store_next - rs->data; + } + DEBUG (4, fprintf (gc_debug_file, "Global remset now has size %d\n", global_rs_size)); + } } /* @@ -1145,7 +1399,13 @@ add_to_global_remset (gpointer ptr) static char* __attribute__((noinline)) copy_object (char *obj, char *from_space_start, char *from_space_end) { - if (obj >= from_space_start && obj < from_space_end) { + static void *copy_labels [] = { &&LAB_0, &&LAB_1, &&LAB_2, &&LAB_3, &&LAB_4, &&LAB_5, &&LAB_6, &&LAB_7, &&LAB_8 }; + + /* + * FIXME: The second set of checks is only needed if we are called for tospace + * objects too. + */ + if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) { MonoVTable *vt; char *forwarded; mword objsize; @@ -1163,20 +1423,42 @@ copy_object (char *obj, char *from_space_start, char *from_space_end) objsize = safe_object_get_size ((MonoObject*)obj); objsize += ALLOC_ALIGN - 1; objsize &= ~(ALLOC_ALIGN - 1); - DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %d)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize)); + DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize)); /* FIXME: handle pinned allocs: * Large objects are simple, at least until we always follow the rule: * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it. * At the end of major collections, we walk the los list and if * the object is pinned, it is marked, otherwise it can be freed. */ - if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) { - DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %d\n", obj, safe_name (obj), objsize)); + if (G_UNLIKELY (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj)))) { + DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize)); pin_object (obj); return obj; } /* ok, the object is not pinned, we can move it */ /* use a optimized memcpy here */ + if (objsize <= sizeof (gpointer) * 8) { + mword *dest = (mword*)gray_objects; + goto *copy_labels [objsize / sizeof (gpointer)]; + LAB_8: + (dest) [7] = ((mword*)obj) [7]; + LAB_7: + (dest) [6] = ((mword*)obj) [6]; + LAB_6: + (dest) [5] = ((mword*)obj) [5]; + LAB_5: + (dest) [4] = ((mword*)obj) [4]; + LAB_4: + (dest) [3] = ((mword*)obj) [3]; + LAB_3: + (dest) [2] = ((mword*)obj) [2]; + LAB_2: + (dest) [1] = ((mword*)obj) [1]; + LAB_1: + (dest) [0] = ((mword*)obj) [0]; + LAB_0: + ; + } else { #if 0 { int ecx; @@ -1192,13 +1474,14 @@ copy_object (char *obj, char *from_space_start, char *from_space_end) #else memcpy (gray_objects, obj, objsize); #endif + } /* adjust array->bounds */ vt = ((MonoObject*)obj)->vtable; g_assert (vt->gc_descr); - if (vt->rank && ((MonoArray*)obj)->bounds) { + if (G_UNLIKELY (vt->rank && ((MonoArray*)obj)->bounds)) { MonoArray *array = (MonoArray*)gray_objects; - array->bounds = (char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj); - DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %d, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array))); + array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj)); + DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array))); } /* set the forwarding pointer */ forward_object (obj, gray_objects); @@ -1213,12 +1496,12 @@ copy_object (char *obj, char *from_space_start, char *from_space_end) #undef HANDLE_PTR #define HANDLE_PTR(ptr,obj) do { \ - if (*(ptr)) { \ - void *__old = *(ptr); \ - *(ptr) = copy_object (*(ptr), from_start, from_end); \ + void *__old = *(ptr); \ + if (__old) { \ + *(ptr) = copy_object (__old, from_start, from_end); \ DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \ - if (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) \ - add_to_global_remset ((ptr)); \ + if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) && !ptr_in_nursery (ptr)) \ + add_to_global_remset ((ptr), FALSE); \ } \ } while (0) @@ -1233,22 +1516,22 @@ scan_object (char *start, char* from_start, char* from_end) { GCVTable *vt; size_t skip_size; - int type; - void **saved_vt; + mword desc; vt = (GCVTable*)LOAD_VTABLE (start); //type = vt->desc & 0x7; /* gcc should be smart enough to remove the bounds check, but it isn't:( */ - switch (vt->desc & 0x7) { + desc = vt->desc; + switch (desc & 0x7) { //if (type == DESC_TYPE_STRING) { case DESC_TYPE_STRING: STRING_SIZE (skip_size, start); return start + skip_size; //} else if (type == DESC_TYPE_RUN_LENGTH) { case DESC_TYPE_RUN_LENGTH: - OBJ_RUN_LEN_FOREACH_PTR (vt,start); - OBJ_RUN_LEN_SIZE (skip_size, vt, start); + OBJ_RUN_LEN_FOREACH_PTR (desc,start); + OBJ_RUN_LEN_SIZE (skip_size, desc, start); g_assert (skip_size); return start + skip_size; //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too @@ -1266,8 +1549,8 @@ scan_object (char *start, char* from_start, char* from_end) return start + skip_size; //} else if (type == DESC_TYPE_SMALL_BITMAP) { case DESC_TYPE_SMALL_BITMAP: - OBJ_BITMAP_FOREACH_PTR (vt,start); - OBJ_BITMAP_SIZE (skip_size, vt, start); + OBJ_BITMAP_FOREACH_PTR (desc,start); + OBJ_BITMAP_SIZE (skip_size, desc, start); return start + skip_size; //} else if (type == DESC_TYPE_LARGE_BITMAP) { case DESC_TYPE_LARGE_BITMAP: @@ -1302,6 +1585,64 @@ scan_object (char *start, char* from_start, char* from_end) return NULL; } +/* + * drain_gray_stack: + * + * Scan objects in the gray stack until the stack is empty. This should be called + * frequently after each object is copied, to achieve better locality and cache + * usage. + */ +static void inline +drain_gray_stack (char *start_addr, char *end_addr) +{ + char *gray_start = gray_first; + + while (gray_start < gray_objects) { + DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start))); + gray_start = scan_object (gray_start, start_addr, end_addr); + } + + gray_first = gray_start; +} + +/* + * scan_vtype: + * + * Scan the valuetype pointed to by START, described by DESC for references to + * other objects between @from_start and @from_end and copy them to the gray_objects area. + * Returns a pointer to the end of the object. + */ +static char* +scan_vtype (char *start, mword desc, char* from_start, char* from_end) +{ + size_t skip_size; + + /* The descriptors include info about the MonoObject header as well */ + start -= sizeof (MonoObject); + + switch (desc & 0x7) { + case DESC_TYPE_RUN_LENGTH: + OBJ_RUN_LEN_FOREACH_PTR (desc,start); + OBJ_RUN_LEN_SIZE (skip_size, desc, start); + g_assert (skip_size); + return start + skip_size; + case DESC_TYPE_SMALL_BITMAP: + OBJ_BITMAP_FOREACH_PTR (desc,start); + OBJ_BITMAP_SIZE (skip_size, desc, start); + return start + skip_size; + case DESC_TYPE_LARGE_BITMAP: + case DESC_TYPE_COMPLEX: + // FIXME: + g_assert_not_reached (); + break; + default: + // The other descriptors can't happen with vtypes + g_assert_not_reached (); + break; + } + return NULL; +} + /* * Addresses from start to end are already sorted. This function finds the object header * for each address and pins the object. The addresses must be inside the passed section. @@ -1341,12 +1682,11 @@ pin_objects_from_addresses (GCMemSection *section, void **start, void **end, voi search_start = start_nursery; } if (search_start < last_obj) - search_start = last_obj + last_obj_size; + search_start = (char*)last_obj + last_obj_size; /* now addr should be in an object a short distance from search_start * Note that search_start must point to zeroed mem or point to an object. */ do { - int already_pinned; if (!*(void**)search_start) { mword p = (mword)search_start; p += sizeof (gpointer); @@ -1359,7 +1699,7 @@ pin_objects_from_addresses (GCMemSection *section, void **start, void **end, voi last_obj_size = safe_object_get_size ((MonoObject*)search_start); last_obj_size += ALLOC_ALIGN - 1; last_obj_size &= ~(ALLOC_ALIGN - 1); - DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %d\n", last_obj, safe_name (last_obj), last_obj_size)); + DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size)); if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) { DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count)); pin_object (search_start); @@ -1396,10 +1736,23 @@ new_gap (int gap) return gap; } +#if 0 +static int +compare_addr (const void *a, const void *b) +{ + return *(const void **)a - *(const void **)b; +} +#endif + /* sort the addresses in array in increasing order */ static void sort_addresses (void **array, int size) { + /* + * qsort is slower as predicted. + * qsort (array, size, sizeof (gpointer), compare_addr); + * return; + */ int gap = size; int swapped, end; while (TRUE) { @@ -1421,7 +1774,7 @@ sort_addresses (void **array, int size) } } -static void +static G_GNUC_UNUSED void print_nursery_gaps (void* start_nursery, void *end_nursery) { int i; @@ -1429,11 +1782,11 @@ print_nursery_gaps (void* start_nursery, void *end_nursery) gpointer next; for (i = 0; i < next_pin_slot; ++i) { next = pin_queue [i]; - fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, next-first); + fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first); first = next; } next = end_nursery; - fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, next-first); + fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first); } /* reduce the info in the pin queue, removing duplicate pointers and sorting them */ @@ -1441,12 +1794,11 @@ static void optimize_pin_queue (int start_slot) { void **start, **cur, **end; - int count, i; /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */ /* it may be better to keep ranges of pinned memory instead of individually pinning objects */ DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot)); if ((next_pin_slot - start_slot) > 1) - sort_addresses (pin_queue + start_slot, next_pin_slot); + sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot); start = cur = pin_queue + start_slot; end = pin_queue + next_pin_slot; while (cur < end) { @@ -1506,12 +1858,21 @@ conservatively_pin_objects_from (void **start, void **end, void *start_nursery, if (next_pin_slot >= pin_queue_size) realloc_pin_queue (); pin_queue [next_pin_slot++] = (void*)addr; - DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", addr)); + DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr)); count++; } start++; } DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count)); + +#ifdef HAVE_VALGRIND_MEMCHECK_H + /* + * The pinning addresses might come from undefined memory, this is normal. Since they + * are used in lots of functions, we make the memory defined here instead of having + * to add a supression for those functions. + */ + VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0])); +#endif } /* @@ -1528,7 +1889,7 @@ conservatively_pin_objects_from (void **start, void **end, void *start_nursery, * is no write in the old generation area where the pinned object is referenced * and we may not consider it as reachable. */ -static void +static G_GNUC_UNUSED void mark_pinned_objects (int generation) { } @@ -1536,14 +1897,14 @@ mark_pinned_objects (int generation) /* * Debugging function: find in the conservative roots where @obj is being pinned. */ -static void +static G_GNUC_UNUSED void find_pinning_reference (char *obj, size_t size) { RootRecord *root; int i; char *endobj = obj + size; - for (i = 0; i < roots_hash_size; ++i) { - for (root = roots_hash [i]; root; root = root->next) { + for (i = 0; i < roots_hash_size [0]; ++i) { + for (root = roots_hash [0][i]; root; root = root->next) { /* if desc is non-null it has precise info */ if (!root->root_desc) { char ** start = (char**)root->start_root; @@ -1569,15 +1930,12 @@ pin_from_roots (void *start_nursery, void *end_nursery) { RootRecord *root; int i; - DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries)); + DEBUG (2, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d/%d entries)\n", (int)roots_size, num_roots_entries [ROOT_TYPE_NORMAL], num_roots_entries [ROOT_TYPE_PINNED])); /* objects pinned from the API are inside these roots */ - for (i = 0; i < roots_hash_size; ++i) { - for (root = roots_hash [i]; root; root = root->next) { - /* if desc is non-null it has precise info */ - if (root->root_desc) - continue; + for (i = 0; i < roots_hash_size [ROOT_TYPE_PINNED]; ++i) { + for (root = roots_hash [ROOT_TYPE_PINNED][i]; root; root = root->next) { DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root)); - conservatively_pin_objects_from ((void**)root->start_root, root->end_root, start_nursery, end_nursery); + conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery); } } /* now deal with the thread stacks @@ -1590,6 +1948,19 @@ pin_from_roots (void *start_nursery, void *end_nursery) pin_thread_data (start_nursery, end_nursery); } +/* Copy function called from user defined mark functions */ +static char *user_copy_n_start; +static char *user_copy_n_end; + +static void* +user_copy (void *addr) +{ + if (addr) + return copy_object (addr, user_copy_n_start, user_copy_n_end); + else + return NULL; +} + /* * The memory area from start_root to end_root contains pointers to objects. * Their position is precisely described by @desc (this means that the pointer @@ -1605,14 +1976,45 @@ precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, while (desc) { if ((desc & 1) && *start_root) { *start_root = copy_object (*start_root, n_start, n_end); - DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \ + DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); + drain_gray_stack (n_start, n_end); } desc >>= 1; start_root++; } return; + case ROOT_DESC_COMPLEX: { + gsize *bitmap_data = complex_descriptors + (desc >> ROOT_DESC_TYPE_SHIFT); + int bwords = (*bitmap_data) - 1; + void **start_run = start_root; + bitmap_data++; + while (bwords-- > 0) { + gsize bmap = *bitmap_data++; + void **objptr = start_run; + while (bmap) { + if ((bmap & 1) && *objptr) { + *objptr = copy_object (*objptr, n_start, n_end); + DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", objptr, *objptr)); + drain_gray_stack (n_start, n_end); + } + bmap >>= 1; + ++objptr; + } + start_run += GC_BITS_PER_WORD; + } + break; + } + case ROOT_DESC_USER: { + MonoGCMarkFunc marker = user_descriptors [desc >> ROOT_DESC_TYPE_SHIFT]; + + user_copy_n_start = n_start; + user_copy_n_end = n_end; + marker (start_root, user_copy); + break; + } case ROOT_DESC_RUN_LEN: - case ROOT_DESC_LARGE_BITMAP: + g_assert_not_reached (); + default: g_assert_not_reached (); } } @@ -1642,29 +2044,44 @@ alloc_nursery (void) char *data; int scan_starts; Fragment *frag; + int alloc_size; if (nursery_section) return; - DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %d\n", nursery_size)); + DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size)); /* later we will alloc a larger area for the nursery but only activate * what we need. The rest will be used as expansion if we have too many pinned * objects in the existing nursery. */ /* FIXME: handle OOM */ section = get_internal_mem (sizeof (GCMemSection)); - data = get_os_memory (nursery_size, TRUE); - nursery_start = nursery_next = data; - nursery_real_end = data + nursery_size; - nursery_temp_end = data + SCAN_START_SIZE; + +#ifdef ALIGN_NURSERY + /* Allocate twice the memory to be able to put the nursery at an aligned address */ + g_assert (nursery_size == DEFAULT_NURSERY_SIZE); + + alloc_size = nursery_size * 2; + data = get_os_memory (alloc_size, TRUE); + nursery_start = (void*)(((mword)data + (1 << DEFAULT_NURSERY_BITS) - 1) & ~((1 << DEFAULT_NURSERY_BITS) - 1)); + g_assert ((char*)nursery_start + nursery_size <= ((char*)data + alloc_size)); + /* FIXME: Use the remaining size for something else, if it is big enough */ +#else + alloc_size = nursery_size; + data = get_os_memory (alloc_size, TRUE); + nursery_start = data; +#endif + nursery_real_end = nursery_start + nursery_size; UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end); - total_alloc += nursery_size; - DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", nursery_size, total_alloc)); + nursery_next = nursery_start; + total_alloc += alloc_size; + DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc)); section->data = section->next_data = data; - section->size = nursery_size; + section->size = alloc_size; section->end_data = nursery_real_end; - scan_starts = nursery_size / SCAN_START_SIZE; + scan_starts = alloc_size / SCAN_START_SIZE; section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts); section->num_scan_start = scan_starts; + section->role = MEMORY_ROLE_GEN0; /* add to the section list */ section->next = section_list; @@ -1685,7 +2102,7 @@ alloc_nursery (void) * Update roots in the old generation. Since we currently don't have the * info from the write barriers, we just scan all the objects. */ -static void +static G_GNUC_UNUSED void scan_old_generation (char *start, char* end) { GCMemSection *section; @@ -1710,7 +2127,7 @@ scan_old_generation (char *start, char* end) } /* scan the old object space, too */ for (big_object = los_object_list; big_object; big_object = big_object->next) { - DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %d\n", big_object->data, safe_name (big_object->data), big_object->size)); + DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size)); scan_object (big_object->data, start, end); } /* scan the list of objects ready for finalization */ @@ -1730,9 +2147,10 @@ static void add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end) { Fragment *fragment; - DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %d\n", frag_start, frag_end, frag_size)); + DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size)); /* memsetting just the first chunk start is bound to provide better cache locality */ - memset (frag_start, 0, frag_size); + if (nursery_clear_policy == CLEAR_AT_GC) + memset (frag_start, 0, frag_size); /* Not worth dealing with smaller fragments: need to tune */ if (frag_size >= FRAGMENT_MIN_SIZE) { fragment = alloc_fragment (); @@ -1742,14 +2160,34 @@ add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end) fragment->next = nursery_fragments; nursery_fragments = fragment; fragment_total += frag_size; + } else { + /* Clear unused fragments, pinning depends on this */ + memset (frag_start, 0, frag_size); + } +} + +static int +scan_needed_big_objects (char *start_addr, char *end_addr) +{ + LOSObject *big_object; + int count = 0; + for (big_object = los_object_list; big_object; big_object = big_object->next) { + if (!big_object->scanned && object_is_pinned (big_object->data)) { + DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size)); + scan_object (big_object->data, start_addr, end_addr); + big_object->scanned = TRUE; + count++; + } } + return count; } static void -drain_gray_stack (char *start_addr, char *end_addr) +finish_gray_stack (char *start_addr, char *end_addr) { - struct timeval atv, btv; - int fin_ready; + TV_DECLARE (atv); + TV_DECLARE (btv); + int fin_ready, bigo_scanned_num; char *gray_start; /* @@ -1762,16 +2200,15 @@ drain_gray_stack (char *start_addr, char *end_addr) * We need to walk the LO list as well in search of marked big objects * (use a flag since this is needed only on major collections). We need to loop * here as well, so keep a counter of marked LO (increasing it in copy_object). + * To achieve better cache locality and cache usage, we drain the gray stack + * frequently, after each object is copied, and just finish the work here. */ - gettimeofday (&btv, NULL); - gray_start = to_space; - DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start))); + gray_start = gray_first; while (gray_start < gray_objects) { DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start))); gray_start = scan_object (gray_start, start_addr, end_addr); } - gettimeofday (&atv, NULL); - DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv))); + TV_GETTIME (atv); //scan_old_generation (start_addr, end_addr); DEBUG (2, fprintf (gc_debug_file, "Old generation done\n")); /* walk the finalization queue and move also the objects that need to be @@ -1784,6 +2221,7 @@ drain_gray_stack (char *start_addr, char *end_addr) do { fin_ready = num_ready_finalizers; finalize_in_range (start_addr, end_addr); + bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr); /* drain the new stack that might have been created */ DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start))); @@ -1791,7 +2229,7 @@ drain_gray_stack (char *start_addr, char *end_addr) DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start))); gray_start = scan_object (gray_start, start_addr, end_addr); } - } while (fin_ready != num_ready_finalizers); + } while (fin_ready != num_ready_finalizers || bigo_scanned_num); DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space))); to_space = gray_start; @@ -1806,7 +2244,7 @@ drain_gray_stack (char *start_addr, char *end_addr) * called. */ null_link_in_range (start_addr, end_addr); - gettimeofday (&btv, NULL); + TV_GETTIME (btv); DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv))); } @@ -1815,7 +2253,6 @@ static int last_num_pinned = 0; static void build_nursery_fragments (int start_pin, int end_pin) { - Fragment *fragment; char *frag_start, *frag_end; size_t frag_size; int i; @@ -1839,6 +2276,30 @@ build_nursery_fragments (int start_pin, int end_pin) frag_size += ALLOC_ALIGN - 1; frag_size &= ~(ALLOC_ALIGN - 1); frag_start = (char*)pin_queue [i] + frag_size; + /* + * pin_queue [i] might point to a half-constructed string or vector whose + * length field is not set. In that case, frag_start points inside the + * (zero initialized) object. Find the end of the object by scanning forward. + * + */ + if (is_maybe_half_constructed (pin_queue [i])) { + char *tlab_end; + + /* This is also hit for zero length arrays/strings */ + + /* Find the end of the TLAB which contained this allocation */ + tlab_end = find_tlab_next_from_address (pin_queue [i]); + + if (tlab_end) { + while ((frag_start < tlab_end) && *(mword*)frag_start == 0) + frag_start += sizeof (mword); + } else { + /* + * FIXME: The object is either not allocated in a TLAB, or it isn't a + * half constructed object. + */ + } + } } nursery_last_pinned_end = frag_start; frag_end = nursery_real_end; @@ -1846,13 +2307,18 @@ build_nursery_fragments (int start_pin, int end_pin) if (frag_size) add_nursery_frag (frag_size, frag_start, frag_end); if (!nursery_fragments) { - g_warning ("Nursery fully pinned (%d)", end_pin - start_pin); + DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin)); for (i = start_pin; i < end_pin; ++i) { - DEBUG (1, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i]))); + DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i]))); } - g_assert_not_reached (); + degraded_mode = 1; } -} + + nursery_next = nursery_frag_real_end = NULL; + + /* Clear TLABs for all threads */ + clear_tlabs (); +} /* FIXME: later reduce code duplication here with the above * We don't keep track of section fragments for non-nursery sections yet, so @@ -1873,7 +2339,11 @@ build_section_fragments (GCMemSection *section) frag_end = pin_queue [i]; /* remove the pin bit from pinned objects */ unpin_object (frag_end); - section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end; + if (frag_end >= section->data + section->size) { + frag_end = section->data + section->size; + } else { + section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end; + } frag_size = frag_end - frag_start; if (frag_size) memset (frag_start, 0, frag_size); @@ -1889,6 +2359,19 @@ build_section_fragments (GCMemSection *section) memset (frag_start, 0, frag_size); } +static void +scan_from_registered_roots (char *addr_start, char *addr_end, int root_type) +{ + int i; + RootRecord *root; + for (i = 0; i < roots_hash_size [root_type]; ++i) { + for (root = roots_hash [root_type][i]; root; root = root->next) { + DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc)); + precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc); + } + } +} + /* * Collect objects in the nursery. */ @@ -1898,41 +2381,62 @@ collect_nursery (size_t requested_size) GCMemSection *section; size_t max_garbage_amount; int i; - char *gray_start; - RootRecord *root; - Fragment *fragment; - char *frag_start, *frag_end; - size_t frag_size; - struct timeval atv, btv; + char *orig_nursery_next; + Fragment *frag; + TV_DECLARE (all_atv); + TV_DECLARE (all_btv); + TV_DECLARE (atv); + TV_DECLARE (btv); + degraded_mode = 0; + orig_nursery_next = nursery_next; nursery_next = MAX (nursery_next, nursery_last_pinned_end); /* FIXME: optimize later to use the higher address where an object can be present */ nursery_next = MAX (nursery_next, nursery_real_end); + if (consistency_check_at_minor_collection) + check_consistency (); + DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start))); max_garbage_amount = nursery_next - nursery_start; + + /* Clear all remaining nursery fragments, pinning depends on this */ + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { + g_assert (orig_nursery_next <= nursery_frag_real_end); + memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next); + for (frag = nursery_fragments; frag; frag = frag->next) { + memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start); + } + } + /* * not enough room in the old generation to store all the possible data from * the nursery in a single continuous space. + * We reset to_space if we allocated objects in degraded mode. */ + if (to_space_section) + to_space = gray_objects = gray_first = to_space_section->next_data; if ((to_space_end - to_space) < max_garbage_amount) { section = alloc_section (nursery_section->size * 4); g_assert (nursery_section->size >= max_garbage_amount); - to_space = gray_objects = section->data; + to_space = gray_objects = gray_first = section->next_data; to_space_end = section->end_data; to_space_section = section; } + DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section)); nursery_section->next_data = nursery_next; num_minor_gcs++; + mono_stats.minor_gc_count ++; /* world must be stopped already */ - gettimeofday (&atv, NULL); + TV_GETTIME (all_atv); + TV_GETTIME (atv); /* pin from pinned handles */ pin_from_roots (nursery_start, nursery_next); /* identify pinned objects */ optimize_pin_queue (0); next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next); - gettimeofday (&btv, NULL); + TV_GETTIME (btv); DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv))); DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot)); @@ -1943,9 +2447,8 @@ collect_nursery (size_t requested_size) scan_from_remsets (nursery_start, nursery_next); /* we don't have complete write barrier yet, so we scan all the old generation sections */ - gettimeofday (&atv, NULL); + TV_GETTIME (atv); DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv))); - /* FIXME: later scan also alloc_pinned objects */ /* the pinned objects are roots */ for (i = 0; i < next_pin_slot; ++i) { @@ -1953,27 +2456,24 @@ collect_nursery (size_t requested_size) scan_object (pin_queue [i], nursery_start, nursery_next); } /* registered roots, this includes static fields */ - for (i = 0; i < roots_hash_size; ++i) { - for (root = roots_hash [i]; root; root = root->next) { - /* if desc is non-null it has precise info */ - if (!root->root_desc) - continue; - DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc)); - precisely_scan_objects_from ((void**)root->start_root, root->end_root, nursery_start, nursery_next, root->root_desc); - } - } - gettimeofday (&btv, NULL); + scan_from_registered_roots (nursery_start, nursery_next, ROOT_TYPE_NORMAL); + /* alloc_pinned objects */ + scan_from_pinned_objects (nursery_start, nursery_next); + TV_GETTIME (btv); DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv))); - drain_gray_stack (nursery_start, nursery_next); + finish_gray_stack (nursery_start, nursery_next); /* walk the pin_queue, build up the fragment list of free memory, unmark * pinned objects as we go, memzero() the empty fragments so they are ready for the * next allocations. */ build_nursery_fragments (0, next_pin_slot); - gettimeofday (&atv, NULL); - DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %d bytes available\n", TV_ELAPSED (btv, atv), fragment_total)); + TV_GETTIME (atv); + DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total)); + + TV_GETTIME (all_btv); + mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv); /* prepare the pin queue for the next collection */ last_num_pinned = next_pin_slot; @@ -1987,19 +2487,17 @@ collect_nursery (size_t requested_size) static void major_collection (void) { - GCMemSection *section, *prev_section, *next_section; + GCMemSection *section, *prev_section; LOSObject *bigobj, *prevbo; - size_t max_garbage_amount; int i; - char *gray_start; - RootRecord *root; PinnedChunk *chunk; FinalizeEntry *fin; - Fragment *fragment; - char *frag_start, *frag_end; - size_t frag_size; - int fin_ready, count; - struct timeval atv, btv; + Fragment *frag; + int count; + TV_DECLARE (all_atv); + TV_DECLARE (all_btv); + TV_DECLARE (atv); + TV_DECLARE (btv); /* FIXME: only use these values for the precise scan * note that to_space pointers should be excluded anyway... */ @@ -2007,8 +2505,20 @@ major_collection (void) char *heap_end = (char*)-1; size_t copy_space_required = 0; + degraded_mode = 0; DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs)); num_major_gcs++; + mono_stats.major_gc_count ++; + + /* Clear all remaining nursery fragments, pinning depends on this */ + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) { + g_assert (nursery_next <= nursery_frag_real_end); + memset (nursery_next, 0, nursery_frag_real_end - nursery_next); + for (frag = nursery_fragments; frag; frag = frag->next) { + memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start); + } + } + /* * FIXME: implement Mark/Compact * Until that is done, we can just apply mostly the same alg as for the nursery: @@ -2019,6 +2529,7 @@ major_collection (void) collect_nursery (0); return; } + TV_GETTIME (all_atv); /* FIXME: make sure the nursery next_data ptr is updated */ nursery_section->next_data = nursery_real_end; /* we should also coalesce scanning from sections close to each other @@ -2027,10 +2538,10 @@ major_collection (void) /* The remsets are not useful for a major collection */ clear_remsets (); /* world must be stopped already */ - gettimeofday (&atv, NULL); + TV_GETTIME (atv); DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n")); for (section = section_list; section; section = section->next) { - section->pin_queue_start = count = next_pin_slot; + section->pin_queue_start = count = section->pin_queue_end = next_pin_slot; pin_from_roots (section->data, section->next_data); if (count != next_pin_slot) { int reduced_to; @@ -2050,7 +2561,7 @@ major_collection (void) if (next_pin_slot != count) { next_pin_slot = count; pin_object (bigobj->data); - DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %d from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size)); + DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size)); } } /* look for pinned addresses for pinned-alloc objects */ @@ -2065,14 +2576,14 @@ major_collection (void) } } - gettimeofday (&btv, NULL); + TV_GETTIME (btv); DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv))); DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot)); /* allocate the big to space */ - DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %d\n", copy_space_required)); + DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required)); section = alloc_section (copy_space_required); - to_space = gray_objects = section->data; + to_space = gray_objects = gray_first = section->next_data; to_space_end = section->end_data; to_space_section = section; @@ -2082,37 +2593,38 @@ major_collection (void) * mark any section without pinned objects, so we can free it since we will be able to * move all the objects. */ - /* the pinned objects are roots */ + /* the pinned objects are roots (big objects are included in this list, too) */ for (i = 0; i < next_pin_slot; ++i) { DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i]))); scan_object (pin_queue [i], heap_start, heap_end); } /* registered roots, this includes static fields */ - for (i = 0; i < roots_hash_size; ++i) { - for (root = roots_hash [i]; root; root = root->next) { - /* if desc is non-null it has precise info */ - if (!root->root_desc) - continue; - DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc)); - precisely_scan_objects_from ((void**)root->start_root, root->end_root, heap_start, heap_end, root->root_desc); - } - } + scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_NORMAL); + scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_WBARRIER); + /* alloc_pinned objects */ + scan_from_pinned_objects (heap_start, heap_end); /* scan the list of objects ready for finalization */ for (fin = fin_ready_list; fin; fin = fin->next) { DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object))); fin->object = copy_object (fin->object, heap_start, heap_end); } - gettimeofday (&atv, NULL); + TV_GETTIME (atv); DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv))); + /* we need to go over the big object list to see if any was marked and scan it + * And we need to make this in a loop, considering that objects referenced by finalizable + * objects could reference big objects (this happens in finish_gray_stack ()) + */ + scan_needed_big_objects (heap_start, heap_end); /* all the objects in the heap */ - drain_gray_stack (heap_start, heap_end); + finish_gray_stack (heap_start, heap_end); /* sweep the big objects list */ prevbo = NULL; for (bigobj = los_object_list; bigobj;) { if (object_is_pinned (bigobj->data)) { unpin_object (bigobj->data); + bigobj->scanned = FALSE; } else { LOSObject *to_free; /* not referenced anywhere, so we can free it */ @@ -2165,6 +2677,8 @@ major_collection (void) */ build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end); + TV_GETTIME (all_btv); + mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv); /* prepare the pin queue for the next collection */ next_pin_slot = 0; if (fin_ready_list) { @@ -2203,12 +2717,13 @@ alloc_section (size_t size) section->end_data = data + new_size; UPDATE_HEAP_BOUNDARIES (data, section->end_data); total_alloc += new_size; - DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", new_size, total_alloc)); + DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc)); section->data = data; section->size = new_size; scan_starts = new_size / SCAN_START_SIZE; section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts); section->num_scan_start = scan_starts; + section->role = MEMORY_ROLE_GEN1; /* add to the section list */ section->next = section_list; @@ -2222,7 +2737,7 @@ free_mem_section (GCMemSection *section) { char *data = section->data; size_t size = section->size; - DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %d\n", data, size)); + DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size)); free_os_memory (data, size); free_internal_mem (section); total_alloc -= size; @@ -2237,9 +2752,6 @@ free_mem_section (GCMemSection *section) static void __attribute__((noinline)) minor_collect_or_expand_inner (size_t size) { - GCMemSection *section; - char *data; - char *old_next_p = nursery_next; int do_minor_collection = 1; if (!nursery_section) { @@ -2247,21 +2759,19 @@ minor_collect_or_expand_inner (size_t size) return; } if (do_minor_collection) { - GCMemSection *old_section = section_list; - stop_world (); collect_nursery (size); - DEBUG (2, fprintf (gc_debug_file, "Heap size: %d, LOS size: %d\n", total_alloc, los_memory_usage)); + DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage)); restart_world (); /* this also sets the proper pointers for the next allocation */ if (!search_fragment_for_size (size)) { int i; /* TypeBuilder and MonoMethod are killing mcs with fragmentation */ - g_warning ("nursery collection didn't find enough room for %d alloc (%d pinned)", size, last_num_pinned); + DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned)); for (i = 0; i < last_num_pinned; ++i) { - DEBUG (1, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i]))); + DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i]))); } - g_assert_not_reached (); + degraded_mode = 1; } } //report_internal_mem_usage (); @@ -2276,10 +2786,6 @@ minor_collect_or_expand_inner (size_t size) * Internal memory can be handled with a freelist for small objects. */ -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - /* * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes). * This must not require any lock. @@ -2288,21 +2794,12 @@ static void* get_os_memory (size_t size, int activate) { void *ptr; - unsigned long prot_flags = activate? PROT_READ|PROT_WRITE: PROT_NONE; + unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE; + prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON; size += pagesize - 1; size &= ~(pagesize - 1); - ptr = mmap (0, size, prot_flags, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (ptr == (void*)-1) { - int fd = open ("/dev/zero", O_RDONLY); - if (fd != -1) { - ptr = mmap (0, size, prot_flags, MAP_PRIVATE, fd, 0); - close (fd); - } - if (ptr == (void*)-1) { - return NULL; - } - } + ptr = mono_valloc (0, size, prot_flags); return ptr; } @@ -2347,7 +2844,7 @@ report_pinned_chunk (PinnedChunk *chunk, int seq) { /* * Debug reporting. */ -static void +static G_GNUC_UNUSED void report_internal_mem_usage (void) { PinnedChunk *chunk; int i; @@ -2410,13 +2907,13 @@ static void sweep_pinned_objects (void) { PinnedChunk *chunk; - int i, j, obj_size; + int i, obj_size; char *p, *endp; void **ptr; void *end_chunk; for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) { end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE; - DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (ranhe: %p-%p)\n", chunk, chunk->start_data, end_chunk)); + DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk)); for (i = 0; i < chunk->num_pages; ++i) { obj_size = chunk->page_sizes [i]; if (!obj_size) @@ -2443,6 +2940,40 @@ sweep_pinned_objects (void) } } +static void +scan_from_pinned_objects (char *addr_start, char *addr_end) +{ + PinnedChunk *chunk; + int i, obj_size; + char *p, *endp; + void **ptr; + void *end_chunk; + for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) { + end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE; + DEBUG (6, fprintf (gc_debug_file, "Scanning pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk)); + for (i = 0; i < chunk->num_pages; ++i) { + obj_size = chunk->page_sizes [i]; + if (!obj_size) + continue; + p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data; + endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE; + DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp)); + while (p + obj_size <= endp) { + ptr = (void**)p; + DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr)); + /* if the first word (the vtable) is outside the chunk we have an object */ + if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) { + DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of alloc_pinned %p (%s)\n", i, ptr, safe_name (ptr))); + // FIXME: Put objects without references into separate chunks + // which do not need to be scanned + scan_object ((char*)ptr, addr_start, addr_end); + } + p += obj_size; + } + } + } +} + /* * Find the slot number in the freelist for memory chunks that * can contain @size objects. @@ -2470,11 +3001,11 @@ build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char * void **p, **end; int count = 0; /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/ - p = start_page; + p = (void**)start_page; end = (void**)(end_page - size); g_assert (!chunk->free_list [slot]); chunk->free_list [slot] = p; - while ((char*)p + size <= end) { + while ((char*)p + size <= (char*)end) { count++; *p = (void*)((char*)p + size); p = *p; @@ -2514,7 +3045,7 @@ alloc_pinned_chunk (size_t size) /* allocate the first page to the freelist */ chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE; build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE)); - DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %d\n", chunk, size)); + DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size)); min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data); max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size)); return chunk; @@ -2587,6 +3118,7 @@ static void* get_internal_mem (size_t size) { return calloc (1, size); +#if 0 int slot; void *res = NULL; PinnedChunk *pchunk; @@ -2610,13 +3142,14 @@ get_internal_mem (size_t size) internal_chunk_list = pchunk; res = get_chunk_freelist (pchunk, slot); return res; +#endif } static void free_internal_mem (void *addr) { free (addr); - return; +#if 0 PinnedChunk *pchunk; for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) { /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/ @@ -2632,6 +3165,7 @@ free_internal_mem (void *addr) } printf ("free of %p failed\n", addr); g_assert_not_reached (); +#endif } /* @@ -2650,7 +3184,7 @@ static void free_large_object (LOSObject *obj) { size_t size = obj->size; - DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %d\n", obj->data, obj->size)); + DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size)); los_memory_usage -= size; size += sizeof (LOSObject); @@ -2676,7 +3210,7 @@ alloc_large_inner (MonoVTable *vtable, size_t size) int just_did_major_gc = FALSE; if (los_memory_usage > next_los_collection) { - DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %d (los already: %u, limit: %u)\n", size, los_memory_usage, next_los_collection)); + DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection)); just_did_major_gc = TRUE; stop_world (); major_collection (); @@ -2699,7 +3233,7 @@ alloc_large_inner (MonoVTable *vtable, size_t size) los_object_list = obj; los_memory_usage += size; los_num_objects++; - DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %d\n", obj->data, vtable, vtable->klass->name, size)); + DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size)); return obj->data; } @@ -2711,7 +3245,12 @@ static gboolean search_fragment_for_size (size_t size) { Fragment *frag, *prev; - DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %d\n", nursery_frag_real_end, size)); + DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size)); + + if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION) + /* Clear the remaining space, pinning depends on this */ + memset (nursery_next, 0, nursery_frag_real_end - nursery_next); + prev = NULL; for (frag = nursery_fragments; frag; frag = frag->next) { if (size <= (frag->fragment_end - frag->fragment_start)) { @@ -2722,9 +3261,8 @@ search_fragment_for_size (size_t size) nursery_fragments = frag->next; nursery_next = frag->fragment_start; nursery_frag_real_end = frag->fragment_end; - nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + size + SCAN_START_SIZE); - DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %d (req: %d)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size)); + DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size)); frag->next = fragment_freelist; fragment_freelist = frag; return TRUE; @@ -2734,6 +3272,32 @@ search_fragment_for_size (size_t size) return FALSE; } +/* + * size is already rounded up and we hold the GC lock. + */ +static void* +alloc_degraded (MonoVTable *vtable, size_t size) +{ + GCMemSection *section; + void **p = NULL; + for (section = section_list; section; section = section->next) { + if (section != nursery_section && (section->end_data - section->next_data) >= size) { + p = (void**)section->next_data; + break; + } + } + if (!p) { + section = alloc_section (nursery_section->size * 4); + /* FIXME: handle OOM */ + p = (void**)section->next_data; + } + section->next_data += size; + degraded_mode += size; + DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section)); + *p = vtable; + return p; +} + /* * Provide a variant that takes just the vtable for small fixed-size objects. * The aligned size is already computed and stored in vt->gc_descr. @@ -2747,54 +3311,160 @@ mono_gc_alloc_obj (MonoVTable *vtable, size_t size) { /* FIXME: handle OOM */ void **p; + char *new_next; int dummy; + gboolean res; size += ALLOC_ALIGN - 1; size &= ~(ALLOC_ALIGN - 1); g_assert (vtable->gc_descr); - LOCK_GC; - p = (void**)nursery_next; + if (G_UNLIKELY (collect_before_allocs)) { + int dummy; + + if (nursery_section) { + LOCK_GC; + + update_current_thread_stack (&dummy); + stop_world (); + collect_nursery (0); + restart_world (); + if (!degraded_mode && !search_fragment_for_size (size)) { + // FIXME: + g_assert_not_reached (); + } + UNLOCK_GC; + } + } + + /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */ + + p = (void**)tlab_next; /* FIXME: handle overflow */ - nursery_next += size; - if (nursery_next >= nursery_temp_end) { - /* there are two cases: the object is too big or we need to collect */ - /* there can be another case (from ORP), if we cooperate with the runtime a bit: - * objects that need finalizers can have the high bit set in their size - * so the above check fails and we can readily add the object to the queue. - * This avoids taking again the GC lock when registering, but this is moot when - * doing thread-local allocation, so it may not be a good idea. + new_next = (char*)p + size; + tlab_next = new_next; + + if (G_LIKELY (new_next < tlab_temp_end)) { + /* Fast path */ + + /* + * FIXME: We might need a memory barrier here so the change to tlab_next is + * visible before the vtable store. */ - if (size > MAX_SMALL_OBJ_SIZE) { - /* get ready for possible collection */ - update_current_thread_stack (&dummy); - nursery_next -= size; - p = alloc_large_inner (vtable, size); - } else { - if (nursery_next >= nursery_frag_real_end) { - nursery_next -= size; - if (!search_fragment_for_size (size)) { - /* get ready for possible collection */ - update_current_thread_stack (&dummy); - minor_collect_or_expand_inner (size); + + DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); + *p = vtable; + + return p; + } + + /* Slow path */ + + /* there are two cases: the object is too big or we run out of space in the TLAB */ + /* we also reach here when the thread does its first allocation after a minor + * collection, since the tlab_ variables are initialized to NULL. + * there can be another case (from ORP), if we cooperate with the runtime a bit: + * objects that need finalizers can have the high bit set in their size + * so the above check fails and we can readily add the object to the queue. + * This avoids taking again the GC lock when registering, but this is moot when + * doing thread-local allocation, so it may not be a good idea. + */ + LOCK_GC; + if (size > MAX_SMALL_OBJ_SIZE) { + /* get ready for possible collection */ + update_current_thread_stack (&dummy); + tlab_next -= size; + p = alloc_large_inner (vtable, size); + } else { + if (tlab_next >= tlab_real_end) { + /* + * Run out of space in the TLAB. When this happens, some amount of space + * remains in the TLAB, but not enough to satisfy the current allocation + * request. Currently, we retire the TLAB in all cases, later we could + * keep it if the remaining space is above a treshold, and satisfy the + * allocation directly from the nursery. + */ + tlab_next -= size; + /* when running in degraded mode, we continue allocing that way + * for a while, to decrease the number of useless nursery collections. + */ + if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) { + p = alloc_degraded (vtable, size); + UNLOCK_GC; + return p; + } + + if (size > tlab_size) { + /* Allocate directly from the nursery */ + if (nursery_next + size >= nursery_frag_real_end) { + if (!search_fragment_for_size (size)) { + /* get ready for possible collection */ + update_current_thread_stack (&dummy); + minor_collect_or_expand_inner (size); + if (degraded_mode) { + p = alloc_degraded (vtable, size); + UNLOCK_GC; + return p; + } + } } - /* nursery_next changed by minor_collect_or_expand_inner () */ + p = (void*)nursery_next; nursery_next += size; - if (nursery_next > nursery_temp_end) { + if (nursery_next > nursery_frag_real_end) { // no space left g_assert (0); } + + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) + memset (p, 0, size); } else { - /* record the scan start so we can find pinned objects more easily */ - nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = p; - /* we just bump nursery_temp_end as well */ - nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + SCAN_START_SIZE); - DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", nursery_next, nursery_temp_end)); + if (tlab_start) + DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size))); + + if (nursery_next + tlab_size >= nursery_frag_real_end) { + res = search_fragment_for_size (tlab_size); + if (!res) { + /* get ready for possible collection */ + update_current_thread_stack (&dummy); + minor_collect_or_expand_inner (tlab_size); + if (degraded_mode) { + p = alloc_degraded (vtable, size); + UNLOCK_GC; + return p; + } + } + } + + /* Allocate a new TLAB from the current nursery fragment */ + tlab_start = nursery_next; + nursery_next += tlab_size; + tlab_next = tlab_start; + tlab_real_end = tlab_start + tlab_size; + tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size); + + if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) + memset (tlab_start, 0, tlab_size); + + /* Allocate from the TLAB */ + p = (void*)tlab_next; + tlab_next += size; + g_assert (tlab_next <= tlab_real_end); + + nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p; } + } else { + /* Reached tlab_temp_end */ + + /* record the scan start so we can find pinned objects more easily */ + nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p; + /* we just bump tlab_temp_end as well */ + tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE); + DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end)); } } - DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size)); + + DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); *p = vtable; UNLOCK_GC; @@ -2822,7 +3492,7 @@ mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size) p = alloc_from_freelist (size); memset (p, 0, size); } - DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size)); + DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size)); *p = vtable; UNLOCK_GC; return p; @@ -2842,7 +3512,7 @@ mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size) #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj)) static void -finalize_in_range (void **start, void **end) +finalize_in_range (char *start, char *end) { FinalizeEntry *entry, *prev; int i; @@ -2851,7 +3521,7 @@ finalize_in_range (void **start, void **end) for (i = 0; i < finalizable_hash_size; ++i) { prev = NULL; for (entry = finalizable_hash [i]; entry;) { - if (entry->object >= start && entry->object < end) { + if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) { if (object_is_fin_ready (entry->object)) { char *from; FinalizeEntry *next; @@ -2884,14 +3554,14 @@ finalize_in_range (void **start, void **end) } static void -null_link_in_range (void **start, void **end) +null_link_in_range (char *start, char *end) { FinalizeEntry *entry, *prev; int i; for (i = 0; i < disappearing_link_hash_size; ++i) { prev = NULL; for (entry = disappearing_link_hash [i]; entry;) { - if (entry->object >= start && entry->object < end) { + if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) { if (object_is_fin_ready (entry->object)) { void **p = entry->data; FinalizeEntry *old; @@ -2905,6 +3575,7 @@ null_link_in_range (void **start, void **end) old = entry->next; free_internal_mem (entry); entry = old; + num_disappearing_links--; continue; } else { void **link; @@ -2923,6 +3594,57 @@ null_link_in_range (void **start, void **end) } } +/** + * mono_gc_finalizers_for_domain: + * @domain: the unloading appdomain + * @out_array: output array + * @out_size: size of output array + * + * Store inside @out_array up to @out_size objects that belong to the unloading + * appdomain @domain. Returns the number of stored items. Can be called repeteadly + * until it returns 0. + * The items are removed from the finalizer data structure, so the caller is supposed + * to finalize them. + * @out_array should be on the stack to allow the GC to know the objects are still alive. + */ +int +mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size) +{ + FinalizeEntry *entry, *prev; + int i, count; + if (no_finalize || !out_size || !out_array) + return 0; + count = 0; + LOCK_GC; + for (i = 0; i < finalizable_hash_size; ++i) { + prev = NULL; + for (entry = finalizable_hash [i]; entry;) { + if (mono_object_domain (entry->object) == domain) { + FinalizeEntry *next; + /* remove and put in out_array */ + if (prev) + prev->next = entry->next; + else + finalizable_hash [i] = entry->next; + next = entry->next; + num_registered_finalizers--; + out_array [count ++] = entry->object; + DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers)); + entry = next; + if (count == out_size) { + UNLOCK_GC; + return count; + } + continue; + } + prev = entry; + entry = entry->next; + } + } + UNLOCK_GC; + return count; +} + static void rehash_fin_table (void) { @@ -3028,14 +3750,26 @@ mono_gc_register_disappearing_link (MonoObject *obj, void *link) /* FIXME: add check that link is not in the heap */ hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size; entry = disappearing_link_hash [hash]; + prev = NULL; for (; entry; entry = entry->next) { /* link already added */ if (link == entry->data) { - /* FIXME: NULL obj means remove */ - entry->object = obj; /* we allow the change of object */ + /* NULL obj means remove */ + if (obj == NULL) { + if (prev) + prev->next = entry->next; + else + disappearing_link_hash [hash] = entry->next; + num_disappearing_links--; + DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links)); + free_internal_mem (entry); + } else { + entry->object = obj; /* we allow the change of object */ + } UNLOCK_GC; return; } + prev = entry; } entry = get_internal_mem (sizeof (FinalizeEntry)); entry->object = obj; @@ -3101,45 +3835,67 @@ mono_gc_add_memory_pressure (gint64 value) */ static void -rehash_roots (void) +rehash_roots (gboolean pinned) { int i; unsigned int hash; RootRecord **new_hash; RootRecord *entry, *next; - int new_size = g_spaced_primes_closest (num_roots_entries); + int new_size; + new_size = g_spaced_primes_closest (num_roots_entries [pinned]); new_hash = get_internal_mem (new_size * sizeof (RootRecord*)); - for (i = 0; i < roots_hash_size; ++i) { - for (entry = roots_hash [i]; entry; entry = next) { + for (i = 0; i < roots_hash_size [pinned]; ++i) { + for (entry = roots_hash [pinned][i]; entry; entry = next) { hash = mono_aligned_addr_hash (entry->start_root) % new_size; next = entry->next; entry->next = new_hash [hash]; new_hash [hash] = entry; } } - free_internal_mem (roots_hash); - roots_hash = new_hash; - roots_hash_size = new_size; + free_internal_mem (roots_hash [pinned]); + roots_hash [pinned] = new_hash; + roots_hash_size [pinned] = new_size; +} + +static RootRecord* +find_root (int root_type, char *start, guint32 addr_hash) +{ + RootRecord *new_root; + + guint32 hash = addr_hash % roots_hash_size [root_type]; + for (new_root = roots_hash [root_type][hash]; new_root; new_root = new_root->next) { + /* we allow changing the size and the descriptor (for thread statics etc) */ + if (new_root->start_root == start) { + return new_root; + } + } + + return NULL; } /* * We do not coalesce roots. */ -int -mono_gc_register_root (char *start, size_t size, void *descr) +static int +mono_gc_register_root_inner (char *start, size_t size, void *descr, int root_type) { RootRecord *new_root; - unsigned int hash = mono_aligned_addr_hash (start); + unsigned int hash, addr_hash = mono_aligned_addr_hash (start); + int i; LOCK_GC; - if (num_roots_entries >= roots_hash_size * 2) - rehash_roots (); - hash %= roots_hash_size; - for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) { + for (i = 0; i < ROOT_TYPE_NUM; ++i) { + if (num_roots_entries [i] >= roots_hash_size [i] * 2) + rehash_roots (i); + } + for (i = 0; i < ROOT_TYPE_NUM; ++i) { + new_root = find_root (i, start, addr_hash); /* we allow changing the size and the descriptor (for thread statics etc) */ - if (new_root->start_root == start) { + if (new_root) { size_t old_size = new_root->end_root - new_root->start_root; new_root->end_root = new_root->start_root + size; + g_assert (((new_root->root_desc != 0) && (descr != NULL)) || + ((new_root->root_desc == 0) && (descr == NULL))); new_root->root_desc = (mword)descr; roots_size += size; roots_size -= old_size; @@ -3153,9 +3909,10 @@ mono_gc_register_root (char *start, size_t size, void *descr) new_root->end_root = new_root->start_root + size; new_root->root_desc = (mword)descr; roots_size += size; - num_roots_entries++; - new_root->next = roots_hash [hash]; - roots_hash [hash] = new_root; + hash = addr_hash % roots_hash_size [root_type]; + num_roots_entries [root_type]++; + new_root->next = roots_hash [root_type] [hash]; + roots_hash [root_type][hash] = new_root; DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size)); } else { UNLOCK_GC; @@ -3165,28 +3922,45 @@ mono_gc_register_root (char *start, size_t size, void *descr) return TRUE; } +int +mono_gc_register_root (char *start, size_t size, void *descr) +{ + return mono_gc_register_root_inner (start, size, descr, descr ? ROOT_TYPE_NORMAL : ROOT_TYPE_PINNED); +} + +int +mono_gc_register_root_wbarrier (char *start, size_t size, void *descr) +{ + return mono_gc_register_root_inner (start, size, descr, ROOT_TYPE_WBARRIER); +} + void mono_gc_deregister_root (char* addr) { - RootRecord *tmp, *prev = NULL; - unsigned int hash = mono_aligned_addr_hash (addr); + RootRecord *tmp, *prev; + unsigned int hash, addr_hash = mono_aligned_addr_hash (addr); + int root_type; + LOCK_GC; - hash %= roots_hash_size; - tmp = roots_hash [hash]; - while (tmp) { - if (tmp->start_root == (char*)addr) { - if (prev) - prev->next = tmp->next; - else - roots_hash [hash] = tmp->next; - roots_size -= (tmp->end_root - tmp->start_root); - num_roots_entries--; - DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root)); - free_internal_mem (tmp); - break; + for (root_type = 0; root_type < ROOT_TYPE_NUM; ++root_type) { + hash = addr_hash % roots_hash_size [root_type]; + tmp = roots_hash [root_type][hash]; + prev = NULL; + while (tmp) { + if (tmp->start_root == (char*)addr) { + if (prev) + prev->next = tmp->next; + else + roots_hash [root_type][hash] = tmp->next; + roots_size -= (tmp->end_root - tmp->start_root); + num_roots_entries [root_type]--; + DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root)); + free_internal_mem (tmp); + break; + } + prev = tmp; + tmp = tmp->next; } - prev = tmp; - tmp = tmp->next; } UNLOCK_GC; } @@ -3197,29 +3971,22 @@ mono_gc_deregister_root (char* addr) * ###################################################################### */ -#undef pthread_create -#undef pthread_join -#undef pthread_detach - -typedef struct { - void *(*start_routine) (void *); - void *arg; - int flags; - sem_t registered; -} SgenThreadStartInfo; - /* eventually share with MonoThread? */ typedef struct _SgenThreadInfo SgenThreadInfo; struct _SgenThreadInfo { SgenThreadInfo *next; - pthread_t id; + ARCH_THREAD_TYPE id; unsigned int stop_count; /* to catch duplicate signals */ int signal; int skip; void *stack_end; void *stack_start; - RememberedSet **remset; + char **tlab_next_addr; + char **tlab_start_addr; + char **tlab_temp_end_addr; + char **tlab_real_end_addr; + RememberedSet *remset; }; /* FIXME: handle large/small config */ @@ -3227,6 +3994,9 @@ struct _SgenThreadInfo { #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u) static SgenThreadInfo* thread_table [THREAD_HASH_SIZE]; + +#if USE_SIGNAL_BASED_START_STOP_WORLD + static sem_t suspend_ack_semaphore; static unsigned int global_stop_count = 0; static int suspend_signal_num = SIGPWR; @@ -3236,13 +4006,13 @@ static mword cur_thread_regs [ARCH_NUM_REGS] = {0}; /* LOCKING: assumes the GC lock is held */ static SgenThreadInfo* -thread_info_lookup (pthread_t id) +thread_info_lookup (ARCH_THREAD_TYPE id) { unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE; SgenThreadInfo *info; info = thread_table [hash]; - while (info && !pthread_equal (info->id, id)) { + while (info && !ARCH_THREAD_EQUALS (info->id, id)) { info = info->next; } return info; @@ -3252,7 +4022,7 @@ static void update_current_thread_stack (void *start) { void *ptr = cur_thread_regs; - SgenThreadInfo *info = thread_info_lookup (pthread_self ()); + SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ()); info->stack_start = align_pointer (&ptr); ARCH_STORE_REGS (ptr); } @@ -3279,7 +4049,7 @@ thread_handshake (int signum) for (i = 0; i < THREAD_HASH_SIZE; ++i) { for (info = thread_table [i]; info; info = info->next) { DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum))); - if (pthread_equal (info->id, me)) { + if (ARCH_THREAD_EQUALS (info->id, me)) { DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id)); continue; } @@ -3290,7 +4060,7 @@ thread_handshake (int signum) DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info)); count++; } else { - DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", info->id, result, strerror (result))); + DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result))); info->skip = 1; } } @@ -3360,7 +4130,7 @@ restart_handler (int sig) errno = old_errno; } -static struct timeval stop_world_time; +static TV_DECLARE (stop_world_time); static unsigned long max_pause_usec = 0; /* LOCKING: assumes the GC lock is held */ @@ -3370,8 +4140,8 @@ stop_world (void) int count; global_stop_count++; - DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (pthread_self ()), (gpointer)pthread_self ())); - gettimeofday (&stop_world_time, NULL); + DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ())); + TV_GETTIME (stop_world_time); count = thread_handshake (suspend_signal_num); DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count)); return count; @@ -3382,18 +4152,19 @@ static int restart_world (void) { int count; - struct timeval end_sw; + TV_DECLARE (end_sw); unsigned long usec; count = thread_handshake (restart_signal_num); - gettimeofday (&end_sw, NULL); - usec = (end_sw.tv_sec - stop_world_time.tv_sec) * 1000000; - usec += end_sw.tv_usec - stop_world_time.tv_usec; + TV_GETTIME (end_sw); + usec = TV_ELAPSED (stop_world_time, end_sw); max_pause_usec = MAX (usec, max_pause_usec); DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec)); return count; } +#endif /* USE_SIGNAL_BASED_START_STOP_WORLD */ + /* * Identify objects pinned in a thread stack and its registers. */ @@ -3406,15 +4177,15 @@ pin_thread_data (void *start_nursery, void *end_nursery) for (i = 0; i < THREAD_HASH_SIZE; ++i) { for (info = thread_table [i]; info; info = info->next) { if (info->skip) { - DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, info->stack_end - info->stack_start)); + DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start)); continue; } - DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, info->stack_end - info->stack_start)); + DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd, pinned=%d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, next_pin_slot)); conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery); } } - DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n")); - conservatively_pin_objects_from (cur_thread_regs, cur_thread_regs + ARCH_NUM_REGS, start_nursery, end_nursery); + DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers, pinned=%d\n", next_pin_slot)); + conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery); } static void @@ -3431,7 +4202,7 @@ find_pinning_ref_from_thread (char *obj, size_t size) continue; while (start < (char**)info->stack_end) { if (*start >= obj && *start < endobj) { - DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, info->id, start, info->stack_start, info->stack_end)); + DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end)); } start++; } @@ -3456,33 +4227,75 @@ handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global { void **ptr; mword count; + mword desc; /* FIXME: exclude stack locations */ switch ((*p) & REMSET_TYPE_MASK) { case REMSET_LOCATION: ptr = (void**)(*p); - if ((ptr < start_nursery || ptr >= end_nursery) && ptr_in_heap (ptr)) { + //__builtin_prefetch (ptr); + if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) { *ptr = copy_object (*ptr, start_nursery, end_nursery); DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr)); - if (!global && *ptr >= start_nursery && *ptr < end_nursery) - add_to_global_remset (ptr); + if (!global && *ptr >= start_nursery && *ptr < end_nursery) { + /* + * If the object is pinned, each reference to it from nonpinned objects + * becomes part of the global remset, which can grow very large. + */ + DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr))); + add_to_global_remset (ptr, FALSE); + } } else { DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr)); } return p + 1; case REMSET_RANGE: ptr = (void**)(*p & ~REMSET_TYPE_MASK); - if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr)) + if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr)) return p + 2; count = p [1]; while (count-- > 0) { *ptr = copy_object (*ptr, start_nursery, end_nursery); DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count)); if (!global && *ptr >= start_nursery && *ptr < end_nursery) - add_to_global_remset (ptr); + add_to_global_remset (ptr, FALSE); ++ptr; } return p + 2; + case REMSET_OBJECT: + ptr = (void**)(*p & ~REMSET_TYPE_MASK); + if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr)) + return p + 1; + scan_object (*ptr, start_nursery, end_nursery); + return p + 1; + case REMSET_OTHER: { + ptr = (void**)(*p & ~REMSET_TYPE_MASK); + + switch (p [1]) { + case REMSET_VTYPE: + if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr)) + return p + 3; + desc = p [2]; + scan_vtype ((char*)ptr, desc, start_nursery, end_nursery); + return p + 3; + case REMSET_ROOT_LOCATION: + /* Same as REMSET_LOCATION, but the address is not required to be in the heap */ + *ptr = copy_object (*ptr, start_nursery, end_nursery); + DEBUG (9, fprintf (gc_debug_file, "Overwrote root location remset at %p with %p\n", ptr, *ptr)); + if (!global && *ptr >= start_nursery && *ptr < end_nursery) { + /* + * If the object is pinned, each reference to it from nonpinned objects + * becomes part of the global remset, which can grow very large. + */ + DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr))); + add_to_global_remset (ptr, TRUE); + } + return p + 2; + default: + g_assert_not_reached (); + } + break; + } default: g_assert_not_reached (); } @@ -3495,20 +4308,45 @@ scan_from_remsets (void *start_nursery, void *end_nursery) int i; SgenThreadInfo *info; RememberedSet *remset, *next; - mword *p; + mword *p, *next_p, *store_pos; /* the global one */ for (remset = global_remset; remset; remset = remset->next) { - DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %d\n", remset->data, remset->store_next, remset->store_next - remset->data)); - for (p = remset->data; p < remset->store_next;) { - p = handle_remset (p, start_nursery, end_nursery, TRUE); + DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data)); + store_pos = remset->data; + for (p = remset->data; p < remset->store_next; p = next_p) { + mword ptr; + + next_p = handle_remset (p, start_nursery, end_nursery, TRUE); + + /* + * Clear global remsets of locations which no longer point to the + * nursery. Otherwise, they could grow indefinitely between major + * collections. + */ + ptr = (p [0] & ~REMSET_TYPE_MASK); + if ((p [0] & REMSET_TYPE_MASK) == REMSET_LOCATION) { + if (ptr_in_nursery (*(void**)ptr)) + *store_pos ++ = p [0]; + } else { + g_assert ((p [0] & REMSET_TYPE_MASK) == REMSET_OTHER); + g_assert (p [1] == REMSET_ROOT_LOCATION); + if (ptr_in_nursery (*(void**)ptr)) { + *store_pos ++ = p [0]; + *store_pos ++ = p [1]; + } + } } + + /* Truncate the remset */ + remset->store_next = store_pos; } + /* the per-thread ones */ for (i = 0; i < THREAD_HASH_SIZE; ++i) { for (info = thread_table [i]; info; info = info->next) { for (remset = info->remset; remset; remset = next) { - DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %d\n", info, remset->data, remset->store_next, remset->store_next - remset->data)); + DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data)); for (p = remset->data; p < remset->store_next;) { p = handle_remset (p, start_nursery, end_nursery, FALSE); } @@ -3562,19 +4400,64 @@ clear_remsets (void) } } -/* LOCKING: assumes the GC lock is held */ -static SgenThreadInfo* +/* + * Clear the thread local TLAB variables for all threads. + */ +static void +clear_tlabs (void) +{ + SgenThreadInfo *info; + int i; + + for (i = 0; i < THREAD_HASH_SIZE; ++i) { + for (info = thread_table [i]; info; info = info->next) { + /* A new TLAB will be allocated when the thread does its first allocation */ + *info->tlab_start_addr = NULL; + *info->tlab_next_addr = NULL; + *info->tlab_temp_end_addr = NULL; + *info->tlab_real_end_addr = NULL; + } + } +} + +/* + * Find the tlab_next value of the TLAB which contains ADDR. + */ +static char* +find_tlab_next_from_address (char *addr) +{ + SgenThreadInfo *info; + int i; + + for (i = 0; i < THREAD_HASH_SIZE; ++i) { + for (info = thread_table [i]; info; info = info->next) { + if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr) + return *info->tlab_next_addr; + } + } + + return NULL; +} + +/* LOCKING: assumes the GC lock is held */ +static SgenThreadInfo* gc_register_current_thread (void *addr) { int hash; SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo)); if (!info) return NULL; - info->id = pthread_self (); + info->id = ARCH_GET_THREAD (); info->stop_count = -1; info->skip = 0; info->signal = 0; info->stack_start = NULL; + info->tlab_start_addr = &tlab_start; + info->tlab_next_addr = &tlab_next; + info->tlab_temp_end_addr = &tlab_temp_end; + info->tlab_real_end_addr = &tlab_real_end; + + tlab_next_addr = &tlab_next; /* try to get it with attributes first */ #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK) @@ -3585,6 +4468,7 @@ gc_register_current_thread (void *addr) pthread_getattr_np (pthread_self (), &attr); pthread_attr_getstack (&attr, &sstart, &size); info->stack_end = (char*)sstart + size; + pthread_attr_destroy (&attr); } #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP) info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ()); @@ -3604,6 +4488,7 @@ gc_register_current_thread (void *addr) thread_table [hash] = info; remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info); + pthread_setspecific (remembered_set_key, remembered_set); DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash)); return info; } @@ -3614,13 +4499,14 @@ unregister_current_thread (void) int hash; SgenThreadInfo *prev = NULL; SgenThreadInfo *p; - pthread_t id = pthread_self (); + RememberedSet *rset; + ARCH_THREAD_TYPE id = ARCH_GET_THREAD (); hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE; p = thread_table [hash]; assert (p); DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id)); - while (!pthread_equal (p->id, id)) { + while (!ARCH_THREAD_EQUALS (p->id, id)) { prev = p; p = p->next; } @@ -3629,9 +4515,49 @@ unregister_current_thread (void) } else { prev->next = p->next; } + rset = p->remset; + /* FIXME: transfer remsets if any */ + while (rset) { + RememberedSet *next = rset->next; + free_internal_mem (rset); + rset = next; + } free (p); } +static void +unregister_thread (void *k) +{ + LOCK_GC; + unregister_current_thread (); + UNLOCK_GC; +} + +gboolean +mono_gc_register_thread (void *baseptr) +{ + SgenThreadInfo *info; + LOCK_GC; + info = thread_info_lookup (ARCH_GET_THREAD ()); + if (info == NULL) + info = gc_register_current_thread (baseptr); + UNLOCK_GC; + return info != NULL; +} + +#if USE_PTHREAD_INTERCEPT + +#undef pthread_create +#undef pthread_join +#undef pthread_detach + +typedef struct { + void *(*start_routine) (void *); + void *arg; + int flags; + sem_t registered; +} SgenThreadStartInfo; + static void* gc_start_thread (void *arg) { @@ -3646,9 +4572,12 @@ gc_start_thread (void *arg) UNLOCK_GC; sem_post (&(start_info->registered)); result = start_func (t_arg); + /* + * this is done by the pthread key dtor LOCK_GC; unregister_current_thread (); UNLOCK_GC; + */ return result; } @@ -3689,17 +4618,7 @@ mono_gc_pthread_detach (pthread_t thread) return pthread_detach (thread); } -gboolean -mono_gc_register_thread (void *baseptr) -{ - SgenThreadInfo *info; - LOCK_GC; - info = thread_info_lookup (pthread_self ()); - if (info == NULL) - info = gc_register_current_thread (baseptr); - UNLOCK_GC; - return info != NULL; -} +#endif /* USE_PTHREAD_INTERCEPT */ /* * ###################################################################### @@ -3729,7 +4648,7 @@ void mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value) { RememberedSet *rs; - if (field_ptr >= nursery_start && field_ptr < nursery_real_end) { + if (ptr_in_nursery (field_ptr)) { *(void**)field_ptr = value; return; } @@ -3743,7 +4662,7 @@ mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* val rs = alloc_remset (rs->end_set - rs->data, (void*)1); rs->next = remembered_set; remembered_set = rs; - thread_info_lookup (pthread_self())->remset = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; *(rs->store_next++) = (mword)field_ptr; *(void**)field_ptr = value; } @@ -3752,7 +4671,7 @@ void mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value) { RememberedSet *rs = remembered_set; - if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end) { + if (ptr_in_nursery (slot_ptr)) { *(void**)slot_ptr = value; return; } @@ -3765,7 +4684,7 @@ mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* va rs = alloc_remset (rs->end_set - rs->data, (void*)1); rs->next = remembered_set; remembered_set = rs; - thread_info_lookup (pthread_self())->remset = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; *(rs->store_next++) = (mword)slot_ptr; *(void**)slot_ptr = value; } @@ -3774,7 +4693,7 @@ void mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count) { RememberedSet *rs = remembered_set; - if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end) + if (ptr_in_nursery (slot_ptr)) return; DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count)); if (rs->store_next + 1 < rs->end_set) { @@ -3785,7 +4704,7 @@ mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count) rs = alloc_remset (rs->end_set - rs->data, (void*)1); rs->next = remembered_set; remembered_set = rs; - thread_info_lookup (pthread_self())->remset = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE; *(rs->store_next++) = count; } @@ -3793,13 +4712,14 @@ mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count) void mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value) { - RememberedSet *rs = remembered_set; - if (ptr >= nursery_start && ptr < nursery_real_end) { + RememberedSet *rs; + if (ptr_in_nursery (ptr)) { DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr)); *(void**)ptr = value; return; } - DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", ptr)); + rs = remembered_set; + DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p (%s)\n", ptr, value ? safe_name (value) : "null")); /* FIXME: ensure it is on the heap */ if (rs->store_next < rs->end_set) { *(rs->store_next++) = (mword)ptr; @@ -3809,18 +4729,453 @@ mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value) rs = alloc_remset (rs->end_set - rs->data, (void*)1); rs->next = remembered_set; remembered_set = rs; - thread_info_lookup (pthread_self())->remset = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; *(rs->store_next++) = (mword)ptr; *(void**)ptr = value; } +void +mono_gc_wbarrier_set_root (gpointer ptr, MonoObject *value) +{ + RememberedSet *rs = remembered_set; + if (ptr_in_nursery (ptr)) + return; + DEBUG (8, fprintf (gc_debug_file, "Adding root remset at %p (%s)\n", ptr, value ? safe_name (value) : "null")); + + if (rs->store_next + 2 < rs->end_set) { + *(rs->store_next++) = (mword)ptr | REMSET_OTHER; + *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION; + *(void**)ptr = value; + return; + } + rs = alloc_remset (rs->end_set - rs->data, (void*)1); + rs->next = remembered_set; + remembered_set = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; + *(rs->store_next++) = (mword)ptr | REMSET_OTHER; + *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION; + + *(void**)ptr = value; +} + void mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass) { - if (dest >= nursery_start && dest < nursery_real_end) { + RememberedSet *rs = remembered_set; + if (ptr_in_nursery (dest)) + return; + DEBUG (8, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name)); + + if (rs->store_next + 2 < rs->end_set) { + *(rs->store_next++) = (mword)dest | REMSET_OTHER; + *(rs->store_next++) = (mword)REMSET_VTYPE; + *(rs->store_next++) = (mword)klass->gc_descr; + return; + } + rs = alloc_remset (rs->end_set - rs->data, (void*)1); + rs->next = remembered_set; + remembered_set = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; + *(rs->store_next++) = (mword)dest | REMSET_OTHER; + *(rs->store_next++) = (mword)REMSET_VTYPE; + *(rs->store_next++) = (mword)klass->gc_descr; +} + +/** + * mono_gc_wbarrier_object: + * + * Write barrier to call when obj is the result of a clone or copy of an object. + */ +void +mono_gc_wbarrier_object (MonoObject* obj) +{ + RememberedSet *rs = remembered_set; + DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj)); + if (rs->store_next < rs->end_set) { + *(rs->store_next++) = (mword)obj | REMSET_OBJECT; + return; + } + rs = alloc_remset (rs->end_set - rs->data, (void*)1); + rs->next = remembered_set; + remembered_set = rs; + thread_info_lookup (ARCH_GET_THREAD ())->remset = rs; + *(rs->store_next++) = (mword)obj | REMSET_OBJECT; +} + +/* + * ###################################################################### + * ######## Collector debugging + * ###################################################################### + */ + +const char*descriptor_types [] = { + "run_length", + "small_bitmap", + "string", + "complex", + "vector", + "array", + "large_bitmap", + "complex_arr" +}; + +void +describe_ptr (char *ptr) +{ + GCMemSection *section; + MonoVTable *vtable; + mword desc; + int type; + + if (ptr_in_nursery (ptr)) { + printf ("Pointer inside nursery.\n"); + } else { + for (section = section_list; section;) { + if (ptr >= section->data && ptr < section->data + section->size) + break; + section = section->next; + } + + if (section) { + printf ("Pointer inside oldspace.\n"); + } else if (obj_is_from_pinned_alloc (ptr)) { + printf ("Pointer is inside a pinned chunk.\n"); + } else { + printf ("Pointer unknown.\n"); + return; + } + } + + if (object_is_pinned (ptr)) + printf ("Object is pinned.\n"); + + if (object_is_forwarded (ptr)) + printf ("Object is forwared.\n"); + + // FIXME: Handle pointers to the inside of objects + vtable = (MonoVTable*)LOAD_VTABLE (ptr); + + printf ("VTable: %p\n", vtable); + if (vtable == NULL) { + printf ("VTable is invalid (empty).\n"); + return; + } + if (ptr_in_nursery (vtable)) { + printf ("VTable is invalid (points inside nursery).\n"); return; } - DEBUG (1, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name)); + printf ("Class: %s\n", vtable->klass->name); + + desc = ((GCVTable*)vtable)->desc; + printf ("Descriptor: %lx\n", desc); + + type = desc & 0x7; + printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]); +} + +static mword* +find_in_remset_loc (mword *p, char *addr, gboolean *found) +{ + void **ptr; + mword count, desc; + size_t skip_size; + + switch ((*p) & REMSET_TYPE_MASK) { + case REMSET_LOCATION: + if (*p == (mword)addr) + *found = TRUE; + return p + 1; + case REMSET_RANGE: + ptr = (void**)(*p & ~REMSET_TYPE_MASK); + count = p [1]; + if ((void**)addr >= ptr && (void**)addr < ptr + count) + *found = TRUE; + return p + 2; + case REMSET_OBJECT: + ptr = (void**)(*p & ~REMSET_TYPE_MASK); + count = safe_object_get_size ((MonoObject*)ptr); + count += (ALLOC_ALIGN - 1); + count &= (ALLOC_ALIGN - 1); + count /= sizeof (mword); + if ((void**)addr >= ptr && (void**)addr < ptr + count) + *found = TRUE; + return p + 1; + case REMSET_OTHER: { + switch (p [1]) { + case REMSET_VTYPE: + ptr = (void**)(*p & ~REMSET_TYPE_MASK); + desc = p [2]; + + switch (desc & 0x7) { + case DESC_TYPE_RUN_LENGTH: + OBJ_RUN_LEN_SIZE (skip_size, desc, ptr); + /* The descriptor includes the size of MonoObject */ + skip_size -= sizeof (MonoObject); + if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer))) + *found = TRUE; + break; + default: + // FIXME: + g_assert_not_reached (); + } + + return p + 3; + case REMSET_ROOT_LOCATION: + return p + 2; + default: + g_assert_not_reached (); + } + break; + } + default: + g_assert_not_reached (); + } + return NULL; +} + +/* + * Return whenever ADDR occurs in the remembered sets + */ +static gboolean +find_in_remsets (char *addr) +{ + int i; + SgenThreadInfo *info; + RememberedSet *remset; + mword *p; + gboolean found = FALSE; + + /* the global one */ + for (remset = global_remset; remset; remset = remset->next) { + DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data)); + for (p = remset->data; p < remset->store_next;) { + p = find_in_remset_loc (p, addr, &found); + if (found) + return TRUE; + } + } + /* the per-thread ones */ + for (i = 0; i < THREAD_HASH_SIZE; ++i) { + for (info = thread_table [i]; info; info = info->next) { + for (remset = info->remset; remset; remset = remset->next) { + DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data)); + for (p = remset->data; p < remset->store_next;) { + p = find_in_remset_loc (p, addr, &found); + if (found) + return TRUE; + } + } + } + } + + return FALSE; +} + +#undef HANDLE_PTR +#define HANDLE_PTR(ptr,obj) do { \ + if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \ + if (!find_in_remsets ((char*)(ptr))) { \ + fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \ + g_assert_not_reached (); \ + } \ + } \ + } while (0) + +/* + * Check that each object reference inside the area which points into the nursery + * can be found in the remembered sets. + */ +static void __attribute__((noinline)) +check_remsets_for_area (char *start, char *end) +{ + GCVTable *vt; + size_t skip_size; + int type; + int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0; + mword desc; + new_obj_references = 0; + obj_references_checked = 0; + while (start < end) { + if (!*(void**)start) { + start += sizeof (void*); /* should be ALLOC_ALIGN, really */ + continue; + } + vt = (GCVTable*)LOAD_VTABLE (start); + DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name)); + if (0) { + MonoObject *obj = (MonoObject*)start; + g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name); + } + desc = vt->desc; + type = desc & 0x7; + if (type == DESC_TYPE_STRING) { + STRING_SIZE (skip_size, start); + start += skip_size; + type_str++; + continue; + } else if (type == DESC_TYPE_RUN_LENGTH) { + OBJ_RUN_LEN_SIZE (skip_size, desc, start); + g_assert (skip_size); + OBJ_RUN_LEN_FOREACH_PTR (desc,start); + start += skip_size; + type_rlen++; + continue; + } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too + skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE; + skip_size *= mono_array_length ((MonoArray*)start); + skip_size += sizeof (MonoArray); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + OBJ_VECTOR_FOREACH_PTR (vt, start); + if (((MonoArray*)start)->bounds) { + /* account for the bounds */ + skip_size += sizeof (MonoArrayBounds) * vt->klass->rank; + } + start += skip_size; + type_vector++; + continue; + } else if (type == DESC_TYPE_SMALL_BITMAP) { + OBJ_BITMAP_SIZE (skip_size, desc, start); + g_assert (skip_size); + OBJ_BITMAP_FOREACH_PTR (desc,start); + start += skip_size; + type_bitmap++; + continue; + } else if (type == DESC_TYPE_LARGE_BITMAP) { + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start); + start += skip_size; + type_lbit++; + continue; + } else if (type == DESC_TYPE_COMPLEX) { + /* this is a complex object */ + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + OBJ_COMPLEX_FOREACH_PTR (vt, start); + start += skip_size; + type_complex++; + continue; + } else if (type == DESC_TYPE_COMPLEX_ARR) { + /* this is an array of complex structs */ + skip_size = mono_array_element_size (((MonoVTable*)vt)->klass); + skip_size *= mono_array_length ((MonoArray*)start); + skip_size += sizeof (MonoArray); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start); + if (((MonoArray*)start)->bounds) { + /* account for the bounds */ + skip_size += sizeof (MonoArrayBounds) * vt->klass->rank; + } + start += skip_size; + type_complex++; + continue; + } else { + g_assert (0); + } + } +} + +/* + * Perform consistency check of the heap. + * + * Assumes the world is stopped. + */ +void +check_consistency (void) +{ + GCMemSection *section; + + // Need to add more checks + // FIXME: Create a general heap enumeration function and use that + + DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n")); + + // Check that oldspace->newspace pointers are registered with the collector + for (section = section_list; section; section = section->next) { + if (section->role == MEMORY_ROLE_GEN0) + continue; + DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data))); + check_remsets_for_area (section->data, section->next_data); + } + + DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n")); +} + +/* Check that the reference is valid */ +#undef HANDLE_PTR +#define HANDLE_PTR(ptr,obj) do { \ + if (*(ptr)) { \ + g_assert (safe_name (*(ptr)) != NULL); \ + } \ + } while (0) + +/* + * check_object: + * + * Perform consistency check on an object. Currently we only check that the + * reference fields are valid. + */ +char* +check_object (char *start) +{ + GCVTable *vt; + size_t skip_size; + mword desc; + + if (!start) + return NULL; + + vt = (GCVTable*)LOAD_VTABLE (start); + //type = vt->desc & 0x7; + + desc = vt->desc; + switch (desc & 0x7) { + case DESC_TYPE_STRING: + STRING_SIZE (skip_size, start); + return start + skip_size; + case DESC_TYPE_RUN_LENGTH: + OBJ_RUN_LEN_FOREACH_PTR (desc,start); + OBJ_RUN_LEN_SIZE (skip_size, desc, start); + g_assert (skip_size); + return start + skip_size; + case DESC_TYPE_ARRAY: + case DESC_TYPE_VECTOR: + OBJ_VECTOR_FOREACH_PTR (vt, start); + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + return start + skip_size; + case DESC_TYPE_SMALL_BITMAP: + OBJ_BITMAP_FOREACH_PTR (desc,start); + OBJ_BITMAP_SIZE (skip_size, desc, start); + return start + skip_size; + case DESC_TYPE_LARGE_BITMAP: + OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start); + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + return start + skip_size; + case DESC_TYPE_COMPLEX: + OBJ_COMPLEX_FOREACH_PTR (vt, start); + /* this is a complex object */ + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + return start + skip_size; + case DESC_TYPE_COMPLEX_ARR: + OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start); + /* this is an array of complex structs */ + skip_size = safe_object_get_size ((MonoObject*)start); + skip_size += (ALLOC_ALIGN - 1); + skip_size &= ~(ALLOC_ALIGN - 1); + return start + skip_size; + } + g_assert_not_reached (); + return NULL; } /* @@ -3832,7 +5187,6 @@ mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass * void mono_gc_collect (int generation) { - SgenThreadInfo *info; LOCK_GC; update_current_thread_stack (&generation); stop_world (); @@ -3851,11 +5205,18 @@ mono_gc_max_generation (void) return 1; } +int +mono_gc_collection_count (int generation) +{ + if (generation == 0) + return num_minor_gcs; + return num_major_gcs; +} + gint64 mono_gc_get_used_size (void) { gint64 tot = 0; - LOSObject *bigo; GCMemSection *section; LOCK_GC; tot = los_memory_usage; @@ -3899,7 +5260,7 @@ mono_object_is_alive (MonoObject* o) int mono_gc_get_generation (MonoObject *obj) { - if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end) + if (ptr_in_nursery (obj)) return 0; return 1; } @@ -3933,12 +5294,23 @@ void* mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits) { if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) { - mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT); - return (void*)desc; + return (void*)MAKE_ROOT_DESC (ROOT_DESC_BITMAP, bitmap [0]); + } else { + mword complex = alloc_complex_descriptor (bitmap, numbits + 1); + return (void*)MAKE_ROOT_DESC (ROOT_DESC_COMPLEX, complex); } - /* conservative scanning */ - DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits)); - return NULL; +} + +void* +mono_gc_make_root_descr_user (MonoGCMarkFunc marker) +{ + void *descr; + + g_assert (user_descriptors_next < MAX_USER_DESCRIPTORS); + descr = (void*)MAKE_ROOT_DESC (ROOT_DESC_USER, (mword)user_descriptors_next); + user_descriptors [user_descriptors_next ++] = marker; + + return descr; } void* @@ -3967,7 +5339,7 @@ mono_gc_is_gc_thread (void) { gboolean result; LOCK_GC; - result = thread_info_lookup (pthread_self ()) != NULL; + result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL; UNLOCK_GC; return result; } @@ -3976,30 +5348,45 @@ void mono_gc_base_init (void) { char *env; + char **opts, **ptr; struct sigaction sinfo; + LOCK_INIT (gc_mutex); LOCK_GC; if (gc_initialized) { UNLOCK_GC; return; } - gc_initialized = TRUE; + pagesize = mono_pagesize (); gc_debug_file = stderr; - /* format: MONO_GC_DEBUG=l[,filename] where l is a debug level 0-9 */ if ((env = getenv ("MONO_GC_DEBUG"))) { - if (env [0] >= '0' && env [0] <= '9') { - gc_debug_level = atoi (env); - env++; - } - if (env [0] == ',') - env++; - if (env [0]) { - char *rf = g_strdup_printf ("%s.%d", env, getpid ()); - gc_debug_file = fopen (rf, "wb"); - if (!gc_debug_file) - gc_debug_file = stderr; - g_free (rf); + opts = g_strsplit (env, ",", -1); + for (ptr = opts; ptr && *ptr; ptr ++) { + char *opt = *ptr; + if (opt [0] >= '0' && opt [0] <= '9') { + gc_debug_level = atoi (opt); + opt++; + if (opt [0] == ':') + opt++; + if (opt [0]) { + char *rf = g_strdup_printf ("%s.%d", opt, getpid ()); + gc_debug_file = fopen (rf, "wb"); + if (!gc_debug_file) + gc_debug_file = stderr; + g_free (rf); + } + } else if (!strcmp (opt, "collect-before-allocs")) { + collect_before_allocs = TRUE; + } else if (!strcmp (opt, "check-at-minor-collections")) { + consistency_check_at_minor_collection = TRUE; + } else { + fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env); + fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|