X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fsgen%2Fsgen-marksweep.c;h=3f14d5ee7acab33c1d35ccb0b84b3640bc9eb7e8;hb=HEAD;hp=3b006682346fec47f4f4c7d0cd4cd7c2cf6e1aef;hpb=e382aa00b577f3908b4d822984f1e4b343741a6b;p=mono.git diff --git a/mono/sgen/sgen-marksweep.c b/mono/sgen/sgen-marksweep.c index 3b006682346..3f14d5ee7ac 100644 --- a/mono/sgen/sgen-marksweep.c +++ b/mono/sgen/sgen-marksweep.c @@ -1,5 +1,6 @@ -/* - * sgen-marksweep.c: The Mark & Sweep major collector. +/** + * \file + * The Mark & Sweep major collector. * * Author: * Mark Probst @@ -31,16 +32,17 @@ #include "mono/sgen/sgen-thread-pool.h" #include "mono/sgen/sgen-client.h" #include "mono/utils/mono-memory-model.h" +#include "mono/utils/mono-proclib.h" -#if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT) -#define MS_BLOCK_SIZE ARCH_MIN_MS_BLOCK_SIZE -#define MS_BLOCK_SIZE_SHIFT ARCH_MIN_MS_BLOCK_SIZE_SHIFT -#else -#define MS_BLOCK_SIZE_SHIFT 14 /* INT FASTENABLE */ -#define MS_BLOCK_SIZE (1 << MS_BLOCK_SIZE_SHIFT) -#endif -#define MAJOR_SECTION_SIZE MS_BLOCK_SIZE -#define CARDS_PER_BLOCK (MS_BLOCK_SIZE / CARD_SIZE_IN_BYTES) +static int ms_block_size; + +/* + * Blocks must be at least this size, meaning that if we detect a + * page size lower than this, we'll use this instead. + */ +#define MS_BLOCK_SIZE_MIN (1024 * 16) + +#define CARDS_PER_BLOCK (ms_block_size / CARD_SIZE_IN_BYTES) /* * Don't allocate single blocks, but alloc a contingent of this many @@ -48,16 +50,22 @@ */ #define MS_BLOCK_ALLOC_NUM 32 +#define MS_NUM_MARK_WORDS ((ms_block_size / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8)) + +/* + * Use this instead of sizeof (MSBlockInfo) since the mark_words + * array size depends on page size at runtime. + */ +#define SIZEOF_MS_BLOCK_INFO (sizeof (MSBlockInfo) + sizeof (guint32) * (MS_NUM_MARK_WORDS - MONO_ZERO_LEN_ARRAY)) + /* * Number of bytes before the first object in a block. At the start * of a block is the MSBlockHeader, then opional padding, then come - * the objects, so this must be >= sizeof (MSBlockHeader). + * the objects, so this must be >= SIZEOF_MS_BLOCK_INFO. */ -#define MS_BLOCK_SKIP ((sizeof (MSBlockHeader) + 15) & ~15) +#define MS_BLOCK_SKIP ((SIZEOF_MS_BLOCK_INFO + 15) & ~15) -#define MS_BLOCK_FREE (MS_BLOCK_SIZE - MS_BLOCK_SKIP) - -#define MS_NUM_MARK_WORDS (MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8) +#define MS_BLOCK_FREE (ms_block_size - MS_BLOCK_SKIP) /* * Blocks progress from one state to the next: @@ -107,14 +115,14 @@ struct _MSBlockInfo { void ** volatile free_list; MSBlockInfo * volatile next_free; guint8 * volatile cardtable_mod_union; - guint32 mark_words [MS_NUM_MARK_WORDS]; + guint32 mark_words [MONO_ZERO_LEN_ARRAY]; }; #define MS_BLOCK_FOR_BLOCK_INFO(b) ((char*)(b)) #define MS_BLOCK_OBJ(b,i) ((GCObject *)(MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i))) #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size) (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i)) -#define MS_BLOCK_DATA_FOR_OBJ(o) ((char*)((mword)(o) & ~(mword)(MS_BLOCK_SIZE - 1))) +#define MS_BLOCK_DATA_FOR_OBJ(o) ((char*)((mword)(o) & ~(mword)(ms_block_size - 1))) typedef struct { MSBlockInfo info; @@ -149,7 +157,7 @@ typedef struct { } while (0) -#define MS_OBJ_ALLOCED(o,b) (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE)) +#define MS_OBJ_ALLOCED(o,b) (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + ms_block_size)) #define MS_BLOCK_OBJ_SIZE_FACTOR (pow (2.0, 1.0 / 3)) @@ -181,10 +189,17 @@ enum { SWEEP_STATE_COMPACTING }; +typedef enum { + SGEN_SWEEP_SERIAL = FALSE, + SGEN_SWEEP_CONCURRENT = TRUE, +} SgenSweepMode; + static volatile int sweep_state = SWEEP_STATE_SWEPT; static gboolean concurrent_mark; -static gboolean concurrent_sweep = TRUE; +static gboolean concurrent_sweep = DEFAULT_SWEEP_MODE; + +int sweep_pool_context = -1; #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl) SGEN_POINTER_IS_TAGGED_1 ((bl)) #define BLOCK_TAG_HAS_REFERENCES(bl) SGEN_POINTER_TAG_1 ((bl)) @@ -202,6 +217,7 @@ static SgenArrayList allocated_blocks = SGEN_ARRAY_LIST_INIT (NULL, sgen_array_l /* non-allocated block free-list */ static void *empty_blocks = NULL; static size_t num_empty_blocks = 0; +static gboolean compact_blocks = FALSE; /* * We can iterate the block list also while sweep is in progress but we @@ -224,6 +240,16 @@ static size_t num_empty_blocks = 0; (bl) = BLOCK_UNTAG ((bl)); #define END_FOREACH_BLOCK_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT; } +#define FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK(bl,begin,end,index,hr) { \ + volatile gpointer *slot; \ + SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&allocated_blocks, begin, end, slot, index) { \ + (bl) = (MSBlockInfo *) (*slot); \ + if (!(bl)) \ + continue; \ + (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl)); \ + (bl) = BLOCK_UNTAG ((bl)); +#define END_FOREACH_BLOCK_RANGE_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; } + static volatile size_t num_major_sections = 0; /* * One free block list for each block object size. We add and remove blocks from these @@ -294,6 +320,7 @@ ms_find_block_obj_size_index (size_t size) #define FREE_BLOCKS_FROM(lists,p,r) (lists [((p) ? MS_BLOCK_FLAG_PINNED : 0) | ((r) ? MS_BLOCK_FLAG_REFS : 0)]) #define FREE_BLOCKS(p,r) (FREE_BLOCKS_FROM (free_block_lists, (p), (r))) +#define FREE_BLOCKS_LOCAL(p,r) (FREE_BLOCKS_FROM (((MSBlockInfo***)mono_native_tls_get_value (worker_block_free_list_key)), (p), (r))) #define MS_BLOCK_OBJ_SIZE_INDEX(s) \ (((s)+7)>>3 < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES ? \ @@ -301,7 +328,7 @@ ms_find_block_obj_size_index (size_t size) ms_find_block_obj_size_index ((s))) static void* -major_alloc_heap (mword nursery_size, mword nursery_align, int the_nursery_bits) +major_alloc_heap (mword nursery_size, mword nursery_align) { char *start; if (nursery_align) @@ -315,7 +342,7 @@ major_alloc_heap (mword nursery_size, mword nursery_align, int the_nursery_bits) static void update_heap_boundaries_for_block (MSBlockInfo *block) { - sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE); + sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size); } /* @@ -337,7 +364,7 @@ ms_get_empty_block (void) */ int alloc_num = MS_BLOCK_ALLOC_NUM; for (;;) { - p = (char *)sgen_alloc_os_memory_aligned (MS_BLOCK_SIZE * alloc_num, MS_BLOCK_SIZE, + p = (char *)sgen_alloc_os_memory_aligned (ms_block_size * alloc_num, ms_block_size, (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE), alloc_num == 1 ? "major heap section" : NULL, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); if (p) @@ -356,7 +383,7 @@ ms_get_empty_block (void) empty = empty_blocks; *(void**)block = empty; } while (SGEN_CAS_PTR ((gpointer*)&empty_blocks, block, empty) != empty); - p += MS_BLOCK_SIZE; + p += ms_block_size; } SGEN_ATOMIC_ADD_P (num_empty_blocks, alloc_num); @@ -380,7 +407,7 @@ ms_get_empty_block (void) *(void**)block = NULL; - g_assert (!((mword)block & (MS_BLOCK_SIZE - 1))); + g_assert (!((mword)block & (ms_block_size - 1))); return block; } @@ -395,10 +422,10 @@ ms_free_block (MSBlockInfo *info) void *empty; char *block = MS_BLOCK_FOR_BLOCK_INFO (info); - sgen_memgov_release_space (MS_BLOCK_SIZE, SPACE_MAJOR); + sgen_memgov_release_space (ms_block_size, SPACE_MAJOR); if (info->cardtable_mod_union) - sgen_card_table_free_mod_union (info->cardtable_mod_union, block, MS_BLOCK_SIZE); - memset (block, 0, MS_BLOCK_SIZE); + sgen_card_table_free_mod_union (info->cardtable_mod_union, block, ms_block_size); + memset (block, 0, ms_block_size); do { empty = empty_blocks; @@ -407,7 +434,7 @@ ms_free_block (MSBlockInfo *info) SGEN_ATOMIC_ADD_P (num_empty_blocks, 1); - binary_protocol_block_free (block, MS_BLOCK_SIZE); + binary_protocol_block_free (block, ms_block_size); } static gboolean @@ -522,7 +549,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references) char *obj_start; int i; - if (!sgen_memgov_try_alloc_space (MS_BLOCK_SIZE, SPACE_MAJOR)) + if (!sgen_memgov_try_alloc_space (ms_block_size, SPACE_MAJOR)) return FALSE; info = (MSBlockInfo*)ms_get_empty_block (); @@ -548,7 +575,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references) update_heap_boundaries_for_block (info); - binary_protocol_block_alloc (info, MS_BLOCK_SIZE); + binary_protocol_block_alloc (info, ms_block_size); /* build free list */ obj_start = MS_BLOCK_FOR_BLOCK_INFO (info) + MS_BLOCK_SKIP; @@ -576,7 +603,7 @@ ptr_is_in_major_block (char *ptr, char **start, gboolean *pinned) MSBlockInfo *block; FOREACH_BLOCK_NO_LOCK (block) { - if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) { + if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) { int count = MS_BLOCK_FREE / block->obj_size; int i; @@ -692,6 +719,55 @@ major_alloc_object (GCVTable vtable, size_t size, gboolean has_references) return alloc_obj (vtable, size, FALSE, has_references); } +/* + * This can only be called by sgen workers. While this is called we assume + * that no other thread is accessing the block free lists. The world should + * be stopped and the gc thread should be waiting for workers to finish. + */ +static GCObject* +major_alloc_object_par (GCVTable vtable, size_t size, gboolean has_references) +{ + int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size); + MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (FALSE, has_references); + MSBlockInfo **free_blocks_local = FREE_BLOCKS_LOCAL (FALSE, has_references); + void *obj; + + if (free_blocks_local [size_index]) { +get_slot: + obj = unlink_slot_from_free_list_uncontested (free_blocks_local, size_index); + } else { + MSBlockInfo *block; +get_block: + block = free_blocks [size_index]; + if (!block) { + if (G_UNLIKELY (!ms_alloc_block (size_index, FALSE, has_references))) + return NULL; + goto get_block; + } else { + MSBlockInfo *next_free = block->next_free; + /* + * Once a block is removed from the main list, it cannot return on the list until + * all the workers are finished and sweep is starting. This means we don't need + * to account for ABA problems. + */ + if (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], next_free, block) != block) + goto get_block; + block->next_free = free_blocks_local [size_index]; + free_blocks_local [size_index] = block; + + goto get_slot; + } + } + + /* FIXME: assumes object layout */ + *(GCVTable*)obj = vtable; + + /* FIXME is it worth CAS-ing here */ + total_allocated_major += block_obj_sizes [size_index]; + + return (GCObject *)obj; +} + /* * We're not freeing the block if it's empty. We leave that work for * the next major collection. @@ -867,7 +943,7 @@ major_finish_sweep_checking (void) wait: job = sweep_job; if (job) - sgen_thread_pool_job_wait (job); + sgen_thread_pool_job_wait (sweep_pool_context, job); SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?"); SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?"); } @@ -912,7 +988,7 @@ major_is_valid_object (char *object) int idx; char *obj; - if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= object)) + if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= object)) continue; idx = MS_BLOCK_OBJ_INDEX (object, block); @@ -939,7 +1015,7 @@ major_describe_pointer (char *ptr) int w, b; gboolean marked; - if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= ptr)) + if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= ptr)) continue; SGEN_LOG (0, "major-ptr (block %p sz %d pin %d ref %d)\n", @@ -1040,13 +1116,13 @@ get_cardtable_mod_union_for_block (MSBlockInfo *block, gboolean allocate) return mod_union; else if (!allocate) return NULL; - mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); other = (guint8 *)SGEN_CAS_PTR ((gpointer*)&block->cardtable_mod_union, mod_union, NULL); if (!other) { SGEN_ASSERT (0, block->cardtable_mod_union == mod_union, "Why did CAS not replace?"); return mod_union; } - sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); return other; } @@ -1094,7 +1170,7 @@ major_block_is_evacuating (MSBlockInfo *block) if (!MS_MARK_BIT ((block), __word, __bit)) { \ MS_SET_MARK_BIT ((block), __word, __bit); \ if (sgen_gc_descr_has_references (desc)) \ - GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \ + GRAY_OBJECT_ENQUEUE_SERIAL ((queue), (obj), (desc)); \ binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \ INC_NUM_MAJOR_OBJECTS_MARKED (); \ } \ @@ -1107,7 +1183,7 @@ major_block_is_evacuating (MSBlockInfo *block) MS_SET_MARK_BIT_PAR ((block), __word, __bit, first); \ if (first) { \ if (sgen_gc_descr_has_references (desc)) \ - GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \ + GRAY_OBJECT_ENQUEUE_PARALLEL ((queue), (obj), (desc)); \ binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \ INC_NUM_MAJOR_OBJECTS_MARKED (); \ } \ @@ -1128,6 +1204,7 @@ pin_major_object (GCObject *obj, SgenGrayQueue *queue) MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue); } +#define COPY_OR_MARK_PARALLEL #include "sgen-major-copy-object.h" static long long @@ -1148,7 +1225,7 @@ major_get_and_reset_num_major_objects_marked (void) #endif /* gcc 4.2.1 from xcode4 crashes on sgen_card_table_get_card_address () when this is enabled */ -#if defined(PLATFORM_MACOSX) +#if defined(HOST_DARWIN) #if MONO_GNUC_VERSION <= 40300 #undef PREFETCH_CARDS #endif @@ -1178,6 +1255,12 @@ static guint64 stat_drain_loops; #define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_no_evacuation #include "sgen-marksweep-drain-gray-stack.h" +#define COPY_OR_MARK_PARALLEL +#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_par_no_evacuation +#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_par_no_evacuation +#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_par_no_evacuation +#include "sgen-marksweep-drain-gray-stack.h" + #define COPY_OR_MARK_WITH_EVACUATION #define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_with_evacuation #define SCAN_OBJECT_FUNCTION_NAME major_scan_object_with_evacuation @@ -1186,6 +1269,15 @@ static guint64 stat_drain_loops; #define SCAN_PTR_FIELD_FUNCTION_NAME major_scan_ptr_field_with_evacuation #include "sgen-marksweep-drain-gray-stack.h" +#define COPY_OR_MARK_PARALLEL +#define COPY_OR_MARK_WITH_EVACUATION +#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_par_with_evacuation +#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_par_with_evacuation +#define SCAN_VTYPE_FUNCTION_NAME major_scan_vtype_par_with_evacuation +#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_par_with_evacuation +#define SCAN_PTR_FIELD_FUNCTION_NAME major_scan_ptr_field_par_with_evacuation +#include "sgen-marksweep-drain-gray-stack.h" + #define COPY_OR_MARK_CONCURRENT #define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_concurrent_no_evacuation #define SCAN_OBJECT_FUNCTION_NAME major_scan_object_concurrent_no_evacuation @@ -1238,6 +1330,15 @@ drain_gray_stack (SgenGrayQueue *queue) return drain_gray_stack_no_evacuation (queue); } +static gboolean +drain_gray_stack_par (SgenGrayQueue *queue) +{ + if (major_is_evacuating ()) + return drain_gray_stack_par_with_evacuation (queue); + else + return drain_gray_stack_par_no_evacuation (queue); +} + static gboolean drain_gray_stack_concurrent (SgenGrayQueue *queue) { @@ -1280,6 +1381,12 @@ major_copy_or_mark_object_concurrent_finish_canonical (GCObject **ptr, SgenGrayQ major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue); } +static void +major_copy_or_mark_object_concurrent_par_finish_canonical (GCObject **ptr, SgenGrayQueue *queue) +{ + major_copy_or_mark_object_par_with_evacuation (ptr, *ptr, queue); +} + static void mark_pinned_objects_in_block (MSBlockInfo *block, size_t first_entry, size_t last_entry, SgenGrayQueue *queue) { @@ -1318,10 +1425,10 @@ static inline void sweep_block_for_size (MSBlockInfo *block, int count, int obj_size) { int obj_index; + void *obj = MS_BLOCK_OBJ_FOR_SIZE (block, 0, obj_size); - for (obj_index = 0; obj_index < count; ++obj_index) { + for (obj_index = 0; obj_index < count; ++obj_index, obj = (void*)((mword)obj + obj_size)) { int word, bit; - void *obj = MS_BLOCK_OBJ_FOR_SIZE (block, obj_index, obj_size); MS_CALC_MARK_BIT (word, bit, obj); if (MS_MARK_BIT (block, word, bit)) { @@ -1350,7 +1457,7 @@ try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state); gboolean success = old_state == expected_state; if (success) - binary_protocol_block_set_state (block, MS_BLOCK_SIZE, old_state, new_state); + binary_protocol_block_set_state (block, ms_block_size, old_state, new_state); return success; } @@ -1359,7 +1466,7 @@ set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state) { SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set"); block->state = new_state; - binary_protocol_block_set_state (block, MS_BLOCK_SIZE, expected_state, new_state); + binary_protocol_block_set_state (block, ms_block_size, expected_state, new_state); } /* @@ -1473,6 +1580,25 @@ sgen_worker_clear_free_block_lists (WorkerData *worker) } } +static void +sgen_worker_clear_free_block_lists_evac (WorkerData *worker) +{ + int i, j; + + if (!worker->free_block_lists) + return; + + for (i = 0; i < MS_BLOCK_TYPE_MAX; i++) { + for (j = 0; j < num_block_obj_sizes; j++) { + if (((MSBlockInfo***) worker->free_block_lists) [i][j]) + SGEN_ASSERT (0, !((MSBlockInfo***) worker->free_block_lists) [i][j]->next_free, "Why do we have linked free blocks on the workers"); + + if (evacuate_block_obj_sizes [j]) + ((MSBlockInfo***) worker->free_block_lists) [i][j] = NULL; + } + } +} + static void sweep_start (void) { @@ -1489,7 +1615,10 @@ sweep_start (void) free_blocks [j] = NULL; } - sgen_workers_foreach (sgen_worker_clear_free_block_lists); + sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists); + sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists); + + compact_blocks = TRUE; } static void sweep_finish (void); @@ -1705,7 +1834,7 @@ sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job) */ if (concurrent_sweep && lazy_sweep) { sweep_blocks_job = sgen_thread_pool_job_alloc ("sweep_blocks", sweep_blocks_job_func, sizeof (SgenThreadPoolJob)); - sgen_thread_pool_job_enqueue (sweep_blocks_job); + sgen_thread_pool_job_enqueue (sweep_pool_context, sweep_blocks_job); } sweep_finish (); @@ -1754,7 +1883,7 @@ major_sweep (void) SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?"); if (concurrent_sweep) { sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob)); - sgen_thread_pool_job_enqueue (sweep_job); + sgen_thread_pool_job_enqueue (sweep_pool_context, sweep_job); } else { sweep_job_func (NULL, NULL); } @@ -1863,6 +1992,18 @@ major_start_nursery_collection (void) #endif old_num_major_sections = num_major_sections; + + /* Compact the block list if it hasn't been compacted in a while and nobody is using it */ + if (compact_blocks && !sweep_in_progress () && !sweep_blocks_job && !sgen_concurrent_collection_in_progress ()) { + /* + * We support null elements in the array but do regular compaction to avoid + * excessive traversal of the array and to facilitate splitting into well + * balanced sections for parallel modes. We compact as soon as possible after + * sweep. + */ + sgen_array_list_remove_nulls (&allocated_blocks); + compact_blocks = FALSE; + } } static void @@ -1956,6 +2097,10 @@ major_start_major_collection (void) sgen_evacuation_freelist_blocks (&free_block_lists [MS_BLOCK_FLAG_REFS][i], i); } + /* We expect workers to have very few blocks on the freelist, just evacuate them */ + sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists_evac); + sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists_evac); + if (lazy_sweep && concurrent_sweep) { /* * sweep_blocks_job is created before sweep_finish, which we wait for above @@ -1964,7 +2109,7 @@ major_start_major_collection (void) */ SgenThreadPoolJob *job = sweep_blocks_job; if (job) - sgen_thread_pool_job_wait (job); + sgen_thread_pool_job_wait (sweep_pool_context, job); } if (lazy_sweep && !concurrent_sweep) @@ -2022,7 +2167,7 @@ major_free_swept_blocks (size_t section_reserve) { SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks"); -#ifdef TARGET_WIN32 +#if defined(HOST_WIN32) || defined(HOST_ORBIS) || defined (HOST_WASM) /* * sgen_free_os_memory () asserts in mono_vfree () because windows doesn't like freeing the middle of * a VirtualAlloc ()-ed block. @@ -2088,7 +2233,7 @@ major_free_swept_blocks (size_t section_reserve) SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong"); - if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) { + if ((char*)block != ((char*)empty_block_arr [d-1]) + ms_block_size) { first = d; continue; } @@ -2102,7 +2247,7 @@ major_free_swept_blocks (size_t section_reserve) * we're iterating. */ int j; - sgen_free_os_memory (empty_block_arr [first], MS_BLOCK_SIZE * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); + sgen_free_os_memory (empty_block_arr [first], ms_block_size * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); for (j = first; j <= d; ++j) empty_block_arr [j] = NULL; dest = first; @@ -2152,7 +2297,7 @@ major_free_swept_blocks (size_t section_reserve) while (num_empty_blocks > section_reserve) { void *next = *(void**)empty_blocks; - sgen_free_os_memory (empty_blocks, MS_BLOCK_SIZE, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); + sgen_free_os_memory (empty_blocks, ms_block_size, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); empty_blocks = next; /* * Needs not be atomic because this is running @@ -2173,7 +2318,7 @@ major_pin_objects (SgenGrayQueue *queue) FOREACH_BLOCK_NO_LOCK (block) { size_t first_entry, last_entry; SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning."); - sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE, + sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size, &first_entry, &last_entry); mark_pinned_objects_in_block (block, first_entry, last_entry, queue); } END_FOREACH_BLOCK_NO_LOCK; @@ -2229,7 +2374,7 @@ static size_t get_bytes_survived_last_sweep (void) { SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep"); - return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * MS_BLOCK_SIZE; + return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * ms_block_size; } static gboolean @@ -2283,7 +2428,7 @@ major_iterate_block_ranges (sgen_cardtable_block_callback callback) FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) { if (has_references) - callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); } END_FOREACH_BLOCK_NO_LOCK; } @@ -2296,7 +2441,7 @@ major_iterate_live_block_ranges (sgen_cardtable_block_callback callback) major_finish_sweep_checking (); FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) { if (has_references) - callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); } END_FOREACH_BLOCK_NO_LOCK; } @@ -2317,7 +2462,7 @@ static guint8* initial_skip_card (guint8 *card_data) { mword *cards = (mword*)card_data; - mword card; + mword card = 0; int i; for (i = 0; i < CARD_WORDS_PER_BLOCK; ++i) { card = cards [i]; @@ -2345,17 +2490,22 @@ initial_skip_card (guint8 *card_data) #define MS_BLOCK_OBJ_INDEX_FAST(o,b,os) (((char*)(o) - ((b) + MS_BLOCK_SKIP)) / (os)) #define MS_BLOCK_OBJ_FAST(b,os,i) ((b) + MS_BLOCK_SKIP + (os) * (i)) -#define MS_OBJ_ALLOCED_FAST(o,b) (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + MS_BLOCK_SIZE)) +#define MS_OBJ_ALLOCED_FAST(o,b) (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + ms_block_size)) static void scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, ScanCopyContext ctx) { SgenGrayQueue *queue = ctx.queue; ScanObjectFunc scan_func = ctx.ops->scan_object; + /* + * FIXME: On systems with very large pages, we allocate fairly large + * arrays on the stack here. This shouldn't be a problem once block + * size is no longer required to be a multiple of the system page size. + */ #ifndef SGEN_HAVE_OVERLAPPING_CARDS - guint8 cards_copy [CARDS_PER_BLOCK]; + guint8 *cards_copy = alloca (sizeof (guint8) * CARDS_PER_BLOCK); #endif - guint8 cards_preclean [CARDS_PER_BLOCK]; + guint8 *cards_preclean = alloca (sizeof (guint8) * CARDS_PER_BLOCK); gboolean small_objects; int block_obj_size; char *block_start; @@ -2483,10 +2633,24 @@ scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, Scan } static void -major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count) +major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count, int block_count) { MSBlockInfo *block; gboolean has_references, was_sweeping, skip_scan; + int first_block, last_block, index; + + /* + * The last_block's index is at least (num_major_sections - 1) since we + * can have nulls in the allocated_blocks list. The last worker will + * scan the left-overs of the list. We expect few null entries in the + * allocated_blocks list, therefore using num_major_sections for computing + * block_count shouldn't affect work distribution. + */ + first_block = block_count * job_index; + if (job_index == job_split_count - 1) + last_block = allocated_blocks.next_slot; + else + last_block = block_count * (job_index + 1); if (!concurrent_mark) g_assert (scan_type == CARDTABLE_SCAN_GLOBAL); @@ -2496,11 +2660,9 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job was_sweeping = sweep_in_progress (); binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION); - FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) { - if (__index % job_split_count != job_index) - continue; + FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK (block, first_block, last_block, index, has_references) { #ifdef PREFETCH_CARDS - int prefetch_index = __index + 6 * job_split_count; + int prefetch_index = index + 6; if (prefetch_index < allocated_blocks.next_slot) { MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index)); PREFETCH_READ (prefetch_block); @@ -2511,7 +2673,6 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job } } #endif - if (!has_references) continue; skip_scan = FALSE; @@ -2535,16 +2696,16 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job * sweep start since we are in a nursery collection. Also avoid CAS-ing */ if (sweep_in_progress ()) { - skip_scan = !ensure_block_is_checked_for_sweeping (__index, TRUE, NULL); + skip_scan = !ensure_block_is_checked_for_sweeping (index, TRUE, NULL); } else if (was_sweeping) { /* Recheck in case sweep finished after dereferencing the slot */ - skip_scan = *sgen_array_list_get_slot (&allocated_blocks, __index) == 0; + skip_scan = *sgen_array_list_get_slot (&allocated_blocks, index) == 0; } } } if (!skip_scan) scan_card_table_for_block (block, scan_type, ctx); - } END_FOREACH_BLOCK_NO_LOCK; + } END_FOREACH_BLOCK_RANGE_NO_LOCK; binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION); } @@ -2598,7 +2759,7 @@ update_cardtable_mod_union (void) if (has_dirty_cards) { size_t num_cards; guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE); - sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards); + sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size, &num_cards); SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong"); } } END_FOREACH_BLOCK_NO_LOCK; @@ -2610,21 +2771,36 @@ static void post_param_init (SgenMajorCollector *collector) { collector->sweeps_lazily = lazy_sweep; - collector->needs_thread_pool = concurrent_mark || concurrent_sweep; } -/* We are guaranteed to be called by the worker in question */ +/* + * We are guaranteed to be called by the worker in question. + * This provides initialization for threads that plan to do + * parallel object allocation. We need to store these lists + * in additional data structures so we can traverse them + * at major/sweep start. + */ static void -sgen_worker_init_callback (gpointer worker_untyped) +sgen_init_block_free_lists (gpointer *list_p) { int i; - WorkerData *worker = (WorkerData*) worker_untyped; - MSBlockInfo ***worker_free_blocks = (MSBlockInfo ***) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo**) * MS_BLOCK_TYPE_MAX, INTERNAL_MEM_MS_TABLES, TRUE); + MSBlockInfo ***worker_free_blocks = (MSBlockInfo ***) mono_native_tls_get_value (worker_block_free_list_key); + + /* + * For simplification, a worker thread uses the same free block lists, + * regardless of the context it is part of (major/minor). + */ + if (worker_free_blocks) { + *list_p = (gpointer)worker_free_blocks; + return; + } + + worker_free_blocks = (MSBlockInfo ***) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo**) * MS_BLOCK_TYPE_MAX, INTERNAL_MEM_MS_TABLES, TRUE); for (i = 0; i < MS_BLOCK_TYPE_MAX; i++) worker_free_blocks [i] = (MSBlockInfo **) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE); - worker->free_block_lists = worker_free_blocks; + *list_p = (gpointer)worker_free_blocks; mono_native_tls_set_value (worker_block_free_list_key, worker_free_blocks); } @@ -2634,7 +2810,15 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr { int i; - sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, sizeof (MSBlockInfo)); + ms_block_size = mono_pagesize (); + + if (ms_block_size < MS_BLOCK_SIZE_MIN) + ms_block_size = MS_BLOCK_SIZE_MIN; + + sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, SIZEOF_MS_BLOCK_INFO); + + if (mono_cpu_count () <= 1) + is_parallel = FALSE; num_block_obj_sizes = ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, NULL); block_obj_sizes = (int *)sgen_alloc_internal_dynamic (sizeof (int) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE); @@ -2665,6 +2849,10 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i) g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i)); + /* We can do this because we always init the minor before the major */ + if (is_parallel || sgen_get_minor_collector ()->is_parallel) + mono_native_tls_alloc (&worker_block_free_list_key, NULL); + mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced); mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed); mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept); @@ -2673,12 +2861,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual); mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal); - collector->section_size = MAJOR_SECTION_SIZE; + collector->section_size = ms_block_size; concurrent_mark = is_concurrent; collector->is_concurrent = is_concurrent; collector->is_parallel = is_parallel; - collector->needs_thread_pool = is_concurrent || concurrent_sweep; collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked; collector->supports_cardtable = TRUE; @@ -2688,6 +2875,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr collector->alloc_degraded = major_alloc_degraded; collector->alloc_object = major_alloc_object; + collector->alloc_object_par = major_alloc_object_par; collector->free_pinned_object = free_pinned_object; collector->iterate_objects = major_iterate_objects; collector->free_non_pinned_object = major_free_non_pinned_object; @@ -2723,9 +2911,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr collector->is_valid_object = major_is_valid_object; collector->describe_pointer = major_describe_pointer; collector->count_cards = major_count_cards; + collector->init_block_free_lists = sgen_init_block_free_lists; collector->major_ops_serial.copy_or_mark_object = major_copy_or_mark_object_canonical; collector->major_ops_serial.scan_object = major_scan_object_with_evacuation; + collector->major_ops_serial.scan_ptr_field = major_scan_ptr_field_with_evacuation; collector->major_ops_serial.drain_gray_stack = drain_gray_stack; if (is_concurrent) { collector->major_ops_concurrent_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical; @@ -2747,12 +2937,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr collector->major_ops_conc_par_start.scan_ptr_field = major_scan_ptr_field_concurrent_par_with_evacuation; collector->major_ops_conc_par_start.drain_gray_stack = drain_gray_stack_concurrent_par; - /* FIXME use parallel obj ops */ - collector->major_ops_conc_par_finish = collector->major_ops_concurrent_finish; - - collector->worker_init_cb = sgen_worker_init_callback; - - mono_native_tls_alloc (&worker_block_free_list_key, NULL); + collector->major_ops_conc_par_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_par_finish_canonical; + collector->major_ops_conc_par_finish.scan_object = major_scan_object_par_with_evacuation; + collector->major_ops_conc_par_finish.scan_vtype = major_scan_vtype_par_with_evacuation; + collector->major_ops_conc_par_finish.scan_ptr_field = major_scan_ptr_field_par_with_evacuation; + collector->major_ops_conc_par_finish.drain_gray_stack = drain_gray_stack_par; } } @@ -2781,7 +2970,15 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr SGEN_ASSERT (0, SGEN_MAX_SMALL_OBJ_SIZE <= MS_BLOCK_FREE / 2, "MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2"); /*cardtable requires major pages to be 8 cards aligned*/ - g_assert ((MS_BLOCK_SIZE % (8 * CARD_SIZE_IN_BYTES)) == 0); + g_assert ((ms_block_size % (8 * CARD_SIZE_IN_BYTES)) == 0); + + if (is_concurrent && is_parallel) + sgen_workers_create_context (GENERATION_OLD, mono_cpu_count ()); + else if (is_concurrent) + sgen_workers_create_context (GENERATION_OLD, 1); + + if (concurrent_sweep) + sweep_pool_context = sgen_thread_pool_create_context (1, NULL, NULL, NULL, NULL, NULL); } void