From: Vlad Brezae Date: Fri, 3 Mar 2017 10:38:15 +0000 (+0200) Subject: [sgen] Parallel nursery collections X-Git-Url: http://wien.tomnetworks.com/gitweb/?p=mono.git;a=commitdiff_plain;h=8bc93c4b39d03fc339f0e097954c6e2a4494b3ff [sgen] Parallel nursery collections Scans the card table and the roots on the workers, including draining the stack. --- diff --git a/mono/sgen/sgen-cardtable.c b/mono/sgen/sgen-cardtable.c index 471fff2353d..a1fac4ada5d 100644 --- a/mono/sgen/sgen-cardtable.c +++ b/mono/sgen/sgen-cardtable.c @@ -49,11 +49,6 @@ guint64 remarked_cards; static guint64 large_objects; static guint64 bloby_objects; #endif -static guint64 major_card_scan_time; -static guint64 los_card_scan_time; - -static guint64 last_major_scan_time; -static guint64 last_los_scan_time; mword sgen_card_table_number_of_cards_in_range (mword address, mword size) @@ -415,11 +410,8 @@ sgen_card_table_clear_cards (void) } static void -sgen_card_table_scan_remsets (ScanCopyContext ctx) +sgen_card_table_start_scan_remsets (void) { - SGEN_TV_DECLARE (atv); - SGEN_TV_DECLARE (btv); - #ifdef SGEN_HAVE_OVERLAPPING_CARDS /*FIXME we should have a bit on each block/los object telling if the object have marked cards.*/ /*First we copy*/ @@ -430,17 +422,6 @@ sgen_card_table_scan_remsets (ScanCopyContext ctx) /*Then we clear*/ sgen_card_table_clear_cards (); #endif - SGEN_TV_GETTIME (atv); - sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1); - SGEN_TV_GETTIME (btv); - last_major_scan_time = SGEN_TV_ELAPSED (atv, btv); - major_card_scan_time += last_major_scan_time; - sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1); - SGEN_TV_GETTIME (atv); - last_los_scan_time = SGEN_TV_ELAPSED (btv, atv); - los_card_scan_time += last_los_scan_time; - - sgen_wbroots_scan_card_table (ctx); } guint8* @@ -581,9 +562,6 @@ sgen_card_table_init (SgenRememberedSet *remset) mono_counters_register ("cardtable large objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &large_objects); mono_counters_register ("cardtable bloby objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &bloby_objects); #endif - mono_counters_register ("cardtable major scan time", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &major_card_scan_time); - mono_counters_register ("cardtable los scan time", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &los_card_scan_time); - remset->wbarrier_set_field = sgen_card_table_wbarrier_set_field; remset->wbarrier_arrayref_copy = sgen_card_table_wbarrier_arrayref_copy; @@ -592,7 +570,7 @@ sgen_card_table_init (SgenRememberedSet *remset) remset->wbarrier_generic_nostore = sgen_card_table_wbarrier_generic_nostore; remset->record_pointer = sgen_card_table_record_pointer; - remset->scan_remsets = sgen_card_table_scan_remsets; + remset->start_scan_remsets = sgen_card_table_start_scan_remsets; remset->clear_cards = sgen_card_table_clear_cards; diff --git a/mono/sgen/sgen-copy-object.h b/mono/sgen/sgen-copy-object.h index 1e3d3918b65..925af17275d 100644 --- a/mono/sgen/sgen-copy-object.h +++ b/mono/sgen/sgen-copy-object.h @@ -124,6 +124,12 @@ copy_object_no_checks_par (GCObject *obj, SgenGrayQueue *queue) GRAY_OBJECT_ENQUEUE_PARALLEL (queue, (GCObject *)destination, sgen_vtable_get_descriptor (vt)); } } else { + /* + * Unlikely case. Clear the allocated object so it doesn't confuse nursery + * card table scanning, since it can contain old invalid refs. + * FIXME make sure it is not a problem if another threads scans it while we clear + */ + mono_gc_bzero_aligned (destination, objsize); destination = final_destination; } } diff --git a/mono/sgen/sgen-gc.c b/mono/sgen/sgen-gc.c index a76440fce16..e83cbb68b78 100644 --- a/mono/sgen/sgen-gc.c +++ b/mono/sgen/sgen-gc.c @@ -267,6 +267,8 @@ static guint64 stat_pinned_objects = 0; static guint64 time_minor_pre_collection_fragment_clear = 0; static guint64 time_minor_pinning = 0; static guint64 time_minor_scan_remsets = 0; +static guint64 time_minor_scan_major_blocks = 0; +static guint64 time_minor_scan_los = 0; static guint64 time_minor_scan_pinned = 0; static guint64 time_minor_scan_roots = 0; static guint64 time_minor_finish_gray_stack = 0; @@ -426,8 +428,6 @@ sgen_workers_get_job_gray_queue (WorkerData *worker_data, SgenGrayQueue *default static void gray_queue_redirect (SgenGrayQueue *queue) { - SGEN_ASSERT (0, concurrent_collection_in_progress, "Where are we redirecting the gray queue to, without a concurrent collection?"); - sgen_workers_take_from_queue (queue); } @@ -1233,6 +1233,8 @@ init_stats (void) mono_counters_register ("Minor fragment clear", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pre_collection_fragment_clear); mono_counters_register ("Minor pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pinning); mono_counters_register ("Minor scan remembered set", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_remsets); + mono_counters_register ("Minor scan major blocks", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_major_blocks); + mono_counters_register ("Minor scan los", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_los); mono_counters_register ("Minor scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_pinned); mono_counters_register ("Minor scan roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_roots); mono_counters_register ("Minor fragment creation", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_fragment_creation); @@ -1336,7 +1338,20 @@ scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job) static void job_remembered_set_scan (void *worker_data_untyped, SgenThreadPoolJob *job) { - remset.scan_remsets (scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job)); + SGEN_TV_DECLARE (atv); + SGEN_TV_DECLARE (btv); + ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job); + + SGEN_TV_GETTIME (atv); + sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1); + SGEN_TV_GETTIME (btv); + time_minor_scan_major_blocks += SGEN_TV_ELAPSED (atv, btv); + + sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1); + SGEN_TV_GETTIME (atv); + time_minor_scan_los += SGEN_TV_ELAPSED (btv, atv); + + sgen_wbroots_scan_card_table (ctx); } typedef struct { @@ -1535,13 +1550,13 @@ enqueue_scan_from_roots_jobs (SgenGrayQueue *gc_thread_gray_queue, char *heap_st static gboolean collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue) { - gboolean needs_major; + gboolean needs_major, is_parallel = FALSE; size_t max_garbage_amount; char *nursery_next; mword fragment_total; ScanJob *sj; SgenGrayQueue gc_thread_gray_queue; - SgenObjectOperations *object_ops; + SgenObjectOperations *object_ops_nopar, *object_ops_par = NULL; ScanCopyContext ctx; TV_DECLARE (atv); TV_DECLARE (btv); @@ -1556,10 +1571,16 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_ binary_protocol_collection_begin (gc_stats.minor_gc_count, GENERATION_NURSERY); - if (sgen_concurrent_collection_in_progress ()) - object_ops = &sgen_minor_collector.serial_ops_with_concurrent_major; - else - object_ops = &sgen_minor_collector.serial_ops; + if (sgen_concurrent_collection_in_progress ()) { + /* FIXME Support parallel nursery collections with concurrent major */ + object_ops_nopar = &sgen_minor_collector.serial_ops_with_concurrent_major; + } else { + object_ops_nopar = &sgen_minor_collector.serial_ops; + if (sgen_minor_collector.is_parallel) { + object_ops_par = &sgen_minor_collector.parallel_ops; + is_parallel = TRUE; + } + } if (do_verify_nursery || do_dump_nursery_content) sgen_debug_verify_nursery (do_dump_nursery_content); @@ -1596,8 +1617,8 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_ sgen_memgov_minor_collection_start (); - init_gray_queue (&gc_thread_gray_queue, FALSE); - ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops, &gc_thread_gray_queue); + init_gray_queue (&gc_thread_gray_queue, is_parallel); + ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, &gc_thread_gray_queue); gc_stats.minor_gc_count ++; @@ -1629,10 +1650,12 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_ SGEN_LOG (2, "Finding pinned pointers: %zd in %lld usecs", sgen_get_pinned_count (), (long long)TV_ELAPSED (btv, atv)); SGEN_LOG (4, "Start scan with %zd pinned objects", sgen_get_pinned_count ()); + remset.start_scan_remsets (); + sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan remset", job_remembered_set_scan, sizeof (ScanJob)); - sj->ops = object_ops; + sj->ops = is_parallel ? object_ops_par : object_ops_nopar; sj->gc_thread_gray_queue = &gc_thread_gray_queue; - sgen_workers_enqueue_job (&sj->job, FALSE); + sgen_workers_enqueue_job (&sj->job, is_parallel); /* we don't have complete write barrier yet, so we scan all the old generation sections */ TV_GETTIME (btv); @@ -1647,7 +1670,13 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_ TV_GETTIME (atv); time_minor_scan_pinned += TV_ELAPSED (btv, atv); - enqueue_scan_from_roots_jobs (&gc_thread_gray_queue, sgen_get_nursery_start (), nursery_next, object_ops, FALSE); + enqueue_scan_from_roots_jobs (&gc_thread_gray_queue, sgen_get_nursery_start (), nursery_next, is_parallel ? object_ops_par : object_ops_nopar, is_parallel); + + if (is_parallel) { + gray_queue_redirect (&gc_thread_gray_queue); + sgen_workers_start_all_workers (object_ops_nopar, object_ops_par, NULL); + sgen_workers_join (); + } TV_GETTIME (btv); time_minor_scan_roots += TV_ELAPSED (atv, btv); @@ -3337,9 +3366,9 @@ sgen_gc_init (void) if (major_collector.post_param_init) major_collector.post_param_init (&major_collector); - if (major_collector.needs_thread_pool) { + if (major_collector.needs_thread_pool || sgen_minor_collector.is_parallel) { int num_workers = 1; - if (major_collector.is_parallel) { + if (major_collector.is_parallel || sgen_minor_collector.is_parallel) { /* FIXME Detect the number of physical cores, instead of logical */ num_workers = mono_cpu_count () / 2; if (num_workers < 1) @@ -3403,6 +3432,12 @@ sgen_get_major_collector (void) return &major_collector; } +SgenMinorCollector* +sgen_get_minor_collector (void) +{ + return &sgen_minor_collector; +} + SgenRememberedSet* sgen_get_remset (void) { diff --git a/mono/sgen/sgen-gc.h b/mono/sgen/sgen-gc.h index 9f88578efce..4ff60166cca 100644 --- a/mono/sgen/sgen-gc.h +++ b/mono/sgen/sgen-gc.h @@ -703,6 +703,7 @@ void sgen_marksweep_init (SgenMajorCollector *collector); void sgen_marksweep_conc_init (SgenMajorCollector *collector); void sgen_marksweep_conc_par_init (SgenMajorCollector *collector); SgenMajorCollector* sgen_get_major_collector (void); +SgenMinorCollector* sgen_get_minor_collector (void); typedef struct _SgenRememberedSet { @@ -713,7 +714,7 @@ typedef struct _SgenRememberedSet { void (*wbarrier_generic_nostore) (gpointer ptr); void (*record_pointer) (gpointer ptr); - void (*scan_remsets) (ScanCopyContext ctx); + void (*start_scan_remsets) (void); void (*clear_cards) (void); diff --git a/mono/sgen/sgen-marksweep.c b/mono/sgen/sgen-marksweep.c index 3492ff6058a..365f6478781 100644 --- a/mono/sgen/sgen-marksweep.c +++ b/mono/sgen/sgen-marksweep.c @@ -2769,6 +2769,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i) g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i)); + /* We can do this because we always init the minor before the major */ + if (is_parallel || sgen_get_minor_collector ()->is_parallel) { + mono_native_tls_alloc (&worker_block_free_list_key, NULL); + collector->worker_init_cb = sgen_worker_init_callback; + } + mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced); mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed); mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept); @@ -2858,10 +2864,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr collector->major_ops_conc_par_finish.scan_vtype = major_scan_vtype_par_with_evacuation; collector->major_ops_conc_par_finish.scan_ptr_field = major_scan_ptr_field_par_with_evacuation; collector->major_ops_conc_par_finish.drain_gray_stack = drain_gray_stack_par; - - collector->worker_init_cb = sgen_worker_init_callback; - - mono_native_tls_alloc (&worker_block_free_list_key, NULL); } } diff --git a/mono/sgen/sgen-workers.c b/mono/sgen/sgen-workers.c index f5169f45ab2..e1633f3f4cc 100644 --- a/mono/sgen/sgen-workers.c +++ b/mono/sgen/sgen-workers.c @@ -204,18 +204,17 @@ sgen_workers_enqueue_job (SgenThreadPoolJob *job, gboolean enqueue) static gboolean workers_get_work (WorkerData *data) { - SgenMajorCollector *major; + SgenMajorCollector *major = sgen_get_major_collector (); + SgenMinorCollector *minor = sgen_get_minor_collector (); + GrayQueueSection *section; g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue)); + g_assert (major->is_concurrent || minor->is_parallel); - /* If we're concurrent, steal from the workers distribute gray queue. */ - major = sgen_get_major_collector (); - if (major->is_concurrent) { - GrayQueueSection *section = sgen_section_gray_queue_dequeue (&workers_distribute_gray_queue); - if (section) { - sgen_gray_object_enqueue_section (&data->private_gray_queue, section, major->is_parallel); - return TRUE; - } + section = sgen_section_gray_queue_dequeue (&workers_distribute_gray_queue); + if (section) { + sgen_gray_object_enqueue_section (&data->private_gray_queue, section, major->is_parallel); + return TRUE; } /* Nobody to steal from */ @@ -227,10 +226,13 @@ static gboolean workers_steal_work (WorkerData *data) { SgenMajorCollector *major = sgen_get_major_collector (); + SgenMinorCollector *minor = sgen_get_minor_collector (); + int generation = sgen_get_current_collection_generation (); GrayQueueSection *section = NULL; int i, current_worker; - if (!major->is_parallel) + if ((generation == GENERATION_OLD && !major->is_parallel) || + (generation == GENERATION_NURSERY && !minor->is_parallel)) return FALSE; /* If we're parallel, steal from other workers' private gray queues */ @@ -275,10 +277,11 @@ thread_pool_init_func (void *data_untyped) { WorkerData *data = (WorkerData *)data_untyped; SgenMajorCollector *major = sgen_get_major_collector (); + SgenMinorCollector *minor = sgen_get_minor_collector (); sgen_client_thread_register_worker (); - if (!major->is_concurrent) + if (!major->is_concurrent && !minor->is_parallel) return; init_private_gray_queue (data); @@ -314,7 +317,6 @@ marker_idle_func (void *data_untyped) WorkerData *data = (WorkerData *)data_untyped; SGEN_ASSERT (0, continue_idle_func (data_untyped), "Why are we called when we're not supposed to work?"); - SGEN_ASSERT (0, sgen_concurrent_collection_in_progress (), "The worker should only mark in concurrent collections."); if (data->state == STATE_WORK_ENQUEUED) { set_state (data, STATE_WORK_ENQUEUED, STATE_WORKING); @@ -357,7 +359,7 @@ init_distribute_gray_queue (void) void sgen_workers_init_distribute_gray_queue (void) { - SGEN_ASSERT (0, sgen_get_major_collector ()->is_concurrent, + SGEN_ASSERT (0, sgen_get_major_collector ()->is_concurrent || sgen_get_minor_collector ()->is_parallel, "Why should we init the distribute gray queue if we don't need it?"); init_distribute_gray_queue (); } @@ -368,7 +370,7 @@ sgen_workers_init (int num_workers, SgenWorkerCallback callback) int i; void **workers_data_ptrs = (void **)alloca(num_workers * sizeof(void *)); - if (!sgen_get_major_collector ()->is_concurrent) { + if (!sgen_get_major_collector ()->is_concurrent && !sgen_get_minor_collector ()->is_parallel) { sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL, NULL, NULL); return; }