[sgen] Parallel nursery collections
authorVlad Brezae <brezaevlad@gmail.com>
Fri, 3 Mar 2017 10:38:15 +0000 (12:38 +0200)
committerVlad Brezae <brezaevlad@gmail.com>
Tue, 4 Apr 2017 13:00:04 +0000 (16:00 +0300)
Scans the card table and the roots on the workers, including draining the stack.

mono/sgen/sgen-cardtable.c
mono/sgen/sgen-copy-object.h
mono/sgen/sgen-gc.c
mono/sgen/sgen-gc.h
mono/sgen/sgen-marksweep.c
mono/sgen/sgen-workers.c

index 471fff2353ddcb9299f3419d2e03744eccb013a4..a1fac4ada5da4a212f1f63b2fb19e7cbbf072216 100644 (file)
@@ -49,11 +49,6 @@ guint64 remarked_cards;
 static guint64 large_objects;
 static guint64 bloby_objects;
 #endif
-static guint64 major_card_scan_time;
-static guint64 los_card_scan_time;
-
-static guint64 last_major_scan_time;
-static guint64 last_los_scan_time;
 
 mword
 sgen_card_table_number_of_cards_in_range (mword address, mword size)
@@ -415,11 +410,8 @@ sgen_card_table_clear_cards (void)
 }
 
 static void
-sgen_card_table_scan_remsets (ScanCopyContext ctx)
+sgen_card_table_start_scan_remsets (void)
 {
-       SGEN_TV_DECLARE (atv);
-       SGEN_TV_DECLARE (btv);
-
 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
        /*FIXME we should have a bit on each block/los object telling if the object have marked cards.*/
        /*First we copy*/
@@ -430,17 +422,6 @@ sgen_card_table_scan_remsets (ScanCopyContext ctx)
        /*Then we clear*/
        sgen_card_table_clear_cards ();
 #endif
-       SGEN_TV_GETTIME (atv);
-       sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
-       SGEN_TV_GETTIME (btv);
-       last_major_scan_time = SGEN_TV_ELAPSED (atv, btv); 
-       major_card_scan_time += last_major_scan_time;
-       sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
-       SGEN_TV_GETTIME (atv);
-       last_los_scan_time = SGEN_TV_ELAPSED (btv, atv);
-       los_card_scan_time += last_los_scan_time;
-
-       sgen_wbroots_scan_card_table (ctx);
 }
 
 guint8*
@@ -581,9 +562,6 @@ sgen_card_table_init (SgenRememberedSet *remset)
        mono_counters_register ("cardtable large objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &large_objects);
        mono_counters_register ("cardtable bloby objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &bloby_objects);
 #endif
-       mono_counters_register ("cardtable major scan time", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &major_card_scan_time);
-       mono_counters_register ("cardtable los scan time", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &los_card_scan_time);
-
 
        remset->wbarrier_set_field = sgen_card_table_wbarrier_set_field;
        remset->wbarrier_arrayref_copy = sgen_card_table_wbarrier_arrayref_copy;
@@ -592,7 +570,7 @@ sgen_card_table_init (SgenRememberedSet *remset)
        remset->wbarrier_generic_nostore = sgen_card_table_wbarrier_generic_nostore;
        remset->record_pointer = sgen_card_table_record_pointer;
 
-       remset->scan_remsets = sgen_card_table_scan_remsets;
+       remset->start_scan_remsets = sgen_card_table_start_scan_remsets;
 
        remset->clear_cards = sgen_card_table_clear_cards;
 
index 1e3d3918b65ce7187ff5a09ff6a9f7e4223cdfe3..925af17275dd67a1d2606d0c302b1ba3d7a4c38b 100644 (file)
@@ -124,6 +124,12 @@ copy_object_no_checks_par (GCObject *obj, SgenGrayQueue *queue)
                                GRAY_OBJECT_ENQUEUE_PARALLEL (queue, (GCObject *)destination, sgen_vtable_get_descriptor (vt));
                        }
                } else {
+                       /*
+                        * Unlikely case. Clear the allocated object so it doesn't confuse nursery
+                        * card table scanning, since it can contain old invalid refs.
+                        * FIXME make sure it is not a problem if another threads scans it while we clear
+                        */
+                       mono_gc_bzero_aligned (destination, objsize);
                        destination = final_destination;
                }
        }
index a76440fce16e4b99d443c8d26b58a8d5307aceae..e83cbb68b782c627592ae216289abdaec205cf29 100644 (file)
@@ -267,6 +267,8 @@ static guint64 stat_pinned_objects = 0;
 static guint64 time_minor_pre_collection_fragment_clear = 0;
 static guint64 time_minor_pinning = 0;
 static guint64 time_minor_scan_remsets = 0;
+static guint64 time_minor_scan_major_blocks = 0;
+static guint64 time_minor_scan_los = 0;
 static guint64 time_minor_scan_pinned = 0;
 static guint64 time_minor_scan_roots = 0;
 static guint64 time_minor_finish_gray_stack = 0;
@@ -426,8 +428,6 @@ sgen_workers_get_job_gray_queue (WorkerData *worker_data, SgenGrayQueue *default
 static void
 gray_queue_redirect (SgenGrayQueue *queue)
 {
-       SGEN_ASSERT (0, concurrent_collection_in_progress, "Where are we redirecting the gray queue to, without a concurrent collection?");
-
        sgen_workers_take_from_queue (queue);
 }
 
@@ -1233,6 +1233,8 @@ init_stats (void)
        mono_counters_register ("Minor fragment clear", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pre_collection_fragment_clear);
        mono_counters_register ("Minor pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pinning);
        mono_counters_register ("Minor scan remembered set", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_remsets);
+       mono_counters_register ("Minor scan major blocks", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_major_blocks);
+       mono_counters_register ("Minor scan los", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_los);
        mono_counters_register ("Minor scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_pinned);
        mono_counters_register ("Minor scan roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_roots);
        mono_counters_register ("Minor fragment creation", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_fragment_creation);
@@ -1336,7 +1338,20 @@ scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job)
 static void
 job_remembered_set_scan (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       remset.scan_remsets (scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job));
+       SGEN_TV_DECLARE (atv);
+       SGEN_TV_DECLARE (btv);
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job);
+
+       SGEN_TV_GETTIME (atv);
+       sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
+       SGEN_TV_GETTIME (btv);
+       time_minor_scan_major_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+       sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
+       SGEN_TV_GETTIME (atv);
+       time_minor_scan_los += SGEN_TV_ELAPSED (btv, atv);
+
+       sgen_wbroots_scan_card_table (ctx);
 }
 
 typedef struct {
@@ -1535,13 +1550,13 @@ enqueue_scan_from_roots_jobs (SgenGrayQueue *gc_thread_gray_queue, char *heap_st
 static gboolean
 collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue)
 {
-       gboolean needs_major;
+       gboolean needs_major, is_parallel = FALSE;
        size_t max_garbage_amount;
        char *nursery_next;
        mword fragment_total;
        ScanJob *sj;
        SgenGrayQueue gc_thread_gray_queue;
-       SgenObjectOperations *object_ops;
+       SgenObjectOperations *object_ops_nopar, *object_ops_par = NULL;
        ScanCopyContext ctx;
        TV_DECLARE (atv);
        TV_DECLARE (btv);
@@ -1556,10 +1571,16 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
 
        binary_protocol_collection_begin (gc_stats.minor_gc_count, GENERATION_NURSERY);
 
-       if (sgen_concurrent_collection_in_progress ())
-               object_ops = &sgen_minor_collector.serial_ops_with_concurrent_major;
-       else
-               object_ops = &sgen_minor_collector.serial_ops;
+       if (sgen_concurrent_collection_in_progress ()) {
+               /* FIXME Support parallel nursery collections with concurrent major */
+               object_ops_nopar = &sgen_minor_collector.serial_ops_with_concurrent_major;
+       } else {
+               object_ops_nopar = &sgen_minor_collector.serial_ops;
+               if (sgen_minor_collector.is_parallel) {
+                       object_ops_par = &sgen_minor_collector.parallel_ops;
+                       is_parallel = TRUE;
+               }
+       }
 
        if (do_verify_nursery || do_dump_nursery_content)
                sgen_debug_verify_nursery (do_dump_nursery_content);
@@ -1596,8 +1617,8 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
 
        sgen_memgov_minor_collection_start ();
 
-       init_gray_queue (&gc_thread_gray_queue, FALSE);
-       ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops, &gc_thread_gray_queue);
+       init_gray_queue (&gc_thread_gray_queue, is_parallel);
+       ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, &gc_thread_gray_queue);
 
        gc_stats.minor_gc_count ++;
 
@@ -1629,10 +1650,12 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        SGEN_LOG (2, "Finding pinned pointers: %zd in %lld usecs", sgen_get_pinned_count (), (long long)TV_ELAPSED (btv, atv));
        SGEN_LOG (4, "Start scan with %zd pinned objects", sgen_get_pinned_count ());
 
+       remset.start_scan_remsets ();
+
        sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan remset", job_remembered_set_scan, sizeof (ScanJob));
-       sj->ops = object_ops;
+       sj->ops = is_parallel ? object_ops_par : object_ops_nopar;
        sj->gc_thread_gray_queue = &gc_thread_gray_queue;
-       sgen_workers_enqueue_job (&sj->job, FALSE);
+       sgen_workers_enqueue_job (&sj->job, is_parallel);
 
        /* we don't have complete write barrier yet, so we scan all the old generation sections */
        TV_GETTIME (btv);
@@ -1647,7 +1670,13 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        TV_GETTIME (atv);
        time_minor_scan_pinned += TV_ELAPSED (btv, atv);
 
-       enqueue_scan_from_roots_jobs (&gc_thread_gray_queue, sgen_get_nursery_start (), nursery_next, object_ops, FALSE);
+       enqueue_scan_from_roots_jobs (&gc_thread_gray_queue, sgen_get_nursery_start (), nursery_next, is_parallel ? object_ops_par : object_ops_nopar, is_parallel);
+
+       if (is_parallel) {
+               gray_queue_redirect (&gc_thread_gray_queue);
+               sgen_workers_start_all_workers (object_ops_nopar, object_ops_par, NULL);
+               sgen_workers_join ();
+       }
 
        TV_GETTIME (btv);
        time_minor_scan_roots += TV_ELAPSED (atv, btv);
@@ -3337,9 +3366,9 @@ sgen_gc_init (void)
        if (major_collector.post_param_init)
                major_collector.post_param_init (&major_collector);
 
-       if (major_collector.needs_thread_pool) {
+       if (major_collector.needs_thread_pool || sgen_minor_collector.is_parallel) {
                int num_workers = 1;
-               if (major_collector.is_parallel) {
+               if (major_collector.is_parallel || sgen_minor_collector.is_parallel) {
                        /* FIXME Detect the number of physical cores, instead of logical */
                        num_workers = mono_cpu_count () / 2;
                        if (num_workers < 1)
@@ -3403,6 +3432,12 @@ sgen_get_major_collector (void)
        return &major_collector;
 }
 
+SgenMinorCollector*
+sgen_get_minor_collector (void)
+{
+       return &sgen_minor_collector;
+}
+
 SgenRememberedSet*
 sgen_get_remset (void)
 {
index 9f88578efce4891bff2b9d435838e0715171334c..4ff60166cca9d66ec79ea4e08ea3c306bf0d10be 100644 (file)
@@ -703,6 +703,7 @@ void sgen_marksweep_init (SgenMajorCollector *collector);
 void sgen_marksweep_conc_init (SgenMajorCollector *collector);
 void sgen_marksweep_conc_par_init (SgenMajorCollector *collector);
 SgenMajorCollector* sgen_get_major_collector (void);
+SgenMinorCollector* sgen_get_minor_collector (void);
 
 
 typedef struct _SgenRememberedSet {
@@ -713,7 +714,7 @@ typedef struct _SgenRememberedSet {
        void (*wbarrier_generic_nostore) (gpointer ptr);
        void (*record_pointer) (gpointer ptr);
 
-       void (*scan_remsets) (ScanCopyContext ctx);
+       void (*start_scan_remsets) (void);
 
        void (*clear_cards) (void);
 
index 3492ff6058ab709e4b5a04f6ced0861d0a74a7a7..365f64787812f03981c64145cd51e38b01664d7a 100644 (file)
@@ -2769,6 +2769,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i)
                g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i));
 
+       /* We can do this because we always init the minor before the major */
+       if (is_parallel || sgen_get_minor_collector ()->is_parallel) {
+               mono_native_tls_alloc (&worker_block_free_list_key, NULL);
+               collector->worker_init_cb = sgen_worker_init_callback;
+       }
+
        mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced);
        mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed);
        mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept);
@@ -2858,10 +2864,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
                        collector->major_ops_conc_par_finish.scan_vtype = major_scan_vtype_par_with_evacuation;
                        collector->major_ops_conc_par_finish.scan_ptr_field = major_scan_ptr_field_par_with_evacuation;
                        collector->major_ops_conc_par_finish.drain_gray_stack = drain_gray_stack_par;
-
-                       collector->worker_init_cb = sgen_worker_init_callback;
-
-                       mono_native_tls_alloc (&worker_block_free_list_key, NULL);
                }
        }
 
index f5169f45ab2e711c87ac1a59d41007ef8715b236..e1633f3f4ccf40010d25425427ea2784c6ae1c4e 100644 (file)
@@ -204,18 +204,17 @@ sgen_workers_enqueue_job (SgenThreadPoolJob *job, gboolean enqueue)
 static gboolean
 workers_get_work (WorkerData *data)
 {
-       SgenMajorCollector *major;
+       SgenMajorCollector *major = sgen_get_major_collector ();
+       SgenMinorCollector *minor = sgen_get_minor_collector ();
+       GrayQueueSection *section;
 
        g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+       g_assert (major->is_concurrent || minor->is_parallel);
 
-       /* If we're concurrent, steal from the workers distribute gray queue. */
-       major = sgen_get_major_collector ();
-       if (major->is_concurrent) {
-               GrayQueueSection *section = sgen_section_gray_queue_dequeue (&workers_distribute_gray_queue);
-               if (section) {
-                       sgen_gray_object_enqueue_section (&data->private_gray_queue, section, major->is_parallel);
-                       return TRUE;
-               }
+       section = sgen_section_gray_queue_dequeue (&workers_distribute_gray_queue);
+       if (section) {
+               sgen_gray_object_enqueue_section (&data->private_gray_queue, section, major->is_parallel);
+               return TRUE;
        }
 
        /* Nobody to steal from */
@@ -227,10 +226,13 @@ static gboolean
 workers_steal_work (WorkerData *data)
 {
        SgenMajorCollector *major = sgen_get_major_collector ();
+       SgenMinorCollector *minor = sgen_get_minor_collector ();
+       int generation = sgen_get_current_collection_generation ();
        GrayQueueSection *section = NULL;
        int i, current_worker;
 
-       if (!major->is_parallel)
+       if ((generation == GENERATION_OLD && !major->is_parallel) ||
+                       (generation == GENERATION_NURSERY && !minor->is_parallel))
                return FALSE;
 
        /* If we're parallel, steal from other workers' private gray queues  */
@@ -275,10 +277,11 @@ thread_pool_init_func (void *data_untyped)
 {
        WorkerData *data = (WorkerData *)data_untyped;
        SgenMajorCollector *major = sgen_get_major_collector ();
+       SgenMinorCollector *minor = sgen_get_minor_collector ();
 
        sgen_client_thread_register_worker ();
 
-       if (!major->is_concurrent)
+       if (!major->is_concurrent && !minor->is_parallel)
                return;
 
        init_private_gray_queue (data);
@@ -314,7 +317,6 @@ marker_idle_func (void *data_untyped)
        WorkerData *data = (WorkerData *)data_untyped;
 
        SGEN_ASSERT (0, continue_idle_func (data_untyped), "Why are we called when we're not supposed to work?");
-       SGEN_ASSERT (0, sgen_concurrent_collection_in_progress (), "The worker should only mark in concurrent collections.");
 
        if (data->state == STATE_WORK_ENQUEUED) {
                set_state (data, STATE_WORK_ENQUEUED, STATE_WORKING);
@@ -357,7 +359,7 @@ init_distribute_gray_queue (void)
 void
 sgen_workers_init_distribute_gray_queue (void)
 {
-       SGEN_ASSERT (0, sgen_get_major_collector ()->is_concurrent,
+       SGEN_ASSERT (0, sgen_get_major_collector ()->is_concurrent || sgen_get_minor_collector ()->is_parallel,
                        "Why should we init the distribute gray queue if we don't need it?");
        init_distribute_gray_queue ();
 }
@@ -368,7 +370,7 @@ sgen_workers_init (int num_workers, SgenWorkerCallback callback)
        int i;
        void **workers_data_ptrs = (void **)alloca(num_workers * sizeof(void *));
 
-       if (!sgen_get_major_collector ()->is_concurrent) {
+       if (!sgen_get_major_collector ()->is_concurrent && !sgen_get_minor_collector ()->is_parallel) {
                sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL, NULL, NULL);
                return;
        }