[sgen] Parallel major blocks precleaning
authorVlad Brezae <brezaevlad@gmail.com>
Tue, 31 May 2016 21:45:19 +0000 (00:45 +0300)
committerVlad Brezae <brezaevlad@gmail.com>
Thu, 19 Jan 2017 22:45:09 +0000 (00:45 +0200)
We allow spliting a major_scan_cardtable into multiple smaller jobs. For precleaning, we enqueue num_workers precleaning jobs, each one precleaning one in every num_workers blocks from the list.

mono/sgen/sgen-cardtable.c
mono/sgen/sgen-gc.c
mono/sgen/sgen-gc.h
mono/sgen/sgen-marksweep.c
mono/sgen/sgen-workers.c
mono/sgen/sgen-workers.h

index af9fd1e33d525a20df807957f744911e64deb73a..8a85ef97e46fc302a8adc4af3b0197b3e11adaec 100644 (file)
@@ -438,7 +438,7 @@ sgen_card_table_scan_remsets (ScanCopyContext ctx)
        sgen_card_table_clear_cards ();
 #endif
        SGEN_TV_GETTIME (atv);
-       sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx);
+       sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
        SGEN_TV_GETTIME (btv);
        last_major_scan_time = SGEN_TV_ELAPSED (atv, btv); 
        major_card_scan_time += last_major_scan_time;
index b45e5b43a5df4e529f0393d7d0ed8bd6d7f04870..133e2e747c75de21569da8725c7a66e5977e5420 100644 (file)
@@ -1324,6 +1324,11 @@ typedef struct {
        SgenGrayQueue *gc_thread_gray_queue;
 } ScanJob;
 
+typedef struct {
+       ScanJob scan_job;
+       int job_index;
+} ParallelScanJob;
+
 static ScanCopyContext
 scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job)
 {
@@ -1390,7 +1395,7 @@ job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJo
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
-       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, 0, 1);
 }
 
 static void
@@ -1406,20 +1411,19 @@ job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob
 static void
 job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       ScanJob *job_data = (ScanJob*)job;
-       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
+       ParallelScanJob *job_data = (ParallelScanJob*)job;
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
 
        g_assert (concurrent_collection_in_progress);
 
-       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
+       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
 }
 
 static void
 job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       WorkerData *worker_data = (WorkerData *)worker_data_untyped;
        ScanJob *job_data = (ScanJob*)job;
-       ScanCopyContext ctx = CONTEXT_FROM_OBJECT_OPERATIONS (job_data->ops, sgen_workers_get_job_gray_queue (worker_data));
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
 
@@ -1429,9 +1433,8 @@ job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
 static void
 job_scan_last_pinned (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       WorkerData *worker_data = (WorkerData *)worker_data_untyped;
        ScanJob *job_data = (ScanJob*)job;
-       ScanCopyContext ctx = CONTEXT_FROM_OBJECT_OPERATIONS (job_data->ops, sgen_workers_get_job_gray_queue (worker_data));
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
 
@@ -1442,11 +1445,17 @@ static void
 workers_finish_callback (void)
 {
        ScanJob *sj;
-       /* Mod union preclean job */
-       sj = (ScanJob*)sgen_thread_pool_job_alloc ("preclean mod union cardtable", job_major_mod_union_preclean, sizeof (ScanJob));
-       sj->ops = sgen_workers_get_idle_func_object_ops ();
-       sj->gc_thread_gray_queue = NULL;
-       sgen_workers_enqueue_job (&sj->job, TRUE);
+       int split_count = sgen_workers_get_job_split_count ();
+       int i;
+       /* Mod union preclean jobs */
+       for (i = 0; i < split_count; i++) {
+               ParallelScanJob *psj;
+               psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean major mod union cardtable", job_major_mod_union_preclean, sizeof (ParallelScanJob));
+               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+               psj->scan_job.gc_thread_gray_queue = NULL;
+               psj->job_index = i;
+               sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+       }
 
        sj = (ScanJob*)sgen_thread_pool_job_alloc ("preclean los mod union cardtable", job_los_mod_union_preclean, sizeof (ScanJob));
        sj->ops = sgen_workers_get_idle_func_object_ops ();
index 2ac2445b36645a3b51694e358f5fab2b4eafc382..89afb69e06986882dc51886af2f2f3eb28ce3a53 100644 (file)
@@ -648,7 +648,7 @@ struct _SgenMajorCollector {
        void (*free_non_pinned_object) (GCObject *obj, size_t size);
        void (*pin_objects) (SgenGrayQueue *queue);
        void (*pin_major_object) (GCObject *obj, SgenGrayQueue *queue);
-       void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx);
+       void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count);
        void (*iterate_live_block_ranges) (sgen_cardtable_block_callback callback);
        void (*iterate_block_ranges) (sgen_cardtable_block_callback callback);
        void (*update_cardtable_mod_union) (void);
index 72a70c693024ee425ac2b401aae023bc974d2b60..9e160f5159deae9083e6af815eb09de24b07ce63 100644 (file)
@@ -2465,7 +2465,7 @@ scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, Scan
 }
 
 static void
-major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
+major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
 {
        MSBlockInfo *block;
        gboolean has_references, was_sweeping, skip_scan;
@@ -2479,8 +2479,10 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
 
        binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
        FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
+               if (__index % job_split_count != job_index)
+                       continue;
 #ifdef PREFETCH_CARDS
-               int prefetch_index = __index + 6;
+               int prefetch_index = __index + 6 * job_split_count;
                if (prefetch_index < allocated_blocks.next_slot) {
                        MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
                        PREFETCH_READ (prefetch_block);
index 49f5ccf3548cb0a9f677dc266cb8b4563bbe3265..2d85a38f8972d10129cd5e9b3a6e1ec8303c596a 100644 (file)
@@ -439,4 +439,16 @@ sgen_workers_get_idle_func_object_ops (void)
        return idle_func_object_ops;
 }
 
+/*
+ * If we have a single worker, splitting into multiple jobs makes no sense. With
+ * more than one worker, we split into a larger number of jobs so that, in case
+ * the work load is uneven, a worker that finished quickly can take up more jobs
+ * than another one.
+ */
+int
+sgen_workers_get_job_split_count (void)
+{
+       return (workers_num > 1) ? workers_num * 4 : 1;
+}
+
 #endif
index cae2da501233942c1811e778df83b4b8c5593ad5..48dc3bb07287844e664d0ccd48d17c6cfbf3c6aa 100644 (file)
@@ -36,5 +36,6 @@ gboolean sgen_workers_are_working (void);
 void sgen_workers_assert_gray_queue_is_empty (void);
 void sgen_workers_take_from_queue_and_awake (SgenGrayQueue *queue);
 SgenObjectOperations* sgen_workers_get_idle_func_object_ops (void);
+int sgen_workers_get_job_split_count (void);
 
 #endif