[sgen] Add free lists for each workers
[mono.git] / mono / sgen / sgen-gc.c
index 842144e56f4aed079bbed1dd86e77c6b57ee5920..ea5b232afe6f42409e632b56677ceb781dcbcdd9 100644 (file)
@@ -290,6 +290,8 @@ static SGEN_TV_DECLARE (time_major_conc_collection_end);
 
 int gc_debug_level = 0;
 FILE* gc_debug_file;
+static char* gc_params_options;
+static char* gc_debug_options;
 
 /*
 void
@@ -990,6 +992,24 @@ mono_gc_get_logfile (void)
        return gc_debug_file;
 }
 
+void
+mono_gc_params_set (const char* options)
+{
+       if (gc_params_options)
+               g_free (gc_params_options);
+
+       gc_params_options = g_strdup (options);
+}
+
+void
+mono_gc_debug_set (const char* options)
+{
+       if (gc_debug_options)
+               g_free (gc_debug_options);
+
+       gc_debug_options = g_strdup (options);
+}
+
 static void
 scan_finalizer_entries (SgenPointerQueue *fin_queue, ScanCopyContext ctx)
 {
@@ -1169,7 +1189,6 @@ finish_gray_stack (int generation, ScanCopyContext ctx)
 
        g_assert (sgen_gray_object_queue_is_empty (queue));
 
-       sgen_gray_object_queue_trim_free_list (queue);
        binary_protocol_finish_gray_stack_end (sgen_timestamp (), generation);
 }
 
@@ -1305,6 +1324,11 @@ typedef struct {
        SgenGrayQueue *gc_thread_gray_queue;
 } ScanJob;
 
+typedef struct {
+       ScanJob scan_job;
+       int job_index;
+} ParallelScanJob;
+
 static ScanCopyContext
 scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job)
 {
@@ -1371,7 +1395,7 @@ job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJo
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
-       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, 0, 1);
 }
 
 static void
@@ -1381,23 +1405,72 @@ job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
-       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, 0, 1);
+}
+
+static void
+job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+{
+       ParallelScanJob *job_data = (ParallelScanJob*)job;
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
+
+       g_assert (concurrent_collection_in_progress);
+
+       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
 }
 
 static void
-job_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+{
+       ParallelScanJob *job_data = (ParallelScanJob*)job;
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
+
+       g_assert (concurrent_collection_in_progress);
+
+       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
+}
+
+static void
+job_scan_last_pinned (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
        ScanJob *job_data = (ScanJob*)job;
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
 
-       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
-       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
-
        sgen_scan_pin_queue_objects (ctx);
 }
 
+static void
+workers_finish_callback (void)
+{
+       ParallelScanJob *psj;
+       ScanJob *sj;
+       int split_count = sgen_workers_get_job_split_count ();
+       int i;
+       /* Mod union preclean jobs */
+       for (i = 0; i < split_count; i++) {
+               psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean major mod union cardtable", job_major_mod_union_preclean, sizeof (ParallelScanJob));
+               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+               psj->scan_job.gc_thread_gray_queue = NULL;
+               psj->job_index = i;
+               sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+       }
+
+       for (i = 0; i < split_count; i++) {
+               psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean los mod union cardtable", job_los_mod_union_preclean, sizeof (ParallelScanJob));
+               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+               psj->scan_job.gc_thread_gray_queue = NULL;
+               psj->job_index = i;
+               sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+       }
+
+       sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan last pinned", job_scan_last_pinned, sizeof (ScanJob));
+       sj->ops = sgen_workers_get_idle_func_object_ops ();
+       sj->gc_thread_gray_queue = NULL;
+       sgen_workers_enqueue_job (&sj->job, TRUE);
+}
+
 static void
 init_gray_queue (SgenGrayQueue *gc_thread_gray_queue, gboolean use_workers)
 {
@@ -1461,7 +1534,7 @@ enqueue_scan_from_roots_jobs (SgenGrayQueue *gc_thread_gray_queue, char *heap_st
  * Return whether any objects were late-pinned due to being out of memory.
  */
 static gboolean
-collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
+collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue)
 {
        gboolean needs_major;
        size_t max_garbage_amount;
@@ -1529,11 +1602,6 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
 
        gc_stats.minor_gc_count ++;
 
-       if (whole_heap_check_before_collection) {
-               sgen_clear_nursery_fragments ();
-               sgen_check_whole_heap (finish_up_concurrent_mark);
-       }
-
        sgen_process_fin_stage_entries ();
 
        /* pin from pinned handles */
@@ -1552,6 +1620,11 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        if (remset_consistency_checks)
                sgen_check_remset_consistency ();
 
+       if (whole_heap_check_before_collection) {
+               sgen_clear_nursery_fragments ();
+               sgen_check_whole_heap (FALSE);
+       }
+
        TV_GETTIME (atv);
        time_minor_pinning += TV_ELAPSED (btv, atv);
        SGEN_LOG (2, "Finding pinned pointers: %zd in %lld usecs", sgen_get_pinned_count (), (long long)TV_ELAPSED (btv, atv));
@@ -1697,7 +1770,7 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
        sgen_clear_nursery_fragments ();
 
        if (whole_heap_check_before_collection)
-               sgen_check_whole_heap (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT);
+               sgen_check_whole_heap (TRUE);
 
        TV_GETTIME (btv);
        time_major_pre_collection_fragment_clear += TV_ELAPSED (atv, btv);
@@ -1786,13 +1859,6 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
        if (old_next_pin_slot)
                *old_next_pin_slot = sgen_get_pinned_count ();
 
-       /*
-        * We don't actually pin when starting a concurrent collection, so the remset
-        * consistency check won't work.
-        */
-       if (remset_consistency_checks && mode != COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT)
-               sgen_check_remset_consistency ();
-
        TV_GETTIME (btv);
        time_major_pinning += TV_ELAPSED (atv, btv);
        SGEN_LOG (2, "Finding pinned pointers: %zd in %lld usecs", sgen_get_pinned_count (), (long long)TV_ELAPSED (atv, btv));
@@ -1807,7 +1873,12 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
                         * We force the finish of the worker with the new object ops context
                         * which can also do copying. We need to have finished pinning.
                         */
-                       sgen_workers_start_all_workers (object_ops, NULL);
+                       /* FIXME Implement parallel copying and get rid of this ineffective hack */
+                       if (major_collector.is_parallel)
+                               sgen_workers_start_all_workers (&major_collector.major_ops_conc_par_start, NULL);
+                       else
+                               sgen_workers_start_all_workers (object_ops, NULL);
+
                        sgen_workers_join ();
                }
        }
@@ -1835,12 +1906,7 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
         */
        if (mode == COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT) {
                if (precleaning_enabled) {
-                       ScanJob *sj;
-                       /* Mod union preclean job */
-                       sj = (ScanJob*)sgen_thread_pool_job_alloc ("preclean mod union cardtable", job_mod_union_preclean, sizeof (ScanJob));
-                       sj->ops = object_ops;
-                       sj->gc_thread_gray_queue = NULL;
-                       sgen_workers_start_all_workers (object_ops, &sj->job);
+                       sgen_workers_start_all_workers (object_ops, workers_finish_callback);
                } else {
                        sgen_workers_start_all_workers (object_ops, NULL);
                }
@@ -1895,7 +1961,11 @@ major_start_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason,
                g_assert (major_collector.is_concurrent);
                concurrent_collection_in_progress = TRUE;
 
-               object_ops = &major_collector.major_ops_concurrent_start;
+               if (major_collector.is_parallel)
+                       object_ops = &major_collector.major_ops_conc_par_start;
+               else
+                       object_ops = &major_collector.major_ops_concurrent_start;
+
        } else {
                object_ops = &major_collector.major_ops_serial;
        }
@@ -1931,7 +2001,10 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason
        TV_GETTIME (btv);
 
        if (concurrent_collection_in_progress) {
-               object_ops = &major_collector.major_ops_concurrent_finish;
+               if (major_collector.is_parallel)
+                       object_ops = &major_collector.major_ops_conc_par_finish;
+               else
+                       object_ops = &major_collector.major_ops_concurrent_finish;
 
                major_copy_or_mark_from_roots (gc_thread_gray_queue, NULL, COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT, object_ops);
 
@@ -1975,9 +2048,6 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason
        reset_heap_boundaries ();
        sgen_update_heap_boundaries ((mword)sgen_get_nursery_start (), (mword)sgen_get_nursery_end ());
 
-       if (whole_heap_check_before_collection)
-               sgen_check_whole_heap (FALSE);
-
        /* walk the pin_queue, build up the fragment list of free memory, unmark
         * pinned objects as we go, memzero() the empty fragments so they are ready for the
         * next allocations.
@@ -2257,7 +2327,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                if (concurrent_collection_in_progress)
                        major_update_concurrent_collection ();
 
-               if (collect_nursery (reason, FALSE, NULL, FALSE) && !concurrent_collection_in_progress) {
+               if (collect_nursery (reason, FALSE, NULL) && !concurrent_collection_in_progress) {
                        overflow_generation_to_collect = GENERATION_OLD;
                        overflow_reason = "Minor overflow";
                }
@@ -2267,7 +2337,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
        } else {
                SGEN_ASSERT (0, generation_to_collect == GENERATION_OLD, "We should have handled nursery collections above");
                if (major_collector.is_concurrent && !wait_to_finish) {
-                       collect_nursery ("Concurrent start", FALSE, NULL, FALSE);
+                       collect_nursery ("Concurrent start", FALSE, NULL);
                        major_start_concurrent_collection (reason);
                        oldest_generation_collected = GENERATION_NURSERY;
                } else if (major_do_collection (reason, FALSE, wait_to_finish)) {
@@ -2285,7 +2355,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                 */
 
                if (overflow_generation_to_collect == GENERATION_NURSERY)
-                       collect_nursery (overflow_reason, TRUE, NULL, FALSE);
+                       collect_nursery (overflow_reason, TRUE, NULL);
                else
                        major_do_collection (overflow_reason, TRUE, wait_to_finish);
 
@@ -2746,6 +2816,8 @@ sgen_gc_init (void)
        char **opts, **ptr;
        char *major_collector_opt = NULL;
        char *minor_collector_opt = NULL;
+       char *params_opts = NULL;
+       char *debug_opts = NULL;
        size_t max_heap = 0;
        size_t soft_limit = 0;
        int result;
@@ -2783,8 +2855,12 @@ sgen_gc_init (void)
 
        mono_coop_mutex_init (&sgen_interruption_mutex);
 
-       if ((env = g_getenv (MONO_GC_PARAMS_NAME))) {
-               opts = g_strsplit (env, ",", -1);
+       if ((env = g_getenv (MONO_GC_PARAMS_NAME)) || gc_params_options) {
+               params_opts = g_strdup_printf ("%s,%s", gc_params_options ? gc_params_options : "", env ? env : "");
+       }
+
+       if (params_opts) {
+               opts = g_strsplit (params_opts, ",", -1);
                for (ptr = opts; *ptr; ++ptr) {
                        char *opt = *ptr;
                        if (g_str_has_prefix (opt, "major=")) {
@@ -2835,6 +2911,8 @@ sgen_gc_init (void)
                sgen_marksweep_init (&major_collector);
        } else if (!strcmp (major_collector_opt, "marksweep-conc")) {
                sgen_marksweep_conc_init (&major_collector);
+       } else if (!strcmp (major_collector_opt, "marksweep-conc-par")) {
+               sgen_marksweep_conc_par_init (&major_collector);
        } else {
                sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using `" DEFAULT_MAJOR_NAME "` instead.", "Unknown major collector `%s'.", major_collector_opt);
                goto use_default_major;
@@ -2986,15 +3064,22 @@ sgen_gc_init (void)
        if (minor_collector_opt)
                g_free (minor_collector_opt);
 
+       if (params_opts)
+               g_free (params_opts);
+
        alloc_nursery ();
 
        sgen_pinning_init ();
        sgen_cement_init (cement_enabled);
 
-       if ((env = g_getenv (MONO_GC_DEBUG_NAME))) {
+       if ((env = g_getenv (MONO_GC_DEBUG_NAME)) || gc_debug_options) {
+               debug_opts = g_strdup_printf ("%s,%s", gc_debug_options ? gc_debug_options  : "", env ? env : "");
+       }
+
+       if (debug_opts) {
                gboolean usage_printed = FALSE;
 
-               opts = g_strsplit (env, ",", -1);
+               opts = g_strsplit (debug_opts, ",", -1);
                for (ptr = opts; ptr && *ptr; ptr ++) {
                        char *opt = *ptr;
                        if (!strcmp (opt, ""))
@@ -3125,14 +3210,25 @@ sgen_gc_init (void)
                g_strfreev (opts);
        }
 
+       if (debug_opts)
+               g_free (debug_opts);
+
        if (check_mark_bits_after_major_collection)
                nursery_clear_policy = CLEAR_AT_GC;
 
        if (major_collector.post_param_init)
                major_collector.post_param_init (&major_collector);
 
-       if (major_collector.needs_thread_pool)
-               sgen_workers_init (1);
+       if (major_collector.needs_thread_pool) {
+               int num_workers = 1;
+               if (major_collector.is_parallel) {
+                       /* FIXME Detect the number of physical cores, instead of logical */
+                       num_workers = mono_cpu_count () / 2;
+                       if (num_workers < 1)
+                               num_workers = 1;
+               }
+               sgen_workers_init (num_workers, (SgenWorkerCallback) major_collector.worker_init_cb);
+       }
 
        sgen_memgov_init (max_heap, soft_limit, debug_print_allowance, allowance_ratio, save_target);
 
@@ -3259,7 +3355,7 @@ sgen_check_whole_heap_stw (void)
 {
        sgen_stop_world (0);
        sgen_clear_nursery_fragments ();
-       sgen_check_whole_heap (FALSE);
+       sgen_check_whole_heap (TRUE);
        sgen_restart_world (0);
 }