[sgen] Disable concurrent queue redirection
[mono.git] / mono / sgen / sgen-gc.c
index f29b15d6368208075b12363b09007fbb9a7aa634..a45a9a106b03f0ce8035bd87202ac7faacf7c972 100644 (file)
@@ -288,11 +288,10 @@ static guint64 time_max = 0;
 static SGEN_TV_DECLARE (time_major_conc_collection_start);
 static SGEN_TV_DECLARE (time_major_conc_collection_end);
 
-static SGEN_TV_DECLARE (last_minor_collection_start_tv);
-static SGEN_TV_DECLARE (last_minor_collection_end_tv);
-
 int gc_debug_level = 0;
 FILE* gc_debug_file;
+static char* gc_params_options;
+static char* gc_debug_options;
 
 /*
 void
@@ -399,9 +398,6 @@ static mword objects_pinned;
  * ######################################################################
  */
 
-/* FIXME: get rid of this */
-typedef SgenGrayQueue GrayQueue;
-
 /* forward declarations */
 static void scan_from_registered_roots (char *addr_start, char *addr_end, int root_type, ScanCopyContext ctx);
 
@@ -428,12 +424,11 @@ sgen_workers_get_job_gray_queue (WorkerData *worker_data, SgenGrayQueue *default
 }
 
 static void
-gray_queue_enable_redirect (SgenGrayQueue *queue)
+gray_queue_redirect (SgenGrayQueue *queue)
 {
        SGEN_ASSERT (0, concurrent_collection_in_progress, "Where are we redirecting the gray queue to, without a concurrent collection?");
 
-       sgen_gray_queue_set_alloc_prepare (queue, sgen_workers_take_from_queue_and_awake);
-       sgen_workers_take_from_queue_and_awake (queue);
+       sgen_workers_take_from_queue (queue);
 }
 
 void
@@ -516,7 +511,7 @@ gboolean
 sgen_drain_gray_stack (ScanCopyContext ctx)
 {
        ScanObjectFunc scan_func = ctx.ops->scan_object;
-       GrayQueue *queue = ctx.queue;
+       SgenGrayQueue *queue = ctx.queue;
 
        if (ctx.ops->drain_gray_stack)
                return ctx.ops->drain_gray_stack (queue);
@@ -713,7 +708,7 @@ pin_objects_in_nursery (gboolean do_scan_objects, ScanCopyContext ctx)
  * when we can't promote an object because we're out of memory.
  */
 void
-sgen_pin_object (GCObject *object, GrayQueue *queue)
+sgen_pin_object (GCObject *object, SgenGrayQueue *queue)
 {
        SGEN_ASSERT (0, sgen_ptr_in_nursery (object), "We're only supposed to use this for pinning nursery objects when out of memory.");
 
@@ -996,6 +991,24 @@ mono_gc_get_logfile (void)
        return gc_debug_file;
 }
 
+void
+mono_gc_params_set (const char* options)
+{
+       if (gc_params_options)
+               g_free (gc_params_options);
+
+       gc_params_options = g_strdup (options);
+}
+
+void
+mono_gc_debug_set (const char* options)
+{
+       if (gc_debug_options)
+               g_free (gc_debug_options);
+
+       gc_debug_options = g_strdup (options);
+}
+
 static void
 scan_finalizer_entries (SgenPointerQueue *fin_queue, ScanCopyContext ctx)
 {
@@ -1175,7 +1188,6 @@ finish_gray_stack (int generation, ScanCopyContext ctx)
 
        g_assert (sgen_gray_object_queue_is_empty (queue));
 
-       sgen_gray_object_queue_trim_free_list (queue);
        binary_protocol_finish_gray_stack_end (sgen_timestamp (), generation);
 }
 
@@ -1311,6 +1323,11 @@ typedef struct {
        SgenGrayQueue *gc_thread_gray_queue;
 } ScanJob;
 
+typedef struct {
+       ScanJob scan_job;
+       int job_index;
+} ParallelScanJob;
+
 static ScanCopyContext
 scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job)
 {
@@ -1377,7 +1394,7 @@ job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJo
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
-       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, 0, 1);
 }
 
 static void
@@ -1387,23 +1404,72 @@ job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
-       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, 0, 1);
 }
 
 static void
-job_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+{
+       ParallelScanJob *job_data = (ParallelScanJob*)job;
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
+
+       g_assert (concurrent_collection_in_progress);
+
+       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
+}
+
+static void
+job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+{
+       ParallelScanJob *job_data = (ParallelScanJob*)job;
+       ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
+
+       g_assert (concurrent_collection_in_progress);
+
+       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
+}
+
+static void
+job_scan_last_pinned (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
        ScanJob *job_data = (ScanJob*)job;
        ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
 
        g_assert (concurrent_collection_in_progress);
 
-       major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
-       sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
-
        sgen_scan_pin_queue_objects (ctx);
 }
 
+static void
+workers_finish_callback (void)
+{
+       ParallelScanJob *psj;
+       ScanJob *sj;
+       int split_count = sgen_workers_get_job_split_count ();
+       int i;
+       /* Mod union preclean jobs */
+       for (i = 0; i < split_count; i++) {
+               psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean major mod union cardtable", job_major_mod_union_preclean, sizeof (ParallelScanJob));
+               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+               psj->scan_job.gc_thread_gray_queue = NULL;
+               psj->job_index = i;
+               sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+       }
+
+       for (i = 0; i < split_count; i++) {
+               psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean los mod union cardtable", job_los_mod_union_preclean, sizeof (ParallelScanJob));
+               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+               psj->scan_job.gc_thread_gray_queue = NULL;
+               psj->job_index = i;
+               sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+       }
+
+       sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan last pinned", job_scan_last_pinned, sizeof (ScanJob));
+       sj->ops = sgen_workers_get_idle_func_object_ops ();
+       sj->gc_thread_gray_queue = NULL;
+       sgen_workers_enqueue_job (&sj->job, TRUE);
+}
+
 static void
 init_gray_queue (SgenGrayQueue *gc_thread_gray_queue, gboolean use_workers)
 {
@@ -1467,7 +1533,7 @@ enqueue_scan_from_roots_jobs (SgenGrayQueue *gc_thread_gray_queue, char *heap_st
  * Return whether any objects were late-pinned due to being out of memory.
  */
 static gboolean
-collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
+collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_queue)
 {
        gboolean needs_major;
        size_t max_garbage_amount;
@@ -1475,10 +1541,12 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        mword fragment_total;
        ScanJob *sj;
        SgenGrayQueue gc_thread_gray_queue;
-       SgenObjectOperations *object_ops = &sgen_minor_collector.serial_ops;
+       SgenObjectOperations *object_ops;
        ScanCopyContext ctx;
        TV_DECLARE (atv);
        TV_DECLARE (btv);
+       SGEN_TV_DECLARE (last_minor_collection_start_tv);
+       SGEN_TV_DECLARE (last_minor_collection_end_tv);
 
        if (disable_minor_collections)
                return TRUE;
@@ -1488,6 +1556,11 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
 
        binary_protocol_collection_begin (gc_stats.minor_gc_count, GENERATION_NURSERY);
 
+       if (sgen_concurrent_collection_in_progress ())
+               object_ops = &sgen_minor_collector.serial_ops_with_concurrent_major;
+       else
+               object_ops = &sgen_minor_collector.serial_ops;
+
        if (do_verify_nursery || do_dump_nursery_content)
                sgen_debug_verify_nursery (do_dump_nursery_content);
 
@@ -1528,11 +1601,6 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
 
        gc_stats.minor_gc_count ++;
 
-       if (whole_heap_check_before_collection) {
-               sgen_clear_nursery_fragments ();
-               sgen_check_whole_heap (finish_up_concurrent_mark);
-       }
-
        sgen_process_fin_stage_entries ();
 
        /* pin from pinned handles */
@@ -1551,6 +1619,11 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        if (remset_consistency_checks)
                sgen_check_remset_consistency ();
 
+       if (whole_heap_check_before_collection) {
+               sgen_clear_nursery_fragments ();
+               sgen_check_whole_heap (FALSE);
+       }
+
        TV_GETTIME (atv);
        time_minor_pinning += TV_ELAPSED (btv, atv);
        SGEN_LOG (2, "Finding pinned pointers: %zd in %lld usecs", sgen_get_pinned_count (), (long long)TV_ELAPSED (btv, atv));
@@ -1696,7 +1769,7 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
        sgen_clear_nursery_fragments ();
 
        if (whole_heap_check_before_collection)
-               sgen_check_whole_heap (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT);
+               sgen_check_whole_heap (TRUE);
 
        TV_GETTIME (btv);
        time_major_pre_collection_fragment_clear += TV_ELAPSED (atv, btv);
@@ -1785,13 +1858,6 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
        if (old_next_pin_slot)
                *old_next_pin_slot = sgen_get_pinned_count ();
 
-       /*
-        * We don't actually pin when starting a concurrent collection, so the remset
-        * consistency check won't work.
-        */
-       if (remset_consistency_checks && mode != COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT)
-               sgen_check_remset_consistency ();
-
        TV_GETTIME (btv);
        time_major_pinning += TV_ELAPSED (atv, btv);
        SGEN_LOG (2, "Finding pinned pointers: %zd in %lld usecs", sgen_get_pinned_count (), (long long)TV_ELAPSED (atv, btv));
@@ -1806,7 +1872,12 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
                         * We force the finish of the worker with the new object ops context
                         * which can also do copying. We need to have finished pinning.
                         */
-                       sgen_workers_start_all_workers (object_ops, NULL);
+                       /* FIXME Implement parallel copying and get rid of this ineffective hack */
+                       if (major_collector.is_parallel)
+                               sgen_workers_start_all_workers (&major_collector.major_ops_conc_par_start, NULL);
+                       else
+                               sgen_workers_start_all_workers (object_ops, NULL);
+
                        sgen_workers_join ();
                }
        }
@@ -1833,17 +1904,12 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
         * the roots.
         */
        if (mode == COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT) {
+               gray_queue_redirect (gc_thread_gray_queue);
                if (precleaning_enabled) {
-                       ScanJob *sj;
-                       /* Mod union preclean job */
-                       sj = (ScanJob*)sgen_thread_pool_job_alloc ("preclean mod union cardtable", job_mod_union_preclean, sizeof (ScanJob));
-                       sj->ops = object_ops;
-                       sj->gc_thread_gray_queue = NULL;
-                       sgen_workers_start_all_workers (object_ops, &sj->job);
+                       sgen_workers_start_all_workers (object_ops, workers_finish_callback);
                } else {
                        sgen_workers_start_all_workers (object_ops, NULL);
                }
-               gray_queue_enable_redirect (gc_thread_gray_queue);
        }
 
        if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
@@ -1894,7 +1960,11 @@ major_start_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason,
                g_assert (major_collector.is_concurrent);
                concurrent_collection_in_progress = TRUE;
 
-               object_ops = &major_collector.major_ops_concurrent_start;
+               if (major_collector.is_parallel)
+                       object_ops = &major_collector.major_ops_conc_par_start;
+               else
+                       object_ops = &major_collector.major_ops_concurrent_start;
+
        } else {
                object_ops = &major_collector.major_ops_serial;
        }
@@ -1930,7 +2000,10 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason
        TV_GETTIME (btv);
 
        if (concurrent_collection_in_progress) {
-               object_ops = &major_collector.major_ops_concurrent_finish;
+               if (major_collector.is_parallel)
+                       object_ops = &major_collector.major_ops_conc_par_finish;
+               else
+                       object_ops = &major_collector.major_ops_concurrent_finish;
 
                major_copy_or_mark_from_roots (gc_thread_gray_queue, NULL, COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT, object_ops);
 
@@ -2173,11 +2246,8 @@ major_finish_concurrent_collection (gboolean forced)
        major_finish_collection (&gc_thread_gray_queue, "finishing", FALSE, -1, forced);
        sgen_gray_object_queue_dispose (&gc_thread_gray_queue);
 
-       if (whole_heap_check_before_collection)
-               sgen_check_whole_heap (FALSE);
-
        TV_GETTIME (total_end);
-       gc_stats.major_gc_time += TV_ELAPSED (total_start, total_end) - TV_ELAPSED (last_minor_collection_start_tv, last_minor_collection_end_tv);
+       gc_stats.major_gc_time += TV_ELAPSED (total_start, total_end);
 
        current_collection_generation = -1;
 }
@@ -2256,7 +2326,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                if (concurrent_collection_in_progress)
                        major_update_concurrent_collection ();
 
-               if (collect_nursery (reason, FALSE, NULL, FALSE) && !concurrent_collection_in_progress) {
+               if (collect_nursery (reason, FALSE, NULL) && !concurrent_collection_in_progress) {
                        overflow_generation_to_collect = GENERATION_OLD;
                        overflow_reason = "Minor overflow";
                }
@@ -2266,7 +2336,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
        } else {
                SGEN_ASSERT (0, generation_to_collect == GENERATION_OLD, "We should have handled nursery collections above");
                if (major_collector.is_concurrent && !wait_to_finish) {
-                       collect_nursery ("Concurrent start", FALSE, NULL, FALSE);
+                       collect_nursery ("Concurrent start", FALSE, NULL);
                        major_start_concurrent_collection (reason);
                        oldest_generation_collected = GENERATION_NURSERY;
                } else if (major_do_collection (reason, FALSE, wait_to_finish)) {
@@ -2284,7 +2354,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                 */
 
                if (overflow_generation_to_collect == GENERATION_NURSERY)
-                       collect_nursery (overflow_reason, TRUE, NULL, FALSE);
+                       collect_nursery (overflow_reason, TRUE, NULL);
                else
                        major_do_collection (overflow_reason, TRUE, wait_to_finish);
 
@@ -2402,6 +2472,13 @@ sgen_object_is_live (GCObject *obj)
  */
 
 static volatile gboolean pending_unqueued_finalizer = FALSE;
+volatile gboolean sgen_suspend_finalizers = FALSE;
+
+void
+sgen_set_suspend_finalizers (void)
+{
+       sgen_suspend_finalizers = TRUE;
+}
 
 int
 sgen_gc_invoke_finalizers (void)
@@ -2457,6 +2534,8 @@ sgen_gc_invoke_finalizers (void)
 gboolean
 sgen_have_pending_finalizers (void)
 {
+       if (sgen_suspend_finalizers)
+               return FALSE;
        return pending_unqueued_finalizer || !sgen_pointer_queue_is_empty (&fin_ready_queue) || !sgen_pointer_queue_is_empty (&critical_fin_queue);
 }
 
@@ -2535,11 +2614,7 @@ sgen_get_current_collection_generation (void)
 void*
 sgen_thread_register (SgenThreadInfo* info, void *stack_bottom_fallback)
 {
-#ifndef HAVE_KW_THREAD
        info->tlab_start = info->tlab_next = info->tlab_temp_end = info->tlab_real_end = NULL;
-#endif
-
-       sgen_init_tlab_info (info);
 
        sgen_client_thread_register (info, stack_bottom_fallback);
 
@@ -2740,6 +2815,8 @@ sgen_gc_init (void)
        char **opts, **ptr;
        char *major_collector_opt = NULL;
        char *minor_collector_opt = NULL;
+       char *params_opts = NULL;
+       char *debug_opts = NULL;
        size_t max_heap = 0;
        size_t soft_limit = 0;
        int result;
@@ -2777,8 +2854,12 @@ sgen_gc_init (void)
 
        mono_coop_mutex_init (&sgen_interruption_mutex);
 
-       if ((env = g_getenv (MONO_GC_PARAMS_NAME))) {
-               opts = g_strsplit (env, ",", -1);
+       if ((env = g_getenv (MONO_GC_PARAMS_NAME)) || gc_params_options) {
+               params_opts = g_strdup_printf ("%s,%s", gc_params_options ? gc_params_options : "", env ? env : "");
+       }
+
+       if (params_opts) {
+               opts = g_strsplit (params_opts, ",", -1);
                for (ptr = opts; *ptr; ++ptr) {
                        char *opt = *ptr;
                        if (g_str_has_prefix (opt, "major=")) {
@@ -2829,6 +2910,8 @@ sgen_gc_init (void)
                sgen_marksweep_init (&major_collector);
        } else if (!strcmp (major_collector_opt, "marksweep-conc")) {
                sgen_marksweep_conc_init (&major_collector);
+       } else if (!strcmp (major_collector_opt, "marksweep-conc-par")) {
+               sgen_marksweep_conc_par_init (&major_collector);
        } else {
                sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using `" DEFAULT_MAJOR_NAME "` instead.", "Unknown major collector `%s'.", major_collector_opt);
                goto use_default_major;
@@ -2980,15 +3063,22 @@ sgen_gc_init (void)
        if (minor_collector_opt)
                g_free (minor_collector_opt);
 
+       if (params_opts)
+               g_free (params_opts);
+
        alloc_nursery ();
 
        sgen_pinning_init ();
        sgen_cement_init (cement_enabled);
 
-       if ((env = g_getenv (MONO_GC_DEBUG_NAME))) {
+       if ((env = g_getenv (MONO_GC_DEBUG_NAME)) || gc_debug_options) {
+               debug_opts = g_strdup_printf ("%s,%s", gc_debug_options ? gc_debug_options  : "", env ? env : "");
+       }
+
+       if (debug_opts) {
                gboolean usage_printed = FALSE;
 
-               opts = g_strsplit (env, ",", -1);
+               opts = g_strsplit (debug_opts, ",", -1);
                for (ptr = opts; ptr && *ptr; ptr ++) {
                        char *opt = *ptr;
                        if (!strcmp (opt, ""))
@@ -3119,14 +3209,25 @@ sgen_gc_init (void)
                g_strfreev (opts);
        }
 
+       if (debug_opts)
+               g_free (debug_opts);
+
        if (check_mark_bits_after_major_collection)
                nursery_clear_policy = CLEAR_AT_GC;
 
        if (major_collector.post_param_init)
                major_collector.post_param_init (&major_collector);
 
-       if (major_collector.needs_thread_pool)
-               sgen_workers_init (1);
+       if (major_collector.needs_thread_pool) {
+               int num_workers = 1;
+               if (major_collector.is_parallel) {
+                       /* FIXME Detect the number of physical cores, instead of logical */
+                       num_workers = mono_cpu_count () / 2;
+                       if (num_workers < 1)
+                               num_workers = 1;
+               }
+               sgen_workers_init (num_workers, (SgenWorkerCallback) major_collector.worker_init_cb);
+       }
 
        sgen_memgov_init (max_heap, soft_limit, debug_print_allowance, allowance_ratio, save_target);
 
@@ -3253,7 +3354,7 @@ sgen_check_whole_heap_stw (void)
 {
        sgen_stop_world (0);
        sgen_clear_nursery_fragments ();
-       sgen_check_whole_heap (FALSE);
+       sgen_check_whole_heap (TRUE);
        sgen_restart_world (0);
 }