[sgen] Disable parallel mode if not enough cores
[mono.git] / mono / sgen / sgen-gc.c
index 8662ffbc1e4bd41188bdd27957ec4f9851008a40..03dfc5cfddd49c9ea8021b4439ec785992753ca2 100644 (file)
@@ -288,6 +288,9 @@ static guint64 time_major_fragment_creation = 0;
 
 static guint64 time_max = 0;
 
+static int sgen_max_pause_time = SGEN_MAX_PAUSE_TIME;
+static float sgen_max_pause_margin = SGEN_MAX_PAUSE_MARGIN;
+
 static SGEN_TV_DECLARE (time_major_conc_collection_start);
 static SGEN_TV_DECLARE (time_major_conc_collection_end);
 
@@ -946,40 +949,51 @@ sgen_update_heap_boundaries (mword low, mword high)
  * in the nursery. The nursery is stored in nursery_section.
  */
 static void
-alloc_nursery (void)
+alloc_nursery (gboolean dynamic, size_t min_size, size_t max_size)
 {
-       GCMemSection *section;
        char *data;
        size_t scan_starts;
-       size_t alloc_size;
 
-       if (nursery_section)
-               return;
-       SGEN_LOG (2, "Allocating nursery size: %zu", (size_t)sgen_nursery_size);
-       /* later we will alloc a larger area for the nursery but only activate
-        * what we need. The rest will be used as expansion if we have too many pinned
-        * objects in the existing nursery.
-        */
-       /* FIXME: handle OOM */
-       section = (GCMemSection *)sgen_alloc_internal (INTERNAL_MEM_SECTION);
+       if (dynamic) {
+               if (!min_size)
+                       min_size = SGEN_DEFAULT_NURSERY_MIN_SIZE;
+               if (!max_size)
+                       max_size = SGEN_DEFAULT_NURSERY_MAX_SIZE;
+       } else {
+               SGEN_ASSERT (0, min_size == max_size, "We can't have nursery ranges for static configuration.");
+               if (!min_size)
+                       min_size = max_size = SGEN_DEFAULT_NURSERY_SIZE;
+       }
 
-       alloc_size = sgen_nursery_size;
+       SGEN_ASSERT (0, !nursery_section, "Why are we allocating the nursery twice?");
+       SGEN_LOG (2, "Allocating nursery size: %zu, initial %zu", max_size, min_size);
 
-       /* If there isn't enough space even for the nursery we should simply abort. */
-       g_assert (sgen_memgov_try_alloc_space (alloc_size, SPACE_NURSERY));
+       /* FIXME: handle OOM */
+       nursery_section = (GCMemSection *)sgen_alloc_internal (INTERNAL_MEM_SECTION);
 
-       data = (char *)major_collector.alloc_heap (alloc_size, alloc_size, sgen_nursery_bits);
-       sgen_update_heap_boundaries ((mword)data, (mword)(data + sgen_nursery_size));
-       SGEN_LOG (4, "Expanding nursery size (%p-%p): %lu, total: %lu", data, data + alloc_size, (unsigned long)sgen_nursery_size, (unsigned long)sgen_gc_get_total_heap_allocation ());
-       section->data = data;
-       section->end_data = data + sgen_nursery_size;
-       scan_starts = (alloc_size + SCAN_START_SIZE - 1) / SCAN_START_SIZE;
-       section->scan_starts = (char **)sgen_alloc_internal_dynamic (sizeof (char*) * scan_starts, INTERNAL_MEM_SCAN_STARTS, TRUE);
-       section->num_scan_start = scan_starts;
+       /* If there isn't enough space even for the nursery we should simply abort. */
+       g_assert (sgen_memgov_try_alloc_space (max_size, SPACE_NURSERY));
 
-       nursery_section = section;
+       /*
+        * The nursery section range represents the memory section where objects
+        * can be found. This is used when iterating for objects in the nursery,
+        * pinning etc. sgen_nursery_max_size represents the total allocated space
+        * for the nursery. sgen_nursery_size represents the current size of the
+        * nursery and it is used for allocation limits, heuristics etc. The
+        * nursery section is not always identical to the current nursery size
+        * because it can contain pinned objects from when the nursery was larger.
+        *
+        * sgen_nursery_size <= nursery_section size <= sgen_nursery_max_size
+        */
+       data = (char *)major_collector.alloc_heap (max_size, max_size);
+       sgen_update_heap_boundaries ((mword)data, (mword)(data + max_size));
+       nursery_section->data = data;
+       nursery_section->end_data = data + min_size;
+       scan_starts = (max_size + SCAN_START_SIZE - 1) / SCAN_START_SIZE;
+       nursery_section->scan_starts = (char **)sgen_alloc_internal_dynamic (sizeof (char*) * scan_starts, INTERNAL_MEM_SCAN_STARTS, TRUE);
+       nursery_section->num_scan_start = scan_starts;
 
-       sgen_nursery_allocator_set_nursery_bounds (data, data + sgen_nursery_size);
+       sgen_nursery_allocator_set_nursery_bounds (data, min_size, max_size);
 }
 
 FILE *
@@ -1332,6 +1346,16 @@ scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job)
 {
        WorkerData *worker_data = (WorkerData *)worker_data_untyped;
 
+       if (!job->ops) {
+               /*
+                * For jobs enqueued on workers we set the ops at job runtime in order
+                * to be able to profit from on the fly optimized object ops or other
+                * object ops changes, like forced concurrent finish.
+                */
+               SGEN_ASSERT (0, sgen_workers_is_worker_thread (mono_native_thread_id_get ()), "We need a context for the scan job");
+               job->ops = sgen_workers_get_idle_func_object_ops ();
+       }
+
        return CONTEXT_FROM_OBJECT_OPERATIONS (job->ops, sgen_workers_get_job_gray_queue (worker_data, job->gc_thread_gray_queue));
 }
 
@@ -1480,7 +1504,6 @@ workers_finish_callback (void)
        /* Mod union preclean jobs */
        for (i = 0; i < split_count; i++) {
                psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean major mod union cardtable", job_major_mod_union_preclean, sizeof (ParallelScanJob));
-               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
                psj->scan_job.gc_thread_gray_queue = NULL;
                psj->job_index = i;
                sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
@@ -1488,14 +1511,12 @@ workers_finish_callback (void)
 
        for (i = 0; i < split_count; i++) {
                psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean los mod union cardtable", job_los_mod_union_preclean, sizeof (ParallelScanJob));
-               psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
                psj->scan_job.gc_thread_gray_queue = NULL;
                psj->job_index = i;
                sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
        }
 
        sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan last pinned", job_scan_last_pinned, sizeof (ScanJob));
-       sj->ops = sgen_workers_get_idle_func_object_ops ();
        sj->gc_thread_gray_queue = NULL;
        sgen_workers_enqueue_job (&sj->job, TRUE);
 }
@@ -1601,6 +1622,7 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        SgenGrayQueue gc_thread_gray_queue;
        SgenObjectOperations *object_ops_nopar, *object_ops_par = NULL;
        ScanCopyContext ctx;
+       int duration;
        TV_DECLARE (atv);
        TV_DECLARE (btv);
        SGEN_TV_DECLARE (last_minor_collection_start_tv);
@@ -1688,7 +1710,7 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
 
        remset.start_scan_remsets ();
 
-       enqueue_scan_remembered_set_jobs (&gc_thread_gray_queue, is_parallel ? object_ops_par : object_ops_nopar, is_parallel);
+       enqueue_scan_remembered_set_jobs (&gc_thread_gray_queue, is_parallel ? NULL : object_ops_nopar, is_parallel);
 
        /* we don't have complete write barrier yet, so we scan all the old generation sections */
        TV_GETTIME (btv);
@@ -1703,7 +1725,7 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        TV_GETTIME (atv);
        time_minor_scan_pinned += TV_ELAPSED (btv, atv);
 
-       enqueue_scan_from_roots_jobs (&gc_thread_gray_queue, nursery_section->data, nursery_section->end_data, is_parallel ? object_ops_par : object_ops_nopar, is_parallel);
+       enqueue_scan_from_roots_jobs (&gc_thread_gray_queue, nursery_section->data, nursery_section->end_data, is_parallel ? NULL : object_ops_nopar, is_parallel);
 
        if (is_parallel) {
                gray_queue_redirect (&gc_thread_gray_queue);
@@ -1732,6 +1754,14 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_
        if (remset_consistency_checks)
                sgen_check_remset_consistency ();
 
+
+       TV_GETTIME (btv);
+       duration = (int)(TV_ELAPSED (last_minor_collection_start_tv, btv) / 10000);
+       if (duration > (sgen_max_pause_time * sgen_max_pause_margin))
+               sgen_resize_nursery (TRUE);
+       else
+               sgen_resize_nursery (FALSE);
+
        /* walk the pin_queue, build up the fragment list of free memory, unmark
         * pinned objects as we go, memzero() the empty fragments so they are ready for the
         * next allocations.
@@ -1978,13 +2008,13 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
                        ParallelScanJob *psj;
 
                        psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan mod union cardtable", job_scan_major_mod_union_card_table, sizeof (ParallelScanJob));
-                       psj->scan_job.ops = object_ops_par ? object_ops_par : object_ops_nopar;
+                       psj->scan_job.ops = parallel ? NULL : object_ops_nopar;
                        psj->scan_job.gc_thread_gray_queue = gc_thread_gray_queue;
                        psj->job_index = i;
                        sgen_workers_enqueue_job (&psj->scan_job.job, parallel);
 
                        psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan LOS mod union cardtable", job_scan_los_mod_union_card_table, sizeof (ParallelScanJob));
-                       psj->scan_job.ops = object_ops_par ? object_ops_par : object_ops_nopar;
+                       psj->scan_job.ops = parallel ? NULL : object_ops_nopar;
                        psj->scan_job.gc_thread_gray_queue = gc_thread_gray_queue;
                        psj->job_index = i;
                        sgen_workers_enqueue_job (&psj->scan_job.job, parallel);
@@ -2984,6 +3014,8 @@ sgen_gc_init (void)
        gboolean debug_print_allowance = FALSE;
        double allowance_ratio = 0, save_target = 0;
        gboolean cement_enabled = TRUE;
+       gboolean dynamic_nursery = FALSE;
+       size_t min_nursery_size = 0, max_nursery_size = 0;
 
        do {
                result = InterlockedCompareExchange (&gc_initialized, -1, 0);
@@ -3132,10 +3164,8 @@ sgen_gc_init (void)
                                                continue;
                                        }
 
-                                       sgen_nursery_size = val;
-                                       sgen_nursery_bits = 0;
-                                       while (ONE_P << (++ sgen_nursery_bits) != sgen_nursery_size)
-                                               ;
+                                       min_nursery_size = max_nursery_size = val;
+                                       dynamic_nursery = FALSE;
                                } else {
                                        sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using default value.", "`nursery-size` must be an integer.");
                                        continue;
@@ -3179,6 +3209,19 @@ sgen_gc_init (void)
                                continue;
                        }
 
+                       if (!strcmp (opt, "dynamic-nursery")) {
+                               if (sgen_minor_collector.is_split)
+                                       sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using default value.",
+                                                       "dynamic-nursery not supported with split-nursery.");
+                               else
+                                       dynamic_nursery = TRUE;
+                               continue;
+                       }
+                       if (!strcmp (opt, "no-dynamic-nursery")) {
+                               dynamic_nursery = FALSE;
+                               continue;
+                       }
+
                        if (major_collector.handle_gc_param && major_collector.handle_gc_param (opt))
                                continue;
 
@@ -3201,6 +3244,7 @@ sgen_gc_init (void)
                        fprintf (stderr, "  minor=COLLECTOR (where COLLECTOR is `simple' or `split')\n");
                        fprintf (stderr, "  wbarrier=WBARRIER (where WBARRIER is `remset' or `cardtable')\n");
                        fprintf (stderr, "  [no-]cementing\n");
+                       fprintf (stderr, "  [no-]dynamic-nursery\n");
                        if (major_collector.print_gc_param_usage)
                                major_collector.print_gc_param_usage ();
                        if (sgen_minor_collector.print_gc_param_usage)
@@ -3225,7 +3269,7 @@ sgen_gc_init (void)
        if (params_opts)
                g_free (params_opts);
 
-       alloc_nursery ();
+       alloc_nursery (dynamic_nursery, min_nursery_size, max_nursery_size);
 
        sgen_pinning_init ();
        sgen_cement_init (cement_enabled);
@@ -3391,12 +3435,15 @@ sgen_gc_init (void)
        if (major_collector.is_concurrent || sgen_minor_collector.is_parallel) {
                int num_workers = 1;
                if (major_collector.is_parallel || sgen_minor_collector.is_parallel) {
-                       /* FIXME Detect the number of physical cores, instead of logical */
-                       num_workers = mono_cpu_count () / 2;
-                       if (num_workers < 1)
+                       num_workers = mono_cpu_count ();
+                       if (num_workers <= 1) {
                                num_workers = 1;
+                               major_collector.is_parallel = FALSE;
+                               sgen_minor_collector.is_parallel = FALSE;
+                       }
                }
-               sgen_workers_init (num_workers, (SgenWorkerCallback) major_collector.worker_init_cb);
+               if (major_collector.is_concurrent || sgen_minor_collector.is_parallel)
+                       sgen_workers_init (num_workers, (SgenWorkerCallback) major_collector.worker_init_cb);
        }
 
        sgen_memgov_init (max_heap, soft_limit, debug_print_allowance, allowance_ratio, save_target);