[sgen] Always use one concurrent worker
[mono.git] / mono / sgen / sgen-workers.c
index f6c8859bd688381d43b21ea995076d9c0913dbe2..cc1930bcc2f443a65b4c536208051e89a119e94f 100644 (file)
@@ -20,6 +20,7 @@
 #include "mono/sgen/sgen-client.h"
 
 static int workers_num;
+static int active_workers_num;
 static volatile gboolean forced_stop;
 static WorkerData *workers_data;
 static SgenWorkerCallback worker_init_cb;
@@ -31,6 +32,8 @@ static SgenWorkerCallback worker_init_cb;
  * gracefully finished, so it can restart them.
  */
 static mono_mutex_t finished_lock;
+static volatile gboolean workers_finished;
+static int worker_awakenings;
 
 static SgenSectionGrayQueue workers_distribute_gray_queue;
 static gboolean workers_distribute_gray_queue_inited;
@@ -60,6 +63,7 @@ enum {
 typedef gint32 State;
 
 static SgenObjectOperations * volatile idle_func_object_ops;
+static SgenObjectOperations *idle_func_object_ops_par, *idle_func_object_ops_nopar;
 /*
  * finished_callback is called only when the workers finish work normally (when they
  * are not forced to finish). The callback is used to enqueue preclean jobs.
@@ -94,7 +98,16 @@ sgen_workers_ensure_awake (void)
        int i;
        gboolean need_signal = FALSE;
 
-       for (i = 0; i < workers_num; i++) {
+       /*
+        * All workers are awaken, make sure we reset the parallel context.
+        * We call this function only when starting the workers so nobody is running,
+        * or when the last worker is enqueuing preclean work. In both cases we can't
+        * have a worker working using a nopar context, which means it is safe.
+        */
+       idle_func_object_ops = (active_workers_num > 1) ? idle_func_object_ops_par : idle_func_object_ops_nopar;
+       workers_finished = FALSE;
+
+       for (i = 0; i < active_workers_num; i++) {
                State old_state;
                gboolean did_set_state;
 
@@ -125,18 +138,20 @@ worker_try_finish (WorkerData *data)
 
        mono_os_mutex_lock (&finished_lock);
 
-       for (i = 0; i < workers_num; i++) {
+       for (i = 0; i < active_workers_num; i++) {
                if (state_is_working_or_enqueued (workers_data [i].state))
                        working++;
        }
 
        if (working == 1) {
                SgenWorkersFinishCallback callback = finish_callback;
+               SGEN_ASSERT (0, idle_func_object_ops == idle_func_object_ops_nopar, "Why are we finishing with parallel context");
                /* We are the last one left. Enqueue preclean job if we have one and awake everybody */
                SGEN_ASSERT (0, data->state != STATE_NOT_WORKING, "How did we get from doing idle work to NOT WORKING without setting it ourselves?");
                if (callback) {
                        finish_callback = NULL;
                        callback ();
+                       worker_awakenings = 0;
                        /* Make sure each worker has a chance of seeing the enqueued jobs */
                        sgen_workers_ensure_awake ();
                        SGEN_ASSERT (0, data->state == STATE_WORK_ENQUEUED, "Why did we fail to set our own state to ENQUEUED");
@@ -153,6 +168,15 @@ worker_try_finish (WorkerData *data)
                SGEN_ASSERT (0, old_state == STATE_WORKING, "What other possibility is there?");
        } while (!set_state (data, old_state, STATE_NOT_WORKING));
 
+       /*
+        * If we are second to last to finish, we set the scan context to the non-parallel
+        * version so we can speed up the last worker. This helps us maintain same level
+        * of performance as non-parallel mode even if we fail to distribute work properly.
+        */
+       if (working == 2)
+               idle_func_object_ops = idle_func_object_ops_nopar;
+
+       workers_finished = TRUE;
        mono_os_mutex_unlock (&finished_lock);
 
        binary_protocol_worker_finish (sgen_timestamp (), forced_stop);
@@ -176,20 +200,6 @@ sgen_workers_enqueue_job (SgenThreadPoolJob *job, gboolean enqueue)
        sgen_thread_pool_job_enqueue (job);
 }
 
-void
-sgen_workers_wait_for_jobs_finished (void)
-{
-       sgen_thread_pool_wait_for_all_jobs ();
-       /*
-        * If the idle task was never triggered or it finished before the last job did and
-        * then didn't get triggered again, we might end up in the situation of having
-        * something in the gray queue yet the idle task not working.  The easiest way to
-        * make sure this doesn't stay that way is to just trigger it again after all jobs
-        * have finished.
-        */
-       sgen_workers_ensure_awake ();
-}
-
 static gboolean
 workers_get_work (WorkerData *data)
 {
@@ -202,7 +212,7 @@ workers_get_work (WorkerData *data)
        if (major->is_concurrent) {
                GrayQueueSection *section = sgen_section_gray_queue_dequeue (&workers_distribute_gray_queue);
                if (section) {
-                       sgen_gray_object_enqueue_section (&data->private_gray_queue, section);
+                       sgen_gray_object_enqueue_section (&data->private_gray_queue, section, major->is_parallel);
                        return TRUE;
                }
        }
@@ -212,6 +222,37 @@ workers_get_work (WorkerData *data)
        return FALSE;
 }
 
+static gboolean
+workers_steal_work (WorkerData *data)
+{
+       SgenMajorCollector *major = sgen_get_major_collector ();
+       GrayQueueSection *section = NULL;
+       int i, current_worker;
+
+       if (!major->is_parallel)
+               return FALSE;
+
+       /* If we're parallel, steal from other workers' private gray queues  */
+       g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+
+       current_worker = (int) (data - workers_data);
+
+       for (i = 1; i < active_workers_num && !section; i++) {
+               int steal_worker = (current_worker + i) % active_workers_num;
+               if (state_is_working_or_enqueued (workers_data [steal_worker].state))
+                       section = sgen_gray_object_steal_section (&workers_data [steal_worker].private_gray_queue);
+       }
+
+       if (section) {
+               sgen_gray_object_enqueue_section (&data->private_gray_queue, section, TRUE);
+               return TRUE;
+       }
+
+       /* Nobody to steal from */
+       g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+       return FALSE;
+}
+
 static void
 concurrent_enqueue_check (GCObject *obj)
 {
@@ -257,6 +298,15 @@ continue_idle_func (void *data_untyped)
        }
 }
 
+static gboolean
+should_work_func (void *data_untyped)
+{
+       WorkerData *data = (WorkerData*)data_untyped;
+       int current_worker = (int) (data - workers_data);
+
+       return current_worker < active_workers_num;
+}
+
 static void
 marker_idle_func (void *data_untyped)
 {
@@ -270,12 +320,18 @@ marker_idle_func (void *data_untyped)
                SGEN_ASSERT (0, data->state != STATE_NOT_WORKING, "How did we get from WORK ENQUEUED to NOT WORKING?");
        }
 
-       if (!forced_stop && (!sgen_gray_object_queue_is_empty (&data->private_gray_queue) || workers_get_work (data))) {
+       if (!forced_stop && (!sgen_gray_object_queue_is_empty (&data->private_gray_queue) || workers_get_work (data) || workers_steal_work (data))) {
                ScanCopyContext ctx = CONTEXT_FROM_OBJECT_OPERATIONS (idle_func_object_ops, &data->private_gray_queue);
 
                SGEN_ASSERT (0, !sgen_gray_object_queue_is_empty (&data->private_gray_queue), "How is our gray queue empty if we just got work?");
 
                sgen_drain_gray_stack (ctx);
+
+               if (data->private_gray_queue.num_sections > 16 && workers_finished && worker_awakenings < active_workers_num) {
+                       /* We bound the number of worker awakenings just to be sure */
+                       worker_awakenings++;
+                       sgen_workers_ensure_awake ();
+               }
        } else {
                worker_try_finish (data);
        }
@@ -310,7 +366,7 @@ sgen_workers_init (int num_workers, SgenWorkerCallback callback)
        void **workers_data_ptrs = (void **)alloca(num_workers * sizeof(void *));
 
        if (!sgen_get_major_collector ()->is_concurrent) {
-               sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL, NULL);
+               sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL, NULL, NULL);
                return;
        }
 
@@ -318,6 +374,7 @@ sgen_workers_init (int num_workers, SgenWorkerCallback callback)
        //g_print ("initing %d workers\n", num_workers);
 
        workers_num = num_workers;
+       active_workers_num = num_workers;
 
        workers_data = (WorkerData *)sgen_alloc_internal_dynamic (sizeof (WorkerData) * num_workers, INTERNAL_MEM_WORKER_DATA, TRUE);
        memset (workers_data, 0, sizeof (WorkerData) * num_workers);
@@ -329,7 +386,7 @@ sgen_workers_init (int num_workers, SgenWorkerCallback callback)
 
        worker_init_cb = callback;
 
-       sgen_thread_pool_init (num_workers, thread_pool_init_func, marker_idle_func, continue_idle_func, workers_data_ptrs);
+       sgen_thread_pool_init (num_workers, thread_pool_init_func, marker_idle_func, continue_idle_func, should_work_func, workers_data_ptrs);
 
        mono_counters_register ("# workers finished", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_num_finished);
 }
@@ -347,11 +404,24 @@ sgen_workers_stop_all_workers (void)
 }
 
 void
-sgen_workers_start_all_workers (SgenObjectOperations *object_ops, SgenWorkersFinishCallback callback)
+sgen_workers_set_num_active_workers (int num_workers)
 {
+       if (num_workers) {
+               SGEN_ASSERT (0, active_workers_num <= workers_num, "We can't start more workers than we initialized");
+               active_workers_num = num_workers;
+       } else {
+               active_workers_num = workers_num;
+       }
+}
+
+void
+sgen_workers_start_all_workers (SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par, SgenWorkersFinishCallback callback)
+{
+       idle_func_object_ops_par = object_ops_par;
+       idle_func_object_ops_nopar = object_ops_nopar;
        forced_stop = FALSE;
-       idle_func_object_ops = object_ops;
        finish_callback = callback;
+       worker_awakenings = 0;
        mono_memory_write_barrier ();
 
        sgen_workers_ensure_awake ();
@@ -369,7 +439,7 @@ sgen_workers_join (void)
        /* At this point all the workers have stopped. */
 
        SGEN_ASSERT (0, sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue), "Why is there still work left to do?");
-       for (i = 0; i < workers_num; ++i)
+       for (i = 0; i < active_workers_num; ++i)
                SGEN_ASSERT (0, sgen_gray_object_queue_is_empty (&workers_data [i].private_gray_queue), "Why is there still work left to do?");
 }
 
@@ -387,7 +457,7 @@ sgen_workers_have_idle_work (void)
        if (!sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue))
                return TRUE;
 
-       for (i = 0; i < workers_num; ++i) {
+       for (i = 0; i < active_workers_num; ++i) {
                if (!sgen_gray_object_queue_is_empty (&workers_data [i].private_gray_queue))
                        return TRUE;
        }
@@ -400,7 +470,7 @@ sgen_workers_all_done (void)
 {
        int i;
 
-       for (i = 0; i < workers_num; i++) {
+       for (i = 0; i < active_workers_num; i++) {
                if (state_is_working_or_enqueued (workers_data [i].state))
                        return FALSE;
        }
@@ -421,28 +491,24 @@ sgen_workers_assert_gray_queue_is_empty (void)
 }
 
 void
-sgen_workers_take_from_queue_and_awake (SgenGrayQueue *queue)
+sgen_workers_take_from_queue (SgenGrayQueue *queue)
 {
-       gboolean wake = FALSE;
+       sgen_gray_object_spread (queue, sgen_workers_get_job_split_count ());
 
        for (;;) {
                GrayQueueSection *section = sgen_gray_object_dequeue_section (queue);
                if (!section)
                        break;
                sgen_section_gray_queue_enqueue (&workers_distribute_gray_queue, section);
-               wake = TRUE;
        }
 
-       if (wake) {
-               SGEN_ASSERT (0, sgen_concurrent_collection_in_progress (), "Why is there work to take when there's no concurrent collection in progress?");
-               sgen_workers_ensure_awake ();
-       }
+       SGEN_ASSERT (0, !sgen_workers_are_working (), "We should fully populate the distribute gray queue before we start the workers");
 }
 
 SgenObjectOperations*
 sgen_workers_get_idle_func_object_ops (void)
 {
-       return idle_func_object_ops;
+       return (idle_func_object_ops_par) ? idle_func_object_ops_par : idle_func_object_ops_nopar;
 }
 
 /*
@@ -454,7 +520,7 @@ sgen_workers_get_idle_func_object_ops (void)
 int
 sgen_workers_get_job_split_count (void)
 {
-       return (workers_num > 1) ? workers_num * 4 : 1;
+       return (active_workers_num > 1) ? active_workers_num * 4 : 1;
 }
 
 void