3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw);
40 guint64 mono_time_since_last_stw ()
42 if (end_of_last_stw == 0)
45 TV_DECLARE (current_time);
46 TV_GETTIME (current_time);
47 return TV_ELAPSED (end_of_last_stw, current_time);
50 unsigned int sgen_global_stop_count = 0;
53 align_pointer (void *ptr)
56 p += sizeof (gpointer) - 1;
57 p &= ~ (sizeof (gpointer) - 1);
62 update_current_thread_stack (void *start)
65 SgenThreadInfo *info = mono_thread_info_current ();
67 info->client_info.stack_start = align_pointer (&stack_guard);
68 g_assert (info->client_info.stack_start);
69 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
74 g_error ("Sgen STW requires a working mono-context");
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
82 acquire_gc_locks (void)
85 mono_thread_info_suspend_lock ();
89 release_gc_locks (void)
91 mono_thread_info_suspend_unlock ();
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
101 /* LOCKING: assumes the GC lock is held */
103 sgen_client_stop_world (int generation)
105 TV_DECLARE (end_handshake);
107 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
111 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
113 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
114 sgen_process_togglerefs ();
116 update_current_thread_stack (&generation);
118 sgen_global_stop_count++;
119 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
120 TV_GETTIME (stop_world_time);
122 sgen_unified_suspend_stop_world ();
124 SGEN_LOG (3, "world stopped");
126 mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
128 TV_GETTIME (end_handshake);
129 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
131 sgen_memgov_collection_start (generation);
132 if (sgen_need_bridge_processing ())
133 sgen_bridge_reset_data ();
136 /* LOCKING: assumes the GC lock is held */
138 sgen_client_restart_world (int generation, gint64 *stw_time)
141 TV_DECLARE (start_handshake);
144 /* notify the profiler of the leftovers */
145 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
147 mono_sgen_gc_event_moves ();
149 mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
151 FOREACH_THREAD (info) {
152 info->client_info.stack_start = NULL;
153 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
156 TV_GETTIME (start_handshake);
158 sgen_unified_suspend_restart_world ();
161 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
162 usec = TV_ELAPSED (stop_world_time, end_sw);
163 max_pause_usec = MAX (usec, max_pause_usec);
164 end_of_last_stw = end_sw;
166 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
168 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
171 * We must release the thread info suspend lock after doing
172 * the thread handshake. Otherwise, if the GC stops the world
173 * and a thread is in the process of starting up, but has not
174 * yet registered (it's not in the thread_list), it is
175 * possible that the thread does register while the world is
176 * stopped. When restarting the GC will then try to restart
177 * said thread, but since it never got the suspend signal, it
178 * cannot answer the restart signal, so a deadlock results.
182 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
188 mono_sgen_init_stw (void)
190 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
191 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
194 /* Unified suspend code */
197 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
200 A thread explicitly asked to be skiped because it holds no managed state.
201 This is used by TP and finalizer threads.
202 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
204 if (info->client_info.gc_disabled) {
211 We have detected that this thread is failing/dying, ignore it.
212 FIXME: can't we merge this with thread_is_dying?
214 if (info->client_info.skip) {
221 Suspending the current thread will deadlock us, bad idea.
223 if (info == mono_thread_info_current ()) {
230 We can't suspend the workers that will do all the heavy lifting.
231 FIXME Use some state bit in SgenThreadInfo for this.
233 if (sgen_thread_pool_is_thread_pool_thread (major_collector.get_sweep_pool (), mono_thread_info_get_tid (info)) ||
234 sgen_workers_is_worker_thread (mono_thread_info_get_tid (info))) {
241 The thread has signaled that it started to detach, ignore it.
242 FIXME: can't we merge this with skip
244 if (!mono_thread_info_is_live (info)) {
254 sgen_unified_suspend_stop_world (void)
256 int sleep_duration = -1;
258 mono_threads_begin_global_suspend ();
259 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
261 FOREACH_THREAD (info) {
262 info->client_info.skip = FALSE;
263 info->client_info.suspend_done = FALSE;
266 if (!sgen_is_thread_in_current_stw (info, &reason)) {
267 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
271 info->client_info.skip = !mono_thread_info_begin_suspend (info);
273 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
276 mono_thread_info_current ()->client_info.suspend_done = TRUE;
277 mono_threads_wait_pending_operations ();
280 gint restart_counter = 0;
282 FOREACH_THREAD (info) {
286 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
287 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
292 All threads that reach here are pristine suspended. This means the following:
294 - We haven't accepted the previous suspend as good.
295 - We haven't gave up on it for this STW (it's either bad or asked not to)
297 if (!mono_thread_info_in_critical_location (info)) {
298 info->client_info.suspend_done = TRUE;
300 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
304 suspend_count = mono_thread_info_suspend_count (info);
305 if (!(suspend_count == 1))
306 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
308 info->client_info.skip = !mono_thread_info_begin_resume (info);
309 if (!info->client_info.skip)
310 restart_counter += 1;
312 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
315 mono_threads_wait_pending_operations ();
317 if (restart_counter == 0)
320 if (sleep_duration < 0) {
321 mono_thread_info_yield ();
324 g_usleep (sleep_duration);
325 sleep_duration += 10;
328 FOREACH_THREAD (info) {
330 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
331 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
335 if (!mono_thread_info_is_running (info)) {
336 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
340 info->client_info.skip = !mono_thread_info_begin_suspend (info);
342 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
345 mono_threads_wait_pending_operations ();
348 FOREACH_THREAD (info) {
352 if (!sgen_is_thread_in_current_stw (info, &reason)) {
353 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
355 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
359 g_assert (info->client_info.suspend_done);
361 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
363 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
364 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
366 if (info->client_info.stack_start < info->client_info.stack_start_limit
367 || info->client_info.stack_start >= info->client_info.stack_end) {
369 * Thread context is in unhandled state, most likely because it is
370 * dying. We don't scan it.
371 * FIXME We should probably rework and check the valid flag instead.
373 info->client_info.stack_start = NULL;
376 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
378 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
380 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
381 mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
386 sgen_unified_suspend_restart_world (void)
388 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
389 FOREACH_THREAD (info) {
391 if (sgen_is_thread_in_current_stw (info, &reason)) {
392 g_assert (mono_thread_info_begin_resume (info));
393 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
395 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
397 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
401 mono_threads_wait_pending_operations ();
402 mono_threads_end_global_suspend ();