2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
36 static TV_DECLARE (end_of_last_stw);
38 guint64 mono_time_since_last_stw ()
40 if (end_of_last_stw == 0)
43 TV_DECLARE (current_time);
44 TV_GETTIME (current_time);
45 return TV_ELAPSED (end_of_last_stw, current_time);
48 unsigned int sgen_global_stop_count = 0;
51 align_pointer (void *ptr)
54 p += sizeof (gpointer) - 1;
55 p &= ~ (sizeof (gpointer) - 1);
60 update_current_thread_stack (void *start)
63 SgenThreadInfo *info = mono_thread_info_current ();
65 info->client_info.stack_start = align_pointer (&stack_guard);
66 g_assert (info->client_info.stack_start);
67 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
69 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
70 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
72 g_error ("Sgen STW requires a working mono-context");
75 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
80 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
84 if (!mono_thread_internal_current ())
85 /* Happens during thread attach */
90 if (!sgen_has_critical_method ())
94 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
95 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
96 * to register the jit info for all GC critical methods after they are JITted/loaded.
98 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
102 return sgen_is_critical_method (mono_jit_info_get_method (ji));
106 restart_threads_until_none_in_managed_allocator (void)
108 int num_threads_died = 0;
109 int sleep_duration = -1;
112 int restart_count = 0, restarted_count = 0;
113 /* restart all threads that stopped in the
115 FOREACH_THREAD (info) {
117 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
119 if (mono_thread_info_is_live (info) &&
120 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
121 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
122 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
123 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
124 result = sgen_resume_thread (info);
128 info->client_info.skip = 1;
131 /* we set the stopped_ip to
132 NULL for threads which
133 we're not restarting so
134 that we can easily identify
136 info->client_info.stopped_ip = NULL;
137 info->client_info.stopped_domain = NULL;
138 info->client_info.suspend_done = TRUE;
141 /* if no threads were restarted, we're done */
142 if (restart_count == 0)
145 /* wait for the threads to signal their restart */
146 sgen_wait_for_suspend_ack (restart_count);
148 if (sleep_duration < 0) {
149 mono_thread_info_yield ();
152 g_usleep (sleep_duration);
153 sleep_duration += 10;
156 /* stop them again */
157 FOREACH_THREAD (info) {
159 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
161 result = sgen_suspend_thread (info);
166 info->client_info.skip = 1;
169 /* some threads might have died */
170 num_threads_died += restart_count - restarted_count;
171 /* wait for the threads to signal their suspension
173 sgen_wait_for_suspend_ack (restarted_count);
176 return num_threads_died;
180 acquire_gc_locks (void)
183 mono_thread_info_suspend_lock ();
187 release_gc_locks (void)
189 mono_thread_info_suspend_unlock ();
193 static TV_DECLARE (stop_world_time);
194 static unsigned long max_pause_usec = 0;
196 static guint64 time_stop_world;
197 static guint64 time_restart_world;
199 /* LOCKING: assumes the GC lock is held */
201 sgen_client_stop_world (int generation)
203 TV_DECLARE (end_handshake);
205 /* notify the profiler of the leftovers */
206 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
207 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
208 mono_sgen_gc_event_moves ();
212 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
213 sgen_process_togglerefs ();
215 update_current_thread_stack (&generation);
217 sgen_global_stop_count++;
218 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
219 TV_GETTIME (stop_world_time);
221 if (mono_thread_info_unified_management_enabled ()) {
222 sgen_unified_suspend_stop_world ();
225 count = sgen_thread_handshake (TRUE);
226 dead = restart_threads_until_none_in_managed_allocator ();
228 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
231 SGEN_LOG (3, "world stopped");
233 TV_GETTIME (end_handshake);
234 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
236 sgen_memgov_collection_start (generation);
237 if (sgen_need_bridge_processing ())
238 sgen_bridge_reset_data ();
241 /* LOCKING: assumes the GC lock is held */
243 sgen_client_restart_world (int generation, gint64 *stw_time)
246 TV_DECLARE (start_handshake);
249 /* notify the profiler of the leftovers */
250 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
251 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
252 mono_sgen_gc_event_moves ();
254 FOREACH_THREAD (info) {
255 info->client_info.stack_start = NULL;
256 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
259 TV_GETTIME (start_handshake);
261 if (mono_thread_info_unified_management_enabled ())
262 sgen_unified_suspend_restart_world ();
264 sgen_thread_handshake (FALSE);
267 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
268 usec = TV_ELAPSED (stop_world_time, end_sw);
269 max_pause_usec = MAX (usec, max_pause_usec);
270 end_of_last_stw = end_sw;
272 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
275 * We must release the thread info suspend lock after doing
276 * the thread handshake. Otherwise, if the GC stops the world
277 * and a thread is in the process of starting up, but has not
278 * yet registered (it's not in the thread_list), it is
279 * possible that the thread does register while the world is
280 * stopped. When restarting the GC will then try to restart
281 * said thread, but since it never got the suspend signal, it
282 * cannot answer the restart signal, so a deadlock results.
290 mono_sgen_init_stw (void)
292 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
293 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
296 /* Unified suspend code */
299 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
302 A thread explicitly asked to be skiped because it holds no managed state.
303 This is used by TP and finalizer threads.
304 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
306 if (info->client_info.gc_disabled) {
313 We have detected that this thread is failing/dying, ignore it.
314 FIXME: can't we merge this with thread_is_dying?
316 if (info->client_info.skip) {
323 Suspending the current thread will deadlock us, bad idea.
325 if (info == mono_thread_info_current ()) {
332 We can't suspend the workers that will do all the heavy lifting.
333 FIXME Use some state bit in SgenThreadInfo for this.
335 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
342 The thread has signaled that it started to detach, ignore it.
343 FIXME: can't we merge this with skip
345 if (!mono_thread_info_is_live (info)) {
355 sgen_unified_suspend_stop_world (void)
358 int sleep_duration = -1;
360 mono_threads_begin_global_suspend ();
361 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
363 FOREACH_THREAD (info) {
365 info->client_info.skip = FALSE;
366 info->client_info.suspend_done = FALSE;
367 if (sgen_is_thread_in_current_stw (info, &reason)) {
368 info->client_info.skip = !mono_thread_info_begin_suspend (info);
369 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
371 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
375 mono_thread_info_current ()->client_info.suspend_done = TRUE;
376 mono_threads_wait_pending_operations ();
380 FOREACH_THREAD (info) {
382 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
383 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
388 All threads that reach here are pristine suspended. This means the following:
390 - We haven't accepted the previous suspend as good.
391 - We haven't gave up on it for this STW (it's either bad or asked not to)
393 if (mono_thread_info_in_critical_location (info)) {
395 gint suspend_count = mono_thread_info_suspend_count (info);
396 if (!(suspend_count == 1))
397 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
398 res = mono_thread_info_begin_resume (info);
399 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
403 info->client_info.skip = TRUE;
405 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
406 g_assert (!info->client_info.in_critical_region);
407 info->client_info.suspend_done = TRUE;
411 if (restart_counter == 0)
413 mono_threads_wait_pending_operations ();
415 if (sleep_duration < 0) {
416 mono_thread_info_yield ();
419 g_usleep (sleep_duration);
420 sleep_duration += 10;
423 FOREACH_THREAD (info) {
425 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
426 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
430 if (mono_thread_info_is_running (info)) {
431 gboolean res = mono_thread_info_begin_suspend (info);
432 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
434 info->client_info.skip = TRUE;
438 mono_threads_wait_pending_operations ();
441 FOREACH_THREAD (info) {
443 if (sgen_is_thread_in_current_stw (info, &reason)) {
444 MonoThreadUnwindState *state;
446 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
447 g_assert (info->client_info.suspend_done);
449 state = mono_thread_info_get_suspend_state (info);
451 info->client_info.ctx = state->ctx;
453 if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
454 /* thread is starting or detaching, nothing to scan here */
455 info->client_info.stopped_domain = NULL;
456 info->client_info.stopped_ip = NULL;
457 info->client_info.stack_start = NULL;
459 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
460 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
461 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
462 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
464 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
465 if (!info->client_info.stack_start
466 || info->client_info.stack_start < info->client_info.stack_start_limit
467 || info->client_info.stack_start >= info->client_info.stack_end) {
468 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
469 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
473 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
475 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
476 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
482 sgen_unified_suspend_restart_world (void)
484 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
485 FOREACH_THREAD (info) {
487 if (sgen_is_thread_in_current_stw (info, &reason)) {
488 g_assert (mono_thread_info_begin_resume (info));
489 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
491 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
493 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
497 mono_threads_wait_pending_operations ();
498 mono_threads_end_global_suspend ();