2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
36 static TV_DECLARE (end_of_last_stw);
38 guint64 mono_time_since_last_stw ()
40 if (end_of_last_stw == 0)
43 TV_DECLARE (current_time);
44 TV_GETTIME (current_time);
45 return TV_ELAPSED (end_of_last_stw, current_time);
48 unsigned int sgen_global_stop_count = 0;
51 align_pointer (void *ptr)
54 p += sizeof (gpointer) - 1;
55 p &= ~ (sizeof (gpointer) - 1);
60 update_current_thread_stack (void *start)
63 SgenThreadInfo *info = mono_thread_info_current ();
65 info->client_info.stack_start = align_pointer (&stack_guard);
66 g_assert (info->client_info.stack_start);
67 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
69 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
70 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
72 g_error ("Sgen STW requires a working mono-context");
75 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
80 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
84 if (!mono_thread_internal_current ())
85 /* Happens during thread attach */
90 if (!sgen_has_critical_method ())
94 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
95 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
96 * to register the jit info for all GC critical methods after they are JITted/loaded.
98 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
102 return sgen_is_critical_method (mono_jit_info_get_method (ji));
106 restart_threads_until_none_in_managed_allocator (void)
108 int num_threads_died = 0;
109 int sleep_duration = -1;
112 int restart_count = 0, restarted_count = 0;
113 /* restart all threads that stopped in the
115 FOREACH_THREAD (info) {
117 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
119 if (mono_thread_info_is_live (info) &&
120 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
121 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
122 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
123 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
124 result = sgen_resume_thread (info);
128 info->client_info.skip = 1;
131 /* we set the stopped_ip to
132 NULL for threads which
133 we're not restarting so
134 that we can easily identify
136 info->client_info.stopped_ip = NULL;
137 info->client_info.stopped_domain = NULL;
138 info->client_info.suspend_done = TRUE;
141 /* if no threads were restarted, we're done */
142 if (restart_count == 0)
145 /* wait for the threads to signal their restart */
146 sgen_wait_for_suspend_ack (restart_count);
148 if (sleep_duration < 0) {
149 mono_thread_info_yield ();
152 g_usleep (sleep_duration);
153 sleep_duration += 10;
156 /* stop them again */
157 FOREACH_THREAD (info) {
159 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
161 result = sgen_suspend_thread (info);
166 info->client_info.skip = 1;
169 /* some threads might have died */
170 num_threads_died += restart_count - restarted_count;
171 /* wait for the threads to signal their suspension
173 sgen_wait_for_suspend_ack (restarted_count);
176 return num_threads_died;
180 acquire_gc_locks (void)
183 mono_thread_info_suspend_lock ();
187 release_gc_locks (void)
189 mono_thread_info_suspend_unlock ();
193 static TV_DECLARE (stop_world_time);
194 static unsigned long max_pause_usec = 0;
196 static guint64 time_stop_world;
197 static guint64 time_restart_world;
199 /* LOCKING: assumes the GC lock is held */
201 sgen_client_stop_world (int generation)
203 TV_DECLARE (end_handshake);
205 /* notify the profiler of the leftovers */
206 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
207 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
208 mono_sgen_gc_event_moves ();
212 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
213 sgen_process_togglerefs ();
215 update_current_thread_stack (&generation);
217 sgen_global_stop_count++;
218 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
219 TV_GETTIME (stop_world_time);
221 if (mono_thread_info_unified_management_enabled ()) {
222 sgen_unified_suspend_stop_world ();
225 count = sgen_thread_handshake (TRUE);
226 dead = restart_threads_until_none_in_managed_allocator ();
228 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
231 SGEN_LOG (3, "world stopped");
233 TV_GETTIME (end_handshake);
234 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
236 sgen_memgov_collection_start (generation);
237 if (sgen_need_bridge_processing ())
238 sgen_bridge_reset_data ();
241 /* LOCKING: assumes the GC lock is held */
243 sgen_client_restart_world (int generation, GGTimingInfo *timing)
246 TV_DECLARE (start_handshake);
249 /* notify the profiler of the leftovers */
250 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
251 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
252 mono_sgen_gc_event_moves ();
254 FOREACH_THREAD (info) {
255 info->client_info.stack_start = NULL;
256 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
259 TV_GETTIME (start_handshake);
261 if (mono_thread_info_unified_management_enabled ())
262 sgen_unified_suspend_restart_world ();
264 sgen_thread_handshake (FALSE);
267 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
268 usec = TV_ELAPSED (stop_world_time, end_sw);
269 max_pause_usec = MAX (usec, max_pause_usec);
270 end_of_last_stw = end_sw;
272 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
275 * We must release the thread info suspend lock after doing
276 * the thread handshake. Otherwise, if the GC stops the world
277 * and a thread is in the process of starting up, but has not
278 * yet registered (it's not in the thread_list), it is
279 * possible that the thread does register while the world is
280 * stopped. When restarting the GC will then try to restart
281 * said thread, but since it never got the suspend signal, it
282 * cannot answer the restart signal, so a deadlock results.
287 timing [0].stw_time = usec;
292 mono_sgen_init_stw (void)
294 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
295 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
298 /* Unified suspend code */
301 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
304 A thread explicitly asked to be skiped because it holds no managed state.
305 This is used by TP and finalizer threads.
306 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
308 if (info->client_info.gc_disabled) {
315 We have detected that this thread is failing/dying, ignore it.
316 FIXME: can't we merge this with thread_is_dying?
318 if (info->client_info.skip) {
325 Suspending the current thread will deadlock us, bad idea.
327 if (info == mono_thread_info_current ()) {
334 We can't suspend the workers that will do all the heavy lifting.
335 FIXME Use some state bit in SgenThreadInfo for this.
337 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
344 The thread has signaled that it started to detach, ignore it.
345 FIXME: can't we merge this with skip
347 if (!mono_thread_info_is_live (info)) {
357 sgen_unified_suspend_stop_world (void)
360 int sleep_duration = -1;
362 mono_threads_begin_global_suspend ();
363 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
365 FOREACH_THREAD (info) {
367 info->client_info.skip = FALSE;
368 info->client_info.suspend_done = FALSE;
369 if (sgen_is_thread_in_current_stw (info, &reason)) {
370 info->client_info.skip = !mono_thread_info_begin_suspend (info);
371 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
373 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
377 mono_thread_info_current ()->client_info.suspend_done = TRUE;
378 mono_threads_wait_pending_operations ();
382 FOREACH_THREAD (info) {
384 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
385 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
390 All threads that reach here are pristine suspended. This means the following:
392 - We haven't accepted the previous suspend as good.
393 - We haven't gave up on it for this STW (it's either bad or asked not to)
395 if (mono_thread_info_in_critical_location (info)) {
397 gint suspend_count = mono_thread_info_suspend_count (info);
398 if (!(suspend_count == 1))
399 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
400 res = mono_thread_info_begin_resume (info);
401 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
405 info->client_info.skip = TRUE;
407 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
408 g_assert (!info->client_info.in_critical_region);
409 info->client_info.suspend_done = TRUE;
413 if (restart_counter == 0)
415 mono_threads_wait_pending_operations ();
417 if (sleep_duration < 0) {
418 mono_thread_info_yield ();
421 g_usleep (sleep_duration);
422 sleep_duration += 10;
425 FOREACH_THREAD (info) {
427 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
428 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
432 if (mono_thread_info_is_running (info)) {
433 gboolean res = mono_thread_info_begin_suspend (info);
434 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
436 info->client_info.skip = TRUE;
440 mono_threads_wait_pending_operations ();
443 FOREACH_THREAD (info) {
445 if (sgen_is_thread_in_current_stw (info, &reason)) {
446 MonoThreadUnwindState *state;
448 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
449 g_assert (info->client_info.suspend_done);
451 state = mono_thread_info_get_suspend_state (info);
453 info->client_info.ctx = state->ctx;
455 if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
456 /* thread is starting or detaching, nothing to scan here */
457 info->client_info.stopped_domain = NULL;
458 info->client_info.stopped_ip = NULL;
459 info->client_info.stack_start = NULL;
461 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
462 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
463 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
464 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
466 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
467 if (!info->client_info.stack_start
468 || info->client_info.stack_start < info->client_info.stack_start_limit
469 || info->client_info.stack_start >= info->client_info.stack_end) {
470 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
471 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
475 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
477 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
478 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
484 sgen_unified_suspend_restart_world (void)
486 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
487 FOREACH_THREAD (info) {
489 if (sgen_is_thread_in_current_stw (info, &reason)) {
490 g_assert (mono_thread_info_begin_resume (info));
491 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
493 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
495 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
499 mono_threads_wait_pending_operations ();
500 mono_threads_end_global_suspend ();