2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
36 static TV_DECLARE (end_of_last_stw);
38 guint64 mono_time_since_last_stw ()
40 if (end_of_last_stw == 0)
43 TV_DECLARE (current_time);
44 TV_GETTIME (current_time);
45 return TV_ELAPSED (end_of_last_stw, current_time);
48 unsigned int sgen_global_stop_count = 0;
51 align_pointer (void *ptr)
54 p += sizeof (gpointer) - 1;
55 p &= ~ (sizeof (gpointer) - 1);
60 update_current_thread_stack (void *start)
63 SgenThreadInfo *info = mono_thread_info_current ();
65 info->client_info.stack_start = align_pointer (&stack_guard);
66 g_assert (info->client_info.stack_start);
67 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
69 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
70 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
72 g_error ("Sgen STW requires a working mono-context");
75 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
80 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
84 if (!mono_thread_internal_current ())
85 /* Happens during thread attach */
90 if (!sgen_has_critical_method ())
94 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
95 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
96 * to register the jit info for all GC critical methods after they are JITted/loaded.
98 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
102 return sgen_is_critical_method (mono_jit_info_get_method (ji));
106 restart_threads_until_none_in_managed_allocator (void)
108 int num_threads_died = 0;
109 int sleep_duration = -1;
112 int restart_count = 0, restarted_count = 0;
113 /* restart all threads that stopped in the
115 FOREACH_THREAD (info) {
117 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
119 if (mono_thread_info_is_live (info) &&
120 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
121 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
122 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
123 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
124 result = sgen_resume_thread (info);
128 info->client_info.skip = 1;
131 /* we set the stopped_ip to
132 NULL for threads which
133 we're not restarting so
134 that we can easily identify
136 info->client_info.stopped_ip = NULL;
137 info->client_info.stopped_domain = NULL;
138 info->client_info.suspend_done = TRUE;
141 /* if no threads were restarted, we're done */
142 if (restart_count == 0)
145 /* wait for the threads to signal their restart */
146 sgen_wait_for_suspend_ack (restart_count);
148 if (sleep_duration < 0) {
149 mono_thread_info_yield ();
152 g_usleep (sleep_duration);
153 sleep_duration += 10;
156 /* stop them again */
157 FOREACH_THREAD (info) {
159 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
161 result = sgen_suspend_thread (info);
166 info->client_info.skip = 1;
169 /* some threads might have died */
170 num_threads_died += restart_count - restarted_count;
171 /* wait for the threads to signal their suspension
173 sgen_wait_for_suspend_ack (restarted_count);
176 return num_threads_died;
180 acquire_gc_locks (void)
183 mono_thread_info_suspend_lock ();
187 release_gc_locks (void)
189 mono_thread_info_suspend_unlock ();
193 static TV_DECLARE (stop_world_time);
194 static unsigned long max_pause_usec = 0;
196 static guint64 time_stop_world;
197 static guint64 time_restart_world;
199 /* LOCKING: assumes the GC lock is held */
201 sgen_client_stop_world (int generation)
203 TV_DECLARE (end_handshake);
205 /* notify the profiler of the leftovers */
206 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
207 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
208 mono_sgen_gc_event_moves ();
212 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
214 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
215 sgen_process_togglerefs ();
217 update_current_thread_stack (&generation);
219 sgen_global_stop_count++;
220 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
221 TV_GETTIME (stop_world_time);
223 if (mono_thread_info_unified_management_enabled ()) {
224 sgen_unified_suspend_stop_world ();
227 count = sgen_thread_handshake (TRUE);
228 dead = restart_threads_until_none_in_managed_allocator ();
230 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
233 SGEN_LOG (3, "world stopped");
235 TV_GETTIME (end_handshake);
236 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
238 sgen_memgov_collection_start (generation);
239 if (sgen_need_bridge_processing ())
240 sgen_bridge_reset_data ();
243 /* LOCKING: assumes the GC lock is held */
245 sgen_client_restart_world (int generation, gint64 *stw_time)
248 TV_DECLARE (start_handshake);
251 /* notify the profiler of the leftovers */
252 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
253 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
254 mono_sgen_gc_event_moves ();
256 FOREACH_THREAD (info) {
257 info->client_info.stack_start = NULL;
258 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
261 TV_GETTIME (start_handshake);
263 if (mono_thread_info_unified_management_enabled ())
264 sgen_unified_suspend_restart_world ();
266 sgen_thread_handshake (FALSE);
269 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
270 usec = TV_ELAPSED (stop_world_time, end_sw);
271 max_pause_usec = MAX (usec, max_pause_usec);
272 end_of_last_stw = end_sw;
274 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
277 * We must release the thread info suspend lock after doing
278 * the thread handshake. Otherwise, if the GC stops the world
279 * and a thread is in the process of starting up, but has not
280 * yet registered (it's not in the thread_list), it is
281 * possible that the thread does register while the world is
282 * stopped. When restarting the GC will then try to restart
283 * said thread, but since it never got the suspend signal, it
284 * cannot answer the restart signal, so a deadlock results.
288 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
294 mono_sgen_init_stw (void)
296 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
297 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
300 /* Unified suspend code */
303 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
306 A thread explicitly asked to be skiped because it holds no managed state.
307 This is used by TP and finalizer threads.
308 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
310 if (info->client_info.gc_disabled) {
317 We have detected that this thread is failing/dying, ignore it.
318 FIXME: can't we merge this with thread_is_dying?
320 if (info->client_info.skip) {
327 Suspending the current thread will deadlock us, bad idea.
329 if (info == mono_thread_info_current ()) {
336 We can't suspend the workers that will do all the heavy lifting.
337 FIXME Use some state bit in SgenThreadInfo for this.
339 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
346 The thread has signaled that it started to detach, ignore it.
347 FIXME: can't we merge this with skip
349 if (!mono_thread_info_is_live (info)) {
359 sgen_unified_suspend_stop_world (void)
362 int sleep_duration = -1;
364 mono_threads_begin_global_suspend ();
365 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
367 FOREACH_THREAD (info) {
369 info->client_info.skip = FALSE;
370 info->client_info.suspend_done = FALSE;
371 if (sgen_is_thread_in_current_stw (info, &reason)) {
372 info->client_info.skip = !mono_thread_info_begin_suspend (info);
373 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
375 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
379 mono_thread_info_current ()->client_info.suspend_done = TRUE;
380 mono_threads_wait_pending_operations ();
384 FOREACH_THREAD (info) {
386 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
387 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
392 All threads that reach here are pristine suspended. This means the following:
394 - We haven't accepted the previous suspend as good.
395 - We haven't gave up on it for this STW (it's either bad or asked not to)
397 if (mono_thread_info_in_critical_location (info)) {
399 gint suspend_count = mono_thread_info_suspend_count (info);
400 if (!(suspend_count == 1))
401 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
402 res = mono_thread_info_begin_resume (info);
403 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
407 info->client_info.skip = TRUE;
409 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
410 g_assert (!info->client_info.in_critical_region);
411 info->client_info.suspend_done = TRUE;
415 if (restart_counter == 0)
417 mono_threads_wait_pending_operations ();
419 if (sleep_duration < 0) {
420 mono_thread_info_yield ();
423 g_usleep (sleep_duration);
424 sleep_duration += 10;
427 FOREACH_THREAD (info) {
429 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
430 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
434 if (mono_thread_info_is_running (info)) {
435 gboolean res = mono_thread_info_begin_suspend (info);
436 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
438 info->client_info.skip = TRUE;
442 mono_threads_wait_pending_operations ();
445 FOREACH_THREAD (info) {
447 if (sgen_is_thread_in_current_stw (info, &reason)) {
448 MonoThreadUnwindState *state;
450 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
451 g_assert (info->client_info.suspend_done);
453 state = mono_thread_info_get_suspend_state (info);
455 info->client_info.ctx = state->ctx;
457 if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
458 /* thread is starting or detaching, nothing to scan here */
459 info->client_info.stopped_domain = NULL;
460 info->client_info.stopped_ip = NULL;
461 info->client_info.stack_start = NULL;
463 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
464 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
465 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
466 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
468 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
469 if (!info->client_info.stack_start
470 || info->client_info.stack_start < info->client_info.stack_start_limit
471 || info->client_info.stack_start >= info->client_info.stack_end) {
472 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
473 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
477 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
479 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
480 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
486 sgen_unified_suspend_restart_world (void)
488 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
489 FOREACH_THREAD (info) {
491 if (sgen_is_thread_in_current_stw (info, &reason)) {
492 g_assert (mono_thread_info_begin_resume (info));
493 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
495 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
497 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
501 mono_threads_wait_pending_operations ();
502 mono_threads_end_global_suspend ();