2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
36 unsigned int sgen_global_stop_count = 0;
39 align_pointer (void *ptr)
42 p += sizeof (gpointer) - 1;
43 p &= ~ (sizeof (gpointer) - 1);
47 static MonoContext cur_thread_ctx;
50 update_current_thread_stack (void *start)
53 SgenThreadInfo *info = mono_thread_info_current ();
55 info->client_info.stack_start = align_pointer (&stack_guard);
56 g_assert (info->client_info.stack_start);
57 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
59 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
60 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
62 g_error ("Sgen STW requires a working mono-context");
65 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
66 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
70 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
74 if (!mono_thread_internal_current ())
75 /* Happens during thread attach */
80 if (!sgen_has_critical_method ())
84 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
85 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
86 * to register the jit info for all GC critical methods after they are JITted/loaded.
88 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
92 return sgen_is_critical_method (mono_jit_info_get_method (ji));
96 restart_threads_until_none_in_managed_allocator (void)
98 int num_threads_died = 0;
99 int sleep_duration = -1;
102 int restart_count = 0, restarted_count = 0;
103 /* restart all threads that stopped in the
105 FOREACH_THREAD (info) {
107 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
109 if (mono_thread_info_is_live (info) &&
110 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
111 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
112 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
113 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
114 result = sgen_resume_thread (info);
118 info->client_info.skip = 1;
121 /* we set the stopped_ip to
122 NULL for threads which
123 we're not restarting so
124 that we can easily identify
126 info->client_info.stopped_ip = NULL;
127 info->client_info.stopped_domain = NULL;
128 info->client_info.suspend_done = TRUE;
131 /* if no threads were restarted, we're done */
132 if (restart_count == 0)
135 /* wait for the threads to signal their restart */
136 sgen_wait_for_suspend_ack (restart_count);
138 if (sleep_duration < 0) {
139 mono_thread_info_yield ();
142 g_usleep (sleep_duration);
143 sleep_duration += 10;
146 /* stop them again */
147 FOREACH_THREAD (info) {
149 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
151 result = sgen_suspend_thread (info);
156 info->client_info.skip = 1;
159 /* some threads might have died */
160 num_threads_died += restart_count - restarted_count;
161 /* wait for the threads to signal their suspension
163 sgen_wait_for_suspend_ack (restarted_count);
166 return num_threads_died;
170 acquire_gc_locks (void)
173 mono_thread_info_suspend_lock ();
177 release_gc_locks (void)
179 mono_thread_info_suspend_unlock ();
183 static TV_DECLARE (stop_world_time);
184 static unsigned long max_pause_usec = 0;
186 static guint64 time_stop_world;
187 static guint64 time_restart_world;
189 /* LOCKING: assumes the GC lock is held */
191 sgen_client_stop_world (int generation)
193 TV_DECLARE (end_handshake);
195 /* notify the profiler of the leftovers */
196 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
197 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
198 mono_sgen_gc_event_moves ();
202 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
203 sgen_process_togglerefs ();
205 update_current_thread_stack (&generation);
207 sgen_global_stop_count++;
208 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
209 TV_GETTIME (stop_world_time);
211 if (mono_thread_info_unified_management_enabled ()) {
212 sgen_unified_suspend_stop_world ();
215 count = sgen_thread_handshake (TRUE);
216 dead = restart_threads_until_none_in_managed_allocator ();
218 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
221 SGEN_LOG (3, "world stopped");
223 TV_GETTIME (end_handshake);
224 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
226 sgen_memgov_collection_start (generation);
227 if (sgen_need_bridge_processing ())
228 sgen_bridge_reset_data ();
231 /* LOCKING: assumes the GC lock is held */
233 sgen_client_restart_world (int generation, GGTimingInfo *timing)
236 TV_DECLARE (start_handshake);
237 TV_DECLARE (end_bridge);
238 unsigned long usec, bridge_usec;
240 /* notify the profiler of the leftovers */
241 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
242 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
243 mono_sgen_gc_event_moves ();
245 FOREACH_THREAD (info) {
246 info->client_info.stack_start = NULL;
247 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
250 TV_GETTIME (start_handshake);
252 if (mono_thread_info_unified_management_enabled ())
253 sgen_unified_suspend_restart_world ();
255 sgen_thread_handshake (FALSE);
258 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
259 usec = TV_ELAPSED (stop_world_time, end_sw);
260 max_pause_usec = MAX (usec, max_pause_usec);
262 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
265 * We must release the thread info suspend lock after doing
266 * the thread handshake. Otherwise, if the GC stops the world
267 * and a thread is in the process of starting up, but has not
268 * yet registered (it's not in the thread_list), it is
269 * possible that the thread does register while the world is
270 * stopped. When restarting the GC will then try to restart
271 * said thread, but since it never got the suspend signal, it
272 * cannot answer the restart signal, so a deadlock results.
276 TV_GETTIME (end_bridge);
277 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
280 timing [0].stw_time = usec;
281 timing [0].bridge_time = bridge_usec;
286 mono_sgen_init_stw (void)
288 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
289 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
292 /* Unified suspend code */
295 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
298 A thread explicitly asked to be skiped because it holds no managed state.
299 This is used by TP and finalizer threads.
300 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
302 if (info->client_info.gc_disabled) {
309 We have detected that this thread is failing/dying, ignore it.
310 FIXME: can't we merge this with thread_is_dying?
312 if (info->client_info.skip) {
319 Suspending the current thread will deadlock us, bad idea.
321 if (info == mono_thread_info_current ()) {
328 We can't suspend the workers that will do all the heavy lifting.
329 FIXME Use some state bit in SgenThreadInfo for this.
331 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
338 The thread has signaled that it started to detach, ignore it.
339 FIXME: can't we merge this with skip
341 if (!mono_thread_info_is_live (info)) {
351 sgen_unified_suspend_stop_world (void)
354 int sleep_duration = -1;
356 mono_threads_begin_global_suspend ();
357 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
359 FOREACH_THREAD (info) {
361 info->client_info.skip = FALSE;
362 info->client_info.suspend_done = FALSE;
363 if (sgen_is_thread_in_current_stw (info, &reason)) {
364 info->client_info.skip = !mono_thread_info_begin_suspend (info);
365 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
367 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
371 mono_thread_info_current ()->client_info.suspend_done = TRUE;
372 mono_threads_wait_pending_operations ();
376 FOREACH_THREAD (info) {
378 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
379 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
384 All threads that reach here are pristine suspended. This means the following:
386 - We haven't accepted the previous suspend as good.
387 - We haven't gave up on it for this STW (it's either bad or asked not to)
389 if (mono_thread_info_in_critical_location (info)) {
391 gint suspend_count = mono_thread_info_suspend_count (info);
392 if (!(suspend_count == 1))
393 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
394 res = mono_thread_info_begin_resume (info);
395 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
399 info->client_info.skip = TRUE;
401 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
402 g_assert (!info->client_info.in_critical_region);
403 info->client_info.suspend_done = TRUE;
407 if (restart_counter == 0)
409 mono_threads_wait_pending_operations ();
411 if (sleep_duration < 0) {
412 mono_thread_info_yield ();
415 g_usleep (sleep_duration);
416 sleep_duration += 10;
419 FOREACH_THREAD (info) {
421 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
422 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
426 if (mono_thread_info_is_running (info)) {
427 gboolean res = mono_thread_info_begin_suspend (info);
428 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
430 info->client_info.skip = TRUE;
434 mono_threads_wait_pending_operations ();
437 FOREACH_THREAD (info) {
439 if (sgen_is_thread_in_current_stw (info, &reason)) {
440 MonoThreadUnwindState *state;
442 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
443 g_assert (info->client_info.suspend_done);
445 state = mono_thread_info_get_suspend_state (info);
447 info->client_info.ctx = state->ctx;
449 if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
450 /* thread is starting or detaching, nothing to scan here */
451 info->client_info.stopped_domain = NULL;
452 info->client_info.stopped_ip = NULL;
453 info->client_info.stack_start = NULL;
455 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
456 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
457 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
458 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
460 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
461 if (!info->client_info.stack_start
462 || info->client_info.stack_start < info->client_info.stack_start_limit
463 || info->client_info.stack_start >= info->client_info.stack_end) {
464 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
465 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
469 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
471 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
472 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
478 sgen_unified_suspend_restart_world (void)
480 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
481 FOREACH_THREAD (info) {
483 if (sgen_is_thread_in_current_stw (info, &reason)) {
484 g_assert (mono_thread_info_begin_resume (info));
485 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
487 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
489 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
493 mono_threads_wait_pending_operations ();
494 mono_threads_end_global_suspend ();