2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
36 unsigned int sgen_global_stop_count = 0;
39 align_pointer (void *ptr)
42 p += sizeof (gpointer) - 1;
43 p &= ~ (sizeof (gpointer) - 1);
47 static MonoContext cur_thread_ctx;
50 update_current_thread_stack (void *start)
53 SgenThreadInfo *info = mono_thread_info_current ();
55 info->client_info.stack_start = align_pointer (&stack_guard);
56 g_assert (info->client_info.stack_start);
57 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
58 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
59 memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
60 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
61 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
65 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
69 if (!mono_thread_internal_current ())
70 /* Happens during thread attach */
75 if (!sgen_has_critical_method ())
79 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
80 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
81 * to register the jit info for all GC critical methods after they are JITted/loaded.
83 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
87 return sgen_is_critical_method (mono_jit_info_get_method (ji));
91 restart_threads_until_none_in_managed_allocator (void)
93 int num_threads_died = 0;
94 int sleep_duration = -1;
97 int restart_count = 0, restarted_count = 0;
98 /* restart all threads that stopped in the
100 FOREACH_THREAD (info) {
102 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
104 if (mono_thread_info_is_live (info) &&
105 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
106 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
107 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
108 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
109 result = sgen_resume_thread (info);
113 info->client_info.skip = 1;
116 /* we set the stopped_ip to
117 NULL for threads which
118 we're not restarting so
119 that we can easily identify
121 info->client_info.stopped_ip = NULL;
122 info->client_info.stopped_domain = NULL;
123 info->client_info.suspend_done = TRUE;
126 /* if no threads were restarted, we're done */
127 if (restart_count == 0)
130 /* wait for the threads to signal their restart */
131 sgen_wait_for_suspend_ack (restart_count);
133 if (sleep_duration < 0) {
134 mono_thread_info_yield ();
137 g_usleep (sleep_duration);
138 sleep_duration += 10;
141 /* stop them again */
142 FOREACH_THREAD (info) {
144 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
146 result = sgen_suspend_thread (info);
151 info->client_info.skip = 1;
154 /* some threads might have died */
155 num_threads_died += restart_count - restarted_count;
156 /* wait for the threads to signal their suspension
158 sgen_wait_for_suspend_ack (restarted_count);
161 return num_threads_died;
165 acquire_gc_locks (void)
168 mono_thread_info_suspend_lock ();
172 release_gc_locks (void)
174 mono_thread_info_suspend_unlock ();
178 static TV_DECLARE (stop_world_time);
179 static unsigned long max_pause_usec = 0;
181 static guint64 time_stop_world;
182 static guint64 time_restart_world;
184 /* LOCKING: assumes the GC lock is held */
186 sgen_client_stop_world (int generation)
188 TV_DECLARE (end_handshake);
190 /* notify the profiler of the leftovers */
191 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
192 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
193 mono_sgen_gc_event_moves ();
197 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
198 sgen_process_togglerefs ();
200 update_current_thread_stack (&generation);
202 sgen_global_stop_count++;
203 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
204 TV_GETTIME (stop_world_time);
206 if (mono_thread_info_unified_management_enabled ()) {
207 sgen_unified_suspend_stop_world ();
210 count = sgen_thread_handshake (TRUE);
211 dead = restart_threads_until_none_in_managed_allocator ();
213 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
216 SGEN_LOG (3, "world stopped");
218 TV_GETTIME (end_handshake);
219 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
221 sgen_memgov_collection_start (generation);
222 if (sgen_need_bridge_processing ())
223 sgen_bridge_reset_data ();
226 /* LOCKING: assumes the GC lock is held */
228 sgen_client_restart_world (int generation, GGTimingInfo *timing)
231 TV_DECLARE (start_handshake);
232 TV_DECLARE (end_bridge);
233 unsigned long usec, bridge_usec;
235 /* notify the profiler of the leftovers */
236 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
237 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
238 mono_sgen_gc_event_moves ();
240 FOREACH_THREAD (info) {
241 info->client_info.stack_start = NULL;
242 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
245 TV_GETTIME (start_handshake);
247 if (mono_thread_info_unified_management_enabled ())
248 sgen_unified_suspend_restart_world ();
250 sgen_thread_handshake (FALSE);
253 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
254 usec = TV_ELAPSED (stop_world_time, end_sw);
255 max_pause_usec = MAX (usec, max_pause_usec);
257 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
260 * We must release the thread info suspend lock after doing
261 * the thread handshake. Otherwise, if the GC stops the world
262 * and a thread is in the process of starting up, but has not
263 * yet registered (it's not in the thread_list), it is
264 * possible that the thread does register while the world is
265 * stopped. When restarting the GC will then try to restart
266 * said thread, but since it never got the suspend signal, it
267 * cannot answer the restart signal, so a deadlock results.
271 TV_GETTIME (end_bridge);
272 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
275 timing [0].stw_time = usec;
276 timing [0].bridge_time = bridge_usec;
281 mono_sgen_init_stw (void)
283 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
284 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
287 /* Unified suspend code */
290 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
293 A thread explicitly asked to be skiped because it holds no managed state.
294 This is used by TP and finalizer threads.
295 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
297 if (info->client_info.gc_disabled) {
304 We have detected that this thread is failing/dying, ignore it.
305 FIXME: can't we merge this with thread_is_dying?
307 if (info->client_info.skip) {
314 Suspending the current thread will deadlock us, bad idea.
316 if (info == mono_thread_info_current ()) {
323 We can't suspend the workers that will do all the heavy lifting.
324 FIXME Use some state bit in SgenThreadInfo for this.
326 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
333 The thread has signaled that it started to detach, ignore it.
334 FIXME: can't we merge this with skip
336 if (!mono_thread_info_is_live (info)) {
346 sgen_unified_suspend_stop_world (void)
349 int sleep_duration = -1;
351 mono_threads_begin_global_suspend ();
352 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
354 FOREACH_THREAD (info) {
356 info->client_info.skip = FALSE;
357 info->client_info.suspend_done = FALSE;
358 if (sgen_is_thread_in_current_stw (info, &reason)) {
359 info->client_info.skip = !mono_thread_info_begin_suspend (info);
360 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
362 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
366 mono_thread_info_current ()->client_info.suspend_done = TRUE;
367 mono_threads_wait_pending_operations ();
371 FOREACH_THREAD (info) {
373 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
374 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
379 All threads that reach here are pristine suspended. This means the following:
381 - We haven't accepted the previous suspend as good.
382 - We haven't gave up on it for this STW (it's either bad or asked not to)
384 if (mono_thread_info_in_critical_location (info)) {
386 gint suspend_count = mono_thread_info_suspend_count (info);
387 if (!(suspend_count == 1))
388 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
389 res = mono_thread_info_begin_resume (info);
390 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
394 info->client_info.skip = TRUE;
396 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
397 g_assert (!info->client_info.in_critical_region);
398 info->client_info.suspend_done = TRUE;
402 if (restart_counter == 0)
404 mono_threads_wait_pending_operations ();
406 if (sleep_duration < 0) {
407 mono_thread_info_yield ();
410 g_usleep (sleep_duration);
411 sleep_duration += 10;
414 FOREACH_THREAD (info) {
416 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
417 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
421 if (mono_thread_info_is_running (info)) {
422 gboolean res = mono_thread_info_begin_suspend (info);
423 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
425 info->client_info.skip = TRUE;
429 mono_threads_wait_pending_operations ();
432 FOREACH_THREAD (info) {
434 if (sgen_is_thread_in_current_stw (info, &reason)) {
435 MonoThreadUnwindState *state;
437 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
438 g_assert (info->client_info.suspend_done);
440 state = mono_thread_info_get_suspend_state (info);
442 info->client_info.ctx = state->ctx;
444 if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
445 /* thread is starting or detaching, nothing to scan here */
446 info->client_info.stopped_domain = NULL;
447 info->client_info.stopped_ip = NULL;
448 info->client_info.stack_start = NULL;
450 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
451 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
452 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
453 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
455 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
456 if (!info->client_info.stack_start
457 || info->client_info.stack_start < info->client_info.stack_start_limit
458 || info->client_info.stack_start >= info->client_info.stack_end) {
459 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
460 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
464 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
466 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
467 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
473 sgen_unified_suspend_restart_world (void)
475 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
476 FOREACH_THREAD (info) {
478 if (sgen_is_thread_in_current_stw (info, &reason)) {
479 g_assert (mono_thread_info_begin_resume (info));
480 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
482 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
484 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
488 mono_threads_wait_pending_operations ();
489 mono_threads_end_global_suspend ();