2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
28 #define TV_DECLARE SGEN_TV_DECLARE
29 #define TV_GETTIME SGEN_TV_GETTIME
30 #define TV_ELAPSED SGEN_TV_ELAPSED
32 static void sgen_unified_suspend_restart_world (void);
33 static void sgen_unified_suspend_stop_world (void);
35 unsigned int sgen_global_stop_count = 0;
38 align_pointer (void *ptr)
41 p += sizeof (gpointer) - 1;
42 p &= ~ (sizeof (gpointer) - 1);
47 static MonoContext cur_thread_ctx;
49 static mword cur_thread_regs [ARCH_NUM_REGS];
53 update_current_thread_stack (void *start)
56 #if !defined(USE_MONO_CTX)
57 void *reg_ptr = cur_thread_regs;
59 SgenThreadInfo *info = mono_thread_info_current ();
61 info->client_info.stack_start = align_pointer (&stack_guard);
62 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
64 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
65 memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
66 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
67 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
69 ARCH_STORE_REGS (reg_ptr);
70 memcpy (&info->client_info.regs, reg_ptr, sizeof (info->client_info.regs));
71 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
72 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, NULL);
77 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
81 if (!mono_thread_internal_current ())
82 /* Happens during thread attach */
87 if (!sgen_has_critical_method ())
91 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
92 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
93 * to register the jit info for all GC critical methods after they are JITted/loaded.
95 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
99 return sgen_is_critical_method (mono_jit_info_get_method (ji));
103 restart_threads_until_none_in_managed_allocator (void)
105 int num_threads_died = 0;
106 int sleep_duration = -1;
109 int restart_count = 0, restarted_count = 0;
110 /* restart all threads that stopped in the
112 FOREACH_THREAD (info) {
114 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
116 if (mono_thread_info_is_live (info) &&
117 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
118 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
119 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
120 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
121 result = sgen_resume_thread (info);
125 info->client_info.skip = 1;
128 /* we set the stopped_ip to
129 NULL for threads which
130 we're not restarting so
131 that we can easily identify
133 info->client_info.stopped_ip = NULL;
134 info->client_info.stopped_domain = NULL;
135 info->client_info.suspend_done = TRUE;
138 /* if no threads were restarted, we're done */
139 if (restart_count == 0)
142 /* wait for the threads to signal their restart */
143 sgen_wait_for_suspend_ack (restart_count);
145 if (sleep_duration < 0) {
146 mono_thread_info_yield ();
149 g_usleep (sleep_duration);
150 sleep_duration += 10;
153 /* stop them again */
154 FOREACH_THREAD (info) {
156 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
158 result = sgen_suspend_thread (info);
163 info->client_info.skip = 1;
166 /* some threads might have died */
167 num_threads_died += restart_count - restarted_count;
168 /* wait for the threads to signal their suspension
170 sgen_wait_for_suspend_ack (restarted_count);
173 return num_threads_died;
177 acquire_gc_locks (void)
180 mono_thread_info_suspend_lock ();
184 release_gc_locks (void)
186 mono_thread_info_suspend_unlock ();
190 static TV_DECLARE (stop_world_time);
191 static unsigned long max_pause_usec = 0;
193 static guint64 time_stop_world;
194 static guint64 time_restart_world;
196 /* LOCKING: assumes the GC lock is held */
198 sgen_client_stop_world (int generation)
200 TV_DECLARE (end_handshake);
202 /* notify the profiler of the leftovers */
203 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
204 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
205 mono_sgen_gc_event_moves ();
209 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
210 sgen_process_togglerefs ();
212 update_current_thread_stack (&generation);
214 sgen_global_stop_count++;
215 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
216 TV_GETTIME (stop_world_time);
218 if (mono_thread_info_unified_management_enabled ()) {
219 sgen_unified_suspend_stop_world ();
222 count = sgen_thread_handshake (TRUE);
223 dead = restart_threads_until_none_in_managed_allocator ();
225 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
228 SGEN_LOG (3, "world stopped");
230 TV_GETTIME (end_handshake);
231 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
233 sgen_memgov_collection_start (generation);
234 if (sgen_need_bridge_processing ())
235 sgen_bridge_reset_data ();
238 /* LOCKING: assumes the GC lock is held */
240 sgen_client_restart_world (int generation, GGTimingInfo *timing)
243 TV_DECLARE (start_handshake);
244 TV_DECLARE (end_bridge);
245 unsigned long usec, bridge_usec;
247 /* notify the profiler of the leftovers */
248 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
249 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
250 mono_sgen_gc_event_moves ();
252 FOREACH_THREAD (info) {
253 info->client_info.stack_start = NULL;
255 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
257 memset (&info->client_info.regs, 0, sizeof (info->client_info.regs));
261 TV_GETTIME (start_handshake);
263 if (mono_thread_info_unified_management_enabled ())
264 sgen_unified_suspend_restart_world ();
266 sgen_thread_handshake (FALSE);
269 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
270 usec = TV_ELAPSED (stop_world_time, end_sw);
271 max_pause_usec = MAX (usec, max_pause_usec);
273 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
276 * We must release the thread info suspend lock after doing
277 * the thread handshake. Otherwise, if the GC stops the world
278 * and a thread is in the process of starting up, but has not
279 * yet registered (it's not in the thread_list), it is
280 * possible that the thread does register while the world is
281 * stopped. When restarting the GC will then try to restart
282 * said thread, but since it never got the suspend signal, it
283 * cannot answer the restart signal, so a deadlock results.
287 TV_GETTIME (end_bridge);
288 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
291 timing [0].stw_time = usec;
292 timing [0].bridge_time = bridge_usec;
297 mono_sgen_init_stw (void)
299 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
300 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
303 /* Unified suspend code */
306 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
309 A thread explicitly asked to be skiped because it holds no managed state.
310 This is used by TP and finalizer threads.
311 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
313 if (info->client_info.gc_disabled) {
318 We have detected that this thread is failing/dying, ignore it.
319 FIXME: can't we merge this with thread_is_dying?
321 if (info->client_info.skip) {
326 Suspending the current thread will deadlock us, bad idea.
328 if (info == mono_thread_info_current ()) {
333 We can't suspend the workers that will do all the heavy lifting.
334 FIXME Use some state bit in SgenThreadInfo for this.
336 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
341 The thread has signaled that it started to detach, ignore it.
342 FIXME: can't we merge this with skip
344 if (!mono_thread_info_is_live (info)) {
352 update_sgen_info (SgenThreadInfo *info)
356 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
357 info->client_info.stopped_domain = (MonoDomain *)mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
358 info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
359 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
361 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
362 if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
363 g_error ("BAD STACK");
365 info->client_info.stack_start = stack_start;
367 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
369 g_assert_not_reached ();
374 sgen_unified_suspend_stop_world (void)
377 int sleep_duration = -1;
379 mono_threads_begin_global_suspend ();
380 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
382 FOREACH_THREAD (info) {
383 info->client_info.skip = FALSE;
384 info->client_info.suspend_done = FALSE;
385 if (sgen_is_thread_in_current_stw (info)) {
386 info->client_info.skip = !mono_thread_info_begin_suspend (info);
387 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
389 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
393 mono_thread_info_current ()->client_info.suspend_done = TRUE;
394 mono_threads_wait_pending_operations ();
398 FOREACH_THREAD (info) {
399 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
400 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
405 All threads that reach here are pristine suspended. This means the following:
407 - We haven't accepted the previous suspend as good.
408 - We haven't gave up on it for this STW (it's either bad or asked not to)
410 if (!mono_thread_info_check_suspend_result (info)) {
411 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
412 info->client_info.skip = TRUE;
413 } else if (mono_thread_info_in_critical_location (info)) {
415 g_assert (mono_thread_info_suspend_count (info) == 1);
416 res = mono_thread_info_begin_resume (info);
417 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
421 info->client_info.skip = TRUE;
423 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
424 g_assert (!info->client_info.in_critical_region);
425 info->client_info.suspend_done = TRUE;
429 if (restart_counter == 0)
431 mono_threads_wait_pending_operations ();
433 if (sleep_duration < 0) {
441 g_usleep (sleep_duration);
442 sleep_duration += 10;
445 FOREACH_THREAD (info) {
446 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
447 gboolean res = mono_thread_info_begin_suspend (info);
448 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
450 info->client_info.skip = TRUE;
454 mono_threads_wait_pending_operations ();
457 FOREACH_THREAD (info) {
458 if (sgen_is_thread_in_current_stw (info)) {
459 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
460 g_assert (info->client_info.suspend_done);
461 update_sgen_info (info);
463 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
469 sgen_unified_suspend_restart_world (void)
471 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
472 FOREACH_THREAD (info) {
473 if (sgen_is_thread_in_current_stw (info)) {
474 g_assert (mono_thread_info_begin_resume (info));
475 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
477 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
481 mono_threads_wait_pending_operations ();
482 mono_threads_end_global_suspend ();