2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
36 unsigned int sgen_global_stop_count = 0;
39 align_pointer (void *ptr)
42 p += sizeof (gpointer) - 1;
43 p &= ~ (sizeof (gpointer) - 1);
47 static MonoContext cur_thread_ctx;
50 update_current_thread_stack (void *start)
53 SgenThreadInfo *info = mono_thread_info_current ();
55 info->client_info.stack_start = align_pointer (&stack_guard);
56 g_assert (info->client_info.stack_start);
57 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
58 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
59 memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
60 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
61 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
65 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
69 if (!mono_thread_internal_current ())
70 /* Happens during thread attach */
75 if (!sgen_has_critical_method ())
79 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
80 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
81 * to register the jit info for all GC critical methods after they are JITted/loaded.
83 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
87 return sgen_is_critical_method (mono_jit_info_get_method (ji));
91 restart_threads_until_none_in_managed_allocator (void)
93 int num_threads_died = 0;
94 int sleep_duration = -1;
97 int restart_count = 0, restarted_count = 0;
98 /* restart all threads that stopped in the
100 FOREACH_THREAD (info) {
102 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
104 if (mono_thread_info_is_live (info) &&
105 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
106 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
107 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
108 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
109 result = sgen_resume_thread (info);
113 info->client_info.skip = 1;
116 /* we set the stopped_ip to
117 NULL for threads which
118 we're not restarting so
119 that we can easily identify
121 info->client_info.stopped_ip = NULL;
122 info->client_info.stopped_domain = NULL;
123 info->client_info.suspend_done = TRUE;
126 /* if no threads were restarted, we're done */
127 if (restart_count == 0)
130 /* wait for the threads to signal their restart */
131 sgen_wait_for_suspend_ack (restart_count);
133 if (sleep_duration < 0) {
134 mono_thread_info_yield ();
137 g_usleep (sleep_duration);
138 sleep_duration += 10;
141 /* stop them again */
142 FOREACH_THREAD (info) {
144 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
146 result = sgen_suspend_thread (info);
151 info->client_info.skip = 1;
154 /* some threads might have died */
155 num_threads_died += restart_count - restarted_count;
156 /* wait for the threads to signal their suspension
158 sgen_wait_for_suspend_ack (restarted_count);
161 return num_threads_died;
165 acquire_gc_locks (void)
168 mono_thread_info_suspend_lock ();
172 release_gc_locks (void)
174 mono_thread_info_suspend_unlock ();
178 static TV_DECLARE (stop_world_time);
179 static unsigned long max_pause_usec = 0;
181 static guint64 time_stop_world;
182 static guint64 time_restart_world;
184 /* LOCKING: assumes the GC lock is held */
186 sgen_client_stop_world (int generation)
188 TV_DECLARE (end_handshake);
190 /* notify the profiler of the leftovers */
191 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
192 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
193 mono_sgen_gc_event_moves ();
197 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
198 sgen_process_togglerefs ();
200 update_current_thread_stack (&generation);
202 sgen_global_stop_count++;
203 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
204 TV_GETTIME (stop_world_time);
206 if (mono_thread_info_unified_management_enabled ()) {
207 sgen_unified_suspend_stop_world ();
210 count = sgen_thread_handshake (TRUE);
211 dead = restart_threads_until_none_in_managed_allocator ();
213 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
216 SGEN_LOG (3, "world stopped");
218 TV_GETTIME (end_handshake);
219 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
221 sgen_memgov_collection_start (generation);
222 if (sgen_need_bridge_processing ())
223 sgen_bridge_reset_data ();
226 /* LOCKING: assumes the GC lock is held */
228 sgen_client_restart_world (int generation, GGTimingInfo *timing)
231 TV_DECLARE (start_handshake);
232 TV_DECLARE (end_bridge);
233 unsigned long usec, bridge_usec;
235 /* notify the profiler of the leftovers */
236 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
237 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
238 mono_sgen_gc_event_moves ();
240 FOREACH_THREAD (info) {
241 info->client_info.stack_start = NULL;
242 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
245 TV_GETTIME (start_handshake);
247 if (mono_thread_info_unified_management_enabled ())
248 sgen_unified_suspend_restart_world ();
250 sgen_thread_handshake (FALSE);
253 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
254 usec = TV_ELAPSED (stop_world_time, end_sw);
255 max_pause_usec = MAX (usec, max_pause_usec);
257 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
260 * We must release the thread info suspend lock after doing
261 * the thread handshake. Otherwise, if the GC stops the world
262 * and a thread is in the process of starting up, but has not
263 * yet registered (it's not in the thread_list), it is
264 * possible that the thread does register while the world is
265 * stopped. When restarting the GC will then try to restart
266 * said thread, but since it never got the suspend signal, it
267 * cannot answer the restart signal, so a deadlock results.
271 TV_GETTIME (end_bridge);
272 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
275 timing [0].stw_time = usec;
276 timing [0].bridge_time = bridge_usec;
281 mono_sgen_init_stw (void)
283 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
284 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
287 /* Unified suspend code */
290 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
293 A thread explicitly asked to be skiped because it holds no managed state.
294 This is used by TP and finalizer threads.
295 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
297 if (info->client_info.gc_disabled) {
302 We have detected that this thread is failing/dying, ignore it.
303 FIXME: can't we merge this with thread_is_dying?
305 if (info->client_info.skip) {
310 Suspending the current thread will deadlock us, bad idea.
312 if (info == mono_thread_info_current ()) {
317 We can't suspend the workers that will do all the heavy lifting.
318 FIXME Use some state bit in SgenThreadInfo for this.
320 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
325 The thread has signaled that it started to detach, ignore it.
326 FIXME: can't we merge this with skip
328 if (!mono_thread_info_is_live (info)) {
336 update_sgen_info (SgenThreadInfo *info)
340 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
341 info->client_info.stopped_domain = (MonoDomain *)mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
342 info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
343 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
345 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
346 if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
347 g_error ("BAD STACK");
349 info->client_info.stack_start = stack_start;
350 g_assert (info->client_info.stack_start);
351 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
355 sgen_unified_suspend_stop_world (void)
358 int sleep_duration = -1;
360 mono_threads_begin_global_suspend ();
361 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
363 FOREACH_THREAD (info) {
364 info->client_info.skip = FALSE;
365 info->client_info.suspend_done = FALSE;
366 if (sgen_is_thread_in_current_stw (info)) {
367 info->client_info.skip = !mono_thread_info_begin_suspend (info);
368 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
370 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
374 mono_thread_info_current ()->client_info.suspend_done = TRUE;
375 mono_threads_wait_pending_operations ();
379 FOREACH_THREAD (info) {
380 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
381 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
386 All threads that reach here are pristine suspended. This means the following:
388 - We haven't accepted the previous suspend as good.
389 - We haven't gave up on it for this STW (it's either bad or asked not to)
391 if (!mono_thread_info_check_suspend_result (info)) {
392 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
393 info->client_info.skip = TRUE;
394 } else if (mono_thread_info_in_critical_location (info)) {
396 g_assert (mono_thread_info_suspend_count (info) == 1);
397 res = mono_thread_info_begin_resume (info);
398 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
402 info->client_info.skip = TRUE;
404 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
405 g_assert (!info->client_info.in_critical_region);
406 info->client_info.suspend_done = TRUE;
410 if (restart_counter == 0)
412 mono_threads_wait_pending_operations ();
414 if (sleep_duration < 0) {
415 mono_thread_info_yield ();
418 g_usleep (sleep_duration);
419 sleep_duration += 10;
422 FOREACH_THREAD (info) {
423 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
424 gboolean res = mono_thread_info_begin_suspend (info);
425 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
427 info->client_info.skip = TRUE;
431 mono_threads_wait_pending_operations ();
434 FOREACH_THREAD (info) {
435 if (sgen_is_thread_in_current_stw (info)) {
436 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
437 g_assert (info->client_info.suspend_done);
438 update_sgen_info (info);
440 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended\n", mono_thread_info_get_tid (info));
441 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
447 sgen_unified_suspend_restart_world (void)
449 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
450 FOREACH_THREAD (info) {
451 if (sgen_is_thread_in_current_stw (info)) {
452 g_assert (mono_thread_info_begin_resume (info));
453 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
455 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
459 mono_threads_wait_pending_operations ();
460 mono_threads_end_global_suspend ();