2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "utils/mono-time.h"
36 #include "utils/dtrace.h"
37 #include "utils/mono-counters.h"
38 #include "utils/mono-threads.h"
40 static gboolean world_is_stopped = FALSE;
42 #define TV_DECLARE SGEN_TV_DECLARE
43 #define TV_GETTIME SGEN_TV_GETTIME
44 #define TV_ELAPSED SGEN_TV_ELAPSED
46 static int sgen_unified_suspend_restart_world (void);
47 static int sgen_unified_suspend_stop_world (void);
50 align_pointer (void *ptr)
53 p += sizeof (gpointer) - 1;
54 p &= ~ (sizeof (gpointer) - 1);
59 static MonoContext cur_thread_ctx;
61 static mword cur_thread_regs [ARCH_NUM_REGS];
65 update_current_thread_stack (void *start)
68 #if !defined(USE_MONO_CTX)
69 void *reg_ptr = cur_thread_regs;
71 SgenThreadInfo *info = mono_thread_info_current ();
73 info->stack_start = align_pointer (&stack_guard);
74 g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
76 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
77 memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
78 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
79 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
81 ARCH_STORE_REGS (reg_ptr);
82 memcpy (&info->regs, reg_ptr, sizeof (info->regs));
83 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
84 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
89 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
93 if (!mono_thread_internal_current ())
94 /* Happens during thread attach */
99 if (!sgen_has_critical_method ())
103 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
104 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
105 * to register the jit info for all GC critical methods after they are JITted/loaded.
107 ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
111 return sgen_is_critical_method (mono_jit_info_get_method (ji));
115 restart_threads_until_none_in_managed_allocator (void)
117 SgenThreadInfo *info;
118 int num_threads_died = 0;
119 int sleep_duration = -1;
122 int restart_count = 0, restarted_count = 0;
123 /* restart all threads that stopped in the
125 FOREACH_THREAD_SAFE (info) {
127 if (info->skip || info->gc_disabled || info->suspend_done)
129 if (mono_thread_info_is_live (info) && (!info->stack_start || info->in_critical_region || info->info.inside_critical_region ||
130 is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
131 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
132 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->info.native_handle);
133 result = sgen_resume_thread (info);
140 /* we set the stopped_ip to
141 NULL for threads which
142 we're not restarting so
143 that we can easily identify
145 info->stopped_ip = NULL;
146 info->stopped_domain = NULL;
147 info->suspend_done = TRUE;
149 } END_FOREACH_THREAD_SAFE
150 /* if no threads were restarted, we're done */
151 if (restart_count == 0)
154 /* wait for the threads to signal their restart */
155 sgen_wait_for_suspend_ack (restart_count);
157 if (sleep_duration < 0) {
158 mono_thread_info_yield ();
161 g_usleep (sleep_duration);
162 sleep_duration += 10;
165 /* stop them again */
166 FOREACH_THREAD (info) {
168 if (info->skip || info->stopped_ip == NULL)
170 result = sgen_suspend_thread (info);
178 /* some threads might have died */
179 num_threads_died += restart_count - restarted_count;
180 /* wait for the threads to signal their suspension
182 sgen_wait_for_suspend_ack (restarted_count);
185 return num_threads_died;
189 acquire_gc_locks (void)
192 mono_thread_info_suspend_lock ();
196 release_gc_locks (void)
198 mono_thread_info_suspend_unlock ();
203 count_cards (long long *major_total, long long *major_marked, long long *los_total, long long *los_marked)
205 sgen_get_major_collector ()->count_cards (major_total, major_marked);
206 sgen_los_count_cards (los_total, los_marked);
209 static TV_DECLARE (stop_world_time);
210 static unsigned long max_pause_usec = 0;
212 static guint64 time_stop_world;
213 static guint64 time_restart_world;
215 /* LOCKING: assumes the GC lock is held */
217 sgen_stop_world (int generation)
219 TV_DECLARE (end_handshake);
222 SGEN_ASSERT (0, !world_is_stopped, "Why are we stopping a stopped world?");
224 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
225 MONO_GC_WORLD_STOP_BEGIN ();
226 binary_protocol_world_stopping (sgen_timestamp ());
229 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
230 sgen_process_togglerefs ();
232 update_current_thread_stack (&count);
234 sgen_global_stop_count++;
235 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
236 TV_GETTIME (stop_world_time);
238 if (mono_thread_info_unified_management_enabled ()) {
239 count = sgen_unified_suspend_stop_world ();
241 count = sgen_thread_handshake (TRUE);
242 dead = restart_threads_until_none_in_managed_allocator ();
244 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
248 world_is_stopped = TRUE;
250 SGEN_LOG (3, "world stopped %d thread(s)", count);
251 mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
252 MONO_GC_WORLD_STOP_END ();
253 if (binary_protocol_is_enabled ()) {
254 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
255 if (binary_protocol_is_heavy_enabled ())
256 count_cards (&major_total, &major_marked, &los_total, &los_marked);
257 binary_protocol_world_stopped (sgen_timestamp (), major_total, major_marked, los_total, los_marked);
260 TV_GETTIME (end_handshake);
261 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
263 sgen_memgov_collection_start (generation);
264 if (sgen_need_bridge_processing ())
265 sgen_bridge_reset_data ();
270 /* LOCKING: assumes the GC lock is held */
272 sgen_restart_world (int generation, GGTimingInfo *timing)
275 SgenThreadInfo *info;
277 TV_DECLARE (start_handshake);
278 TV_DECLARE (end_bridge);
279 unsigned long usec, bridge_usec;
281 SGEN_ASSERT (0, world_is_stopped, "Why are we restarting a running world?");
283 if (binary_protocol_is_enabled ()) {
284 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
285 if (binary_protocol_is_heavy_enabled ())
286 count_cards (&major_total, &major_marked, &los_total, &los_marked);
287 binary_protocol_world_restarting (generation, sgen_timestamp (), major_total, major_marked, los_total, los_marked);
290 /* notify the profiler of the leftovers */
291 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
292 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
293 sgen_gc_event_moves ();
294 mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
295 MONO_GC_WORLD_RESTART_BEGIN (generation);
296 FOREACH_THREAD (info) {
297 info->stack_start = NULL;
299 memset (&info->ctx, 0, sizeof (MonoContext));
301 memset (&info->regs, 0, sizeof (info->regs));
305 TV_GETTIME (start_handshake);
307 if (mono_thread_info_unified_management_enabled ())
308 count = sgen_unified_suspend_restart_world ();
310 count = sgen_thread_handshake (FALSE);
314 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
315 usec = TV_ELAPSED (stop_world_time, end_sw);
316 max_pause_usec = MAX (usec, max_pause_usec);
318 world_is_stopped = FALSE;
320 SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
321 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
322 MONO_GC_WORLD_RESTART_END (generation);
323 binary_protocol_world_restarted (generation, sgen_timestamp ());
326 * We must release the thread info suspend lock after doing
327 * the thread handshake. Otherwise, if the GC stops the world
328 * and a thread is in the process of starting up, but has not
329 * yet registered (it's not in the thread_list), it is
330 * possible that the thread does register while the world is
331 * stopped. When restarting the GC will then try to restart
332 * said thread, but since it never got the suspend signal, it
333 * cannot answer the restart signal, so a deadlock results.
337 sgen_try_free_some_memory = TRUE;
339 if (sgen_need_bridge_processing ())
340 sgen_bridge_processing_finish (generation);
342 TV_GETTIME (end_bridge);
343 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
346 timing [0].stw_time = usec;
347 timing [0].bridge_time = bridge_usec;
350 sgen_memgov_collection_end (generation, timing, timing ? 2 : 0);
356 sgen_is_world_stopped (void)
358 return world_is_stopped;
364 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
365 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
368 /* Unified suspend code */
371 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
374 A thread explicitly asked to be skiped because it holds no managed state.
375 This is used by TP and finalizer threads.
376 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
378 if (info->gc_disabled) {
383 We have detected that this thread is failing/dying, ignore it.
384 FIXME: can't we merge this with thread_is_dying?
391 Suspending the current thread will deadlock us, bad idea.
393 if (info == mono_thread_info_current ()) {
398 We can't suspend the workers that will do all the heavy lifting.
399 FIXME Use some state bit in SgenThreadInfo for this.
401 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
406 The thread has signaled that it started to detach, ignore it.
407 FIXME: can't we merge this with skip
409 if (!mono_thread_info_is_live (info)) {
417 update_sgen_info (SgenThreadInfo *info)
421 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
422 info->stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
423 info->stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
424 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
426 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
427 if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
428 g_error ("BAD STACK");
430 info->stack_start = stack_start;
432 info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
434 g_assert_not_reached ();
439 sgen_unified_suspend_stop_world (void)
442 SgenThreadInfo *info;
444 int sleep_duration = -1;
446 mono_threads_begin_global_suspend ();
447 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
449 FOREACH_THREAD_SAFE (info) {
451 info->suspend_done = FALSE;
452 if (sgen_is_thread_in_current_stw (info)) {
453 info->skip = !mono_thread_info_begin_suspend (info, FALSE);
454 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
458 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
460 } END_FOREACH_THREAD_SAFE
462 mono_thread_info_current ()->suspend_done = TRUE;
463 mono_threads_wait_pending_operations ();
467 FOREACH_THREAD_SAFE (info) {
468 if (info->suspend_done || !sgen_is_thread_in_current_stw (info)) {
469 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->suspend_done, !sgen_is_thread_in_current_stw (info));
474 All threads that reach here are pristine suspended. This means the following:
476 - We haven't accepted the previous suspend as good.
477 - We haven't gave up on it for this STW (it's either bad or asked not to)
479 if (!mono_threads_core_check_suspend_result (info)) {
480 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
482 } else if (mono_thread_info_in_critical_location (info)) {
484 g_assert (mono_thread_info_suspend_count (info) == 1);
485 res = mono_thread_info_begin_resume (info);
486 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
492 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
493 g_assert (!info->in_critical_region);
494 info->suspend_done = TRUE;
496 } END_FOREACH_THREAD_SAFE
498 if (restart_counter == 0)
500 mono_threads_wait_pending_operations ();
502 if (sleep_duration < 0) {
510 g_usleep (sleep_duration);
511 sleep_duration += 10;
514 FOREACH_THREAD_SAFE (info) {
515 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
516 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
517 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
521 } END_FOREACH_THREAD_SAFE
523 mono_threads_wait_pending_operations ();
526 FOREACH_THREAD_SAFE (info) {
527 if (sgen_is_thread_in_current_stw (info)) {
528 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
529 g_assert (info->suspend_done);
530 update_sgen_info (info);
532 g_assert (!info->suspend_done || info == mono_thread_info_current ());
534 } END_FOREACH_THREAD_SAFE
540 sgen_unified_suspend_restart_world (void)
542 SgenThreadInfo *info;
545 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
546 FOREACH_THREAD_SAFE (info) {
547 if (sgen_is_thread_in_current_stw (info)) {
548 g_assert (mono_thread_info_begin_resume (info));
549 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
552 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
554 } END_FOREACH_THREAD_SAFE
556 mono_threads_wait_pending_operations ();
557 mono_threads_end_global_suspend ();