2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "metadata/sgen-client.h"
36 #include "utils/mono-time.h"
38 #define TV_DECLARE SGEN_TV_DECLARE
39 #define TV_GETTIME SGEN_TV_GETTIME
40 #define TV_ELAPSED SGEN_TV_ELAPSED
42 static int sgen_unified_suspend_restart_world (void);
43 static int sgen_unified_suspend_stop_world (void);
46 align_pointer (void *ptr)
49 p += sizeof (gpointer) - 1;
50 p &= ~ (sizeof (gpointer) - 1);
55 static MonoContext cur_thread_ctx;
57 static mword cur_thread_regs [ARCH_NUM_REGS];
61 update_current_thread_stack (void *start)
64 #if !defined(USE_MONO_CTX)
65 void *reg_ptr = cur_thread_regs;
67 SgenThreadInfo *info = mono_thread_info_current ();
69 info->stack_start = align_pointer (&stack_guard);
70 g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
72 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
73 memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
74 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
75 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
77 ARCH_STORE_REGS (reg_ptr);
78 memcpy (&info->regs, reg_ptr, sizeof (info->regs));
79 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
80 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
85 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
89 if (!mono_thread_internal_current ())
90 /* Happens during thread attach */
95 if (!sgen_has_critical_method ())
99 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
100 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
101 * to register the jit info for all GC critical methods after they are JITted/loaded.
103 ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
107 return sgen_is_critical_method (mono_jit_info_get_method (ji));
111 restart_threads_until_none_in_managed_allocator (void)
113 SgenThreadInfo *info;
114 int num_threads_died = 0;
115 int sleep_duration = -1;
118 int restart_count = 0, restarted_count = 0;
119 /* restart all threads that stopped in the
121 FOREACH_THREAD_SAFE (info) {
123 if (info->client_info.skip || info->gc_disabled || info->client_info.suspend_done)
125 if (mono_thread_info_is_live (info) &&
126 (!info->stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
127 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
128 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
129 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
130 result = sgen_resume_thread (info);
134 info->client_info.skip = 1;
137 /* we set the stopped_ip to
138 NULL for threads which
139 we're not restarting so
140 that we can easily identify
142 info->client_info.stopped_ip = NULL;
143 info->client_info.stopped_domain = NULL;
144 info->client_info.suspend_done = TRUE;
146 } END_FOREACH_THREAD_SAFE
147 /* if no threads were restarted, we're done */
148 if (restart_count == 0)
151 /* wait for the threads to signal their restart */
152 sgen_wait_for_suspend_ack (restart_count);
154 if (sleep_duration < 0) {
155 mono_thread_info_yield ();
158 g_usleep (sleep_duration);
159 sleep_duration += 10;
162 /* stop them again */
163 FOREACH_THREAD (info) {
165 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
167 result = sgen_suspend_thread (info);
172 info->client_info.skip = 1;
175 /* some threads might have died */
176 num_threads_died += restart_count - restarted_count;
177 /* wait for the threads to signal their suspension
179 sgen_wait_for_suspend_ack (restarted_count);
182 return num_threads_died;
186 acquire_gc_locks (void)
189 mono_thread_info_suspend_lock ();
193 release_gc_locks (void)
195 mono_thread_info_suspend_unlock ();
199 static TV_DECLARE (stop_world_time);
200 static unsigned long max_pause_usec = 0;
202 static guint64 time_stop_world;
203 static guint64 time_restart_world;
205 /* LOCKING: assumes the GC lock is held */
207 sgen_client_stop_world (int generation)
209 TV_DECLARE (end_handshake);
212 /* notify the profiler of the leftovers */
213 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
214 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
215 mono_sgen_gc_event_moves ();
219 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
220 sgen_process_togglerefs ();
222 update_current_thread_stack (&count);
224 sgen_global_stop_count++;
225 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
226 TV_GETTIME (stop_world_time);
228 if (mono_thread_info_unified_management_enabled ()) {
229 count = sgen_unified_suspend_stop_world ();
231 count = sgen_thread_handshake (TRUE);
232 dead = restart_threads_until_none_in_managed_allocator ();
234 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
238 SGEN_LOG (3, "world stopped %d thread(s)", count);
240 TV_GETTIME (end_handshake);
241 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
243 sgen_memgov_collection_start (generation);
244 if (sgen_need_bridge_processing ())
245 sgen_bridge_reset_data ();
250 /* LOCKING: assumes the GC lock is held */
252 sgen_client_restart_world (int generation, GGTimingInfo *timing)
255 SgenThreadInfo *info;
257 TV_DECLARE (start_handshake);
258 TV_DECLARE (end_bridge);
259 unsigned long usec, bridge_usec;
261 /* notify the profiler of the leftovers */
262 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
263 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
264 mono_sgen_gc_event_moves ();
266 FOREACH_THREAD (info) {
267 info->stack_start = NULL;
269 memset (&info->ctx, 0, sizeof (MonoContext));
271 memset (&info->regs, 0, sizeof (info->regs));
275 TV_GETTIME (start_handshake);
277 if (mono_thread_info_unified_management_enabled ())
278 count = sgen_unified_suspend_restart_world ();
280 count = sgen_thread_handshake (FALSE);
284 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
285 usec = TV_ELAPSED (stop_world_time, end_sw);
286 max_pause_usec = MAX (usec, max_pause_usec);
288 SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
291 * We must release the thread info suspend lock after doing
292 * the thread handshake. Otherwise, if the GC stops the world
293 * and a thread is in the process of starting up, but has not
294 * yet registered (it's not in the thread_list), it is
295 * possible that the thread does register while the world is
296 * stopped. When restarting the GC will then try to restart
297 * said thread, but since it never got the suspend signal, it
298 * cannot answer the restart signal, so a deadlock results.
302 TV_GETTIME (end_bridge);
303 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
306 timing [0].stw_time = usec;
307 timing [0].bridge_time = bridge_usec;
314 mono_sgen_init_stw (void)
316 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
317 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
320 /* Unified suspend code */
323 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
326 A thread explicitly asked to be skiped because it holds no managed state.
327 This is used by TP and finalizer threads.
328 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
330 if (info->gc_disabled) {
335 We have detected that this thread is failing/dying, ignore it.
336 FIXME: can't we merge this with thread_is_dying?
338 if (info->client_info.skip) {
343 Suspending the current thread will deadlock us, bad idea.
345 if (info == mono_thread_info_current ()) {
350 We can't suspend the workers that will do all the heavy lifting.
351 FIXME Use some state bit in SgenThreadInfo for this.
353 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
358 The thread has signaled that it started to detach, ignore it.
359 FIXME: can't we merge this with skip
361 if (!mono_thread_info_is_live (info)) {
369 update_sgen_info (SgenThreadInfo *info)
373 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
374 info->client_info.stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
375 info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
376 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
378 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
379 if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
380 g_error ("BAD STACK");
382 info->stack_start = stack_start;
384 info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
386 g_assert_not_reached ();
391 sgen_unified_suspend_stop_world (void)
394 SgenThreadInfo *info;
396 int sleep_duration = -1;
398 mono_threads_begin_global_suspend ();
399 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
401 FOREACH_THREAD_SAFE (info) {
402 info->client_info.skip = FALSE;
403 info->client_info.suspend_done = FALSE;
404 if (sgen_is_thread_in_current_stw (info)) {
405 info->client_info.skip = !mono_thread_info_begin_suspend (info, FALSE);
406 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
407 if (!info->client_info.skip)
410 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
412 } END_FOREACH_THREAD_SAFE
414 mono_thread_info_current ()->client_info.suspend_done = TRUE;
415 mono_threads_wait_pending_operations ();
419 FOREACH_THREAD_SAFE (info) {
420 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
421 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
426 All threads that reach here are pristine suspended. This means the following:
428 - We haven't accepted the previous suspend as good.
429 - We haven't gave up on it for this STW (it's either bad or asked not to)
431 if (!mono_threads_core_check_suspend_result (info)) {
432 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
433 info->client_info.skip = TRUE;
434 } else if (mono_thread_info_in_critical_location (info)) {
436 g_assert (mono_thread_info_suspend_count (info) == 1);
437 res = mono_thread_info_begin_resume (info);
438 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
442 info->client_info.skip = TRUE;
444 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
445 g_assert (!info->client_info.in_critical_region);
446 info->client_info.suspend_done = TRUE;
448 } END_FOREACH_THREAD_SAFE
450 if (restart_counter == 0)
452 mono_threads_wait_pending_operations ();
454 if (sleep_duration < 0) {
462 g_usleep (sleep_duration);
463 sleep_duration += 10;
466 FOREACH_THREAD_SAFE (info) {
467 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
468 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
469 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
471 info->client_info.skip = TRUE;
473 } END_FOREACH_THREAD_SAFE
475 mono_threads_wait_pending_operations ();
478 FOREACH_THREAD_SAFE (info) {
479 if (sgen_is_thread_in_current_stw (info)) {
480 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
481 g_assert (info->client_info.suspend_done);
482 update_sgen_info (info);
484 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
486 } END_FOREACH_THREAD_SAFE
492 sgen_unified_suspend_restart_world (void)
494 SgenThreadInfo *info;
497 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
498 FOREACH_THREAD_SAFE (info) {
499 if (sgen_is_thread_in_current_stw (info)) {
500 g_assert (mono_thread_info_begin_resume (info));
501 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
504 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
506 } END_FOREACH_THREAD_SAFE
508 mono_threads_wait_pending_operations ();
509 mono_threads_end_global_suspend ();