2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "metadata/sgen-client.h"
36 #include "utils/mono-time.h"
38 #define TV_DECLARE SGEN_TV_DECLARE
39 #define TV_GETTIME SGEN_TV_GETTIME
40 #define TV_ELAPSED SGEN_TV_ELAPSED
42 static int sgen_unified_suspend_restart_world (void);
43 static int sgen_unified_suspend_stop_world (void);
46 align_pointer (void *ptr)
49 p += sizeof (gpointer) - 1;
50 p &= ~ (sizeof (gpointer) - 1);
55 static MonoContext cur_thread_ctx;
57 static mword cur_thread_regs [ARCH_NUM_REGS];
61 update_current_thread_stack (void *start)
64 #if !defined(USE_MONO_CTX)
65 void *reg_ptr = cur_thread_regs;
67 SgenThreadInfo *info = mono_thread_info_current ();
69 info->stack_start = align_pointer (&stack_guard);
70 g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
72 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
73 memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
74 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
75 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
77 ARCH_STORE_REGS (reg_ptr);
78 memcpy (&info->regs, reg_ptr, sizeof (info->regs));
79 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
80 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
85 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
89 if (!mono_thread_internal_current ())
90 /* Happens during thread attach */
95 if (!sgen_has_critical_method ())
99 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
100 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
101 * to register the jit info for all GC critical methods after they are JITted/loaded.
103 ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
107 return sgen_is_critical_method (mono_jit_info_get_method (ji));
111 restart_threads_until_none_in_managed_allocator (void)
113 SgenThreadInfo *info;
114 int num_threads_died = 0;
115 int sleep_duration = -1;
118 int restart_count = 0, restarted_count = 0;
119 /* restart all threads that stopped in the
121 FOREACH_THREAD_SAFE (info) {
123 if (info->skip || info->gc_disabled || info->suspend_done)
125 if (mono_thread_info_is_live (info) && (!info->stack_start || info->in_critical_region || info->client_info.info.inside_critical_region ||
126 is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
127 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
128 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
129 result = sgen_resume_thread (info);
136 /* we set the stopped_ip to
137 NULL for threads which
138 we're not restarting so
139 that we can easily identify
141 info->stopped_ip = NULL;
142 info->stopped_domain = NULL;
143 info->suspend_done = TRUE;
145 } END_FOREACH_THREAD_SAFE
146 /* if no threads were restarted, we're done */
147 if (restart_count == 0)
150 /* wait for the threads to signal their restart */
151 sgen_wait_for_suspend_ack (restart_count);
153 if (sleep_duration < 0) {
154 mono_thread_info_yield ();
157 g_usleep (sleep_duration);
158 sleep_duration += 10;
161 /* stop them again */
162 FOREACH_THREAD (info) {
164 if (info->skip || info->stopped_ip == NULL)
166 result = sgen_suspend_thread (info);
174 /* some threads might have died */
175 num_threads_died += restart_count - restarted_count;
176 /* wait for the threads to signal their suspension
178 sgen_wait_for_suspend_ack (restarted_count);
181 return num_threads_died;
185 acquire_gc_locks (void)
188 mono_thread_info_suspend_lock ();
192 release_gc_locks (void)
194 mono_thread_info_suspend_unlock ();
198 static TV_DECLARE (stop_world_time);
199 static unsigned long max_pause_usec = 0;
201 static guint64 time_stop_world;
202 static guint64 time_restart_world;
204 /* LOCKING: assumes the GC lock is held */
206 sgen_client_stop_world (int generation)
208 TV_DECLARE (end_handshake);
211 /* notify the profiler of the leftovers */
212 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
213 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
214 mono_sgen_gc_event_moves ();
218 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
219 sgen_process_togglerefs ();
221 update_current_thread_stack (&count);
223 sgen_global_stop_count++;
224 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
225 TV_GETTIME (stop_world_time);
227 if (mono_thread_info_unified_management_enabled ()) {
228 count = sgen_unified_suspend_stop_world ();
230 count = sgen_thread_handshake (TRUE);
231 dead = restart_threads_until_none_in_managed_allocator ();
233 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
237 SGEN_LOG (3, "world stopped %d thread(s)", count);
239 TV_GETTIME (end_handshake);
240 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
242 sgen_memgov_collection_start (generation);
243 if (sgen_need_bridge_processing ())
244 sgen_bridge_reset_data ();
249 /* LOCKING: assumes the GC lock is held */
251 sgen_client_restart_world (int generation, GGTimingInfo *timing)
254 SgenThreadInfo *info;
256 TV_DECLARE (start_handshake);
257 TV_DECLARE (end_bridge);
258 unsigned long usec, bridge_usec;
260 /* notify the profiler of the leftovers */
261 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
262 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
263 mono_sgen_gc_event_moves ();
265 FOREACH_THREAD (info) {
266 info->stack_start = NULL;
268 memset (&info->ctx, 0, sizeof (MonoContext));
270 memset (&info->regs, 0, sizeof (info->regs));
274 TV_GETTIME (start_handshake);
276 if (mono_thread_info_unified_management_enabled ())
277 count = sgen_unified_suspend_restart_world ();
279 count = sgen_thread_handshake (FALSE);
283 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
284 usec = TV_ELAPSED (stop_world_time, end_sw);
285 max_pause_usec = MAX (usec, max_pause_usec);
287 SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
290 * We must release the thread info suspend lock after doing
291 * the thread handshake. Otherwise, if the GC stops the world
292 * and a thread is in the process of starting up, but has not
293 * yet registered (it's not in the thread_list), it is
294 * possible that the thread does register while the world is
295 * stopped. When restarting the GC will then try to restart
296 * said thread, but since it never got the suspend signal, it
297 * cannot answer the restart signal, so a deadlock results.
301 TV_GETTIME (end_bridge);
302 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
305 timing [0].stw_time = usec;
306 timing [0].bridge_time = bridge_usec;
313 mono_sgen_init_stw (void)
315 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
316 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
319 /* Unified suspend code */
322 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
325 A thread explicitly asked to be skiped because it holds no managed state.
326 This is used by TP and finalizer threads.
327 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
329 if (info->gc_disabled) {
334 We have detected that this thread is failing/dying, ignore it.
335 FIXME: can't we merge this with thread_is_dying?
342 Suspending the current thread will deadlock us, bad idea.
344 if (info == mono_thread_info_current ()) {
349 We can't suspend the workers that will do all the heavy lifting.
350 FIXME Use some state bit in SgenThreadInfo for this.
352 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
357 The thread has signaled that it started to detach, ignore it.
358 FIXME: can't we merge this with skip
360 if (!mono_thread_info_is_live (info)) {
368 update_sgen_info (SgenThreadInfo *info)
372 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
373 info->stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
374 info->stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
375 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
377 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
378 if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
379 g_error ("BAD STACK");
381 info->stack_start = stack_start;
383 info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
385 g_assert_not_reached ();
390 sgen_unified_suspend_stop_world (void)
393 SgenThreadInfo *info;
395 int sleep_duration = -1;
397 mono_threads_begin_global_suspend ();
398 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
400 FOREACH_THREAD_SAFE (info) {
402 info->suspend_done = FALSE;
403 if (sgen_is_thread_in_current_stw (info)) {
404 info->skip = !mono_thread_info_begin_suspend (info, FALSE);
405 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
409 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
411 } END_FOREACH_THREAD_SAFE
413 mono_thread_info_current ()->suspend_done = TRUE;
414 mono_threads_wait_pending_operations ();
418 FOREACH_THREAD_SAFE (info) {
419 if (info->suspend_done || !sgen_is_thread_in_current_stw (info)) {
420 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->suspend_done, !sgen_is_thread_in_current_stw (info));
425 All threads that reach here are pristine suspended. This means the following:
427 - We haven't accepted the previous suspend as good.
428 - We haven't gave up on it for this STW (it's either bad or asked not to)
430 if (!mono_threads_core_check_suspend_result (info)) {
431 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
433 } else if (mono_thread_info_in_critical_location (info)) {
435 g_assert (mono_thread_info_suspend_count (info) == 1);
436 res = mono_thread_info_begin_resume (info);
437 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
443 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
444 g_assert (!info->in_critical_region);
445 info->suspend_done = TRUE;
447 } END_FOREACH_THREAD_SAFE
449 if (restart_counter == 0)
451 mono_threads_wait_pending_operations ();
453 if (sleep_duration < 0) {
461 g_usleep (sleep_duration);
462 sleep_duration += 10;
465 FOREACH_THREAD_SAFE (info) {
466 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
467 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
468 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
472 } END_FOREACH_THREAD_SAFE
474 mono_threads_wait_pending_operations ();
477 FOREACH_THREAD_SAFE (info) {
478 if (sgen_is_thread_in_current_stw (info)) {
479 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
480 g_assert (info->suspend_done);
481 update_sgen_info (info);
483 g_assert (!info->suspend_done || info == mono_thread_info_current ());
485 } END_FOREACH_THREAD_SAFE
491 sgen_unified_suspend_restart_world (void)
493 SgenThreadInfo *info;
496 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
497 FOREACH_THREAD_SAFE (info) {
498 if (sgen_is_thread_in_current_stw (info)) {
499 g_assert (mono_thread_info_begin_resume (info));
500 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
503 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
505 } END_FOREACH_THREAD_SAFE
507 mono_threads_wait_pending_operations ();
508 mono_threads_end_global_suspend ();