2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "sgen/sgen-gc.h"
31 #include "sgen/sgen-protocol.h"
32 #include "sgen/sgen-memory-governor.h"
33 #include "sgen/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "sgen/sgen-client.h"
36 #include "metadata/sgen-bridge-internals.h"
37 #include "metadata/gc-internals.h"
39 #define TV_DECLARE SGEN_TV_DECLARE
40 #define TV_GETTIME SGEN_TV_GETTIME
41 #define TV_ELAPSED SGEN_TV_ELAPSED
43 static void sgen_unified_suspend_restart_world (void);
44 static void sgen_unified_suspend_stop_world (void);
46 unsigned int sgen_global_stop_count = 0;
49 align_pointer (void *ptr)
52 p += sizeof (gpointer) - 1;
53 p &= ~ (sizeof (gpointer) - 1);
58 static MonoContext cur_thread_ctx;
60 static mword cur_thread_regs [ARCH_NUM_REGS];
64 update_current_thread_stack (void *start)
67 #if !defined(USE_MONO_CTX)
68 void *reg_ptr = cur_thread_regs;
70 SgenThreadInfo *info = mono_thread_info_current ();
72 info->client_info.stack_start = align_pointer (&stack_guard);
73 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
75 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
76 memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
80 ARCH_STORE_REGS (reg_ptr);
81 memcpy (&info->client_info.regs, reg_ptr, sizeof (info->client_info.regs));
82 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
83 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, NULL);
88 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
92 if (!mono_thread_internal_current ())
93 /* Happens during thread attach */
98 if (!sgen_has_critical_method ())
102 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
103 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
104 * to register the jit info for all GC critical methods after they are JITted/loaded.
106 ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
110 return sgen_is_critical_method (mono_jit_info_get_method (ji));
114 restart_threads_until_none_in_managed_allocator (void)
116 int num_threads_died = 0;
117 int sleep_duration = -1;
120 int restart_count = 0, restarted_count = 0;
121 /* restart all threads that stopped in the
123 FOREACH_THREAD (info) {
125 if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
127 if (mono_thread_info_is_live (info) &&
128 (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
129 is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
130 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
131 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
132 result = sgen_resume_thread (info);
136 info->client_info.skip = 1;
139 /* we set the stopped_ip to
140 NULL for threads which
141 we're not restarting so
142 that we can easily identify
144 info->client_info.stopped_ip = NULL;
145 info->client_info.stopped_domain = NULL;
146 info->client_info.suspend_done = TRUE;
149 /* if no threads were restarted, we're done */
150 if (restart_count == 0)
153 /* wait for the threads to signal their restart */
154 sgen_wait_for_suspend_ack (restart_count);
156 if (sleep_duration < 0) {
157 mono_thread_info_yield ();
160 g_usleep (sleep_duration);
161 sleep_duration += 10;
164 /* stop them again */
165 FOREACH_THREAD (info) {
167 if (info->client_info.skip || info->client_info.stopped_ip == NULL)
169 result = sgen_suspend_thread (info);
174 info->client_info.skip = 1;
177 /* some threads might have died */
178 num_threads_died += restart_count - restarted_count;
179 /* wait for the threads to signal their suspension
181 sgen_wait_for_suspend_ack (restarted_count);
184 return num_threads_died;
188 acquire_gc_locks (void)
191 mono_thread_info_suspend_lock ();
195 release_gc_locks (void)
197 mono_thread_info_suspend_unlock ();
201 static TV_DECLARE (stop_world_time);
202 static unsigned long max_pause_usec = 0;
204 static guint64 time_stop_world;
205 static guint64 time_restart_world;
207 /* LOCKING: assumes the GC lock is held */
209 sgen_client_stop_world (int generation)
211 TV_DECLARE (end_handshake);
213 /* notify the profiler of the leftovers */
214 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
215 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
216 mono_sgen_gc_event_moves ();
220 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
221 sgen_process_togglerefs ();
223 update_current_thread_stack (&generation);
225 sgen_global_stop_count++;
226 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
227 TV_GETTIME (stop_world_time);
229 if (mono_thread_info_unified_management_enabled ()) {
230 sgen_unified_suspend_stop_world ();
233 count = sgen_thread_handshake (TRUE);
234 dead = restart_threads_until_none_in_managed_allocator ();
236 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
239 SGEN_LOG (3, "world stopped");
241 TV_GETTIME (end_handshake);
242 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
244 sgen_memgov_collection_start (generation);
245 if (sgen_need_bridge_processing ())
246 sgen_bridge_reset_data ();
249 /* LOCKING: assumes the GC lock is held */
251 sgen_client_restart_world (int generation, GGTimingInfo *timing)
254 TV_DECLARE (start_handshake);
255 TV_DECLARE (end_bridge);
256 unsigned long usec, bridge_usec;
258 /* notify the profiler of the leftovers */
259 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
260 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
261 mono_sgen_gc_event_moves ();
263 FOREACH_THREAD (info) {
264 info->client_info.stack_start = NULL;
266 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
268 memset (&info->client_info.regs, 0, sizeof (info->client_info.regs));
272 TV_GETTIME (start_handshake);
274 if (mono_thread_info_unified_management_enabled ())
275 sgen_unified_suspend_restart_world ();
277 sgen_thread_handshake (FALSE);
280 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
281 usec = TV_ELAPSED (stop_world_time, end_sw);
282 max_pause_usec = MAX (usec, max_pause_usec);
284 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
287 * We must release the thread info suspend lock after doing
288 * the thread handshake. Otherwise, if the GC stops the world
289 * and a thread is in the process of starting up, but has not
290 * yet registered (it's not in the thread_list), it is
291 * possible that the thread does register while the world is
292 * stopped. When restarting the GC will then try to restart
293 * said thread, but since it never got the suspend signal, it
294 * cannot answer the restart signal, so a deadlock results.
298 TV_GETTIME (end_bridge);
299 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
302 timing [0].stw_time = usec;
303 timing [0].bridge_time = bridge_usec;
308 mono_sgen_init_stw (void)
310 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
311 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
314 /* Unified suspend code */
317 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
320 A thread explicitly asked to be skiped because it holds no managed state.
321 This is used by TP and finalizer threads.
322 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
324 if (info->client_info.gc_disabled) {
329 We have detected that this thread is failing/dying, ignore it.
330 FIXME: can't we merge this with thread_is_dying?
332 if (info->client_info.skip) {
337 Suspending the current thread will deadlock us, bad idea.
339 if (info == mono_thread_info_current ()) {
344 We can't suspend the workers that will do all the heavy lifting.
345 FIXME Use some state bit in SgenThreadInfo for this.
347 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
352 The thread has signaled that it started to detach, ignore it.
353 FIXME: can't we merge this with skip
355 if (!mono_thread_info_is_live (info)) {
363 update_sgen_info (SgenThreadInfo *info)
367 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
368 info->client_info.stopped_domain = (MonoDomain *)mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
369 info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
370 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
372 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
373 if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
374 g_error ("BAD STACK");
376 info->client_info.stack_start = stack_start;
378 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
380 g_assert_not_reached ();
385 sgen_unified_suspend_stop_world (void)
388 int sleep_duration = -1;
390 mono_threads_begin_global_suspend ();
391 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
393 FOREACH_THREAD (info) {
394 info->client_info.skip = FALSE;
395 info->client_info.suspend_done = FALSE;
396 if (sgen_is_thread_in_current_stw (info)) {
397 info->client_info.skip = !mono_thread_info_begin_suspend (info);
398 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
400 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
404 mono_thread_info_current ()->client_info.suspend_done = TRUE;
405 mono_threads_wait_pending_operations ();
409 FOREACH_THREAD (info) {
410 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
411 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
416 All threads that reach here are pristine suspended. This means the following:
418 - We haven't accepted the previous suspend as good.
419 - We haven't gave up on it for this STW (it's either bad or asked not to)
421 if (!mono_thread_info_check_suspend_result (info)) {
422 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
423 info->client_info.skip = TRUE;
424 } else if (mono_thread_info_in_critical_location (info)) {
426 g_assert (mono_thread_info_suspend_count (info) == 1);
427 res = mono_thread_info_begin_resume (info);
428 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
432 info->client_info.skip = TRUE;
434 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
435 g_assert (!info->client_info.in_critical_region);
436 info->client_info.suspend_done = TRUE;
440 if (restart_counter == 0)
442 mono_threads_wait_pending_operations ();
444 if (sleep_duration < 0) {
452 g_usleep (sleep_duration);
453 sleep_duration += 10;
456 FOREACH_THREAD (info) {
457 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
458 gboolean res = mono_thread_info_begin_suspend (info);
459 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
461 info->client_info.skip = TRUE;
465 mono_threads_wait_pending_operations ();
468 FOREACH_THREAD (info) {
469 if (sgen_is_thread_in_current_stw (info)) {
470 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
471 g_assert (info->client_info.suspend_done);
472 update_sgen_info (info);
474 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
480 sgen_unified_suspend_restart_world (void)
482 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
483 FOREACH_THREAD (info) {
484 if (sgen_is_thread_in_current_stw (info)) {
485 g_assert (mono_thread_info_begin_resume (info));
486 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
488 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
492 mono_threads_wait_pending_operations ();
493 mono_threads_end_global_suspend ();