3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw);
40 guint64 mono_time_since_last_stw ()
42 if (end_of_last_stw == 0)
45 TV_DECLARE (current_time);
46 TV_GETTIME (current_time);
47 return TV_ELAPSED (end_of_last_stw, current_time);
50 unsigned int sgen_global_stop_count = 0;
53 align_pointer (void *ptr)
56 p += sizeof (gpointer) - 1;
57 p &= ~ (sizeof (gpointer) - 1);
62 update_current_thread_stack (void *start)
65 SgenThreadInfo *info = mono_thread_info_current ();
67 info->client_info.stack_start = align_pointer (&stack_guard);
68 g_assert (info->client_info.stack_start);
69 g_assert (info->client_info.stack_start >= info->client_info.info.stack_start_limit && info->client_info.stack_start < info->client_info.info.stack_end);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
73 #elif defined (HOST_WASM)
76 g_error ("Sgen STW requires a working mono-context");
79 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
80 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
84 acquire_gc_locks (void)
87 mono_thread_info_suspend_lock ();
91 release_gc_locks (void)
93 mono_thread_info_suspend_unlock ();
97 static TV_DECLARE (stop_world_time);
98 static unsigned long max_pause_usec = 0;
100 static guint64 time_stop_world;
101 static guint64 time_restart_world;
103 /* LOCKING: assumes the GC lock is held */
105 sgen_client_stop_world (int generation)
107 TV_DECLARE (end_handshake);
109 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_STOP_WORLD, generation));
113 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation));
115 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
116 sgen_process_togglerefs ();
118 update_current_thread_stack (&generation);
120 sgen_global_stop_count++;
121 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
122 TV_GETTIME (stop_world_time);
124 sgen_unified_suspend_stop_world ();
126 SGEN_LOG (3, "world stopped");
128 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_STOP_WORLD, generation));
130 TV_GETTIME (end_handshake);
131 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
133 sgen_memgov_collection_start (generation);
134 if (sgen_need_bridge_processing ())
135 sgen_bridge_reset_data ();
138 /* LOCKING: assumes the GC lock is held */
140 sgen_client_restart_world (int generation, gint64 *stw_time)
143 TV_DECLARE (start_handshake);
146 /* notify the profiler of the leftovers */
147 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
148 if (MONO_PROFILER_ENABLED (gc_moves))
149 mono_sgen_gc_event_moves ();
151 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_START_WORLD, generation));
153 FOREACH_THREAD (info) {
154 info->client_info.stack_start = NULL;
155 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
158 TV_GETTIME (start_handshake);
160 sgen_unified_suspend_restart_world ();
163 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
164 usec = TV_ELAPSED (stop_world_time, end_sw);
165 max_pause_usec = MAX (usec, max_pause_usec);
166 end_of_last_stw = end_sw;
168 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
170 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_START_WORLD, generation));
173 * We must release the thread info suspend lock after doing
174 * the thread handshake. Otherwise, if the GC stops the world
175 * and a thread is in the process of starting up, but has not
176 * yet registered (it's not in the thread_list), it is
177 * possible that the thread does register while the world is
178 * stopped. When restarting the GC will then try to restart
179 * said thread, but since it never got the suspend signal, it
180 * cannot answer the restart signal, so a deadlock results.
184 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation));
190 mono_sgen_init_stw (void)
192 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
193 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
196 /* Unified suspend code */
199 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
202 A thread explicitly asked to be skiped because it holds no managed state.
203 This is used by TP and finalizer threads.
204 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
206 if (info->client_info.gc_disabled) {
213 We have detected that this thread is failing/dying, ignore it.
214 FIXME: can't we merge this with thread_is_dying?
216 if (info->client_info.skip) {
223 Suspending the current thread will deadlock us, bad idea.
225 if (info == mono_thread_info_current ()) {
232 We can't suspend the workers that will do all the heavy lifting.
233 FIXME Use some state bit in SgenThreadInfo for this.
235 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
242 The thread has signaled that it started to detach, ignore it.
243 FIXME: can't we merge this with skip
245 if (!mono_thread_info_is_live (info)) {
255 sgen_unified_suspend_stop_world (void)
257 int sleep_duration = -1;
259 mono_threads_begin_global_suspend ();
260 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
262 FOREACH_THREAD (info) {
263 info->client_info.skip = FALSE;
264 info->client_info.suspend_done = FALSE;
267 if (!sgen_is_thread_in_current_stw (info, &reason)) {
268 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
272 info->client_info.skip = !mono_thread_info_begin_suspend (info);
274 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
277 mono_thread_info_current ()->client_info.suspend_done = TRUE;
278 mono_threads_wait_pending_operations ();
281 gint restart_counter = 0;
283 FOREACH_THREAD (info) {
287 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
288 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
293 All threads that reach here are pristine suspended. This means the following:
295 - We haven't accepted the previous suspend as good.
296 - We haven't gave up on it for this STW (it's either bad or asked not to)
298 if (!mono_thread_info_in_critical_location (info)) {
299 info->client_info.suspend_done = TRUE;
301 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
305 suspend_count = mono_thread_info_suspend_count (info);
306 if (!(suspend_count == 1))
307 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
309 info->client_info.skip = !mono_thread_info_begin_resume (info);
310 if (!info->client_info.skip)
311 restart_counter += 1;
313 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
316 mono_threads_wait_pending_operations ();
318 if (restart_counter == 0)
321 if (sleep_duration < 0) {
322 mono_thread_info_yield ();
325 g_usleep (sleep_duration);
326 sleep_duration += 10;
329 FOREACH_THREAD (info) {
331 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
332 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
336 if (!mono_thread_info_is_running (info)) {
337 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
341 info->client_info.skip = !mono_thread_info_begin_suspend (info);
343 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
346 mono_threads_wait_pending_operations ();
349 FOREACH_THREAD (info) {
353 if (!sgen_is_thread_in_current_stw (info, &reason)) {
354 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
356 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
360 g_assert (info->client_info.suspend_done);
362 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
364 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
365 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
367 if (info->client_info.stack_start < info->client_info.info.stack_start_limit
368 || info->client_info.stack_start >= info->client_info.info.stack_end) {
370 * Thread context is in unhandled state, most likely because it is
371 * dying. We don't scan it.
372 * FIXME We should probably rework and check the valid flag instead.
374 info->client_info.stack_start = NULL;
377 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
379 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
381 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
382 mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.info.stack_end : NULL);
387 sgen_unified_suspend_restart_world (void)
389 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
390 FOREACH_THREAD (info) {
392 if (sgen_is_thread_in_current_stw (info, &reason)) {
393 g_assert (mono_thread_info_begin_resume (info));
394 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
396 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
398 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
402 mono_threads_wait_pending_operations ();
403 mono_threads_end_global_suspend ();