3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw);
40 guint64 mono_time_since_last_stw ()
42 if (end_of_last_stw == 0)
45 TV_DECLARE (current_time);
46 TV_GETTIME (current_time);
47 return TV_ELAPSED (end_of_last_stw, current_time);
50 unsigned int sgen_global_stop_count = 0;
53 align_pointer (void *ptr)
56 p += sizeof (gpointer) - 1;
57 p &= ~ (sizeof (gpointer) - 1);
62 update_current_thread_stack (void *start)
65 SgenThreadInfo *info = mono_thread_info_current ();
67 info->client_info.stack_start = align_pointer (&stack_guard);
68 g_assert (info->client_info.stack_start);
69 g_assert (info->client_info.stack_start >= info->client_info.info.stack_start_limit && info->client_info.stack_start < info->client_info.info.stack_end);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
74 g_error ("Sgen STW requires a working mono-context");
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
82 acquire_gc_locks (void)
85 mono_thread_info_suspend_lock ();
89 release_gc_locks (void)
91 mono_thread_info_suspend_unlock ();
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
101 /* LOCKING: assumes the GC lock is held */
103 sgen_client_stop_world (int generation)
105 TV_DECLARE (end_handshake);
107 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_STOP_WORLD, generation));
111 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation));
113 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
114 sgen_process_togglerefs ();
116 update_current_thread_stack (&generation);
118 sgen_global_stop_count++;
119 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
120 TV_GETTIME (stop_world_time);
122 sgen_unified_suspend_stop_world ();
124 SGEN_LOG (3, "world stopped");
126 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_STOP_WORLD, generation));
128 TV_GETTIME (end_handshake);
129 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
131 sgen_memgov_collection_start (generation);
132 if (sgen_need_bridge_processing ())
133 sgen_bridge_reset_data ();
136 /* LOCKING: assumes the GC lock is held */
138 sgen_client_restart_world (int generation, gint64 *stw_time)
141 TV_DECLARE (start_handshake);
144 /* notify the profiler of the leftovers */
145 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146 if (MONO_PROFILER_ENABLED (gc_moves))
147 mono_sgen_gc_event_moves ();
149 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_START_WORLD, generation));
151 FOREACH_THREAD (info) {
152 info->client_info.stack_start = NULL;
153 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
156 TV_GETTIME (start_handshake);
158 sgen_unified_suspend_restart_world ();
161 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
162 usec = TV_ELAPSED (stop_world_time, end_sw);
163 max_pause_usec = MAX (usec, max_pause_usec);
164 end_of_last_stw = end_sw;
166 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
168 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_START_WORLD, generation));
171 * We must release the thread info suspend lock after doing
172 * the thread handshake. Otherwise, if the GC stops the world
173 * and a thread is in the process of starting up, but has not
174 * yet registered (it's not in the thread_list), it is
175 * possible that the thread does register while the world is
176 * stopped. When restarting the GC will then try to restart
177 * said thread, but since it never got the suspend signal, it
178 * cannot answer the restart signal, so a deadlock results.
182 MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation));
188 mono_sgen_init_stw (void)
190 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
191 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
194 /* Unified suspend code */
197 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
200 A thread explicitly asked to be skiped because it holds no managed state.
201 This is used by TP and finalizer threads.
202 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
204 if (info->client_info.gc_disabled) {
211 We have detected that this thread is failing/dying, ignore it.
212 FIXME: can't we merge this with thread_is_dying?
214 if (info->client_info.skip) {
221 Suspending the current thread will deadlock us, bad idea.
223 if (info == mono_thread_info_current ()) {
230 We can't suspend the workers that will do all the heavy lifting.
231 FIXME Use some state bit in SgenThreadInfo for this.
233 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
240 The thread has signaled that it started to detach, ignore it.
241 FIXME: can't we merge this with skip
243 if (!mono_thread_info_is_live (info)) {
253 sgen_unified_suspend_stop_world (void)
255 int sleep_duration = -1;
257 mono_threads_begin_global_suspend ();
258 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
260 FOREACH_THREAD (info) {
261 info->client_info.skip = FALSE;
262 info->client_info.suspend_done = FALSE;
265 if (!sgen_is_thread_in_current_stw (info, &reason)) {
266 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
270 info->client_info.skip = !mono_thread_info_begin_suspend (info);
272 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
275 mono_thread_info_current ()->client_info.suspend_done = TRUE;
276 mono_threads_wait_pending_operations ();
279 gint restart_counter = 0;
281 FOREACH_THREAD (info) {
285 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
286 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
291 All threads that reach here are pristine suspended. This means the following:
293 - We haven't accepted the previous suspend as good.
294 - We haven't gave up on it for this STW (it's either bad or asked not to)
296 if (!mono_thread_info_in_critical_location (info)) {
297 info->client_info.suspend_done = TRUE;
299 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
303 suspend_count = mono_thread_info_suspend_count (info);
304 if (!(suspend_count == 1))
305 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
307 info->client_info.skip = !mono_thread_info_begin_resume (info);
308 if (!info->client_info.skip)
309 restart_counter += 1;
311 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
314 mono_threads_wait_pending_operations ();
316 if (restart_counter == 0)
319 if (sleep_duration < 0) {
320 mono_thread_info_yield ();
323 g_usleep (sleep_duration);
324 sleep_duration += 10;
327 FOREACH_THREAD (info) {
329 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
330 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
334 if (!mono_thread_info_is_running (info)) {
335 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
339 info->client_info.skip = !mono_thread_info_begin_suspend (info);
341 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
344 mono_threads_wait_pending_operations ();
347 FOREACH_THREAD (info) {
351 if (!sgen_is_thread_in_current_stw (info, &reason)) {
352 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
354 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
358 g_assert (info->client_info.suspend_done);
360 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
362 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
363 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
365 if (info->client_info.stack_start < info->client_info.info.stack_start_limit
366 || info->client_info.stack_start >= info->client_info.info.stack_end) {
368 * Thread context is in unhandled state, most likely because it is
369 * dying. We don't scan it.
370 * FIXME We should probably rework and check the valid flag instead.
372 info->client_info.stack_start = NULL;
375 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
377 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
379 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
380 mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.info.stack_end : NULL);
385 sgen_unified_suspend_restart_world (void)
387 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
388 FOREACH_THREAD (info) {
390 if (sgen_is_thread_in_current_stw (info, &reason)) {
391 g_assert (mono_thread_info_begin_resume (info));
392 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
394 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
396 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
400 mono_threads_wait_pending_operations ();
401 mono_threads_end_global_suspend ();