2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28 #include "utils/mono-threads-debug.h"
30 #define TV_DECLARE SGEN_TV_DECLARE
31 #define TV_GETTIME SGEN_TV_GETTIME
32 #define TV_ELAPSED SGEN_TV_ELAPSED
34 static void sgen_unified_suspend_restart_world (void);
35 static void sgen_unified_suspend_stop_world (void);
37 static TV_DECLARE (end_of_last_stw);
39 guint64 mono_time_since_last_stw ()
41 if (end_of_last_stw == 0)
44 TV_DECLARE (current_time);
45 TV_GETTIME (current_time);
46 return TV_ELAPSED (end_of_last_stw, current_time);
49 unsigned int sgen_global_stop_count = 0;
52 align_pointer (void *ptr)
55 p += sizeof (gpointer) - 1;
56 p &= ~ (sizeof (gpointer) - 1);
61 update_current_thread_stack (void *start)
64 SgenThreadInfo *info = mono_thread_info_current ();
66 info->client_info.stack_start = align_pointer (&stack_guard);
67 g_assert (info->client_info.stack_start);
68 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
70 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
71 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
73 g_error ("Sgen STW requires a working mono-context");
76 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
77 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
81 acquire_gc_locks (void)
84 mono_thread_info_suspend_lock ();
88 release_gc_locks (void)
90 mono_thread_info_suspend_unlock ();
94 static TV_DECLARE (stop_world_time);
95 static unsigned long max_pause_usec = 0;
97 static guint64 time_stop_world;
98 static guint64 time_restart_world;
100 /* LOCKING: assumes the GC lock is held */
102 sgen_client_stop_world (int generation)
104 TV_DECLARE (end_handshake);
106 /* notify the profiler of the leftovers */
107 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
108 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
109 mono_sgen_gc_event_moves ();
113 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
115 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
116 sgen_process_togglerefs ();
118 update_current_thread_stack (&generation);
120 sgen_global_stop_count++;
121 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
122 TV_GETTIME (stop_world_time);
124 sgen_unified_suspend_stop_world ();
126 SGEN_LOG (3, "world stopped");
128 TV_GETTIME (end_handshake);
129 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
131 sgen_memgov_collection_start (generation);
132 if (sgen_need_bridge_processing ())
133 sgen_bridge_reset_data ();
136 /* LOCKING: assumes the GC lock is held */
138 sgen_client_restart_world (int generation, gint64 *stw_time)
141 TV_DECLARE (start_handshake);
144 /* notify the profiler of the leftovers */
145 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
147 mono_sgen_gc_event_moves ();
149 FOREACH_THREAD (info) {
150 info->client_info.stack_start = NULL;
151 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
154 TV_GETTIME (start_handshake);
156 sgen_unified_suspend_restart_world ();
159 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
160 usec = TV_ELAPSED (stop_world_time, end_sw);
161 max_pause_usec = MAX (usec, max_pause_usec);
162 end_of_last_stw = end_sw;
164 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
167 * We must release the thread info suspend lock after doing
168 * the thread handshake. Otherwise, if the GC stops the world
169 * and a thread is in the process of starting up, but has not
170 * yet registered (it's not in the thread_list), it is
171 * possible that the thread does register while the world is
172 * stopped. When restarting the GC will then try to restart
173 * said thread, but since it never got the suspend signal, it
174 * cannot answer the restart signal, so a deadlock results.
178 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
184 mono_sgen_init_stw (void)
186 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
187 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
190 /* Unified suspend code */
193 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
196 A thread explicitly asked to be skiped because it holds no managed state.
197 This is used by TP and finalizer threads.
198 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
200 if (info->client_info.gc_disabled) {
207 We have detected that this thread is failing/dying, ignore it.
208 FIXME: can't we merge this with thread_is_dying?
210 if (info->client_info.skip) {
217 Suspending the current thread will deadlock us, bad idea.
219 if (info == mono_thread_info_current ()) {
226 We can't suspend the workers that will do all the heavy lifting.
227 FIXME Use some state bit in SgenThreadInfo for this.
229 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
236 The thread has signaled that it started to detach, ignore it.
237 FIXME: can't we merge this with skip
239 if (!mono_thread_info_is_live (info)) {
249 sgen_unified_suspend_stop_world (void)
252 int sleep_duration = -1;
254 mono_threads_begin_global_suspend ();
255 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
257 FOREACH_THREAD (info) {
259 info->client_info.skip = FALSE;
260 info->client_info.suspend_done = FALSE;
261 if (sgen_is_thread_in_current_stw (info, &reason)) {
262 info->client_info.skip = !mono_thread_info_begin_suspend (info);
263 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
265 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
269 mono_thread_info_current ()->client_info.suspend_done = TRUE;
270 mono_threads_wait_pending_operations ();
274 FOREACH_THREAD (info) {
276 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
277 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
282 All threads that reach here are pristine suspended. This means the following:
284 - We haven't accepted the previous suspend as good.
285 - We haven't gave up on it for this STW (it's either bad or asked not to)
287 if (mono_thread_info_in_critical_location (info)) {
289 gint suspend_count = mono_thread_info_suspend_count (info);
290 if (!(suspend_count == 1))
291 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
292 res = mono_thread_info_begin_resume (info);
296 info->client_info.skip = TRUE;
297 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
299 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
300 g_assert (!info->client_info.in_critical_region);
301 info->client_info.suspend_done = TRUE;
305 if (restart_counter == 0)
307 mono_threads_wait_pending_operations ();
309 if (sleep_duration < 0) {
310 mono_thread_info_yield ();
313 g_usleep (sleep_duration);
314 sleep_duration += 10;
317 FOREACH_THREAD (info) {
319 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
320 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
324 if (mono_thread_info_is_running (info)) {
325 gboolean res = mono_thread_info_begin_suspend (info);
327 info->client_info.skip = TRUE;
328 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
332 mono_threads_wait_pending_operations ();
335 FOREACH_THREAD (info) {
337 if (sgen_is_thread_in_current_stw (info, &reason)) {
340 g_assert (info->client_info.suspend_done);
342 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
344 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
345 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
347 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
348 if (!info->client_info.stack_start
349 || info->client_info.stack_start < info->client_info.stack_start_limit
350 || info->client_info.stack_start >= info->client_info.stack_end) {
351 g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
352 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
355 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
357 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
358 mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
360 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
362 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
363 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
369 sgen_unified_suspend_restart_world (void)
371 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
372 FOREACH_THREAD (info) {
374 if (sgen_is_thread_in_current_stw (info, &reason)) {
375 g_assert (mono_thread_info_begin_resume (info));
376 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
378 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
380 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
384 mono_threads_wait_pending_operations ();
385 mono_threads_end_global_suspend ();