3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw);
40 guint64 mono_time_since_last_stw ()
42 if (end_of_last_stw == 0)
45 TV_DECLARE (current_time);
46 TV_GETTIME (current_time);
47 return TV_ELAPSED (end_of_last_stw, current_time);
50 unsigned int sgen_global_stop_count = 0;
53 align_pointer (void *ptr)
56 p += sizeof (gpointer) - 1;
57 p &= ~ (sizeof (gpointer) - 1);
62 update_current_thread_stack (void *start)
65 SgenThreadInfo *info = mono_thread_info_current ();
67 info->client_info.stack_start = align_pointer (&stack_guard);
68 g_assert (info->client_info.stack_start);
69 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
74 g_error ("Sgen STW requires a working mono-context");
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
82 acquire_gc_locks (void)
85 mono_thread_info_suspend_lock ();
89 release_gc_locks (void)
91 mono_thread_info_suspend_unlock ();
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
101 /* LOCKING: assumes the GC lock is held */
103 sgen_client_stop_world (int generation)
105 TV_DECLARE (end_handshake);
107 /* notify the profiler of the leftovers */
108 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
109 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
110 mono_sgen_gc_event_moves ();
114 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
116 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
117 sgen_process_togglerefs ();
119 update_current_thread_stack (&generation);
121 sgen_global_stop_count++;
122 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
123 TV_GETTIME (stop_world_time);
125 sgen_unified_suspend_stop_world ();
127 SGEN_LOG (3, "world stopped");
129 TV_GETTIME (end_handshake);
130 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
132 sgen_memgov_collection_start (generation);
133 if (sgen_need_bridge_processing ())
134 sgen_bridge_reset_data ();
137 /* LOCKING: assumes the GC lock is held */
139 sgen_client_restart_world (int generation, gint64 *stw_time)
142 TV_DECLARE (start_handshake);
145 /* notify the profiler of the leftovers */
146 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
147 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
148 mono_sgen_gc_event_moves ();
150 FOREACH_THREAD (info) {
151 info->client_info.stack_start = NULL;
152 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
155 TV_GETTIME (start_handshake);
157 sgen_unified_suspend_restart_world ();
160 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
161 usec = TV_ELAPSED (stop_world_time, end_sw);
162 max_pause_usec = MAX (usec, max_pause_usec);
163 end_of_last_stw = end_sw;
165 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
168 * We must release the thread info suspend lock after doing
169 * the thread handshake. Otherwise, if the GC stops the world
170 * and a thread is in the process of starting up, but has not
171 * yet registered (it's not in the thread_list), it is
172 * possible that the thread does register while the world is
173 * stopped. When restarting the GC will then try to restart
174 * said thread, but since it never got the suspend signal, it
175 * cannot answer the restart signal, so a deadlock results.
179 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
185 mono_sgen_init_stw (void)
187 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
188 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
191 /* Unified suspend code */
194 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
197 A thread explicitly asked to be skiped because it holds no managed state.
198 This is used by TP and finalizer threads.
199 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
201 if (info->client_info.gc_disabled) {
208 We have detected that this thread is failing/dying, ignore it.
209 FIXME: can't we merge this with thread_is_dying?
211 if (info->client_info.skip) {
218 Suspending the current thread will deadlock us, bad idea.
220 if (info == mono_thread_info_current ()) {
227 We can't suspend the workers that will do all the heavy lifting.
228 FIXME Use some state bit in SgenThreadInfo for this.
230 if (sgen_thread_pool_is_thread_pool_thread (major_collector.get_sweep_pool (), mono_thread_info_get_tid (info)) ||
231 sgen_workers_is_worker_thread (mono_thread_info_get_tid (info))) {
238 The thread has signaled that it started to detach, ignore it.
239 FIXME: can't we merge this with skip
241 if (!mono_thread_info_is_live (info)) {
251 sgen_unified_suspend_stop_world (void)
253 int sleep_duration = -1;
255 mono_threads_begin_global_suspend ();
256 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
258 FOREACH_THREAD (info) {
259 info->client_info.skip = FALSE;
260 info->client_info.suspend_done = FALSE;
263 if (!sgen_is_thread_in_current_stw (info, &reason)) {
264 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
268 info->client_info.skip = !mono_thread_info_begin_suspend (info);
270 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
273 mono_thread_info_current ()->client_info.suspend_done = TRUE;
274 mono_threads_wait_pending_operations ();
277 gint restart_counter = 0;
279 FOREACH_THREAD (info) {
283 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
284 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
289 All threads that reach here are pristine suspended. This means the following:
291 - We haven't accepted the previous suspend as good.
292 - We haven't gave up on it for this STW (it's either bad or asked not to)
294 if (!mono_thread_info_in_critical_location (info)) {
295 info->client_info.suspend_done = TRUE;
297 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
301 suspend_count = mono_thread_info_suspend_count (info);
302 if (!(suspend_count == 1))
303 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
305 info->client_info.skip = !mono_thread_info_begin_resume (info);
306 if (!info->client_info.skip)
307 restart_counter += 1;
309 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
312 mono_threads_wait_pending_operations ();
314 if (restart_counter == 0)
317 if (sleep_duration < 0) {
318 mono_thread_info_yield ();
321 g_usleep (sleep_duration);
322 sleep_duration += 10;
325 FOREACH_THREAD (info) {
327 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
328 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
332 if (!mono_thread_info_is_running (info)) {
333 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
337 info->client_info.skip = !mono_thread_info_begin_suspend (info);
339 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
342 mono_threads_wait_pending_operations ();
345 FOREACH_THREAD (info) {
349 if (!sgen_is_thread_in_current_stw (info, &reason)) {
350 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
352 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
356 g_assert (info->client_info.suspend_done);
358 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
360 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
361 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
363 if (info->client_info.stack_start < info->client_info.stack_start_limit
364 || info->client_info.stack_start >= info->client_info.stack_end) {
366 * Thread context is in unhandled state, most likely because it is
367 * dying. We don't scan it.
368 * FIXME We should probably rework and check the valid flag instead.
370 info->client_info.stack_start = NULL;
373 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
375 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
377 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
378 mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
383 sgen_unified_suspend_restart_world (void)
385 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
386 FOREACH_THREAD (info) {
388 if (sgen_is_thread_in_current_stw (info, &reason)) {
389 g_assert (mono_thread_info_begin_resume (info));
390 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
392 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
394 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
398 mono_threads_wait_pending_operations ();
399 mono_threads_end_global_suspend ();