3 * Stop the world functionality
6 * Paolo Molaro (lupus@ximian.com)
7 * Rodrigo Kumpera (kumpera@gmail.com)
9 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11 * Copyright 2011 Xamarin, Inc.
12 * Copyright (C) 2012 Xamarin Inc
14 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
38 static TV_DECLARE (end_of_last_stw);
40 guint64 mono_time_since_last_stw ()
42 if (end_of_last_stw == 0)
45 TV_DECLARE (current_time);
46 TV_GETTIME (current_time);
47 return TV_ELAPSED (end_of_last_stw, current_time);
50 unsigned int sgen_global_stop_count = 0;
53 align_pointer (void *ptr)
56 p += sizeof (gpointer) - 1;
57 p &= ~ (sizeof (gpointer) - 1);
62 update_current_thread_stack (void *start)
65 SgenThreadInfo *info = mono_thread_info_current ();
67 info->client_info.stack_start = align_pointer (&stack_guard);
68 g_assert (info->client_info.stack_start);
69 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72 MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
74 g_error ("Sgen STW requires a working mono-context");
77 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
82 acquire_gc_locks (void)
85 mono_thread_info_suspend_lock ();
89 release_gc_locks (void)
91 mono_thread_info_suspend_unlock ();
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
101 /* LOCKING: assumes the GC lock is held */
103 sgen_client_stop_world (int generation)
105 TV_DECLARE (end_handshake);
107 /* notify the profiler of the leftovers */
108 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
109 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
110 mono_sgen_gc_event_moves ();
112 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
116 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
118 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
119 sgen_process_togglerefs ();
121 update_current_thread_stack (&generation);
123 sgen_global_stop_count++;
124 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
125 TV_GETTIME (stop_world_time);
127 sgen_unified_suspend_stop_world ();
129 SGEN_LOG (3, "world stopped");
131 mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
133 TV_GETTIME (end_handshake);
134 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
136 sgen_memgov_collection_start (generation);
137 if (sgen_need_bridge_processing ())
138 sgen_bridge_reset_data ();
141 /* LOCKING: assumes the GC lock is held */
143 sgen_client_restart_world (int generation, gint64 *stw_time)
146 TV_DECLARE (start_handshake);
149 /* notify the profiler of the leftovers */
150 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
151 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
152 mono_sgen_gc_event_moves ();
154 mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
156 FOREACH_THREAD (info) {
157 info->client_info.stack_start = NULL;
158 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
161 TV_GETTIME (start_handshake);
163 sgen_unified_suspend_restart_world ();
166 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
167 usec = TV_ELAPSED (stop_world_time, end_sw);
168 max_pause_usec = MAX (usec, max_pause_usec);
169 end_of_last_stw = end_sw;
171 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
173 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
176 * We must release the thread info suspend lock after doing
177 * the thread handshake. Otherwise, if the GC stops the world
178 * and a thread is in the process of starting up, but has not
179 * yet registered (it's not in the thread_list), it is
180 * possible that the thread does register while the world is
181 * stopped. When restarting the GC will then try to restart
182 * said thread, but since it never got the suspend signal, it
183 * cannot answer the restart signal, so a deadlock results.
187 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
193 mono_sgen_init_stw (void)
195 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
196 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
199 /* Unified suspend code */
202 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
205 A thread explicitly asked to be skiped because it holds no managed state.
206 This is used by TP and finalizer threads.
207 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
209 if (info->client_info.gc_disabled) {
216 We have detected that this thread is failing/dying, ignore it.
217 FIXME: can't we merge this with thread_is_dying?
219 if (info->client_info.skip) {
226 Suspending the current thread will deadlock us, bad idea.
228 if (info == mono_thread_info_current ()) {
235 We can't suspend the workers that will do all the heavy lifting.
236 FIXME Use some state bit in SgenThreadInfo for this.
238 if (sgen_thread_pool_is_thread_pool_thread (major_collector.get_sweep_pool (), mono_thread_info_get_tid (info)) ||
239 sgen_workers_is_worker_thread (mono_thread_info_get_tid (info))) {
246 The thread has signaled that it started to detach, ignore it.
247 FIXME: can't we merge this with skip
249 if (!mono_thread_info_is_live (info)) {
259 sgen_unified_suspend_stop_world (void)
261 int sleep_duration = -1;
263 mono_threads_begin_global_suspend ();
264 THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
266 FOREACH_THREAD (info) {
267 info->client_info.skip = FALSE;
268 info->client_info.suspend_done = FALSE;
271 if (!sgen_is_thread_in_current_stw (info, &reason)) {
272 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
276 info->client_info.skip = !mono_thread_info_begin_suspend (info);
278 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
281 mono_thread_info_current ()->client_info.suspend_done = TRUE;
282 mono_threads_wait_pending_operations ();
285 gint restart_counter = 0;
287 FOREACH_THREAD (info) {
291 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
292 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
297 All threads that reach here are pristine suspended. This means the following:
299 - We haven't accepted the previous suspend as good.
300 - We haven't gave up on it for this STW (it's either bad or asked not to)
302 if (!mono_thread_info_in_critical_location (info)) {
303 info->client_info.suspend_done = TRUE;
305 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
309 suspend_count = mono_thread_info_suspend_count (info);
310 if (!(suspend_count == 1))
311 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
313 info->client_info.skip = !mono_thread_info_begin_resume (info);
314 if (!info->client_info.skip)
315 restart_counter += 1;
317 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
320 mono_threads_wait_pending_operations ();
322 if (restart_counter == 0)
325 if (sleep_duration < 0) {
326 mono_thread_info_yield ();
329 g_usleep (sleep_duration);
330 sleep_duration += 10;
333 FOREACH_THREAD (info) {
335 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
336 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
340 if (!mono_thread_info_is_running (info)) {
341 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
345 info->client_info.skip = !mono_thread_info_begin_suspend (info);
347 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
350 mono_threads_wait_pending_operations ();
353 FOREACH_THREAD (info) {
357 if (!sgen_is_thread_in_current_stw (info, &reason)) {
358 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
360 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
364 g_assert (info->client_info.suspend_done);
366 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
368 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
369 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
371 if (info->client_info.stack_start < info->client_info.stack_start_limit
372 || info->client_info.stack_start >= info->client_info.stack_end) {
374 * Thread context is in unhandled state, most likely because it is
375 * dying. We don't scan it.
376 * FIXME We should probably rework and check the valid flag instead.
378 info->client_info.stack_start = NULL;
381 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
383 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
385 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
386 mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
391 sgen_unified_suspend_restart_world (void)
393 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
394 FOREACH_THREAD (info) {
396 if (sgen_is_thread_in_current_stw (info, &reason)) {
397 g_assert (mono_thread_info_begin_resume (info));
398 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
400 binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
402 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
406 mono_threads_wait_pending_operations ();
407 mono_threads_end_global_suspend ();