2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "sgen/sgen-gc.h"
31 #include "sgen/sgen-protocol.h"
32 #include "sgen/sgen-memory-governor.h"
33 #include "sgen/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "sgen/sgen-client.h"
36 #include "metadata/sgen-bridge-internals.h"
37 #include "metadata/gc-internals.h"
38 #include "utils/mono-threads.h"
40 #define TV_DECLARE SGEN_TV_DECLARE
41 #define TV_GETTIME SGEN_TV_GETTIME
42 #define TV_ELAPSED SGEN_TV_ELAPSED
44 static void sgen_unified_suspend_restart_world (void);
45 static void sgen_unified_suspend_stop_world (void);
47 unsigned int sgen_global_stop_count = 0;
50 align_pointer (void *ptr)
53 p += sizeof (gpointer) - 1;
54 p &= ~ (sizeof (gpointer) - 1);
59 static MonoContext cur_thread_ctx;
61 static mword cur_thread_regs [ARCH_NUM_REGS];
65 update_current_thread_stack (void *start)
68 #if !defined(USE_MONO_CTX)
69 void *reg_ptr = cur_thread_regs;
71 SgenThreadInfo *info = mono_thread_info_current ();
73 info->client_info.stack_start = align_pointer (&stack_guard);
74 g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
76 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
77 memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
78 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
79 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
81 ARCH_STORE_REGS (reg_ptr);
82 memcpy (&info->client_info.regs, reg_ptr, sizeof (info->client_info.regs));
83 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
84 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, NULL);
89 acquire_gc_locks (void)
92 mono_thread_info_suspend_lock ();
96 release_gc_locks (void)
98 mono_thread_info_suspend_unlock ();
102 static TV_DECLARE (stop_world_time);
103 static unsigned long max_pause_usec = 0;
105 static guint64 time_stop_world;
106 static guint64 time_restart_world;
108 /* LOCKING: assumes the GC lock is held */
110 sgen_client_stop_world (int generation)
112 TV_DECLARE (end_handshake);
114 /* notify the profiler of the leftovers */
115 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
116 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
117 mono_sgen_gc_event_moves ();
121 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
122 sgen_process_togglerefs ();
124 update_current_thread_stack (&generation);
126 sgen_global_stop_count++;
127 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
128 TV_GETTIME (stop_world_time);
130 sgen_unified_suspend_stop_world ();
132 SGEN_LOG (3, "world stopped");
134 TV_GETTIME (end_handshake);
135 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
137 sgen_memgov_collection_start (generation);
138 if (sgen_need_bridge_processing ())
139 sgen_bridge_reset_data ();
142 /* LOCKING: assumes the GC lock is held */
144 sgen_client_restart_world (int generation, GGTimingInfo *timing)
146 SgenThreadInfo *info;
148 TV_DECLARE (start_handshake);
149 TV_DECLARE (end_bridge);
150 unsigned long usec, bridge_usec;
152 /* notify the profiler of the leftovers */
153 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
154 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
155 mono_sgen_gc_event_moves ();
157 FOREACH_THREAD (info) {
158 info->client_info.stack_start = NULL;
160 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
162 memset (&info->client_info.regs, 0, sizeof (info->client_info.regs));
166 TV_GETTIME (start_handshake);
168 sgen_unified_suspend_restart_world ();
171 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
172 usec = TV_ELAPSED (stop_world_time, end_sw);
173 max_pause_usec = MAX (usec, max_pause_usec);
175 SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
178 * We must release the thread info suspend lock after doing
179 * the thread handshake. Otherwise, if the GC stops the world
180 * and a thread is in the process of starting up, but has not
181 * yet registered (it's not in the thread_list), it is
182 * possible that the thread does register while the world is
183 * stopped. When restarting the GC will then try to restart
184 * said thread, but since it never got the suspend signal, it
185 * cannot answer the restart signal, so a deadlock results.
189 TV_GETTIME (end_bridge);
190 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
193 timing [0].stw_time = usec;
194 timing [0].bridge_time = bridge_usec;
199 mono_sgen_init_stw (void)
201 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
202 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
205 /* Unified suspend code */
208 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
211 A thread explicitly asked to be skiped because it holds no managed state.
212 This is used by TP and finalizer threads.
213 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
215 if (info->client_info.gc_disabled) {
220 We have detected that this thread is failing/dying, ignore it.
221 FIXME: can't we merge this with thread_is_dying?
223 if (info->client_info.skip) {
228 Suspending the current thread will deadlock us, bad idea.
230 if (info == mono_thread_info_current ()) {
235 We can't suspend the workers that will do all the heavy lifting.
236 FIXME Use some state bit in SgenThreadInfo for this.
238 if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
243 The thread has signaled that it started to detach, ignore it.
244 FIXME: can't we merge this with skip
246 if (!mono_thread_info_is_live (info)) {
254 update_sgen_info (SgenThreadInfo *info)
258 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
259 info->client_info.stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
260 info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
261 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
263 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
264 if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
265 g_error ("BAD STACK");
267 info->client_info.stack_start = stack_start;
269 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
271 g_assert_not_reached ();
276 sgen_unified_suspend_stop_world (void)
279 SgenThreadInfo *info;
280 int sleep_duration = -1;
282 mono_threads_begin_global_suspend ();
283 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
285 FOREACH_THREAD_SAFE (info) {
286 info->client_info.skip = FALSE;
287 info->client_info.suspend_done = FALSE;
288 if (sgen_is_thread_in_current_stw (info)) {
289 info->client_info.skip = !mono_thread_info_begin_suspend (info, FALSE);
290 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
292 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
294 } END_FOREACH_THREAD_SAFE
296 mono_thread_info_current ()->client_info.suspend_done = TRUE;
297 mono_threads_wait_pending_operations ();
301 FOREACH_THREAD_SAFE (info) {
302 if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
303 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
308 All threads that reach here are pristine suspended. This means the following:
310 - We haven't accepted the previous suspend as good.
311 - We haven't gave up on it for this STW (it's either bad or asked not to)
313 if (!mono_thread_info_check_suspend_result (info)) {
314 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
315 info->client_info.skip = TRUE;
316 } else if (mono_thread_info_in_critical_location (info)) {
318 g_assert (mono_thread_info_suspend_count (info) == 1);
319 res = mono_thread_info_begin_resume (info);
320 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
324 info->client_info.skip = TRUE;
326 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
327 g_assert (!info->client_info.in_critical_region);
328 info->client_info.suspend_done = TRUE;
330 } END_FOREACH_THREAD_SAFE
332 if (restart_counter == 0)
334 mono_threads_wait_pending_operations ();
336 if (sleep_duration < 0) {
337 mono_thread_info_yield ();
340 g_usleep (sleep_duration);
341 sleep_duration += 10;
344 FOREACH_THREAD_SAFE (info) {
345 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
346 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
347 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
349 info->client_info.skip = TRUE;
351 } END_FOREACH_THREAD_SAFE
353 mono_threads_wait_pending_operations ();
356 FOREACH_THREAD_SAFE (info) {
357 if (sgen_is_thread_in_current_stw (info)) {
358 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
359 g_assert (info->client_info.suspend_done);
360 update_sgen_info (info);
362 g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
364 } END_FOREACH_THREAD_SAFE
368 sgen_unified_suspend_restart_world (void)
370 SgenThreadInfo *info;
372 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
373 FOREACH_THREAD_SAFE (info) {
374 if (sgen_is_thread_in_current_stw (info)) {
375 g_assert (mono_thread_info_begin_resume (info));
376 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
378 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
380 } END_FOREACH_THREAD_SAFE
382 mono_threads_wait_pending_operations ();
383 mono_threads_end_global_suspend ();