2 * sgen-stw.c: Stop the world functionality
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Library General Public
15 * License 2.0 as published by the Free Software Foundation;
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License 2.0 along with this library; if not, write to the Free
24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/profiler-private.h"
34 #include "utils/mono-time.h"
35 #include "utils/dtrace.h"
36 #include "utils/mono-counters.h"
37 #include "utils/mono-threads.h"
39 #define TV_DECLARE SGEN_TV_DECLARE
40 #define TV_GETTIME SGEN_TV_GETTIME
41 #define TV_ELAPSED SGEN_TV_ELAPSED
43 static int sgen_unified_suspend_restart_world (void);
44 static int sgen_unified_suspend_stop_world (void);
47 align_pointer (void *ptr)
50 p += sizeof (gpointer) - 1;
51 p &= ~ (sizeof (gpointer) - 1);
56 static MonoContext cur_thread_ctx;
58 static mword cur_thread_regs [ARCH_NUM_REGS];
62 update_current_thread_stack (void *start)
65 #if !defined(USE_MONO_CTX)
66 void *reg_ptr = cur_thread_regs;
68 SgenThreadInfo *info = mono_thread_info_current ();
70 info->stack_start = align_pointer (&stack_guard);
71 g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
73 MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
74 memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
75 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
78 ARCH_STORE_REGS (reg_ptr);
79 memcpy (&info->regs, reg_ptr, sizeof (info->regs));
80 if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
81 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
86 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
90 if (!mono_thread_internal_current ())
91 /* Happens during thread attach */
96 if (!sgen_has_critical_method ())
100 * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
101 * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
102 * to register the jit info for all GC critical methods after they are JITted/loaded.
104 ji = mono_jit_info_table_find_internal (domain, ip, FALSE);
108 return sgen_is_critical_method (mono_jit_info_get_method (ji));
112 restart_threads_until_none_in_managed_allocator (void)
114 SgenThreadInfo *info;
115 int num_threads_died = 0;
116 int sleep_duration = -1;
119 int restart_count = 0, restarted_count = 0;
120 /* restart all threads that stopped in the
122 FOREACH_THREAD_SAFE (info) {
124 if (info->skip || info->gc_disabled || info->suspend_done)
126 if (mono_thread_info_is_live (info) && (!info->stack_start || info->in_critical_region || info->info.inside_critical_region ||
127 is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
128 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
129 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->info.native_handle);
130 result = sgen_resume_thread (info);
137 /* we set the stopped_ip to
138 NULL for threads which
139 we're not restarting so
140 that we can easily identify
142 info->stopped_ip = NULL;
143 info->stopped_domain = NULL;
144 info->suspend_done = TRUE;
146 } END_FOREACH_THREAD_SAFE
147 /* if no threads were restarted, we're done */
148 if (restart_count == 0)
151 /* wait for the threads to signal their restart */
152 sgen_wait_for_suspend_ack (restart_count);
154 if (sleep_duration < 0) {
155 mono_thread_info_yield ();
158 g_usleep (sleep_duration);
159 sleep_duration += 10;
162 /* stop them again */
163 FOREACH_THREAD (info) {
165 if (info->skip || info->stopped_ip == NULL)
167 result = sgen_suspend_thread (info);
175 /* some threads might have died */
176 num_threads_died += restart_count - restarted_count;
177 /* wait for the threads to signal their suspension
179 sgen_wait_for_suspend_ack (restarted_count);
182 return num_threads_died;
186 acquire_gc_locks (void)
189 mono_thread_info_suspend_lock ();
193 release_gc_locks (void)
195 mono_thread_info_suspend_unlock ();
200 count_cards (long long *major_total, long long *major_marked, long long *los_total, long long *los_marked)
202 sgen_get_major_collector ()->count_cards (major_total, major_marked);
203 sgen_los_count_cards (los_total, los_marked);
206 static TV_DECLARE (stop_world_time);
207 static unsigned long max_pause_usec = 0;
209 static guint64 time_stop_world;
210 static guint64 time_restart_world;
212 /* LOCKING: assumes the GC lock is held */
214 sgen_stop_world (int generation)
216 TV_DECLARE (end_handshake);
219 mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
220 MONO_GC_WORLD_STOP_BEGIN ();
221 binary_protocol_world_stopping (sgen_timestamp ());
224 /* We start to scan after locks are taking, this ensures we won't be interrupted. */
225 sgen_process_togglerefs ();
227 update_current_thread_stack (&count);
229 sgen_global_stop_count++;
230 SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
231 TV_GETTIME (stop_world_time);
233 if (mono_thread_info_unified_management_enabled ()) {
234 count = sgen_unified_suspend_stop_world ();
236 count = sgen_thread_handshake (TRUE);
237 dead = restart_threads_until_none_in_managed_allocator ();
239 g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
243 SGEN_LOG (3, "world stopped %d thread(s)", count);
244 mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
245 MONO_GC_WORLD_STOP_END ();
246 if (binary_protocol_is_enabled ()) {
247 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
248 if (binary_protocol_is_heavy_enabled ())
249 count_cards (&major_total, &major_marked, &los_total, &los_marked);
250 binary_protocol_world_stopped (sgen_timestamp (), major_total, major_marked, los_total, los_marked);
253 TV_GETTIME (end_handshake);
254 time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
256 sgen_memgov_collection_start (generation);
257 if (sgen_need_bridge_processing ())
258 sgen_bridge_reset_data ();
263 /* LOCKING: assumes the GC lock is held */
265 sgen_restart_world (int generation, GGTimingInfo *timing)
268 SgenThreadInfo *info;
270 TV_DECLARE (start_handshake);
271 TV_DECLARE (end_bridge);
272 unsigned long usec, bridge_usec;
274 if (binary_protocol_is_enabled ()) {
275 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
276 if (binary_protocol_is_heavy_enabled ())
277 count_cards (&major_total, &major_marked, &los_total, &los_marked);
278 binary_protocol_world_restarting (generation, sgen_timestamp (), major_total, major_marked, los_total, los_marked);
281 /* notify the profiler of the leftovers */
282 /* FIXME this is the wrong spot at we can STW for non collection reasons. */
283 if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
284 sgen_gc_event_moves ();
285 mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
286 MONO_GC_WORLD_RESTART_BEGIN (generation);
287 FOREACH_THREAD (info) {
288 info->stack_start = NULL;
290 memset (&info->ctx, 0, sizeof (MonoContext));
292 memset (&info->regs, 0, sizeof (info->regs));
296 TV_GETTIME (start_handshake);
298 if (mono_thread_info_unified_management_enabled ())
299 count = sgen_unified_suspend_restart_world ();
301 count = sgen_thread_handshake (FALSE);
305 time_restart_world += TV_ELAPSED (start_handshake, end_sw);
306 usec = TV_ELAPSED (stop_world_time, end_sw);
307 max_pause_usec = MAX (usec, max_pause_usec);
308 SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
309 mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
310 MONO_GC_WORLD_RESTART_END (generation);
311 binary_protocol_world_restarted (generation, sgen_timestamp ());
314 * We must release the thread info suspend lock after doing
315 * the thread handshake. Otherwise, if the GC stops the world
316 * and a thread is in the process of starting up, but has not
317 * yet registered (it's not in the thread_list), it is
318 * possible that the thread does register while the world is
319 * stopped. When restarting the GC will then try to restart
320 * said thread, but since it never got the suspend signal, it
321 * cannot answer the restart signal, so a deadlock results.
325 sgen_try_free_some_memory = TRUE;
327 if (sgen_need_bridge_processing ())
328 sgen_bridge_processing_finish (generation);
330 TV_GETTIME (end_bridge);
331 bridge_usec = TV_ELAPSED (end_sw, end_bridge);
334 timing [0].stw_time = usec;
335 timing [0].bridge_time = bridge_usec;
338 sgen_memgov_collection_end (generation, timing, timing ? 2 : 0);
346 mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
347 mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
350 /* Unified suspend code */
353 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
356 A thread explicitly asked to be skiped because it holds no managed state.
357 This is used by TP and finalizer threads.
358 FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
360 if (info->gc_disabled) {
365 We have detected that this thread is failing/dying, ignore it.
366 FIXME: can't we merge this with thread_is_dying?
373 Suspending the current thread will deadlock us, bad idea.
375 if (info == mono_thread_info_current ()) {
380 We can't suspend the workers that will do all the heavy lifting.
381 FIXME Use some state bit in SgenThreadInfo for this.
383 if (sgen_is_worker_thread (mono_thread_info_get_tid (info))) {
388 The thread has signaled that it started to detach, ignore it.
389 FIXME: can't we merge this with skip
391 if (!mono_thread_info_is_live (info)) {
399 update_sgen_info (SgenThreadInfo *info)
403 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
404 info->stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
405 info->stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
406 stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
408 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
409 if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
410 g_error ("BAD STACK");
412 info->stack_start = stack_start;
414 info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
416 g_assert_not_reached ();
421 sgen_unified_suspend_stop_world (void)
424 SgenThreadInfo *info;
426 int sleep_duration = -1;
428 mono_threads_begin_global_suspend ();
429 THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
431 FOREACH_THREAD_SAFE (info) {
433 info->suspend_done = FALSE;
434 if (sgen_is_thread_in_current_stw (info)) {
435 info->skip = !mono_thread_info_begin_suspend (info, FALSE);
436 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
440 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
442 } END_FOREACH_THREAD_SAFE
444 mono_thread_info_current ()->suspend_done = TRUE;
445 mono_threads_wait_pending_operations ();
449 FOREACH_THREAD_SAFE (info) {
450 if (info->suspend_done || !sgen_is_thread_in_current_stw (info)) {
451 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->suspend_done, !sgen_is_thread_in_current_stw (info));
456 All threads that reach here are pristine suspended. This means the following:
458 - We haven't accepted the previous suspend as good.
459 - We haven't gave up on it for this STW (it's either bad or asked not to)
461 if (!mono_threads_core_check_suspend_result (info)) {
462 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
464 } else if (mono_thread_info_in_critical_location (info)) {
466 g_assert (mono_thread_info_suspend_count (info) == 1);
467 res = mono_thread_info_begin_resume (info);
468 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
474 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
475 g_assert (!info->in_critical_region);
476 info->suspend_done = TRUE;
478 } END_FOREACH_THREAD_SAFE
480 if (restart_counter == 0)
482 mono_threads_wait_pending_operations ();
484 if (sleep_duration < 0) {
492 g_usleep (sleep_duration);
493 sleep_duration += 10;
496 FOREACH_THREAD_SAFE (info) {
497 if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
498 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
499 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
503 } END_FOREACH_THREAD_SAFE
505 mono_threads_wait_pending_operations ();
508 FOREACH_THREAD_SAFE (info) {
509 if (sgen_is_thread_in_current_stw (info)) {
510 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
511 g_assert (info->suspend_done);
512 update_sgen_info (info);
514 g_assert (!info->suspend_done || info == mono_thread_info_current ());
516 } END_FOREACH_THREAD_SAFE
522 sgen_unified_suspend_restart_world (void)
524 SgenThreadInfo *info;
527 THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
528 FOREACH_THREAD_SAFE (info) {
529 if (sgen_is_thread_in_current_stw (info)) {
530 g_assert (mono_thread_info_begin_resume (info));
531 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
534 THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
536 } END_FOREACH_THREAD_SAFE
538 mono_threads_wait_pending_operations ();
539 mono_threads_end_global_suspend ();