Merge pull request #2987 from spouliot/ipv6only
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
32
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
35
36 unsigned int sgen_global_stop_count = 0;
37
38 inline static void*
39 align_pointer (void *ptr)
40 {
41         mword p = (mword)ptr;
42         p += sizeof (gpointer) - 1;
43         p &= ~ (sizeof (gpointer) - 1);
44         return (void*)p;
45 }
46
47 static MonoContext cur_thread_ctx;
48
49 static void
50 update_current_thread_stack (void *start)
51 {
52         int stack_guard = 0;
53         SgenThreadInfo *info = mono_thread_info_current ();
54
55         info->client_info.stack_start = align_pointer (&stack_guard);
56         g_assert (info->client_info.stack_start);
57         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
58
59 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
60         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
61 #else
62         g_error ("Sgen STW requires a working mono-context");
63 #endif
64
65         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
66                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
67 }
68
69 static gboolean
70 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
71 {
72         MonoJitInfo *ji;
73
74         if (!mono_thread_internal_current ())
75                 /* Happens during thread attach */
76                 return FALSE;
77
78         if (!ip || !domain)
79                 return FALSE;
80         if (!sgen_has_critical_method ())
81                 return FALSE;
82
83         /*
84          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
85          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
86          * to register the jit info for all GC critical methods after they are JITted/loaded.
87          */
88         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
89         if (!ji)
90                 return FALSE;
91
92         return sgen_is_critical_method (mono_jit_info_get_method (ji));
93 }
94
95 static int
96 restart_threads_until_none_in_managed_allocator (void)
97 {
98         int num_threads_died = 0;
99         int sleep_duration = -1;
100
101         for (;;) {
102                 int restart_count = 0, restarted_count = 0;
103                 /* restart all threads that stopped in the
104                    allocator */
105                 FOREACH_THREAD (info) {
106                         gboolean result;
107                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
108                                 continue;
109                         if (mono_thread_info_is_live (info) &&
110                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
111                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
112                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
113                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
114                                 result = sgen_resume_thread (info);
115                                 if (result) {
116                                         ++restart_count;
117                                 } else {
118                                         info->client_info.skip = 1;
119                                 }
120                         } else {
121                                 /* we set the stopped_ip to
122                                    NULL for threads which
123                                    we're not restarting so
124                                    that we can easily identify
125                                    the others */
126                                 info->client_info.stopped_ip = NULL;
127                                 info->client_info.stopped_domain = NULL;
128                                 info->client_info.suspend_done = TRUE;
129                         }
130                 } FOREACH_THREAD_END
131                 /* if no threads were restarted, we're done */
132                 if (restart_count == 0)
133                         break;
134
135                 /* wait for the threads to signal their restart */
136                 sgen_wait_for_suspend_ack (restart_count);
137
138                 if (sleep_duration < 0) {
139                         mono_thread_info_yield ();
140                         sleep_duration = 0;
141                 } else {
142                         g_usleep (sleep_duration);
143                         sleep_duration += 10;
144                 }
145
146                 /* stop them again */
147                 FOREACH_THREAD (info) {
148                         gboolean result;
149                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
150                                 continue;
151                         result = sgen_suspend_thread (info);
152
153                         if (result) {
154                                 ++restarted_count;
155                         } else {
156                                 info->client_info.skip = 1;
157                         }
158                 } FOREACH_THREAD_END
159                 /* some threads might have died */
160                 num_threads_died += restart_count - restarted_count;
161                 /* wait for the threads to signal their suspension
162                    again */
163                 sgen_wait_for_suspend_ack (restarted_count);
164         }
165
166         return num_threads_died;
167 }
168
169 static void
170 acquire_gc_locks (void)
171 {
172         LOCK_INTERRUPTION;
173         mono_thread_info_suspend_lock ();
174 }
175
176 static void
177 release_gc_locks (void)
178 {
179         mono_thread_info_suspend_unlock ();
180         UNLOCK_INTERRUPTION;
181 }
182
183 static TV_DECLARE (stop_world_time);
184 static unsigned long max_pause_usec = 0;
185
186 static guint64 time_stop_world;
187 static guint64 time_restart_world;
188
189 /* LOCKING: assumes the GC lock is held */
190 void
191 sgen_client_stop_world (int generation)
192 {
193         TV_DECLARE (end_handshake);
194
195         /* notify the profiler of the leftovers */
196         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
197         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
198                 mono_sgen_gc_event_moves ();
199
200         acquire_gc_locks ();
201
202         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
203         sgen_process_togglerefs ();
204
205         update_current_thread_stack (&generation);
206
207         sgen_global_stop_count++;
208         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
209         TV_GETTIME (stop_world_time);
210
211         if (mono_thread_info_unified_management_enabled ()) {
212                 sgen_unified_suspend_stop_world ();
213         } else {
214                 int count, dead;
215                 count = sgen_thread_handshake (TRUE);
216                 dead = restart_threads_until_none_in_managed_allocator ();
217                 if (count < dead)
218                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
219         }
220
221         SGEN_LOG (3, "world stopped");
222
223         TV_GETTIME (end_handshake);
224         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
225
226         sgen_memgov_collection_start (generation);
227         if (sgen_need_bridge_processing ())
228                 sgen_bridge_reset_data ();
229 }
230
231 /* LOCKING: assumes the GC lock is held */
232 void
233 sgen_client_restart_world (int generation, GGTimingInfo *timing)
234 {
235         TV_DECLARE (end_sw);
236         TV_DECLARE (start_handshake);
237         TV_DECLARE (end_bridge);
238         unsigned long usec, bridge_usec;
239
240         /* notify the profiler of the leftovers */
241         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
242         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
243                 mono_sgen_gc_event_moves ();
244
245         FOREACH_THREAD (info) {
246                 info->client_info.stack_start = NULL;
247                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
248         } FOREACH_THREAD_END
249
250         TV_GETTIME (start_handshake);
251
252         if (mono_thread_info_unified_management_enabled ())
253                 sgen_unified_suspend_restart_world ();
254         else
255                 sgen_thread_handshake (FALSE);
256
257         TV_GETTIME (end_sw);
258         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
259         usec = TV_ELAPSED (stop_world_time, end_sw);
260         max_pause_usec = MAX (usec, max_pause_usec);
261
262         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
263
264         /*
265          * We must release the thread info suspend lock after doing
266          * the thread handshake.  Otherwise, if the GC stops the world
267          * and a thread is in the process of starting up, but has not
268          * yet registered (it's not in the thread_list), it is
269          * possible that the thread does register while the world is
270          * stopped.  When restarting the GC will then try to restart
271          * said thread, but since it never got the suspend signal, it
272          * cannot answer the restart signal, so a deadlock results.
273          */
274         release_gc_locks ();
275
276         TV_GETTIME (end_bridge);
277         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
278
279         if (timing) {
280                 timing [0].stw_time = usec;
281                 timing [0].bridge_time = bridge_usec;
282         }
283 }
284
285 void
286 mono_sgen_init_stw (void)
287 {
288         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
289         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
290 }
291
292 /* Unified suspend code */
293
294 static gboolean
295 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
296 {
297         /*
298         A thread explicitly asked to be skiped because it holds no managed state.
299         This is used by TP and finalizer threads.
300         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
301         */
302         if (info->client_info.gc_disabled) {
303                 if (reason)
304                         *reason = 1;
305                 return FALSE;
306         }
307
308         /*
309         We have detected that this thread is failing/dying, ignore it.
310         FIXME: can't we merge this with thread_is_dying?
311         */
312         if (info->client_info.skip) {
313                 if (reason)
314                         *reason = 2;
315                 return FALSE;
316         }
317
318         /*
319         Suspending the current thread will deadlock us, bad idea.
320         */
321         if (info == mono_thread_info_current ()) {
322                 if (reason)
323                         *reason = 3;
324                 return FALSE;
325         }
326
327         /*
328         We can't suspend the workers that will do all the heavy lifting.
329         FIXME Use some state bit in SgenThreadInfo for this.
330         */
331         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
332                 if (reason)
333                         *reason = 4;
334                 return FALSE;
335         }
336
337         /*
338         The thread has signaled that it started to detach, ignore it.
339         FIXME: can't we merge this with skip
340         */
341         if (!mono_thread_info_is_live (info)) {
342                 if (reason)
343                         *reason = 5;
344                 return FALSE;
345         }
346
347         return TRUE;
348 }
349
350 static void
351 sgen_unified_suspend_stop_world (void)
352 {
353         int restart_counter;
354         int sleep_duration = -1;
355
356         mono_threads_begin_global_suspend ();
357         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
358
359         FOREACH_THREAD (info) {
360                 int reason;
361                 info->client_info.skip = FALSE;
362                 info->client_info.suspend_done = FALSE;
363                 if (sgen_is_thread_in_current_stw (info, &reason)) {
364                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
365                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
366                 } else {
367                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
368                 }
369         } FOREACH_THREAD_END
370
371         mono_thread_info_current ()->client_info.suspend_done = TRUE;
372         mono_threads_wait_pending_operations ();
373
374         for (;;) {
375                 restart_counter = 0;
376                 FOREACH_THREAD (info) {
377                         int reason = 0;
378                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
379                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
380                                 continue;
381                         }
382
383                         /*
384                         All threads that reach here are pristine suspended. This means the following:
385
386                         - We haven't accepted the previous suspend as good.
387                         - We haven't gave up on it for this STW (it's either bad or asked not to)
388                         */
389                         if (mono_thread_info_in_critical_location (info)) {
390                                 gboolean res;
391                                 gint suspend_count = mono_thread_info_suspend_count (info);
392                                 if (!(suspend_count == 1))
393                                         g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
394                                 res = mono_thread_info_begin_resume (info);
395                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
396                                 if (res)
397                                         ++restart_counter;
398                                 else
399                                         info->client_info.skip = TRUE;
400                         } else {
401                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
402                                 g_assert (!info->client_info.in_critical_region);
403                                 info->client_info.suspend_done = TRUE;
404                         }
405                 } FOREACH_THREAD_END
406
407                 if (restart_counter == 0)
408                         break;
409                 mono_threads_wait_pending_operations ();
410
411                 if (sleep_duration < 0) {
412                         mono_thread_info_yield ();
413                         sleep_duration = 0;
414                 } else {
415                         g_usleep (sleep_duration);
416                         sleep_duration += 10;
417                 }
418
419                 FOREACH_THREAD (info) {
420                         int reason = 0;
421                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
422                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
423                                 continue;
424                         }
425
426                         if (mono_thread_info_is_running (info)) {
427                                 gboolean res = mono_thread_info_begin_suspend (info);
428                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
429                                 if (!res)
430                                         info->client_info.skip = TRUE;
431                         }
432                 } FOREACH_THREAD_END
433
434                 mono_threads_wait_pending_operations ();
435         }
436
437         FOREACH_THREAD (info) {
438                 int reason = 0;
439                 if (sgen_is_thread_in_current_stw (info, &reason)) {
440                         MonoThreadUnwindState *state;
441
442                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
443                         g_assert (info->client_info.suspend_done);
444
445                         state = mono_thread_info_get_suspend_state (info);
446
447                         info->client_info.ctx = state->ctx;
448
449                         if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
450                                 /* thread is starting or detaching, nothing to scan here */
451                                 info->client_info.stopped_domain = NULL;
452                                 info->client_info.stopped_ip = NULL;
453                                 info->client_info.stack_start = NULL;
454                         } else {
455                                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
456                                 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
457                                 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
458                                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
459
460                                 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
461                                 if (!info->client_info.stack_start
462                                          || info->client_info.stack_start < info->client_info.stack_start_limit
463                                          || info->client_info.stack_start >= info->client_info.stack_end) {
464                                         g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
465                                                 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
466                                 }
467                         }
468
469                         binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
470                 } else {
471                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
472                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
473                 }
474         } FOREACH_THREAD_END
475 }
476
477 static void
478 sgen_unified_suspend_restart_world (void)
479 {
480         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
481         FOREACH_THREAD (info) {
482                 int reason = 0;
483                 if (sgen_is_thread_in_current_stw (info, &reason)) {
484                         g_assert (mono_thread_info_begin_resume (info));
485                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
486
487                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
488                 } else {
489                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
490                 }
491         } FOREACH_THREAD_END
492
493         mono_threads_wait_pending_operations ();
494         mono_threads_end_global_suspend ();
495 }
496 #endif