Merge pull request #2998 from lateralusX/jlorenss/win-x64-full-aot-support
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
32
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
35
36 static TV_DECLARE (end_of_last_stw);
37
38 guint64 mono_time_since_last_stw ()
39 {
40         if (end_of_last_stw == 0)
41                 return 0;
42
43         TV_DECLARE (current_time);
44         TV_GETTIME (current_time);
45         return TV_ELAPSED (end_of_last_stw, current_time);
46 }
47
48 unsigned int sgen_global_stop_count = 0;
49
50 inline static void*
51 align_pointer (void *ptr)
52 {
53         mword p = (mword)ptr;
54         p += sizeof (gpointer) - 1;
55         p &= ~ (sizeof (gpointer) - 1);
56         return (void*)p;
57 }
58
59 static void
60 update_current_thread_stack (void *start)
61 {
62         int stack_guard = 0;
63         SgenThreadInfo *info = mono_thread_info_current ();
64
65         info->client_info.stack_start = align_pointer (&stack_guard);
66         g_assert (info->client_info.stack_start);
67         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
68
69 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
70         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
71 #else
72         g_error ("Sgen STW requires a working mono-context");
73 #endif
74
75         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
77 }
78
79 static gboolean
80 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
81 {
82         MonoJitInfo *ji;
83
84         if (!mono_thread_internal_current ())
85                 /* Happens during thread attach */
86                 return FALSE;
87
88         if (!ip || !domain)
89                 return FALSE;
90         if (!sgen_has_critical_method ())
91                 return FALSE;
92
93         /*
94          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
95          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
96          * to register the jit info for all GC critical methods after they are JITted/loaded.
97          */
98         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
99         if (!ji)
100                 return FALSE;
101
102         return sgen_is_critical_method (mono_jit_info_get_method (ji));
103 }
104
105 static int
106 restart_threads_until_none_in_managed_allocator (void)
107 {
108         int num_threads_died = 0;
109         int sleep_duration = -1;
110
111         for (;;) {
112                 int restart_count = 0, restarted_count = 0;
113                 /* restart all threads that stopped in the
114                    allocator */
115                 FOREACH_THREAD (info) {
116                         gboolean result;
117                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
118                                 continue;
119                         if (mono_thread_info_is_live (info) &&
120                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
121                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
122                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
123                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
124                                 result = sgen_resume_thread (info);
125                                 if (result) {
126                                         ++restart_count;
127                                 } else {
128                                         info->client_info.skip = 1;
129                                 }
130                         } else {
131                                 /* we set the stopped_ip to
132                                    NULL for threads which
133                                    we're not restarting so
134                                    that we can easily identify
135                                    the others */
136                                 info->client_info.stopped_ip = NULL;
137                                 info->client_info.stopped_domain = NULL;
138                                 info->client_info.suspend_done = TRUE;
139                         }
140                 } FOREACH_THREAD_END
141                 /* if no threads were restarted, we're done */
142                 if (restart_count == 0)
143                         break;
144
145                 /* wait for the threads to signal their restart */
146                 sgen_wait_for_suspend_ack (restart_count);
147
148                 if (sleep_duration < 0) {
149                         mono_thread_info_yield ();
150                         sleep_duration = 0;
151                 } else {
152                         g_usleep (sleep_duration);
153                         sleep_duration += 10;
154                 }
155
156                 /* stop them again */
157                 FOREACH_THREAD (info) {
158                         gboolean result;
159                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
160                                 continue;
161                         result = sgen_suspend_thread (info);
162
163                         if (result) {
164                                 ++restarted_count;
165                         } else {
166                                 info->client_info.skip = 1;
167                         }
168                 } FOREACH_THREAD_END
169                 /* some threads might have died */
170                 num_threads_died += restart_count - restarted_count;
171                 /* wait for the threads to signal their suspension
172                    again */
173                 sgen_wait_for_suspend_ack (restarted_count);
174         }
175
176         return num_threads_died;
177 }
178
179 static void
180 acquire_gc_locks (void)
181 {
182         LOCK_INTERRUPTION;
183         mono_thread_info_suspend_lock ();
184 }
185
186 static void
187 release_gc_locks (void)
188 {
189         mono_thread_info_suspend_unlock ();
190         UNLOCK_INTERRUPTION;
191 }
192
193 static TV_DECLARE (stop_world_time);
194 static unsigned long max_pause_usec = 0;
195
196 static guint64 time_stop_world;
197 static guint64 time_restart_world;
198
199 /* LOCKING: assumes the GC lock is held */
200 void
201 sgen_client_stop_world (int generation)
202 {
203         TV_DECLARE (end_handshake);
204
205         /* notify the profiler of the leftovers */
206         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
207         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
208                 mono_sgen_gc_event_moves ();
209
210         acquire_gc_locks ();
211
212         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
213         sgen_process_togglerefs ();
214
215         update_current_thread_stack (&generation);
216
217         sgen_global_stop_count++;
218         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
219         TV_GETTIME (stop_world_time);
220
221         if (mono_thread_info_unified_management_enabled ()) {
222                 sgen_unified_suspend_stop_world ();
223         } else {
224                 int count, dead;
225                 count = sgen_thread_handshake (TRUE);
226                 dead = restart_threads_until_none_in_managed_allocator ();
227                 if (count < dead)
228                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
229         }
230
231         SGEN_LOG (3, "world stopped");
232
233         TV_GETTIME (end_handshake);
234         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
235
236         sgen_memgov_collection_start (generation);
237         if (sgen_need_bridge_processing ())
238                 sgen_bridge_reset_data ();
239 }
240
241 /* LOCKING: assumes the GC lock is held */
242 void
243 sgen_client_restart_world (int generation, GGTimingInfo *timing)
244 {
245         TV_DECLARE (end_sw);
246         TV_DECLARE (start_handshake);
247         unsigned long usec;
248
249         /* notify the profiler of the leftovers */
250         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
251         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
252                 mono_sgen_gc_event_moves ();
253
254         FOREACH_THREAD (info) {
255                 info->client_info.stack_start = NULL;
256                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
257         } FOREACH_THREAD_END
258
259         TV_GETTIME (start_handshake);
260
261         if (mono_thread_info_unified_management_enabled ())
262                 sgen_unified_suspend_restart_world ();
263         else
264                 sgen_thread_handshake (FALSE);
265
266         TV_GETTIME (end_sw);
267         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
268         usec = TV_ELAPSED (stop_world_time, end_sw);
269         max_pause_usec = MAX (usec, max_pause_usec);
270         end_of_last_stw = end_sw;
271
272         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
273
274         /*
275          * We must release the thread info suspend lock after doing
276          * the thread handshake.  Otherwise, if the GC stops the world
277          * and a thread is in the process of starting up, but has not
278          * yet registered (it's not in the thread_list), it is
279          * possible that the thread does register while the world is
280          * stopped.  When restarting the GC will then try to restart
281          * said thread, but since it never got the suspend signal, it
282          * cannot answer the restart signal, so a deadlock results.
283          */
284         release_gc_locks ();
285
286         if (timing) {
287                 timing [0].stw_time = usec;
288         }
289 }
290
291 void
292 mono_sgen_init_stw (void)
293 {
294         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
295         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
296 }
297
298 /* Unified suspend code */
299
300 static gboolean
301 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
302 {
303         /*
304         A thread explicitly asked to be skiped because it holds no managed state.
305         This is used by TP and finalizer threads.
306         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
307         */
308         if (info->client_info.gc_disabled) {
309                 if (reason)
310                         *reason = 1;
311                 return FALSE;
312         }
313
314         /*
315         We have detected that this thread is failing/dying, ignore it.
316         FIXME: can't we merge this with thread_is_dying?
317         */
318         if (info->client_info.skip) {
319                 if (reason)
320                         *reason = 2;
321                 return FALSE;
322         }
323
324         /*
325         Suspending the current thread will deadlock us, bad idea.
326         */
327         if (info == mono_thread_info_current ()) {
328                 if (reason)
329                         *reason = 3;
330                 return FALSE;
331         }
332
333         /*
334         We can't suspend the workers that will do all the heavy lifting.
335         FIXME Use some state bit in SgenThreadInfo for this.
336         */
337         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
338                 if (reason)
339                         *reason = 4;
340                 return FALSE;
341         }
342
343         /*
344         The thread has signaled that it started to detach, ignore it.
345         FIXME: can't we merge this with skip
346         */
347         if (!mono_thread_info_is_live (info)) {
348                 if (reason)
349                         *reason = 5;
350                 return FALSE;
351         }
352
353         return TRUE;
354 }
355
356 static void
357 sgen_unified_suspend_stop_world (void)
358 {
359         int restart_counter;
360         int sleep_duration = -1;
361
362         mono_threads_begin_global_suspend ();
363         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
364
365         FOREACH_THREAD (info) {
366                 int reason;
367                 info->client_info.skip = FALSE;
368                 info->client_info.suspend_done = FALSE;
369                 if (sgen_is_thread_in_current_stw (info, &reason)) {
370                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
371                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
372                 } else {
373                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
374                 }
375         } FOREACH_THREAD_END
376
377         mono_thread_info_current ()->client_info.suspend_done = TRUE;
378         mono_threads_wait_pending_operations ();
379
380         for (;;) {
381                 restart_counter = 0;
382                 FOREACH_THREAD (info) {
383                         int reason = 0;
384                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
385                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
386                                 continue;
387                         }
388
389                         /*
390                         All threads that reach here are pristine suspended. This means the following:
391
392                         - We haven't accepted the previous suspend as good.
393                         - We haven't gave up on it for this STW (it's either bad or asked not to)
394                         */
395                         if (mono_thread_info_in_critical_location (info)) {
396                                 gboolean res;
397                                 gint suspend_count = mono_thread_info_suspend_count (info);
398                                 if (!(suspend_count == 1))
399                                         g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
400                                 res = mono_thread_info_begin_resume (info);
401                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
402                                 if (res)
403                                         ++restart_counter;
404                                 else
405                                         info->client_info.skip = TRUE;
406                         } else {
407                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
408                                 g_assert (!info->client_info.in_critical_region);
409                                 info->client_info.suspend_done = TRUE;
410                         }
411                 } FOREACH_THREAD_END
412
413                 if (restart_counter == 0)
414                         break;
415                 mono_threads_wait_pending_operations ();
416
417                 if (sleep_duration < 0) {
418                         mono_thread_info_yield ();
419                         sleep_duration = 0;
420                 } else {
421                         g_usleep (sleep_duration);
422                         sleep_duration += 10;
423                 }
424
425                 FOREACH_THREAD (info) {
426                         int reason = 0;
427                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
428                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
429                                 continue;
430                         }
431
432                         if (mono_thread_info_is_running (info)) {
433                                 gboolean res = mono_thread_info_begin_suspend (info);
434                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
435                                 if (!res)
436                                         info->client_info.skip = TRUE;
437                         }
438                 } FOREACH_THREAD_END
439
440                 mono_threads_wait_pending_operations ();
441         }
442
443         FOREACH_THREAD (info) {
444                 int reason = 0;
445                 if (sgen_is_thread_in_current_stw (info, &reason)) {
446                         MonoThreadUnwindState *state;
447
448                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
449                         g_assert (info->client_info.suspend_done);
450
451                         state = mono_thread_info_get_suspend_state (info);
452
453                         info->client_info.ctx = state->ctx;
454
455                         if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
456                                 /* thread is starting or detaching, nothing to scan here */
457                                 info->client_info.stopped_domain = NULL;
458                                 info->client_info.stopped_ip = NULL;
459                                 info->client_info.stack_start = NULL;
460                         } else {
461                                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
462                                 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
463                                 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
464                                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
465
466                                 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
467                                 if (!info->client_info.stack_start
468                                          || info->client_info.stack_start < info->client_info.stack_start_limit
469                                          || info->client_info.stack_start >= info->client_info.stack_end) {
470                                         g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
471                                                 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
472                                 }
473                         }
474
475                         binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
476                 } else {
477                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
478                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
479                 }
480         } FOREACH_THREAD_END
481 }
482
483 static void
484 sgen_unified_suspend_restart_world (void)
485 {
486         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
487         FOREACH_THREAD (info) {
488                 int reason = 0;
489                 if (sgen_is_thread_in_current_stw (info, &reason)) {
490                         g_assert (mono_thread_info_begin_resume (info));
491                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
492
493                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
494                 } else {
495                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
496                 }
497         } FOREACH_THREAD_END
498
499         mono_threads_wait_pending_operations ();
500         mono_threads_end_global_suspend ();
501 }
502 #endif