Merge pull request #2871 from BrzVlad/feature-conc-sweep-nrs
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
32
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
35
36 static TV_DECLARE (end_of_last_stw);
37
38 guint64 mono_time_since_last_stw ()
39 {
40         if (end_of_last_stw == 0)
41                 return 0;
42
43         TV_DECLARE (current_time);
44         TV_GETTIME (current_time);
45         return TV_ELAPSED (end_of_last_stw, current_time);
46 }
47
48 unsigned int sgen_global_stop_count = 0;
49
50 inline static void*
51 align_pointer (void *ptr)
52 {
53         mword p = (mword)ptr;
54         p += sizeof (gpointer) - 1;
55         p &= ~ (sizeof (gpointer) - 1);
56         return (void*)p;
57 }
58
59 static void
60 update_current_thread_stack (void *start)
61 {
62         int stack_guard = 0;
63         SgenThreadInfo *info = mono_thread_info_current ();
64
65         info->client_info.stack_start = align_pointer (&stack_guard);
66         g_assert (info->client_info.stack_start);
67         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
68
69 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
70         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
71 #else
72         g_error ("Sgen STW requires a working mono-context");
73 #endif
74
75         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
77 }
78
79 static gboolean
80 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
81 {
82         MonoJitInfo *ji;
83
84         if (!mono_thread_internal_current ())
85                 /* Happens during thread attach */
86                 return FALSE;
87
88         if (!ip || !domain)
89                 return FALSE;
90         if (!sgen_has_critical_method ())
91                 return FALSE;
92
93         /*
94          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
95          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
96          * to register the jit info for all GC critical methods after they are JITted/loaded.
97          */
98         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
99         if (!ji)
100                 return FALSE;
101
102         return sgen_is_critical_method (mono_jit_info_get_method (ji));
103 }
104
105 static int
106 restart_threads_until_none_in_managed_allocator (void)
107 {
108         int num_threads_died = 0;
109         int sleep_duration = -1;
110
111         for (;;) {
112                 int restart_count = 0, restarted_count = 0;
113                 /* restart all threads that stopped in the
114                    allocator */
115                 FOREACH_THREAD (info) {
116                         gboolean result;
117                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
118                                 continue;
119                         if (mono_thread_info_is_live (info) &&
120                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
121                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
122                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
123                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
124                                 result = sgen_resume_thread (info);
125                                 if (result) {
126                                         ++restart_count;
127                                 } else {
128                                         info->client_info.skip = 1;
129                                 }
130                         } else {
131                                 /* we set the stopped_ip to
132                                    NULL for threads which
133                                    we're not restarting so
134                                    that we can easily identify
135                                    the others */
136                                 info->client_info.stopped_ip = NULL;
137                                 info->client_info.stopped_domain = NULL;
138                                 info->client_info.suspend_done = TRUE;
139                         }
140                 } FOREACH_THREAD_END
141                 /* if no threads were restarted, we're done */
142                 if (restart_count == 0)
143                         break;
144
145                 /* wait for the threads to signal their restart */
146                 sgen_wait_for_suspend_ack (restart_count);
147
148                 if (sleep_duration < 0) {
149                         mono_thread_info_yield ();
150                         sleep_duration = 0;
151                 } else {
152                         g_usleep (sleep_duration);
153                         sleep_duration += 10;
154                 }
155
156                 /* stop them again */
157                 FOREACH_THREAD (info) {
158                         gboolean result;
159                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
160                                 continue;
161                         result = sgen_suspend_thread (info);
162
163                         if (result) {
164                                 ++restarted_count;
165                         } else {
166                                 info->client_info.skip = 1;
167                         }
168                 } FOREACH_THREAD_END
169                 /* some threads might have died */
170                 num_threads_died += restart_count - restarted_count;
171                 /* wait for the threads to signal their suspension
172                    again */
173                 sgen_wait_for_suspend_ack (restarted_count);
174         }
175
176         return num_threads_died;
177 }
178
179 static void
180 acquire_gc_locks (void)
181 {
182         LOCK_INTERRUPTION;
183         mono_thread_info_suspend_lock ();
184 }
185
186 static void
187 release_gc_locks (void)
188 {
189         mono_thread_info_suspend_unlock ();
190         UNLOCK_INTERRUPTION;
191 }
192
193 static TV_DECLARE (stop_world_time);
194 static unsigned long max_pause_usec = 0;
195
196 static guint64 time_stop_world;
197 static guint64 time_restart_world;
198
199 /* LOCKING: assumes the GC lock is held */
200 void
201 sgen_client_stop_world (int generation)
202 {
203         TV_DECLARE (end_handshake);
204
205         /* notify the profiler of the leftovers */
206         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
207         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
208                 mono_sgen_gc_event_moves ();
209
210         acquire_gc_locks ();
211
212         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
213         sgen_process_togglerefs ();
214
215         update_current_thread_stack (&generation);
216
217         sgen_global_stop_count++;
218         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
219         TV_GETTIME (stop_world_time);
220
221         if (mono_thread_info_unified_management_enabled ()) {
222                 sgen_unified_suspend_stop_world ();
223         } else {
224                 int count, dead;
225                 count = sgen_thread_handshake (TRUE);
226                 dead = restart_threads_until_none_in_managed_allocator ();
227                 if (count < dead)
228                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
229         }
230
231         SGEN_LOG (3, "world stopped");
232
233         TV_GETTIME (end_handshake);
234         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
235
236         sgen_memgov_collection_start (generation);
237         if (sgen_need_bridge_processing ())
238                 sgen_bridge_reset_data ();
239 }
240
241 /* LOCKING: assumes the GC lock is held */
242 void
243 sgen_client_restart_world (int generation, gint64 *stw_time)
244 {
245         TV_DECLARE (end_sw);
246         TV_DECLARE (start_handshake);
247         unsigned long usec;
248
249         /* notify the profiler of the leftovers */
250         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
251         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
252                 mono_sgen_gc_event_moves ();
253
254         FOREACH_THREAD (info) {
255                 info->client_info.stack_start = NULL;
256                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
257         } FOREACH_THREAD_END
258
259         TV_GETTIME (start_handshake);
260
261         if (mono_thread_info_unified_management_enabled ())
262                 sgen_unified_suspend_restart_world ();
263         else
264                 sgen_thread_handshake (FALSE);
265
266         TV_GETTIME (end_sw);
267         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
268         usec = TV_ELAPSED (stop_world_time, end_sw);
269         max_pause_usec = MAX (usec, max_pause_usec);
270         end_of_last_stw = end_sw;
271
272         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
273
274         /*
275          * We must release the thread info suspend lock after doing
276          * the thread handshake.  Otherwise, if the GC stops the world
277          * and a thread is in the process of starting up, but has not
278          * yet registered (it's not in the thread_list), it is
279          * possible that the thread does register while the world is
280          * stopped.  When restarting the GC will then try to restart
281          * said thread, but since it never got the suspend signal, it
282          * cannot answer the restart signal, so a deadlock results.
283          */
284         release_gc_locks ();
285
286         *stw_time = usec;
287 }
288
289 void
290 mono_sgen_init_stw (void)
291 {
292         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
293         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
294 }
295
296 /* Unified suspend code */
297
298 static gboolean
299 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
300 {
301         /*
302         A thread explicitly asked to be skiped because it holds no managed state.
303         This is used by TP and finalizer threads.
304         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
305         */
306         if (info->client_info.gc_disabled) {
307                 if (reason)
308                         *reason = 1;
309                 return FALSE;
310         }
311
312         /*
313         We have detected that this thread is failing/dying, ignore it.
314         FIXME: can't we merge this with thread_is_dying?
315         */
316         if (info->client_info.skip) {
317                 if (reason)
318                         *reason = 2;
319                 return FALSE;
320         }
321
322         /*
323         Suspending the current thread will deadlock us, bad idea.
324         */
325         if (info == mono_thread_info_current ()) {
326                 if (reason)
327                         *reason = 3;
328                 return FALSE;
329         }
330
331         /*
332         We can't suspend the workers that will do all the heavy lifting.
333         FIXME Use some state bit in SgenThreadInfo for this.
334         */
335         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
336                 if (reason)
337                         *reason = 4;
338                 return FALSE;
339         }
340
341         /*
342         The thread has signaled that it started to detach, ignore it.
343         FIXME: can't we merge this with skip
344         */
345         if (!mono_thread_info_is_live (info)) {
346                 if (reason)
347                         *reason = 5;
348                 return FALSE;
349         }
350
351         return TRUE;
352 }
353
354 static void
355 sgen_unified_suspend_stop_world (void)
356 {
357         int restart_counter;
358         int sleep_duration = -1;
359
360         mono_threads_begin_global_suspend ();
361         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
362
363         FOREACH_THREAD (info) {
364                 int reason;
365                 info->client_info.skip = FALSE;
366                 info->client_info.suspend_done = FALSE;
367                 if (sgen_is_thread_in_current_stw (info, &reason)) {
368                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
369                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
370                 } else {
371                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
372                 }
373         } FOREACH_THREAD_END
374
375         mono_thread_info_current ()->client_info.suspend_done = TRUE;
376         mono_threads_wait_pending_operations ();
377
378         for (;;) {
379                 restart_counter = 0;
380                 FOREACH_THREAD (info) {
381                         int reason = 0;
382                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
383                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
384                                 continue;
385                         }
386
387                         /*
388                         All threads that reach here are pristine suspended. This means the following:
389
390                         - We haven't accepted the previous suspend as good.
391                         - We haven't gave up on it for this STW (it's either bad or asked not to)
392                         */
393                         if (mono_thread_info_in_critical_location (info)) {
394                                 gboolean res;
395                                 gint suspend_count = mono_thread_info_suspend_count (info);
396                                 if (!(suspend_count == 1))
397                                         g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
398                                 res = mono_thread_info_begin_resume (info);
399                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
400                                 if (res)
401                                         ++restart_counter;
402                                 else
403                                         info->client_info.skip = TRUE;
404                         } else {
405                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
406                                 g_assert (!info->client_info.in_critical_region);
407                                 info->client_info.suspend_done = TRUE;
408                         }
409                 } FOREACH_THREAD_END
410
411                 if (restart_counter == 0)
412                         break;
413                 mono_threads_wait_pending_operations ();
414
415                 if (sleep_duration < 0) {
416                         mono_thread_info_yield ();
417                         sleep_duration = 0;
418                 } else {
419                         g_usleep (sleep_duration);
420                         sleep_duration += 10;
421                 }
422
423                 FOREACH_THREAD (info) {
424                         int reason = 0;
425                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
426                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
427                                 continue;
428                         }
429
430                         if (mono_thread_info_is_running (info)) {
431                                 gboolean res = mono_thread_info_begin_suspend (info);
432                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
433                                 if (!res)
434                                         info->client_info.skip = TRUE;
435                         }
436                 } FOREACH_THREAD_END
437
438                 mono_threads_wait_pending_operations ();
439         }
440
441         FOREACH_THREAD (info) {
442                 int reason = 0;
443                 if (sgen_is_thread_in_current_stw (info, &reason)) {
444                         MonoThreadUnwindState *state;
445
446                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
447                         g_assert (info->client_info.suspend_done);
448
449                         state = mono_thread_info_get_suspend_state (info);
450
451                         info->client_info.ctx = state->ctx;
452
453                         if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
454                                 /* thread is starting or detaching, nothing to scan here */
455                                 info->client_info.stopped_domain = NULL;
456                                 info->client_info.stopped_ip = NULL;
457                                 info->client_info.stack_start = NULL;
458                         } else {
459                                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
460                                 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
461                                 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
462                                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
463
464                                 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
465                                 if (!info->client_info.stack_start
466                                          || info->client_info.stack_start < info->client_info.stack_start_limit
467                                          || info->client_info.stack_start >= info->client_info.stack_end) {
468                                         g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
469                                                 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
470                                 }
471                         }
472
473                         binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
474                 } else {
475                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
476                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
477                 }
478         } FOREACH_THREAD_END
479 }
480
481 static void
482 sgen_unified_suspend_restart_world (void)
483 {
484         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
485         FOREACH_THREAD (info) {
486                 int reason = 0;
487                 if (sgen_is_thread_in_current_stw (info, &reason)) {
488                         g_assert (mono_thread_info_begin_resume (info));
489                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
490
491                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
492                 } else {
493                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
494                 }
495         } FOREACH_THREAD_END
496
497         mono_threads_wait_pending_operations ();
498         mono_threads_end_global_suspend ();
499 }
500 #endif