Merge pull request #2933 from ludovic-henry/referencesource-monitor
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
32
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
35
36 unsigned int sgen_global_stop_count = 0;
37
38 inline static void*
39 align_pointer (void *ptr)
40 {
41         mword p = (mword)ptr;
42         p += sizeof (gpointer) - 1;
43         p &= ~ (sizeof (gpointer) - 1);
44         return (void*)p;
45 }
46
47 static MonoContext cur_thread_ctx;
48
49 static void
50 update_current_thread_stack (void *start)
51 {
52         int stack_guard = 0;
53         SgenThreadInfo *info = mono_thread_info_current ();
54         
55         info->client_info.stack_start = align_pointer (&stack_guard);
56         g_assert (info->client_info.stack_start);
57         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
58         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
59         memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
60         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
61                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
62 }
63
64 static gboolean
65 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
66 {
67         MonoJitInfo *ji;
68
69         if (!mono_thread_internal_current ())
70                 /* Happens during thread attach */
71                 return FALSE;
72
73         if (!ip || !domain)
74                 return FALSE;
75         if (!sgen_has_critical_method ())
76                 return FALSE;
77
78         /*
79          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
80          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
81          * to register the jit info for all GC critical methods after they are JITted/loaded.
82          */
83         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
84         if (!ji)
85                 return FALSE;
86
87         return sgen_is_critical_method (mono_jit_info_get_method (ji));
88 }
89
90 static int
91 restart_threads_until_none_in_managed_allocator (void)
92 {
93         int num_threads_died = 0;
94         int sleep_duration = -1;
95
96         for (;;) {
97                 int restart_count = 0, restarted_count = 0;
98                 /* restart all threads that stopped in the
99                    allocator */
100                 FOREACH_THREAD (info) {
101                         gboolean result;
102                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
103                                 continue;
104                         if (mono_thread_info_is_live (info) &&
105                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
106                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
107                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
108                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
109                                 result = sgen_resume_thread (info);
110                                 if (result) {
111                                         ++restart_count;
112                                 } else {
113                                         info->client_info.skip = 1;
114                                 }
115                         } else {
116                                 /* we set the stopped_ip to
117                                    NULL for threads which
118                                    we're not restarting so
119                                    that we can easily identify
120                                    the others */
121                                 info->client_info.stopped_ip = NULL;
122                                 info->client_info.stopped_domain = NULL;
123                                 info->client_info.suspend_done = TRUE;
124                         }
125                 } FOREACH_THREAD_END
126                 /* if no threads were restarted, we're done */
127                 if (restart_count == 0)
128                         break;
129
130                 /* wait for the threads to signal their restart */
131                 sgen_wait_for_suspend_ack (restart_count);
132
133                 if (sleep_duration < 0) {
134                         mono_thread_info_yield ();
135                         sleep_duration = 0;
136                 } else {
137                         g_usleep (sleep_duration);
138                         sleep_duration += 10;
139                 }
140
141                 /* stop them again */
142                 FOREACH_THREAD (info) {
143                         gboolean result;
144                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
145                                 continue;
146                         result = sgen_suspend_thread (info);
147
148                         if (result) {
149                                 ++restarted_count;
150                         } else {
151                                 info->client_info.skip = 1;
152                         }
153                 } FOREACH_THREAD_END
154                 /* some threads might have died */
155                 num_threads_died += restart_count - restarted_count;
156                 /* wait for the threads to signal their suspension
157                    again */
158                 sgen_wait_for_suspend_ack (restarted_count);
159         }
160
161         return num_threads_died;
162 }
163
164 static void
165 acquire_gc_locks (void)
166 {
167         LOCK_INTERRUPTION;
168         mono_thread_info_suspend_lock ();
169 }
170
171 static void
172 release_gc_locks (void)
173 {
174         mono_thread_info_suspend_unlock ();
175         UNLOCK_INTERRUPTION;
176 }
177
178 static TV_DECLARE (stop_world_time);
179 static unsigned long max_pause_usec = 0;
180
181 static guint64 time_stop_world;
182 static guint64 time_restart_world;
183
184 /* LOCKING: assumes the GC lock is held */
185 void
186 sgen_client_stop_world (int generation)
187 {
188         TV_DECLARE (end_handshake);
189
190         /* notify the profiler of the leftovers */
191         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
192         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
193                 mono_sgen_gc_event_moves ();
194
195         acquire_gc_locks ();
196
197         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
198         sgen_process_togglerefs ();
199
200         update_current_thread_stack (&generation);
201
202         sgen_global_stop_count++;
203         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
204         TV_GETTIME (stop_world_time);
205
206         if (mono_thread_info_unified_management_enabled ()) {
207                 sgen_unified_suspend_stop_world ();
208         } else {
209                 int count, dead;
210                 count = sgen_thread_handshake (TRUE);
211                 dead = restart_threads_until_none_in_managed_allocator ();
212                 if (count < dead)
213                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
214         }
215
216         SGEN_LOG (3, "world stopped");
217
218         TV_GETTIME (end_handshake);
219         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
220
221         sgen_memgov_collection_start (generation);
222         if (sgen_need_bridge_processing ())
223                 sgen_bridge_reset_data ();
224 }
225
226 /* LOCKING: assumes the GC lock is held */
227 void
228 sgen_client_restart_world (int generation, GGTimingInfo *timing)
229 {
230         TV_DECLARE (end_sw);
231         TV_DECLARE (start_handshake);
232         TV_DECLARE (end_bridge);
233         unsigned long usec, bridge_usec;
234
235         /* notify the profiler of the leftovers */
236         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
237         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
238                 mono_sgen_gc_event_moves ();
239
240         FOREACH_THREAD (info) {
241                 info->client_info.stack_start = NULL;
242                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
243         } FOREACH_THREAD_END
244
245         TV_GETTIME (start_handshake);
246
247         if (mono_thread_info_unified_management_enabled ())
248                 sgen_unified_suspend_restart_world ();
249         else
250                 sgen_thread_handshake (FALSE);
251
252         TV_GETTIME (end_sw);
253         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
254         usec = TV_ELAPSED (stop_world_time, end_sw);
255         max_pause_usec = MAX (usec, max_pause_usec);
256
257         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
258
259         /*
260          * We must release the thread info suspend lock after doing
261          * the thread handshake.  Otherwise, if the GC stops the world
262          * and a thread is in the process of starting up, but has not
263          * yet registered (it's not in the thread_list), it is
264          * possible that the thread does register while the world is
265          * stopped.  When restarting the GC will then try to restart
266          * said thread, but since it never got the suspend signal, it
267          * cannot answer the restart signal, so a deadlock results.
268          */
269         release_gc_locks ();
270
271         TV_GETTIME (end_bridge);
272         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
273
274         if (timing) {
275                 timing [0].stw_time = usec;
276                 timing [0].bridge_time = bridge_usec;
277         }
278 }
279
280 void
281 mono_sgen_init_stw (void)
282 {
283         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
284         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
285 }
286
287 /* Unified suspend code */
288
289 static gboolean
290 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
291 {
292         /*
293         A thread explicitly asked to be skiped because it holds no managed state.
294         This is used by TP and finalizer threads.
295         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
296         */
297         if (info->client_info.gc_disabled) {
298                 if (reason)
299                         *reason = 1;
300                 return FALSE;
301         }
302
303         /*
304         We have detected that this thread is failing/dying, ignore it.
305         FIXME: can't we merge this with thread_is_dying?
306         */
307         if (info->client_info.skip) {
308                 if (reason)
309                         *reason = 2;
310                 return FALSE;
311         }
312
313         /*
314         Suspending the current thread will deadlock us, bad idea.
315         */
316         if (info == mono_thread_info_current ()) {
317                 if (reason)
318                         *reason = 3;
319                 return FALSE;
320         }
321
322         /*
323         We can't suspend the workers that will do all the heavy lifting.
324         FIXME Use some state bit in SgenThreadInfo for this.
325         */
326         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
327                 if (reason)
328                         *reason = 4;
329                 return FALSE;
330         }
331
332         /*
333         The thread has signaled that it started to detach, ignore it.
334         FIXME: can't we merge this with skip
335         */
336         if (!mono_thread_info_is_live (info)) {
337                 if (reason)
338                         *reason = 5;
339                 return FALSE;
340         }
341
342         return TRUE;
343 }
344
345 static void
346 sgen_unified_suspend_stop_world (void)
347 {
348         int restart_counter;
349         int sleep_duration = -1;
350
351         mono_threads_begin_global_suspend ();
352         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
353
354         FOREACH_THREAD (info) {
355                 int reason;
356                 info->client_info.skip = FALSE;
357                 info->client_info.suspend_done = FALSE;
358                 if (sgen_is_thread_in_current_stw (info, &reason)) {
359                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
360                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
361                 } else {
362                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
363                 }
364         } FOREACH_THREAD_END
365
366         mono_thread_info_current ()->client_info.suspend_done = TRUE;
367         mono_threads_wait_pending_operations ();
368
369         for (;;) {
370                 restart_counter = 0;
371                 FOREACH_THREAD (info) {
372                         int reason = 0;
373                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
374                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
375                                 continue;
376                         }
377
378                         /*
379                         All threads that reach here are pristine suspended. This means the following:
380
381                         - We haven't accepted the previous suspend as good.
382                         - We haven't gave up on it for this STW (it's either bad or asked not to)
383                         */
384                         if (mono_thread_info_in_critical_location (info)) {
385                                 gboolean res;
386                                 gint suspend_count = mono_thread_info_suspend_count (info);
387                                 if (!(suspend_count == 1))
388                                         g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
389                                 res = mono_thread_info_begin_resume (info);
390                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
391                                 if (res)
392                                         ++restart_counter;
393                                 else
394                                         info->client_info.skip = TRUE;
395                         } else {
396                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
397                                 g_assert (!info->client_info.in_critical_region);
398                                 info->client_info.suspend_done = TRUE;
399                         }
400                 } FOREACH_THREAD_END
401
402                 if (restart_counter == 0)
403                         break;
404                 mono_threads_wait_pending_operations ();
405
406                 if (sleep_duration < 0) {
407                         mono_thread_info_yield ();
408                         sleep_duration = 0;
409                 } else {
410                         g_usleep (sleep_duration);
411                         sleep_duration += 10;
412                 }
413
414                 FOREACH_THREAD (info) {
415                         int reason = 0;
416                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
417                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
418                                 continue;
419                         }
420
421                         if (mono_thread_info_is_running (info)) {
422                                 gboolean res = mono_thread_info_begin_suspend (info);
423                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
424                                 if (!res)
425                                         info->client_info.skip = TRUE;
426                         }
427                 } FOREACH_THREAD_END
428
429                 mono_threads_wait_pending_operations ();
430         }
431
432         FOREACH_THREAD (info) {
433                 int reason = 0;
434                 if (sgen_is_thread_in_current_stw (info, &reason)) {
435                         MonoThreadUnwindState *state;
436
437                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
438                         g_assert (info->client_info.suspend_done);
439
440                         state = mono_thread_info_get_suspend_state (info);
441
442                         info->client_info.ctx = state->ctx;
443
444                         if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
445                                 /* thread is starting or detaching, nothing to scan here */
446                                 info->client_info.stopped_domain = NULL;
447                                 info->client_info.stopped_ip = NULL;
448                                 info->client_info.stack_start = NULL;
449                         } else {
450                                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
451                                 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
452                                 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
453                                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
454
455                                 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
456                                 if (!info->client_info.stack_start
457                                          || info->client_info.stack_start < info->client_info.stack_start_limit
458                                          || info->client_info.stack_start >= info->client_info.stack_end) {
459                                         g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
460                                                 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
461                                 }
462                         }
463
464                         binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
465                 } else {
466                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
467                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
468                 }
469         } FOREACH_THREAD_END
470 }
471
472 static void
473 sgen_unified_suspend_restart_world (void)
474 {
475         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
476         FOREACH_THREAD (info) {
477                 int reason = 0;
478                 if (sgen_is_thread_in_current_stw (info, &reason)) {
479                         g_assert (mono_thread_info_begin_resume (info));
480                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
481
482                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
483                 } else {
484                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
485                 }
486         } FOREACH_THREAD_END
487
488         mono_threads_wait_pending_operations ();
489         mono_threads_end_global_suspend ();
490 }
491 #endif