Merge pull request #2803 from BrzVlad/feature-conc-pinned-scan
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27
28 #define TV_DECLARE SGEN_TV_DECLARE
29 #define TV_GETTIME SGEN_TV_GETTIME
30 #define TV_ELAPSED SGEN_TV_ELAPSED
31
32 static void sgen_unified_suspend_restart_world (void);
33 static void sgen_unified_suspend_stop_world (void);
34
35 unsigned int sgen_global_stop_count = 0;
36
37 inline static void*
38 align_pointer (void *ptr)
39 {
40         mword p = (mword)ptr;
41         p += sizeof (gpointer) - 1;
42         p &= ~ (sizeof (gpointer) - 1);
43         return (void*)p;
44 }
45
46 #ifdef USE_MONO_CTX
47 static MonoContext cur_thread_ctx;
48 #else
49 static mword cur_thread_regs [ARCH_NUM_REGS];
50 #endif
51
52 static void
53 update_current_thread_stack (void *start)
54 {
55         int stack_guard = 0;
56 #if !defined(USE_MONO_CTX)
57         void *reg_ptr = cur_thread_regs;
58 #endif
59         SgenThreadInfo *info = mono_thread_info_current ();
60         
61         info->client_info.stack_start = align_pointer (&stack_guard);
62         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
63 #ifdef USE_MONO_CTX
64         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
65         memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
66         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
67                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
68 #else
69         ARCH_STORE_REGS (reg_ptr);
70         memcpy (&info->client_info.regs, reg_ptr, sizeof (info->client_info.regs));
71         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
72                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, NULL);
73 #endif
74 }
75
76 static gboolean
77 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
78 {
79         MonoJitInfo *ji;
80
81         if (!mono_thread_internal_current ())
82                 /* Happens during thread attach */
83                 return FALSE;
84
85         if (!ip || !domain)
86                 return FALSE;
87         if (!sgen_has_critical_method ())
88                 return FALSE;
89
90         /*
91          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
92          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
93          * to register the jit info for all GC critical methods after they are JITted/loaded.
94          */
95         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
96         if (!ji)
97                 return FALSE;
98
99         return sgen_is_critical_method (mono_jit_info_get_method (ji));
100 }
101
102 static int
103 restart_threads_until_none_in_managed_allocator (void)
104 {
105         int num_threads_died = 0;
106         int sleep_duration = -1;
107
108         for (;;) {
109                 int restart_count = 0, restarted_count = 0;
110                 /* restart all threads that stopped in the
111                    allocator */
112                 FOREACH_THREAD (info) {
113                         gboolean result;
114                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
115                                 continue;
116                         if (mono_thread_info_is_live (info) &&
117                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
118                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
119                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
120                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
121                                 result = sgen_resume_thread (info);
122                                 if (result) {
123                                         ++restart_count;
124                                 } else {
125                                         info->client_info.skip = 1;
126                                 }
127                         } else {
128                                 /* we set the stopped_ip to
129                                    NULL for threads which
130                                    we're not restarting so
131                                    that we can easily identify
132                                    the others */
133                                 info->client_info.stopped_ip = NULL;
134                                 info->client_info.stopped_domain = NULL;
135                                 info->client_info.suspend_done = TRUE;
136                         }
137                 } FOREACH_THREAD_END
138                 /* if no threads were restarted, we're done */
139                 if (restart_count == 0)
140                         break;
141
142                 /* wait for the threads to signal their restart */
143                 sgen_wait_for_suspend_ack (restart_count);
144
145                 if (sleep_duration < 0) {
146                         mono_thread_info_yield ();
147                         sleep_duration = 0;
148                 } else {
149                         g_usleep (sleep_duration);
150                         sleep_duration += 10;
151                 }
152
153                 /* stop them again */
154                 FOREACH_THREAD (info) {
155                         gboolean result;
156                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
157                                 continue;
158                         result = sgen_suspend_thread (info);
159
160                         if (result) {
161                                 ++restarted_count;
162                         } else {
163                                 info->client_info.skip = 1;
164                         }
165                 } FOREACH_THREAD_END
166                 /* some threads might have died */
167                 num_threads_died += restart_count - restarted_count;
168                 /* wait for the threads to signal their suspension
169                    again */
170                 sgen_wait_for_suspend_ack (restarted_count);
171         }
172
173         return num_threads_died;
174 }
175
176 static void
177 acquire_gc_locks (void)
178 {
179         LOCK_INTERRUPTION;
180         mono_thread_info_suspend_lock ();
181 }
182
183 static void
184 release_gc_locks (void)
185 {
186         mono_thread_info_suspend_unlock ();
187         UNLOCK_INTERRUPTION;
188 }
189
190 static TV_DECLARE (stop_world_time);
191 static unsigned long max_pause_usec = 0;
192
193 static guint64 time_stop_world;
194 static guint64 time_restart_world;
195
196 /* LOCKING: assumes the GC lock is held */
197 void
198 sgen_client_stop_world (int generation)
199 {
200         TV_DECLARE (end_handshake);
201
202         /* notify the profiler of the leftovers */
203         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
204         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
205                 mono_sgen_gc_event_moves ();
206
207         acquire_gc_locks ();
208
209         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
210         sgen_process_togglerefs ();
211
212         update_current_thread_stack (&generation);
213
214         sgen_global_stop_count++;
215         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
216         TV_GETTIME (stop_world_time);
217
218         if (mono_thread_info_unified_management_enabled ()) {
219                 sgen_unified_suspend_stop_world ();
220         } else {
221                 int count, dead;
222                 count = sgen_thread_handshake (TRUE);
223                 dead = restart_threads_until_none_in_managed_allocator ();
224                 if (count < dead)
225                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
226         }
227
228         SGEN_LOG (3, "world stopped");
229
230         TV_GETTIME (end_handshake);
231         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
232
233         sgen_memgov_collection_start (generation);
234         if (sgen_need_bridge_processing ())
235                 sgen_bridge_reset_data ();
236 }
237
238 /* LOCKING: assumes the GC lock is held */
239 void
240 sgen_client_restart_world (int generation, GGTimingInfo *timing)
241 {
242         TV_DECLARE (end_sw);
243         TV_DECLARE (start_handshake);
244         TV_DECLARE (end_bridge);
245         unsigned long usec, bridge_usec;
246
247         /* notify the profiler of the leftovers */
248         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
249         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
250                 mono_sgen_gc_event_moves ();
251
252         FOREACH_THREAD (info) {
253                 info->client_info.stack_start = NULL;
254 #ifdef USE_MONO_CTX
255                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
256 #else
257                 memset (&info->client_info.regs, 0, sizeof (info->client_info.regs));
258 #endif
259         } FOREACH_THREAD_END
260
261         TV_GETTIME (start_handshake);
262
263         if (mono_thread_info_unified_management_enabled ())
264                 sgen_unified_suspend_restart_world ();
265         else
266                 sgen_thread_handshake (FALSE);
267
268         TV_GETTIME (end_sw);
269         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
270         usec = TV_ELAPSED (stop_world_time, end_sw);
271         max_pause_usec = MAX (usec, max_pause_usec);
272
273         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
274
275         /*
276          * We must release the thread info suspend lock after doing
277          * the thread handshake.  Otherwise, if the GC stops the world
278          * and a thread is in the process of starting up, but has not
279          * yet registered (it's not in the thread_list), it is
280          * possible that the thread does register while the world is
281          * stopped.  When restarting the GC will then try to restart
282          * said thread, but since it never got the suspend signal, it
283          * cannot answer the restart signal, so a deadlock results.
284          */
285         release_gc_locks ();
286
287         TV_GETTIME (end_bridge);
288         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
289
290         if (timing) {
291                 timing [0].stw_time = usec;
292                 timing [0].bridge_time = bridge_usec;
293         }
294 }
295
296 void
297 mono_sgen_init_stw (void)
298 {
299         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
300         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
301 }
302
303 /* Unified suspend code */
304
305 static gboolean
306 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
307 {
308         /*
309         A thread explicitly asked to be skiped because it holds no managed state.
310         This is used by TP and finalizer threads.
311         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
312         */
313         if (info->client_info.gc_disabled) {
314                 return FALSE;
315         }
316
317         /*
318         We have detected that this thread is failing/dying, ignore it.
319         FIXME: can't we merge this with thread_is_dying?
320         */
321         if (info->client_info.skip) {
322                 return FALSE;
323         }
324
325         /*
326         Suspending the current thread will deadlock us, bad idea.
327         */
328         if (info == mono_thread_info_current ()) {
329                 return FALSE;
330         }
331
332         /*
333         We can't suspend the workers that will do all the heavy lifting.
334         FIXME Use some state bit in SgenThreadInfo for this.
335         */
336         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
337                 return FALSE;
338         }
339
340         /*
341         The thread has signaled that it started to detach, ignore it.
342         FIXME: can't we merge this with skip
343         */
344         if (!mono_thread_info_is_live (info)) {
345                 return FALSE;
346         }
347
348         return TRUE;
349 }
350
351 static void
352 update_sgen_info (SgenThreadInfo *info)
353 {
354         char *stack_start;
355
356         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
357         info->client_info.stopped_domain = (MonoDomain *)mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
358         info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
359         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
360
361         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
362         if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
363                 g_error ("BAD STACK");
364
365         info->client_info.stack_start = stack_start;
366 #ifdef USE_MONO_CTX
367         info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
368 #else
369         g_assert_not_reached ();
370 #endif
371 }
372
373 static void
374 sgen_unified_suspend_stop_world (void)
375 {
376         int restart_counter;
377         int sleep_duration = -1;
378
379         mono_threads_begin_global_suspend ();
380         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
381
382         FOREACH_THREAD (info) {
383                 info->client_info.skip = FALSE;
384                 info->client_info.suspend_done = FALSE;
385                 if (sgen_is_thread_in_current_stw (info)) {
386                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
387                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
388                 } else {
389                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
390                 }
391         } FOREACH_THREAD_END
392
393         mono_thread_info_current ()->client_info.suspend_done = TRUE;
394         mono_threads_wait_pending_operations ();
395
396         for (;;) {
397                 restart_counter = 0;
398                 FOREACH_THREAD (info) {
399                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
400                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
401                                 continue;
402                         }
403
404                         /*
405                         All threads that reach here are pristine suspended. This means the following:
406
407                         - We haven't accepted the previous suspend as good.
408                         - We haven't gave up on it for this STW (it's either bad or asked not to)
409                         */
410                         if (!mono_thread_info_check_suspend_result (info)) {
411                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
412                                 info->client_info.skip = TRUE;
413                         } else if (mono_thread_info_in_critical_location (info)) {
414                                 gboolean res;
415                                 g_assert (mono_thread_info_suspend_count (info) == 1);
416                                 res = mono_thread_info_begin_resume (info);
417                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
418                                 if (res)
419                                         ++restart_counter;
420                                 else
421                                         info->client_info.skip = TRUE;
422                         } else {
423                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
424                                 g_assert (!info->client_info.in_critical_region);
425                                 info->client_info.suspend_done = TRUE;
426                         }
427                 } FOREACH_THREAD_END
428
429                 if (restart_counter == 0)
430                         break;
431                 mono_threads_wait_pending_operations ();
432
433                 if (sleep_duration < 0) {
434 #ifdef HOST_WIN32
435                         SwitchToThread ();
436 #else
437                         sched_yield ();
438 #endif
439                         sleep_duration = 0;
440                 } else {
441                         g_usleep (sleep_duration);
442                         sleep_duration += 10;
443                 }
444
445                 FOREACH_THREAD (info) {
446                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
447                                 gboolean res = mono_thread_info_begin_suspend (info);
448                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
449                                 if (!res)
450                                         info->client_info.skip = TRUE;
451                         }
452                 } FOREACH_THREAD_END
453
454                 mono_threads_wait_pending_operations ();
455         }
456
457         FOREACH_THREAD (info) {
458                 if (sgen_is_thread_in_current_stw (info)) {
459                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
460                         g_assert (info->client_info.suspend_done);
461                         update_sgen_info (info);
462                 } else {
463                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
464                 }
465         } FOREACH_THREAD_END
466 }
467
468 static void
469 sgen_unified_suspend_restart_world (void)
470 {
471         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
472         FOREACH_THREAD (info) {
473                 if (sgen_is_thread_in_current_stw (info)) {
474                         g_assert (mono_thread_info_begin_resume (info));
475                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
476                 } else {
477                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
478                 }
479         } FOREACH_THREAD_END
480
481         mono_threads_wait_pending_operations ();
482         mono_threads_end_global_suspend ();
483 }
484 #endif