51575b724a22e10f6d7e8e9425b27706630f4a9f
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Library General Public
15  * License 2.0 as published by the Free Software Foundation;
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Library General Public License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License 2.0 along with this library; if not, write to the Free
24  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include "config.h"
28 #ifdef HAVE_SGEN_GC
29
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "metadata/sgen-client.h"
36 #include "utils/mono-time.h"
37
38 #define TV_DECLARE SGEN_TV_DECLARE
39 #define TV_GETTIME SGEN_TV_GETTIME
40 #define TV_ELAPSED SGEN_TV_ELAPSED
41
42 static int sgen_unified_suspend_restart_world (void);
43 static int sgen_unified_suspend_stop_world (void);
44
45 inline static void*
46 align_pointer (void *ptr)
47 {
48         mword p = (mword)ptr;
49         p += sizeof (gpointer) - 1;
50         p &= ~ (sizeof (gpointer) - 1);
51         return (void*)p;
52 }
53
54 #ifdef USE_MONO_CTX
55 static MonoContext cur_thread_ctx;
56 #else
57 static mword cur_thread_regs [ARCH_NUM_REGS];
58 #endif
59
60 static void
61 update_current_thread_stack (void *start)
62 {
63         int stack_guard = 0;
64 #if !defined(USE_MONO_CTX)
65         void *reg_ptr = cur_thread_regs;
66 #endif
67         SgenThreadInfo *info = mono_thread_info_current ();
68         
69         info->stack_start = align_pointer (&stack_guard);
70         g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
71 #ifdef USE_MONO_CTX
72         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
73         memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
74         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
75                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
76 #else
77         ARCH_STORE_REGS (reg_ptr);
78         memcpy (&info->regs, reg_ptr, sizeof (info->regs));
79         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
80                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
81 #endif
82 }
83
84 static gboolean
85 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
86 {
87         MonoJitInfo *ji;
88
89         if (!mono_thread_internal_current ())
90                 /* Happens during thread attach */
91                 return FALSE;
92
93         if (!ip || !domain)
94                 return FALSE;
95         if (!sgen_has_critical_method ())
96                 return FALSE;
97
98         /*
99          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
100          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
101          * to register the jit info for all GC critical methods after they are JITted/loaded.
102          */
103         ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
104         if (!ji)
105                 return FALSE;
106
107         return sgen_is_critical_method (mono_jit_info_get_method (ji));
108 }
109
110 static int
111 restart_threads_until_none_in_managed_allocator (void)
112 {
113         SgenThreadInfo *info;
114         int num_threads_died = 0;
115         int sleep_duration = -1;
116
117         for (;;) {
118                 int restart_count = 0, restarted_count = 0;
119                 /* restart all threads that stopped in the
120                    allocator */
121                 FOREACH_THREAD_SAFE (info) {
122                         gboolean result;
123                         if (info->skip || info->gc_disabled || info->suspend_done)
124                                 continue;
125                         if (mono_thread_info_is_live (info) && (!info->stack_start || info->in_critical_region || info->client_info.info.inside_critical_region ||
126                                         is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
127                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
128                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
129                                 result = sgen_resume_thread (info);
130                                 if (result) {
131                                         ++restart_count;
132                                 } else {
133                                         info->skip = 1;
134                                 }
135                         } else {
136                                 /* we set the stopped_ip to
137                                    NULL for threads which
138                                    we're not restarting so
139                                    that we can easily identify
140                                    the others */
141                                 info->stopped_ip = NULL;
142                                 info->stopped_domain = NULL;
143                                 info->suspend_done = TRUE;
144                         }
145                 } END_FOREACH_THREAD_SAFE
146                 /* if no threads were restarted, we're done */
147                 if (restart_count == 0)
148                         break;
149
150                 /* wait for the threads to signal their restart */
151                 sgen_wait_for_suspend_ack (restart_count);
152
153                 if (sleep_duration < 0) {
154                         mono_thread_info_yield ();
155                         sleep_duration = 0;
156                 } else {
157                         g_usleep (sleep_duration);
158                         sleep_duration += 10;
159                 }
160
161                 /* stop them again */
162                 FOREACH_THREAD (info) {
163                         gboolean result;
164                         if (info->skip || info->stopped_ip == NULL)
165                                 continue;
166                         result = sgen_suspend_thread (info);
167
168                         if (result) {
169                                 ++restarted_count;
170                         } else {
171                                 info->skip = 1;
172                         }
173                 } END_FOREACH_THREAD
174                 /* some threads might have died */
175                 num_threads_died += restart_count - restarted_count;
176                 /* wait for the threads to signal their suspension
177                    again */
178                 sgen_wait_for_suspend_ack (restarted_count);
179         }
180
181         return num_threads_died;
182 }
183
184 static void
185 acquire_gc_locks (void)
186 {
187         LOCK_INTERRUPTION;
188         mono_thread_info_suspend_lock ();
189 }
190
191 static void
192 release_gc_locks (void)
193 {
194         mono_thread_info_suspend_unlock ();
195         UNLOCK_INTERRUPTION;
196 }
197
198 static TV_DECLARE (stop_world_time);
199 static unsigned long max_pause_usec = 0;
200
201 static guint64 time_stop_world;
202 static guint64 time_restart_world;
203
204 /* LOCKING: assumes the GC lock is held */
205 int
206 sgen_client_stop_world (int generation)
207 {
208         TV_DECLARE (end_handshake);
209         int count, dead;
210
211         /* notify the profiler of the leftovers */
212         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
213         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
214                 mono_sgen_gc_event_moves ();
215
216         acquire_gc_locks ();
217
218         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
219         sgen_process_togglerefs ();
220
221         update_current_thread_stack (&count);
222
223         sgen_global_stop_count++;
224         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
225         TV_GETTIME (stop_world_time);
226
227         if (mono_thread_info_unified_management_enabled ()) {
228                 count = sgen_unified_suspend_stop_world ();
229         } else {
230                 count = sgen_thread_handshake (TRUE);
231                 dead = restart_threads_until_none_in_managed_allocator ();
232                 if (count < dead)
233                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
234                 count -= dead;
235         }
236
237         SGEN_LOG (3, "world stopped %d thread(s)", count);
238
239         TV_GETTIME (end_handshake);
240         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
241
242         sgen_memgov_collection_start (generation);
243         if (sgen_need_bridge_processing ())
244                 sgen_bridge_reset_data ();
245
246         return count;
247 }
248
249 /* LOCKING: assumes the GC lock is held */
250 int
251 sgen_client_restart_world (int generation, GGTimingInfo *timing)
252 {
253         int count;
254         SgenThreadInfo *info;
255         TV_DECLARE (end_sw);
256         TV_DECLARE (start_handshake);
257         TV_DECLARE (end_bridge);
258         unsigned long usec, bridge_usec;
259
260         /* notify the profiler of the leftovers */
261         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
262         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
263                 mono_sgen_gc_event_moves ();
264
265         FOREACH_THREAD (info) {
266                 info->stack_start = NULL;
267 #ifdef USE_MONO_CTX
268                 memset (&info->ctx, 0, sizeof (MonoContext));
269 #else
270                 memset (&info->regs, 0, sizeof (info->regs));
271 #endif
272         } END_FOREACH_THREAD
273
274         TV_GETTIME (start_handshake);
275
276         if (mono_thread_info_unified_management_enabled ())
277                 count = sgen_unified_suspend_restart_world ();
278         else
279                 count = sgen_thread_handshake (FALSE);
280
281
282         TV_GETTIME (end_sw);
283         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
284         usec = TV_ELAPSED (stop_world_time, end_sw);
285         max_pause_usec = MAX (usec, max_pause_usec);
286
287         SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
288
289         /*
290          * We must release the thread info suspend lock after doing
291          * the thread handshake.  Otherwise, if the GC stops the world
292          * and a thread is in the process of starting up, but has not
293          * yet registered (it's not in the thread_list), it is
294          * possible that the thread does register while the world is
295          * stopped.  When restarting the GC will then try to restart
296          * said thread, but since it never got the suspend signal, it
297          * cannot answer the restart signal, so a deadlock results.
298          */
299         release_gc_locks ();
300
301         TV_GETTIME (end_bridge);
302         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
303
304         if (timing) {
305                 timing [0].stw_time = usec;
306                 timing [0].bridge_time = bridge_usec;
307         }
308
309         return count;
310 }
311
312 void
313 mono_sgen_init_stw (void)
314 {
315         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
316         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
317 }
318
319 /* Unified suspend code */
320
321 static gboolean
322 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
323 {
324         /*
325         A thread explicitly asked to be skiped because it holds no managed state.
326         This is used by TP and finalizer threads.
327         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
328         */
329         if (info->gc_disabled) {
330                 return FALSE;
331         }
332
333         /*
334         We have detected that this thread is failing/dying, ignore it.
335         FIXME: can't we merge this with thread_is_dying?
336         */
337         if (info->skip) {
338                 return FALSE;
339         }
340
341         /*
342         Suspending the current thread will deadlock us, bad idea.
343         */
344         if (info == mono_thread_info_current ()) {
345                 return FALSE;
346         }
347
348         /*
349         We can't suspend the workers that will do all the heavy lifting.
350         FIXME Use some state bit in SgenThreadInfo for this.
351         */
352         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
353                 return FALSE;
354         }
355
356         /*
357         The thread has signaled that it started to detach, ignore it.
358         FIXME: can't we merge this with skip
359         */
360         if (!mono_thread_info_is_live (info)) {
361                 return FALSE;
362         }
363
364         return TRUE;
365 }
366
367 static void
368 update_sgen_info (SgenThreadInfo *info)
369 {
370         char *stack_start;
371
372         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
373         info->stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
374         info->stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
375         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
376
377         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
378         if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
379                 g_error ("BAD STACK");
380
381         info->stack_start = stack_start;
382 #ifdef USE_MONO_CTX
383         info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
384 #else
385         g_assert_not_reached ();
386 #endif
387 }
388
389 static int
390 sgen_unified_suspend_stop_world (void)
391 {
392         int restart_counter;
393         SgenThreadInfo *info;
394         int count = 0;
395         int sleep_duration = -1;
396
397         mono_threads_begin_global_suspend ();
398         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
399
400         FOREACH_THREAD_SAFE (info) {
401                 info->skip = FALSE;
402                 info->suspend_done = FALSE;
403                 if (sgen_is_thread_in_current_stw (info)) {
404                         info->skip = !mono_thread_info_begin_suspend (info, FALSE);
405                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
406                         if (!info->skip)
407                                 ++count;
408                 } else {
409                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
410                 }
411         } END_FOREACH_THREAD_SAFE
412
413         mono_thread_info_current ()->suspend_done = TRUE;
414         mono_threads_wait_pending_operations ();
415
416         for (;;) {
417                 restart_counter = 0;
418                 FOREACH_THREAD_SAFE (info) {
419                         if (info->suspend_done || !sgen_is_thread_in_current_stw (info)) {
420                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->suspend_done, !sgen_is_thread_in_current_stw (info));
421                                 continue;
422                         }
423
424                         /*
425                         All threads that reach here are pristine suspended. This means the following:
426
427                         - We haven't accepted the previous suspend as good.
428                         - We haven't gave up on it for this STW (it's either bad or asked not to)
429                         */
430                         if (!mono_threads_core_check_suspend_result (info)) {
431                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
432                                 info->skip = TRUE;
433                         } else if (mono_thread_info_in_critical_location (info)) {
434                                 gboolean res;
435                                 g_assert (mono_thread_info_suspend_count (info) == 1);
436                                 res = mono_thread_info_begin_resume (info);
437                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
438                                 if (res)
439                                         ++restart_counter;
440                                 else
441                                         info->skip = TRUE;
442                         } else {
443                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
444                                 g_assert (!info->in_critical_region);
445                                 info->suspend_done = TRUE;
446                         }
447                 } END_FOREACH_THREAD_SAFE
448
449                 if (restart_counter == 0)
450                         break;
451                 mono_threads_wait_pending_operations ();
452
453                 if (sleep_duration < 0) {
454 #ifdef HOST_WIN32
455                         SwitchToThread ();
456 #else
457                         sched_yield ();
458 #endif
459                         sleep_duration = 0;
460                 } else {
461                         g_usleep (sleep_duration);
462                         sleep_duration += 10;
463                 }
464
465                 FOREACH_THREAD_SAFE (info) {
466                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
467                                 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
468                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
469                                 if (!res)
470                                         info->skip = TRUE;
471                         }
472                 } END_FOREACH_THREAD_SAFE
473
474                 mono_threads_wait_pending_operations ();
475         }
476
477         FOREACH_THREAD_SAFE (info) {
478                 if (sgen_is_thread_in_current_stw (info)) {
479                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
480                         g_assert (info->suspend_done);
481                         update_sgen_info (info);
482                 } else {
483                         g_assert (!info->suspend_done || info == mono_thread_info_current ());
484                 }
485         } END_FOREACH_THREAD_SAFE
486
487         return count;
488 }
489
490 static int
491 sgen_unified_suspend_restart_world (void)
492 {
493         SgenThreadInfo *info;
494         int count = 0;
495
496         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
497         FOREACH_THREAD_SAFE (info) {
498                 if (sgen_is_thread_in_current_stw (info)) {
499                         g_assert (mono_thread_info_begin_resume (info));
500                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
501                         ++count;
502                 } else {
503                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
504                 }
505         } END_FOREACH_THREAD_SAFE
506
507         mono_threads_wait_pending_operations ();
508         mono_threads_end_global_suspend ();
509         return count;
510 }
511 #endif