Merge pull request #1222 from LogosBible/uri-trycreate
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Library General Public
15  * License 2.0 as published by the Free Software Foundation;
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Library General Public License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License 2.0 along with this library; if not, write to the Free
24  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include "config.h"
28 #ifdef HAVE_SGEN_GC
29
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "utils/mono-time.h"
36 #include "utils/dtrace.h"
37 #include "utils/mono-counters.h"
38 #include "utils/mono-threads.h"
39
40 static gboolean world_is_stopped = FALSE;
41
42 #define TV_DECLARE SGEN_TV_DECLARE
43 #define TV_GETTIME SGEN_TV_GETTIME
44 #define TV_ELAPSED SGEN_TV_ELAPSED
45
46 static int sgen_unified_suspend_restart_world (void);
47 static int sgen_unified_suspend_stop_world (void);
48
49 inline static void*
50 align_pointer (void *ptr)
51 {
52         mword p = (mword)ptr;
53         p += sizeof (gpointer) - 1;
54         p &= ~ (sizeof (gpointer) - 1);
55         return (void*)p;
56 }
57
58 #ifdef USE_MONO_CTX
59 static MonoContext cur_thread_ctx;
60 #else
61 static mword cur_thread_regs [ARCH_NUM_REGS];
62 #endif
63
64 static void
65 update_current_thread_stack (void *start)
66 {
67         int stack_guard = 0;
68 #if !defined(USE_MONO_CTX)
69         void *reg_ptr = cur_thread_regs;
70 #endif
71         SgenThreadInfo *info = mono_thread_info_current ();
72         
73         info->stack_start = align_pointer (&stack_guard);
74         g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
75 #ifdef USE_MONO_CTX
76         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
77         memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
78         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
79                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
80 #else
81         ARCH_STORE_REGS (reg_ptr);
82         memcpy (&info->regs, reg_ptr, sizeof (info->regs));
83         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
84                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
85 #endif
86 }
87
88 static gboolean
89 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
90 {
91         MonoJitInfo *ji;
92
93         if (!mono_thread_internal_current ())
94                 /* Happens during thread attach */
95                 return FALSE;
96
97         if (!ip || !domain)
98                 return FALSE;
99         if (!sgen_has_critical_method ())
100                 return FALSE;
101
102         /*
103          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
104          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
105          * to register the jit info for all GC critical methods after they are JITted/loaded.
106          */
107         ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
108         if (!ji)
109                 return FALSE;
110
111         return sgen_is_critical_method (mono_jit_info_get_method (ji));
112 }
113
114 static int
115 restart_threads_until_none_in_managed_allocator (void)
116 {
117         SgenThreadInfo *info;
118         int num_threads_died = 0;
119         int sleep_duration = -1;
120
121         for (;;) {
122                 int restart_count = 0, restarted_count = 0;
123                 /* restart all threads that stopped in the
124                    allocator */
125                 FOREACH_THREAD_SAFE (info) {
126                         gboolean result;
127                         if (info->skip || info->gc_disabled || info->suspend_done)
128                                 continue;
129                         if (mono_thread_info_is_live (info) && (!info->stack_start || info->in_critical_region || info->info.inside_critical_region ||
130                                         is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
131                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
132                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->info.native_handle);
133                                 result = sgen_resume_thread (info);
134                                 if (result) {
135                                         ++restart_count;
136                                 } else {
137                                         info->skip = 1;
138                                 }
139                         } else {
140                                 /* we set the stopped_ip to
141                                    NULL for threads which
142                                    we're not restarting so
143                                    that we can easily identify
144                                    the others */
145                                 info->stopped_ip = NULL;
146                                 info->stopped_domain = NULL;
147                                 info->suspend_done = TRUE;
148                         }
149                 } END_FOREACH_THREAD_SAFE
150                 /* if no threads were restarted, we're done */
151                 if (restart_count == 0)
152                         break;
153
154                 /* wait for the threads to signal their restart */
155                 sgen_wait_for_suspend_ack (restart_count);
156
157                 if (sleep_duration < 0) {
158                         mono_thread_info_yield ();
159                         sleep_duration = 0;
160                 } else {
161                         g_usleep (sleep_duration);
162                         sleep_duration += 10;
163                 }
164
165                 /* stop them again */
166                 FOREACH_THREAD (info) {
167                         gboolean result;
168                         if (info->skip || info->stopped_ip == NULL)
169                                 continue;
170                         result = sgen_suspend_thread (info);
171
172                         if (result) {
173                                 ++restarted_count;
174                         } else {
175                                 info->skip = 1;
176                         }
177                 } END_FOREACH_THREAD
178                 /* some threads might have died */
179                 num_threads_died += restart_count - restarted_count;
180                 /* wait for the threads to signal their suspension
181                    again */
182                 sgen_wait_for_suspend_ack (restarted_count);
183         }
184
185         return num_threads_died;
186 }
187
188 static void
189 acquire_gc_locks (void)
190 {
191         LOCK_INTERRUPTION;
192         mono_thread_info_suspend_lock ();
193 }
194
195 static void
196 release_gc_locks (void)
197 {
198         mono_thread_info_suspend_unlock ();
199         UNLOCK_INTERRUPTION;
200 }
201
202 static void
203 count_cards (long long *major_total, long long *major_marked, long long *los_total, long long *los_marked)
204 {
205         sgen_get_major_collector ()->count_cards (major_total, major_marked);
206         sgen_los_count_cards (los_total, los_marked);
207 }
208
209 static TV_DECLARE (stop_world_time);
210 static unsigned long max_pause_usec = 0;
211
212 static guint64 time_stop_world;
213 static guint64 time_restart_world;
214
215 /* LOCKING: assumes the GC lock is held */
216 int
217 sgen_stop_world (int generation)
218 {
219         TV_DECLARE (end_handshake);
220         int count, dead;
221
222         SGEN_ASSERT (0, !world_is_stopped, "Why are we stopping a stopped world?");
223
224         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
225         MONO_GC_WORLD_STOP_BEGIN ();
226         binary_protocol_world_stopping (sgen_timestamp ());
227         acquire_gc_locks ();
228
229         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
230         sgen_process_togglerefs ();
231
232         update_current_thread_stack (&count);
233
234         sgen_global_stop_count++;
235         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
236         TV_GETTIME (stop_world_time);
237
238         if (mono_thread_info_unified_management_enabled ()) {
239                 count = sgen_unified_suspend_stop_world ();
240         } else {
241                 count = sgen_thread_handshake (TRUE);
242                 dead = restart_threads_until_none_in_managed_allocator ();
243                 if (count < dead)
244                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
245                 count -= dead;
246         }
247
248         world_is_stopped = TRUE;
249
250         SGEN_LOG (3, "world stopped %d thread(s)", count);
251         mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
252         MONO_GC_WORLD_STOP_END ();
253         if (binary_protocol_is_enabled ()) {
254                 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
255                 if (binary_protocol_is_heavy_enabled ())
256                         count_cards (&major_total, &major_marked, &los_total, &los_marked);
257                 binary_protocol_world_stopped (sgen_timestamp (), major_total, major_marked, los_total, los_marked);
258         }
259
260         TV_GETTIME (end_handshake);
261         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
262
263         sgen_memgov_collection_start (generation);
264         if (sgen_need_bridge_processing ())
265                 sgen_bridge_reset_data ();
266
267         return count;
268 }
269
270 /* LOCKING: assumes the GC lock is held */
271 int
272 sgen_restart_world (int generation, GGTimingInfo *timing)
273 {
274         int count;
275         SgenThreadInfo *info;
276         TV_DECLARE (end_sw);
277         TV_DECLARE (start_handshake);
278         TV_DECLARE (end_bridge);
279         unsigned long usec, bridge_usec;
280
281         SGEN_ASSERT (0, world_is_stopped, "Why are we restarting a running world?");
282
283         if (binary_protocol_is_enabled ()) {
284                 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
285                 if (binary_protocol_is_heavy_enabled ())
286                         count_cards (&major_total, &major_marked, &los_total, &los_marked);
287                 binary_protocol_world_restarting (generation, sgen_timestamp (), major_total, major_marked, los_total, los_marked);
288         }
289
290         /* notify the profiler of the leftovers */
291         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
292         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
293                 sgen_gc_event_moves ();
294         mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
295         MONO_GC_WORLD_RESTART_BEGIN (generation);
296         FOREACH_THREAD (info) {
297                 info->stack_start = NULL;
298 #ifdef USE_MONO_CTX
299                 memset (&info->ctx, 0, sizeof (MonoContext));
300 #else
301                 memset (&info->regs, 0, sizeof (info->regs));
302 #endif
303         } END_FOREACH_THREAD
304
305         TV_GETTIME (start_handshake);
306
307         if (mono_thread_info_unified_management_enabled ())
308                 count = sgen_unified_suspend_restart_world ();
309         else
310                 count = sgen_thread_handshake (FALSE);
311
312
313         TV_GETTIME (end_sw);
314         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
315         usec = TV_ELAPSED (stop_world_time, end_sw);
316         max_pause_usec = MAX (usec, max_pause_usec);
317
318         world_is_stopped = FALSE;
319
320         SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
321         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
322         MONO_GC_WORLD_RESTART_END (generation);
323         binary_protocol_world_restarted (generation, sgen_timestamp ());
324
325         /*
326          * We must release the thread info suspend lock after doing
327          * the thread handshake.  Otherwise, if the GC stops the world
328          * and a thread is in the process of starting up, but has not
329          * yet registered (it's not in the thread_list), it is
330          * possible that the thread does register while the world is
331          * stopped.  When restarting the GC will then try to restart
332          * said thread, but since it never got the suspend signal, it
333          * cannot answer the restart signal, so a deadlock results.
334          */
335         release_gc_locks ();
336
337         sgen_try_free_some_memory = TRUE;
338
339         if (sgen_need_bridge_processing ())
340                 sgen_bridge_processing_finish (generation);
341
342         TV_GETTIME (end_bridge);
343         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
344
345         if (timing) {
346                 timing [0].stw_time = usec;
347                 timing [0].bridge_time = bridge_usec;
348         }
349         
350         sgen_memgov_collection_end (generation, timing, timing ? 2 : 0);
351
352         return count;
353 }
354
355 gboolean
356 sgen_is_world_stopped (void)
357 {
358         return world_is_stopped;
359 }
360
361 void
362 sgen_init_stw (void)
363 {
364         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
365         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
366 }
367
368 /* Unified suspend code */
369
370 static gboolean
371 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
372 {
373         /*
374         A thread explicitly asked to be skiped because it holds no managed state.
375         This is used by TP and finalizer threads.
376         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
377         */
378         if (info->gc_disabled) {
379                 return FALSE;
380         }
381
382         /*
383         We have detected that this thread is failing/dying, ignore it.
384         FIXME: can't we merge this with thread_is_dying?
385         */
386         if (info->skip) {
387                 return FALSE;
388         }
389
390         /*
391         Suspending the current thread will deadlock us, bad idea.
392         */
393         if (info == mono_thread_info_current ()) {
394                 return FALSE;
395         }
396
397         /*
398         We can't suspend the workers that will do all the heavy lifting.
399         FIXME Use some state bit in SgenThreadInfo for this.
400         */
401         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
402                 return FALSE;
403         }
404
405         /*
406         The thread has signaled that it started to detach, ignore it.
407         FIXME: can't we merge this with skip
408         */
409         if (!mono_thread_info_is_live (info)) {
410                 return FALSE;
411         }
412
413         return TRUE;
414 }
415
416 static void
417 update_sgen_info (SgenThreadInfo *info)
418 {
419         char *stack_start;
420
421         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
422         info->stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
423         info->stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
424         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
425
426         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
427         if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
428                 g_error ("BAD STACK");
429
430         info->stack_start = stack_start;
431 #ifdef USE_MONO_CTX
432         info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
433 #else
434         g_assert_not_reached ();
435 #endif
436 }
437
438 static int
439 sgen_unified_suspend_stop_world (void)
440 {
441         int restart_counter;
442         SgenThreadInfo *info;
443         int count = 0;
444         int sleep_duration = -1;
445
446         mono_threads_begin_global_suspend ();
447         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
448
449         FOREACH_THREAD_SAFE (info) {
450                 info->skip = FALSE;
451                 info->suspend_done = FALSE;
452                 if (sgen_is_thread_in_current_stw (info)) {
453                         info->skip = !mono_thread_info_begin_suspend (info, FALSE);
454                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
455                         if (!info->skip)
456                                 ++count;
457                 } else {
458                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
459                 }
460         } END_FOREACH_THREAD_SAFE
461
462         mono_thread_info_current ()->suspend_done = TRUE;
463         mono_threads_wait_pending_operations ();
464
465         for (;;) {
466                 restart_counter = 0;
467                 FOREACH_THREAD_SAFE (info) {
468                         if (info->suspend_done || !sgen_is_thread_in_current_stw (info)) {
469                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->suspend_done, !sgen_is_thread_in_current_stw (info));
470                                 continue;
471                         }
472
473                         /*
474                         All threads that reach here are pristine suspended. This means the following:
475
476                         - We haven't accepted the previous suspend as good.
477                         - We haven't gave up on it for this STW (it's either bad or asked not to)
478                         */
479                         if (!mono_threads_core_check_suspend_result (info)) {
480                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
481                                 info->skip = TRUE;
482                         } else if (mono_thread_info_in_critical_location (info)) {
483                                 gboolean res;
484                                 g_assert (mono_thread_info_suspend_count (info) == 1);
485                                 res = mono_thread_info_begin_resume (info);
486                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
487                                 if (res)
488                                         ++restart_counter;
489                                 else
490                                         info->skip = TRUE;
491                         } else {
492                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
493                                 g_assert (!info->in_critical_region);
494                                 info->suspend_done = TRUE;
495                         }
496                 } END_FOREACH_THREAD_SAFE
497
498                 if (restart_counter == 0)
499                         break;
500                 mono_threads_wait_pending_operations ();
501
502                 if (sleep_duration < 0) {
503 #ifdef HOST_WIN32
504                         SwitchToThread ();
505 #else
506                         sched_yield ();
507 #endif
508                         sleep_duration = 0;
509                 } else {
510                         g_usleep (sleep_duration);
511                         sleep_duration += 10;
512                 }
513
514                 FOREACH_THREAD_SAFE (info) {
515                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
516                                 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
517                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
518                                 if (!res)
519                                         info->skip = TRUE;
520                         }
521                 } END_FOREACH_THREAD_SAFE
522
523                 mono_threads_wait_pending_operations ();
524         }
525
526         FOREACH_THREAD_SAFE (info) {
527                 if (sgen_is_thread_in_current_stw (info)) {
528                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
529                         g_assert (info->suspend_done);
530                         update_sgen_info (info);
531                 } else {
532                         g_assert (!info->suspend_done || info == mono_thread_info_current ());
533                 }
534         } END_FOREACH_THREAD_SAFE
535
536         return count;
537 }
538
539 static int
540 sgen_unified_suspend_restart_world (void)
541 {
542         SgenThreadInfo *info;
543         int count = 0;
544
545         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
546         FOREACH_THREAD_SAFE (info) {
547                 if (sgen_is_thread_in_current_stw (info)) {
548                         g_assert (mono_thread_info_begin_resume (info));
549                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
550                         ++count;
551                 } else {
552                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
553                 }
554         } END_FOREACH_THREAD_SAFE
555
556         mono_threads_wait_pending_operations ();
557         mono_threads_end_global_suspend ();
558         return count;
559 }
560 #endif