Merge pull request #1624 from esdrubal/getprocesstimes
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Library General Public
15  * License 2.0 as published by the Free Software Foundation;
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Library General Public License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License 2.0 along with this library; if not, write to the Free
24  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include "config.h"
28 #ifdef HAVE_SGEN_GC
29
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/profiler-private.h"
34 #include "utils/mono-time.h"
35 #include "utils/dtrace.h"
36 #include "utils/mono-counters.h"
37 #include "utils/mono-threads.h"
38
39 #define TV_DECLARE SGEN_TV_DECLARE
40 #define TV_GETTIME SGEN_TV_GETTIME
41 #define TV_ELAPSED SGEN_TV_ELAPSED
42
43 static int sgen_unified_suspend_restart_world (void);
44 static int sgen_unified_suspend_stop_world (void);
45
46 inline static void*
47 align_pointer (void *ptr)
48 {
49         mword p = (mword)ptr;
50         p += sizeof (gpointer) - 1;
51         p &= ~ (sizeof (gpointer) - 1);
52         return (void*)p;
53 }
54
55 #ifdef USE_MONO_CTX
56 static MonoContext cur_thread_ctx;
57 #else
58 static mword cur_thread_regs [ARCH_NUM_REGS];
59 #endif
60
61 static void
62 update_current_thread_stack (void *start)
63 {
64         int stack_guard = 0;
65 #if !defined(USE_MONO_CTX)
66         void *reg_ptr = cur_thread_regs;
67 #endif
68         SgenThreadInfo *info = mono_thread_info_current ();
69         
70         info->stack_start = align_pointer (&stack_guard);
71         g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
72 #ifdef USE_MONO_CTX
73         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
74         memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
75         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
77 #else
78         ARCH_STORE_REGS (reg_ptr);
79         memcpy (&info->regs, reg_ptr, sizeof (info->regs));
80         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
81                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
82 #endif
83 }
84
85 static gboolean
86 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
87 {
88         MonoJitInfo *ji;
89
90         if (!mono_thread_internal_current ())
91                 /* Happens during thread attach */
92                 return FALSE;
93
94         if (!ip || !domain)
95                 return FALSE;
96         if (!sgen_has_critical_method ())
97                 return FALSE;
98
99         /*
100          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
101          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
102          * to register the jit info for all GC critical methods after they are JITted/loaded.
103          */
104         ji = mono_jit_info_table_find_internal (domain, ip, FALSE);
105         if (!ji)
106                 return FALSE;
107
108         return sgen_is_critical_method (mono_jit_info_get_method (ji));
109 }
110
111 static int
112 restart_threads_until_none_in_managed_allocator (void)
113 {
114         SgenThreadInfo *info;
115         int num_threads_died = 0;
116         int sleep_duration = -1;
117
118         for (;;) {
119                 int restart_count = 0, restarted_count = 0;
120                 /* restart all threads that stopped in the
121                    allocator */
122                 FOREACH_THREAD_SAFE (info) {
123                         gboolean result;
124                         if (info->skip || info->gc_disabled || info->suspend_done)
125                                 continue;
126                         if (mono_thread_info_is_live (info) && (!info->stack_start || info->in_critical_region || info->info.inside_critical_region ||
127                                         is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
128                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
129                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->info.native_handle);
130                                 result = sgen_resume_thread (info);
131                                 if (result) {
132                                         ++restart_count;
133                                 } else {
134                                         info->skip = 1;
135                                 }
136                         } else {
137                                 /* we set the stopped_ip to
138                                    NULL for threads which
139                                    we're not restarting so
140                                    that we can easily identify
141                                    the others */
142                                 info->stopped_ip = NULL;
143                                 info->stopped_domain = NULL;
144                                 info->suspend_done = TRUE;
145                         }
146                 } END_FOREACH_THREAD_SAFE
147                 /* if no threads were restarted, we're done */
148                 if (restart_count == 0)
149                         break;
150
151                 /* wait for the threads to signal their restart */
152                 sgen_wait_for_suspend_ack (restart_count);
153
154                 if (sleep_duration < 0) {
155                         mono_thread_info_yield ();
156                         sleep_duration = 0;
157                 } else {
158                         g_usleep (sleep_duration);
159                         sleep_duration += 10;
160                 }
161
162                 /* stop them again */
163                 FOREACH_THREAD (info) {
164                         gboolean result;
165                         if (info->skip || info->stopped_ip == NULL)
166                                 continue;
167                         result = sgen_suspend_thread (info);
168
169                         if (result) {
170                                 ++restarted_count;
171                         } else {
172                                 info->skip = 1;
173                         }
174                 } END_FOREACH_THREAD
175                 /* some threads might have died */
176                 num_threads_died += restart_count - restarted_count;
177                 /* wait for the threads to signal their suspension
178                    again */
179                 sgen_wait_for_suspend_ack (restarted_count);
180         }
181
182         return num_threads_died;
183 }
184
185 static void
186 acquire_gc_locks (void)
187 {
188         LOCK_INTERRUPTION;
189         mono_thread_info_suspend_lock ();
190 }
191
192 static void
193 release_gc_locks (void)
194 {
195         mono_thread_info_suspend_unlock ();
196         UNLOCK_INTERRUPTION;
197 }
198
199 static void
200 count_cards (long long *major_total, long long *major_marked, long long *los_total, long long *los_marked)
201 {
202         sgen_get_major_collector ()->count_cards (major_total, major_marked);
203         sgen_los_count_cards (los_total, los_marked);
204 }
205
206 static TV_DECLARE (stop_world_time);
207 static unsigned long max_pause_usec = 0;
208
209 static guint64 time_stop_world;
210 static guint64 time_restart_world;
211
212 /* LOCKING: assumes the GC lock is held */
213 int
214 sgen_stop_world (int generation)
215 {
216         TV_DECLARE (end_handshake);
217         int count, dead;
218
219         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
220         MONO_GC_WORLD_STOP_BEGIN ();
221         binary_protocol_world_stopping (sgen_timestamp ());
222         acquire_gc_locks ();
223
224         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
225         sgen_process_togglerefs ();
226
227         update_current_thread_stack (&count);
228
229         sgen_global_stop_count++;
230         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
231         TV_GETTIME (stop_world_time);
232
233         if (mono_thread_info_unified_management_enabled ()) {
234                 count = sgen_unified_suspend_stop_world ();
235         } else {
236                 count = sgen_thread_handshake (TRUE);
237                 dead = restart_threads_until_none_in_managed_allocator ();
238                 if (count < dead)
239                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
240                 count -= dead;
241         }
242
243         SGEN_LOG (3, "world stopped %d thread(s)", count);
244         mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
245         MONO_GC_WORLD_STOP_END ();
246         if (binary_protocol_is_enabled ()) {
247                 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
248                 if (binary_protocol_is_heavy_enabled ())
249                         count_cards (&major_total, &major_marked, &los_total, &los_marked);
250                 binary_protocol_world_stopped (sgen_timestamp (), major_total, major_marked, los_total, los_marked);
251         }
252
253         TV_GETTIME (end_handshake);
254         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
255
256         sgen_memgov_collection_start (generation);
257         if (sgen_need_bridge_processing ())
258                 sgen_bridge_reset_data ();
259
260         return count;
261 }
262
263 /* LOCKING: assumes the GC lock is held */
264 int
265 sgen_restart_world (int generation, GGTimingInfo *timing)
266 {
267         int count;
268         SgenThreadInfo *info;
269         TV_DECLARE (end_sw);
270         TV_DECLARE (start_handshake);
271         TV_DECLARE (end_bridge);
272         unsigned long usec, bridge_usec;
273
274         if (binary_protocol_is_enabled ()) {
275                 long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
276                 if (binary_protocol_is_heavy_enabled ())
277                         count_cards (&major_total, &major_marked, &los_total, &los_marked);
278                 binary_protocol_world_restarting (generation, sgen_timestamp (), major_total, major_marked, los_total, los_marked);
279         }
280
281         /* notify the profiler of the leftovers */
282         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
283         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
284                 sgen_gc_event_moves ();
285         mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
286         MONO_GC_WORLD_RESTART_BEGIN (generation);
287         FOREACH_THREAD (info) {
288                 info->stack_start = NULL;
289 #ifdef USE_MONO_CTX
290                 memset (&info->ctx, 0, sizeof (MonoContext));
291 #else
292                 memset (&info->regs, 0, sizeof (info->regs));
293 #endif
294         } END_FOREACH_THREAD
295
296         TV_GETTIME (start_handshake);
297
298         if (mono_thread_info_unified_management_enabled ())
299                 count = sgen_unified_suspend_restart_world ();
300         else
301                 count = sgen_thread_handshake (FALSE);
302
303
304         TV_GETTIME (end_sw);
305         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
306         usec = TV_ELAPSED (stop_world_time, end_sw);
307         max_pause_usec = MAX (usec, max_pause_usec);
308         SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
309         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
310         MONO_GC_WORLD_RESTART_END (generation);
311         binary_protocol_world_restarted (generation, sgen_timestamp ());
312
313         /*
314          * We must release the thread info suspend lock after doing
315          * the thread handshake.  Otherwise, if the GC stops the world
316          * and a thread is in the process of starting up, but has not
317          * yet registered (it's not in the thread_list), it is
318          * possible that the thread does register while the world is
319          * stopped.  When restarting the GC will then try to restart
320          * said thread, but since it never got the suspend signal, it
321          * cannot answer the restart signal, so a deadlock results.
322          */
323         release_gc_locks ();
324
325         sgen_try_free_some_memory = TRUE;
326
327         if (sgen_need_bridge_processing ())
328                 sgen_bridge_processing_finish (generation);
329
330         TV_GETTIME (end_bridge);
331         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
332
333         if (timing) {
334                 timing [0].stw_time = usec;
335                 timing [0].bridge_time = bridge_usec;
336         }
337         
338         sgen_memgov_collection_end (generation, timing, timing ? 2 : 0);
339
340         return count;
341 }
342
343 void
344 sgen_init_stw (void)
345 {
346         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
347         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
348 }
349
350 /* Unified suspend code */
351
352 static gboolean
353 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
354 {
355         /*
356         A thread explicitly asked to be skiped because it holds no managed state.
357         This is used by TP and finalizer threads.
358         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
359         */
360         if (info->gc_disabled) {
361                 return FALSE;
362         }
363
364         /*
365         We have detected that this thread is failing/dying, ignore it.
366         FIXME: can't we merge this with thread_is_dying?
367         */
368         if (info->skip) {
369                 return FALSE;
370         }
371
372         /*
373         Suspending the current thread will deadlock us, bad idea.
374         */
375         if (info == mono_thread_info_current ()) {
376                 return FALSE;
377         }
378
379         /*
380         We can't suspend the workers that will do all the heavy lifting.
381         FIXME Use some state bit in SgenThreadInfo for this.
382         */
383         if (sgen_is_worker_thread (mono_thread_info_get_tid (info))) {
384                 return FALSE;
385         }
386
387         /*
388         The thread has signaled that it started to detach, ignore it.
389         FIXME: can't we merge this with skip
390         */
391         if (!mono_thread_info_is_live (info)) {
392                 return FALSE;
393         }
394
395         return TRUE;
396 }
397
398 static void
399 update_sgen_info (SgenThreadInfo *info)
400 {
401         char *stack_start;
402
403         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
404         info->stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
405         info->stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
406         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
407
408         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
409         if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
410                 g_error ("BAD STACK");
411
412         info->stack_start = stack_start;
413 #ifdef USE_MONO_CTX
414         info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
415 #else
416         g_assert_not_reached ();
417 #endif
418 }
419
420 static int
421 sgen_unified_suspend_stop_world (void)
422 {
423         int restart_counter;
424         SgenThreadInfo *info;
425         int count = 0;
426         int sleep_duration = -1;
427
428         mono_threads_begin_global_suspend ();
429         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
430
431         FOREACH_THREAD_SAFE (info) {
432                 info->skip = FALSE;
433                 info->suspend_done = FALSE;
434                 if (sgen_is_thread_in_current_stw (info)) {
435                         info->skip = !mono_thread_info_begin_suspend (info, FALSE);
436                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
437                         if (!info->skip)
438                                 ++count;
439                 } else {
440                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
441                 }
442         } END_FOREACH_THREAD_SAFE
443
444         mono_thread_info_current ()->suspend_done = TRUE;
445         mono_threads_wait_pending_operations ();
446
447         for (;;) {
448                 restart_counter = 0;
449                 FOREACH_THREAD_SAFE (info) {
450                         if (info->suspend_done || !sgen_is_thread_in_current_stw (info)) {
451                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->suspend_done, !sgen_is_thread_in_current_stw (info));
452                                 continue;
453                         }
454
455                         /*
456                         All threads that reach here are pristine suspended. This means the following:
457
458                         - We haven't accepted the previous suspend as good.
459                         - We haven't gave up on it for this STW (it's either bad or asked not to)
460                         */
461                         if (!mono_threads_core_check_suspend_result (info)) {
462                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
463                                 info->skip = TRUE;
464                         } else if (mono_thread_info_in_critical_location (info)) {
465                                 gboolean res;
466                                 g_assert (mono_thread_info_suspend_count (info) == 1);
467                                 res = mono_thread_info_begin_resume (info);
468                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
469                                 if (res)
470                                         ++restart_counter;
471                                 else
472                                         info->skip = TRUE;
473                         } else {
474                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
475                                 g_assert (!info->in_critical_region);
476                                 info->suspend_done = TRUE;
477                         }
478                 } END_FOREACH_THREAD_SAFE
479
480                 if (restart_counter == 0)
481                         break;
482                 mono_threads_wait_pending_operations ();
483
484                 if (sleep_duration < 0) {
485 #ifdef HOST_WIN32
486                         SwitchToThread ();
487 #else
488                         sched_yield ();
489 #endif
490                         sleep_duration = 0;
491                 } else {
492                         g_usleep (sleep_duration);
493                         sleep_duration += 10;
494                 }
495
496                 FOREACH_THREAD_SAFE (info) {
497                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
498                                 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
499                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
500                                 if (!res)
501                                         info->skip = TRUE;
502                         }
503                 } END_FOREACH_THREAD_SAFE
504
505                 mono_threads_wait_pending_operations ();
506         }
507
508         FOREACH_THREAD_SAFE (info) {
509                 if (sgen_is_thread_in_current_stw (info)) {
510                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
511                         g_assert (info->suspend_done);
512                         update_sgen_info (info);
513                 } else {
514                         g_assert (!info->suspend_done || info == mono_thread_info_current ());
515                 }
516         } END_FOREACH_THREAD_SAFE
517
518         return count;
519 }
520
521 static int
522 sgen_unified_suspend_restart_world (void)
523 {
524         SgenThreadInfo *info;
525         int count = 0;
526
527         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
528         FOREACH_THREAD_SAFE (info) {
529                 if (sgen_is_thread_in_current_stw (info)) {
530                         g_assert (mono_thread_info_begin_resume (info));
531                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
532                         ++count;
533                 } else {
534                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
535                 }
536         } END_FOREACH_THREAD_SAFE
537
538         mono_threads_wait_pending_operations ();
539         mono_threads_end_global_suspend ();
540         return count;
541 }
542 #endif