[sgen] Add some assertion for stack start and end
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
32
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
35
36 unsigned int sgen_global_stop_count = 0;
37
38 inline static void*
39 align_pointer (void *ptr)
40 {
41         mword p = (mword)ptr;
42         p += sizeof (gpointer) - 1;
43         p &= ~ (sizeof (gpointer) - 1);
44         return (void*)p;
45 }
46
47 static MonoContext cur_thread_ctx;
48
49 static void
50 update_current_thread_stack (void *start)
51 {
52         int stack_guard = 0;
53         SgenThreadInfo *info = mono_thread_info_current ();
54         
55         info->client_info.stack_start = align_pointer (&stack_guard);
56         g_assert (info->client_info.stack_start);
57         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
58         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
59         memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
60         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
61                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
62 }
63
64 static gboolean
65 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
66 {
67         MonoJitInfo *ji;
68
69         if (!mono_thread_internal_current ())
70                 /* Happens during thread attach */
71                 return FALSE;
72
73         if (!ip || !domain)
74                 return FALSE;
75         if (!sgen_has_critical_method ())
76                 return FALSE;
77
78         /*
79          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
80          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
81          * to register the jit info for all GC critical methods after they are JITted/loaded.
82          */
83         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
84         if (!ji)
85                 return FALSE;
86
87         return sgen_is_critical_method (mono_jit_info_get_method (ji));
88 }
89
90 static int
91 restart_threads_until_none_in_managed_allocator (void)
92 {
93         int num_threads_died = 0;
94         int sleep_duration = -1;
95
96         for (;;) {
97                 int restart_count = 0, restarted_count = 0;
98                 /* restart all threads that stopped in the
99                    allocator */
100                 FOREACH_THREAD (info) {
101                         gboolean result;
102                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
103                                 continue;
104                         if (mono_thread_info_is_live (info) &&
105                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
106                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
107                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
108                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
109                                 result = sgen_resume_thread (info);
110                                 if (result) {
111                                         ++restart_count;
112                                 } else {
113                                         info->client_info.skip = 1;
114                                 }
115                         } else {
116                                 /* we set the stopped_ip to
117                                    NULL for threads which
118                                    we're not restarting so
119                                    that we can easily identify
120                                    the others */
121                                 info->client_info.stopped_ip = NULL;
122                                 info->client_info.stopped_domain = NULL;
123                                 info->client_info.suspend_done = TRUE;
124                         }
125                 } FOREACH_THREAD_END
126                 /* if no threads were restarted, we're done */
127                 if (restart_count == 0)
128                         break;
129
130                 /* wait for the threads to signal their restart */
131                 sgen_wait_for_suspend_ack (restart_count);
132
133                 if (sleep_duration < 0) {
134                         mono_thread_info_yield ();
135                         sleep_duration = 0;
136                 } else {
137                         g_usleep (sleep_duration);
138                         sleep_duration += 10;
139                 }
140
141                 /* stop them again */
142                 FOREACH_THREAD (info) {
143                         gboolean result;
144                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
145                                 continue;
146                         result = sgen_suspend_thread (info);
147
148                         if (result) {
149                                 ++restarted_count;
150                         } else {
151                                 info->client_info.skip = 1;
152                         }
153                 } FOREACH_THREAD_END
154                 /* some threads might have died */
155                 num_threads_died += restart_count - restarted_count;
156                 /* wait for the threads to signal their suspension
157                    again */
158                 sgen_wait_for_suspend_ack (restarted_count);
159         }
160
161         return num_threads_died;
162 }
163
164 static void
165 acquire_gc_locks (void)
166 {
167         LOCK_INTERRUPTION;
168         mono_thread_info_suspend_lock ();
169 }
170
171 static void
172 release_gc_locks (void)
173 {
174         mono_thread_info_suspend_unlock ();
175         UNLOCK_INTERRUPTION;
176 }
177
178 static TV_DECLARE (stop_world_time);
179 static unsigned long max_pause_usec = 0;
180
181 static guint64 time_stop_world;
182 static guint64 time_restart_world;
183
184 /* LOCKING: assumes the GC lock is held */
185 void
186 sgen_client_stop_world (int generation)
187 {
188         TV_DECLARE (end_handshake);
189
190         /* notify the profiler of the leftovers */
191         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
192         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
193                 mono_sgen_gc_event_moves ();
194
195         acquire_gc_locks ();
196
197         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
198         sgen_process_togglerefs ();
199
200         update_current_thread_stack (&generation);
201
202         sgen_global_stop_count++;
203         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
204         TV_GETTIME (stop_world_time);
205
206         if (mono_thread_info_unified_management_enabled ()) {
207                 sgen_unified_suspend_stop_world ();
208         } else {
209                 int count, dead;
210                 count = sgen_thread_handshake (TRUE);
211                 dead = restart_threads_until_none_in_managed_allocator ();
212                 if (count < dead)
213                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
214         }
215
216         SGEN_LOG (3, "world stopped");
217
218         TV_GETTIME (end_handshake);
219         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
220
221         sgen_memgov_collection_start (generation);
222         if (sgen_need_bridge_processing ())
223                 sgen_bridge_reset_data ();
224 }
225
226 /* LOCKING: assumes the GC lock is held */
227 void
228 sgen_client_restart_world (int generation, GGTimingInfo *timing)
229 {
230         TV_DECLARE (end_sw);
231         TV_DECLARE (start_handshake);
232         TV_DECLARE (end_bridge);
233         unsigned long usec, bridge_usec;
234
235         /* notify the profiler of the leftovers */
236         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
237         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
238                 mono_sgen_gc_event_moves ();
239
240         FOREACH_THREAD (info) {
241                 info->client_info.stack_start = NULL;
242                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
243         } FOREACH_THREAD_END
244
245         TV_GETTIME (start_handshake);
246
247         if (mono_thread_info_unified_management_enabled ())
248                 sgen_unified_suspend_restart_world ();
249         else
250                 sgen_thread_handshake (FALSE);
251
252         TV_GETTIME (end_sw);
253         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
254         usec = TV_ELAPSED (stop_world_time, end_sw);
255         max_pause_usec = MAX (usec, max_pause_usec);
256
257         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
258
259         /*
260          * We must release the thread info suspend lock after doing
261          * the thread handshake.  Otherwise, if the GC stops the world
262          * and a thread is in the process of starting up, but has not
263          * yet registered (it's not in the thread_list), it is
264          * possible that the thread does register while the world is
265          * stopped.  When restarting the GC will then try to restart
266          * said thread, but since it never got the suspend signal, it
267          * cannot answer the restart signal, so a deadlock results.
268          */
269         release_gc_locks ();
270
271         TV_GETTIME (end_bridge);
272         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
273
274         if (timing) {
275                 timing [0].stw_time = usec;
276                 timing [0].bridge_time = bridge_usec;
277         }
278 }
279
280 void
281 mono_sgen_init_stw (void)
282 {
283         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
284         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
285 }
286
287 /* Unified suspend code */
288
289 static gboolean
290 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
291 {
292         /*
293         A thread explicitly asked to be skiped because it holds no managed state.
294         This is used by TP and finalizer threads.
295         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
296         */
297         if (info->client_info.gc_disabled) {
298                 return FALSE;
299         }
300
301         /*
302         We have detected that this thread is failing/dying, ignore it.
303         FIXME: can't we merge this with thread_is_dying?
304         */
305         if (info->client_info.skip) {
306                 return FALSE;
307         }
308
309         /*
310         Suspending the current thread will deadlock us, bad idea.
311         */
312         if (info == mono_thread_info_current ()) {
313                 return FALSE;
314         }
315
316         /*
317         We can't suspend the workers that will do all the heavy lifting.
318         FIXME Use some state bit in SgenThreadInfo for this.
319         */
320         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
321                 return FALSE;
322         }
323
324         /*
325         The thread has signaled that it started to detach, ignore it.
326         FIXME: can't we merge this with skip
327         */
328         if (!mono_thread_info_is_live (info)) {
329                 return FALSE;
330         }
331
332         return TRUE;
333 }
334
335 static void
336 update_sgen_info (SgenThreadInfo *info)
337 {
338         char *stack_start;
339
340         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
341         info->client_info.stopped_domain = (MonoDomain *)mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
342         info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
343         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
344
345         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
346         if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
347                 g_error ("BAD STACK");
348
349         info->client_info.stack_start = stack_start;
350         g_assert (info->client_info.stack_start);
351         info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
352 }
353
354 static void
355 sgen_unified_suspend_stop_world (void)
356 {
357         int restart_counter;
358         int sleep_duration = -1;
359
360         mono_threads_begin_global_suspend ();
361         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
362
363         FOREACH_THREAD (info) {
364                 info->client_info.skip = FALSE;
365                 info->client_info.suspend_done = FALSE;
366                 if (sgen_is_thread_in_current_stw (info)) {
367                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
368                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
369                 } else {
370                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
371                 }
372         } FOREACH_THREAD_END
373
374         mono_thread_info_current ()->client_info.suspend_done = TRUE;
375         mono_threads_wait_pending_operations ();
376
377         for (;;) {
378                 restart_counter = 0;
379                 FOREACH_THREAD (info) {
380                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
381                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
382                                 continue;
383                         }
384
385                         /*
386                         All threads that reach here are pristine suspended. This means the following:
387
388                         - We haven't accepted the previous suspend as good.
389                         - We haven't gave up on it for this STW (it's either bad or asked not to)
390                         */
391                         if (!mono_thread_info_check_suspend_result (info)) {
392                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
393                                 info->client_info.skip = TRUE;
394                         } else if (mono_thread_info_in_critical_location (info)) {
395                                 gboolean res;
396                                 g_assert (mono_thread_info_suspend_count (info) == 1);
397                                 res = mono_thread_info_begin_resume (info);
398                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
399                                 if (res)
400                                         ++restart_counter;
401                                 else
402                                         info->client_info.skip = TRUE;
403                         } else {
404                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
405                                 g_assert (!info->client_info.in_critical_region);
406                                 info->client_info.suspend_done = TRUE;
407                         }
408                 } FOREACH_THREAD_END
409
410                 if (restart_counter == 0)
411                         break;
412                 mono_threads_wait_pending_operations ();
413
414                 if (sleep_duration < 0) {
415                         mono_thread_info_yield ();
416                         sleep_duration = 0;
417                 } else {
418                         g_usleep (sleep_duration);
419                         sleep_duration += 10;
420                 }
421
422                 FOREACH_THREAD (info) {
423                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
424                                 gboolean res = mono_thread_info_begin_suspend (info);
425                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
426                                 if (!res)
427                                         info->client_info.skip = TRUE;
428                         }
429                 } FOREACH_THREAD_END
430
431                 mono_threads_wait_pending_operations ();
432         }
433
434         FOREACH_THREAD (info) {
435                 if (sgen_is_thread_in_current_stw (info)) {
436                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
437                         g_assert (info->client_info.suspend_done);
438                         update_sgen_info (info);
439                 } else {
440                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended\n", mono_thread_info_get_tid (info));
441                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
442                 }
443         } FOREACH_THREAD_END
444 }
445
446 static void
447 sgen_unified_suspend_restart_world (void)
448 {
449         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
450         FOREACH_THREAD (info) {
451                 if (sgen_is_thread_in_current_stw (info)) {
452                         g_assert (mono_thread_info_begin_resume (info));
453                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
454                 } else {
455                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
456                 }
457         } FOREACH_THREAD_END
458
459         mono_threads_wait_pending_operations ();
460         mono_threads_end_global_suspend ();
461 }
462 #endif