[sgen] Move `skip` and `in_critical_region` to Mono-specific code.
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Library General Public
15  * License 2.0 as published by the Free Software Foundation;
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Library General Public License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License 2.0 along with this library; if not, write to the Free
24  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include "config.h"
28 #ifdef HAVE_SGEN_GC
29
30 #include "metadata/sgen-gc.h"
31 #include "metadata/sgen-protocol.h"
32 #include "metadata/sgen-memory-governor.h"
33 #include "metadata/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "metadata/sgen-client.h"
36 #include "utils/mono-time.h"
37
38 #define TV_DECLARE SGEN_TV_DECLARE
39 #define TV_GETTIME SGEN_TV_GETTIME
40 #define TV_ELAPSED SGEN_TV_ELAPSED
41
42 static int sgen_unified_suspend_restart_world (void);
43 static int sgen_unified_suspend_stop_world (void);
44
45 inline static void*
46 align_pointer (void *ptr)
47 {
48         mword p = (mword)ptr;
49         p += sizeof (gpointer) - 1;
50         p &= ~ (sizeof (gpointer) - 1);
51         return (void*)p;
52 }
53
54 #ifdef USE_MONO_CTX
55 static MonoContext cur_thread_ctx;
56 #else
57 static mword cur_thread_regs [ARCH_NUM_REGS];
58 #endif
59
60 static void
61 update_current_thread_stack (void *start)
62 {
63         int stack_guard = 0;
64 #if !defined(USE_MONO_CTX)
65         void *reg_ptr = cur_thread_regs;
66 #endif
67         SgenThreadInfo *info = mono_thread_info_current ();
68         
69         info->stack_start = align_pointer (&stack_guard);
70         g_assert (info->stack_start >= info->stack_start_limit && info->stack_start < info->stack_end);
71 #ifdef USE_MONO_CTX
72         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
73         memcpy (&info->ctx, &cur_thread_ctx, sizeof (MonoContext));
74         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
75                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, &info->ctx);
76 #else
77         ARCH_STORE_REGS (reg_ptr);
78         memcpy (&info->regs, reg_ptr, sizeof (info->regs));
79         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
80                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->runtime_data, NULL, NULL);
81 #endif
82 }
83
84 static gboolean
85 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
86 {
87         MonoJitInfo *ji;
88
89         if (!mono_thread_internal_current ())
90                 /* Happens during thread attach */
91                 return FALSE;
92
93         if (!ip || !domain)
94                 return FALSE;
95         if (!sgen_has_critical_method ())
96                 return FALSE;
97
98         /*
99          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
100          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
101          * to register the jit info for all GC critical methods after they are JITted/loaded.
102          */
103         ji = mono_jit_info_table_find_internal (domain, ip, FALSE, FALSE);
104         if (!ji)
105                 return FALSE;
106
107         return sgen_is_critical_method (mono_jit_info_get_method (ji));
108 }
109
110 static int
111 restart_threads_until_none_in_managed_allocator (void)
112 {
113         SgenThreadInfo *info;
114         int num_threads_died = 0;
115         int sleep_duration = -1;
116
117         for (;;) {
118                 int restart_count = 0, restarted_count = 0;
119                 /* restart all threads that stopped in the
120                    allocator */
121                 FOREACH_THREAD_SAFE (info) {
122                         gboolean result;
123                         if (info->client_info.skip || info->gc_disabled || info->client_info.suspend_done)
124                                 continue;
125                         if (mono_thread_info_is_live (info) &&
126                                         (!info->stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
127                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
128                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
129                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
130                                 result = sgen_resume_thread (info);
131                                 if (result) {
132                                         ++restart_count;
133                                 } else {
134                                         info->client_info.skip = 1;
135                                 }
136                         } else {
137                                 /* we set the stopped_ip to
138                                    NULL for threads which
139                                    we're not restarting so
140                                    that we can easily identify
141                                    the others */
142                                 info->client_info.stopped_ip = NULL;
143                                 info->client_info.stopped_domain = NULL;
144                                 info->client_info.suspend_done = TRUE;
145                         }
146                 } END_FOREACH_THREAD_SAFE
147                 /* if no threads were restarted, we're done */
148                 if (restart_count == 0)
149                         break;
150
151                 /* wait for the threads to signal their restart */
152                 sgen_wait_for_suspend_ack (restart_count);
153
154                 if (sleep_duration < 0) {
155                         mono_thread_info_yield ();
156                         sleep_duration = 0;
157                 } else {
158                         g_usleep (sleep_duration);
159                         sleep_duration += 10;
160                 }
161
162                 /* stop them again */
163                 FOREACH_THREAD (info) {
164                         gboolean result;
165                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
166                                 continue;
167                         result = sgen_suspend_thread (info);
168
169                         if (result) {
170                                 ++restarted_count;
171                         } else {
172                                 info->client_info.skip = 1;
173                         }
174                 } END_FOREACH_THREAD
175                 /* some threads might have died */
176                 num_threads_died += restart_count - restarted_count;
177                 /* wait for the threads to signal their suspension
178                    again */
179                 sgen_wait_for_suspend_ack (restarted_count);
180         }
181
182         return num_threads_died;
183 }
184
185 static void
186 acquire_gc_locks (void)
187 {
188         LOCK_INTERRUPTION;
189         mono_thread_info_suspend_lock ();
190 }
191
192 static void
193 release_gc_locks (void)
194 {
195         mono_thread_info_suspend_unlock ();
196         UNLOCK_INTERRUPTION;
197 }
198
199 static TV_DECLARE (stop_world_time);
200 static unsigned long max_pause_usec = 0;
201
202 static guint64 time_stop_world;
203 static guint64 time_restart_world;
204
205 /* LOCKING: assumes the GC lock is held */
206 int
207 sgen_client_stop_world (int generation)
208 {
209         TV_DECLARE (end_handshake);
210         int count, dead;
211
212         /* notify the profiler of the leftovers */
213         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
214         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
215                 mono_sgen_gc_event_moves ();
216
217         acquire_gc_locks ();
218
219         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
220         sgen_process_togglerefs ();
221
222         update_current_thread_stack (&count);
223
224         sgen_global_stop_count++;
225         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer)mono_native_thread_id_get ());
226         TV_GETTIME (stop_world_time);
227
228         if (mono_thread_info_unified_management_enabled ()) {
229                 count = sgen_unified_suspend_stop_world ();
230         } else {
231                 count = sgen_thread_handshake (TRUE);
232                 dead = restart_threads_until_none_in_managed_allocator ();
233                 if (count < dead)
234                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
235                 count -= dead;
236         }
237
238         SGEN_LOG (3, "world stopped %d thread(s)", count);
239
240         TV_GETTIME (end_handshake);
241         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
242
243         sgen_memgov_collection_start (generation);
244         if (sgen_need_bridge_processing ())
245                 sgen_bridge_reset_data ();
246
247         return count;
248 }
249
250 /* LOCKING: assumes the GC lock is held */
251 int
252 sgen_client_restart_world (int generation, GGTimingInfo *timing)
253 {
254         int count;
255         SgenThreadInfo *info;
256         TV_DECLARE (end_sw);
257         TV_DECLARE (start_handshake);
258         TV_DECLARE (end_bridge);
259         unsigned long usec, bridge_usec;
260
261         /* notify the profiler of the leftovers */
262         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
263         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
264                 mono_sgen_gc_event_moves ();
265
266         FOREACH_THREAD (info) {
267                 info->stack_start = NULL;
268 #ifdef USE_MONO_CTX
269                 memset (&info->ctx, 0, sizeof (MonoContext));
270 #else
271                 memset (&info->regs, 0, sizeof (info->regs));
272 #endif
273         } END_FOREACH_THREAD
274
275         TV_GETTIME (start_handshake);
276
277         if (mono_thread_info_unified_management_enabled ())
278                 count = sgen_unified_suspend_restart_world ();
279         else
280                 count = sgen_thread_handshake (FALSE);
281
282
283         TV_GETTIME (end_sw);
284         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
285         usec = TV_ELAPSED (stop_world_time, end_sw);
286         max_pause_usec = MAX (usec, max_pause_usec);
287
288         SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
289
290         /*
291          * We must release the thread info suspend lock after doing
292          * the thread handshake.  Otherwise, if the GC stops the world
293          * and a thread is in the process of starting up, but has not
294          * yet registered (it's not in the thread_list), it is
295          * possible that the thread does register while the world is
296          * stopped.  When restarting the GC will then try to restart
297          * said thread, but since it never got the suspend signal, it
298          * cannot answer the restart signal, so a deadlock results.
299          */
300         release_gc_locks ();
301
302         TV_GETTIME (end_bridge);
303         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
304
305         if (timing) {
306                 timing [0].stw_time = usec;
307                 timing [0].bridge_time = bridge_usec;
308         }
309
310         return count;
311 }
312
313 void
314 mono_sgen_init_stw (void)
315 {
316         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
317         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
318 }
319
320 /* Unified suspend code */
321
322 static gboolean
323 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
324 {
325         /*
326         A thread explicitly asked to be skiped because it holds no managed state.
327         This is used by TP and finalizer threads.
328         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
329         */
330         if (info->gc_disabled) {
331                 return FALSE;
332         }
333
334         /*
335         We have detected that this thread is failing/dying, ignore it.
336         FIXME: can't we merge this with thread_is_dying?
337         */
338         if (info->client_info.skip) {
339                 return FALSE;
340         }
341
342         /*
343         Suspending the current thread will deadlock us, bad idea.
344         */
345         if (info == mono_thread_info_current ()) {
346                 return FALSE;
347         }
348
349         /*
350         We can't suspend the workers that will do all the heavy lifting.
351         FIXME Use some state bit in SgenThreadInfo for this.
352         */
353         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
354                 return FALSE;
355         }
356
357         /*
358         The thread has signaled that it started to detach, ignore it.
359         FIXME: can't we merge this with skip
360         */
361         if (!mono_thread_info_is_live (info)) {
362                 return FALSE;
363         }
364
365         return TRUE;
366 }
367
368 static void
369 update_sgen_info (SgenThreadInfo *info)
370 {
371         char *stack_start;
372
373         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
374         info->client_info.stopped_domain = mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
375         info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
376         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
377
378         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
379         if (stack_start < (char*)info->stack_start_limit || stack_start >= (char*)info->stack_end)
380                 g_error ("BAD STACK");
381
382         info->stack_start = stack_start;
383 #ifdef USE_MONO_CTX
384         info->ctx = mono_thread_info_get_suspend_state (info)->ctx;
385 #else
386         g_assert_not_reached ();
387 #endif
388 }
389
390 static int
391 sgen_unified_suspend_stop_world (void)
392 {
393         int restart_counter;
394         SgenThreadInfo *info;
395         int count = 0;
396         int sleep_duration = -1;
397
398         mono_threads_begin_global_suspend ();
399         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
400
401         FOREACH_THREAD_SAFE (info) {
402                 info->client_info.skip = FALSE;
403                 info->client_info.suspend_done = FALSE;
404                 if (sgen_is_thread_in_current_stw (info)) {
405                         info->client_info.skip = !mono_thread_info_begin_suspend (info, FALSE);
406                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
407                         if (!info->client_info.skip)
408                                 ++count;
409                 } else {
410                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->skip);
411                 }
412         } END_FOREACH_THREAD_SAFE
413
414         mono_thread_info_current ()->client_info.suspend_done = TRUE;
415         mono_threads_wait_pending_operations ();
416
417         for (;;) {
418                 restart_counter = 0;
419                 FOREACH_THREAD_SAFE (info) {
420                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
421                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
422                                 continue;
423                         }
424
425                         /*
426                         All threads that reach here are pristine suspended. This means the following:
427
428                         - We haven't accepted the previous suspend as good.
429                         - We haven't gave up on it for this STW (it's either bad or asked not to)
430                         */
431                         if (!mono_threads_core_check_suspend_result (info)) {
432                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
433                                 info->client_info.skip = TRUE;
434                         } else if (mono_thread_info_in_critical_location (info)) {
435                                 gboolean res;
436                                 g_assert (mono_thread_info_suspend_count (info) == 1);
437                                 res = mono_thread_info_begin_resume (info);
438                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
439                                 if (res)
440                                         ++restart_counter;
441                                 else
442                                         info->client_info.skip = TRUE;
443                         } else {
444                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
445                                 g_assert (!info->client_info.in_critical_region);
446                                 info->client_info.suspend_done = TRUE;
447                         }
448                 } END_FOREACH_THREAD_SAFE
449
450                 if (restart_counter == 0)
451                         break;
452                 mono_threads_wait_pending_operations ();
453
454                 if (sleep_duration < 0) {
455 #ifdef HOST_WIN32
456                         SwitchToThread ();
457 #else
458                         sched_yield ();
459 #endif
460                         sleep_duration = 0;
461                 } else {
462                         g_usleep (sleep_duration);
463                         sleep_duration += 10;
464                 }
465
466                 FOREACH_THREAD_SAFE (info) {
467                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
468                                 gboolean res = mono_thread_info_begin_suspend (info, FALSE);
469                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
470                                 if (!res)
471                                         info->client_info.skip = TRUE;
472                         }
473                 } END_FOREACH_THREAD_SAFE
474
475                 mono_threads_wait_pending_operations ();
476         }
477
478         FOREACH_THREAD_SAFE (info) {
479                 if (sgen_is_thread_in_current_stw (info)) {
480                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
481                         g_assert (info->client_info.suspend_done);
482                         update_sgen_info (info);
483                 } else {
484                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
485                 }
486         } END_FOREACH_THREAD_SAFE
487
488         return count;
489 }
490
491 static int
492 sgen_unified_suspend_restart_world (void)
493 {
494         SgenThreadInfo *info;
495         int count = 0;
496
497         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
498         FOREACH_THREAD_SAFE (info) {
499                 if (sgen_is_thread_in_current_stw (info)) {
500                         g_assert (mono_thread_info_begin_resume (info));
501                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
502                         ++count;
503                 } else {
504                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
505                 }
506         } END_FOREACH_THREAD_SAFE
507
508         mono_threads_wait_pending_operations ();
509         mono_threads_end_global_suspend ();
510         return count;
511 }
512 #endif