Merge pull request #2698 from esdrubal/iosxmlarray
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Library General Public
15  * License 2.0 as published by the Free Software Foundation;
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Library General Public License for more details.
21  *
22  * You should have received a copy of the GNU Library General Public
23  * License 2.0 along with this library; if not, write to the Free
24  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include "config.h"
28 #ifdef HAVE_SGEN_GC
29
30 #include "sgen/sgen-gc.h"
31 #include "sgen/sgen-protocol.h"
32 #include "sgen/sgen-memory-governor.h"
33 #include "sgen/sgen-thread-pool.h"
34 #include "metadata/profiler-private.h"
35 #include "sgen/sgen-client.h"
36 #include "metadata/sgen-bridge-internals.h"
37 #include "metadata/gc-internals.h"
38
39 #define TV_DECLARE SGEN_TV_DECLARE
40 #define TV_GETTIME SGEN_TV_GETTIME
41 #define TV_ELAPSED SGEN_TV_ELAPSED
42
43 static void sgen_unified_suspend_restart_world (void);
44 static void sgen_unified_suspend_stop_world (void);
45
46 unsigned int sgen_global_stop_count = 0;
47
48 inline static void*
49 align_pointer (void *ptr)
50 {
51         mword p = (mword)ptr;
52         p += sizeof (gpointer) - 1;
53         p &= ~ (sizeof (gpointer) - 1);
54         return (void*)p;
55 }
56
57 #ifdef USE_MONO_CTX
58 static MonoContext cur_thread_ctx;
59 #else
60 static mword cur_thread_regs [ARCH_NUM_REGS];
61 #endif
62
63 static void
64 update_current_thread_stack (void *start)
65 {
66         int stack_guard = 0;
67 #if !defined(USE_MONO_CTX)
68         void *reg_ptr = cur_thread_regs;
69 #endif
70         SgenThreadInfo *info = mono_thread_info_current ();
71         
72         info->client_info.stack_start = align_pointer (&stack_guard);
73         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
74 #ifdef USE_MONO_CTX
75         MONO_CONTEXT_GET_CURRENT (cur_thread_ctx);
76         memcpy (&info->client_info.ctx, &cur_thread_ctx, sizeof (MonoContext));
77         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
79 #else
80         ARCH_STORE_REGS (reg_ptr);
81         memcpy (&info->client_info.regs, reg_ptr, sizeof (info->client_info.regs));
82         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
83                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, NULL);
84 #endif
85 }
86
87 static gboolean
88 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
89 {
90         MonoJitInfo *ji;
91
92         if (!mono_thread_internal_current ())
93                 /* Happens during thread attach */
94                 return FALSE;
95
96         if (!ip || !domain)
97                 return FALSE;
98         if (!sgen_has_critical_method ())
99                 return FALSE;
100
101         /*
102          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
103          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
104          * to register the jit info for all GC critical methods after they are JITted/loaded.
105          */
106         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
107         if (!ji)
108                 return FALSE;
109
110         return sgen_is_critical_method (mono_jit_info_get_method (ji));
111 }
112
113 static int
114 restart_threads_until_none_in_managed_allocator (void)
115 {
116         int num_threads_died = 0;
117         int sleep_duration = -1;
118
119         for (;;) {
120                 int restart_count = 0, restarted_count = 0;
121                 /* restart all threads that stopped in the
122                    allocator */
123                 FOREACH_THREAD (info) {
124                         gboolean result;
125                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
126                                 continue;
127                         if (mono_thread_info_is_live (info) &&
128                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
129                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
130                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
131                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
132                                 result = sgen_resume_thread (info);
133                                 if (result) {
134                                         ++restart_count;
135                                 } else {
136                                         info->client_info.skip = 1;
137                                 }
138                         } else {
139                                 /* we set the stopped_ip to
140                                    NULL for threads which
141                                    we're not restarting so
142                                    that we can easily identify
143                                    the others */
144                                 info->client_info.stopped_ip = NULL;
145                                 info->client_info.stopped_domain = NULL;
146                                 info->client_info.suspend_done = TRUE;
147                         }
148                 } FOREACH_THREAD_END
149                 /* if no threads were restarted, we're done */
150                 if (restart_count == 0)
151                         break;
152
153                 /* wait for the threads to signal their restart */
154                 sgen_wait_for_suspend_ack (restart_count);
155
156                 if (sleep_duration < 0) {
157                         mono_thread_info_yield ();
158                         sleep_duration = 0;
159                 } else {
160                         g_usleep (sleep_duration);
161                         sleep_duration += 10;
162                 }
163
164                 /* stop them again */
165                 FOREACH_THREAD (info) {
166                         gboolean result;
167                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
168                                 continue;
169                         result = sgen_suspend_thread (info);
170
171                         if (result) {
172                                 ++restarted_count;
173                         } else {
174                                 info->client_info.skip = 1;
175                         }
176                 } FOREACH_THREAD_END
177                 /* some threads might have died */
178                 num_threads_died += restart_count - restarted_count;
179                 /* wait for the threads to signal their suspension
180                    again */
181                 sgen_wait_for_suspend_ack (restarted_count);
182         }
183
184         return num_threads_died;
185 }
186
187 static void
188 acquire_gc_locks (void)
189 {
190         LOCK_INTERRUPTION;
191         mono_thread_info_suspend_lock ();
192 }
193
194 static void
195 release_gc_locks (void)
196 {
197         mono_thread_info_suspend_unlock ();
198         UNLOCK_INTERRUPTION;
199 }
200
201 static TV_DECLARE (stop_world_time);
202 static unsigned long max_pause_usec = 0;
203
204 static guint64 time_stop_world;
205 static guint64 time_restart_world;
206
207 /* LOCKING: assumes the GC lock is held */
208 void
209 sgen_client_stop_world (int generation)
210 {
211         TV_DECLARE (end_handshake);
212
213         /* notify the profiler of the leftovers */
214         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
215         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
216                 mono_sgen_gc_event_moves ();
217
218         acquire_gc_locks ();
219
220         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
221         sgen_process_togglerefs ();
222
223         update_current_thread_stack (&generation);
224
225         sgen_global_stop_count++;
226         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
227         TV_GETTIME (stop_world_time);
228
229         if (mono_thread_info_unified_management_enabled ()) {
230                 sgen_unified_suspend_stop_world ();
231         } else {
232                 int count, dead;
233                 count = sgen_thread_handshake (TRUE);
234                 dead = restart_threads_until_none_in_managed_allocator ();
235                 if (count < dead)
236                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
237         }
238
239         SGEN_LOG (3, "world stopped");
240
241         TV_GETTIME (end_handshake);
242         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
243
244         sgen_memgov_collection_start (generation);
245         if (sgen_need_bridge_processing ())
246                 sgen_bridge_reset_data ();
247 }
248
249 /* LOCKING: assumes the GC lock is held */
250 void
251 sgen_client_restart_world (int generation, GGTimingInfo *timing)
252 {
253         TV_DECLARE (end_sw);
254         TV_DECLARE (start_handshake);
255         TV_DECLARE (end_bridge);
256         unsigned long usec, bridge_usec;
257
258         /* notify the profiler of the leftovers */
259         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
260         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
261                 mono_sgen_gc_event_moves ();
262
263         FOREACH_THREAD (info) {
264                 info->client_info.stack_start = NULL;
265 #ifdef USE_MONO_CTX
266                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
267 #else
268                 memset (&info->client_info.regs, 0, sizeof (info->client_info.regs));
269 #endif
270         } FOREACH_THREAD_END
271
272         TV_GETTIME (start_handshake);
273
274         if (mono_thread_info_unified_management_enabled ())
275                 sgen_unified_suspend_restart_world ();
276         else
277                 sgen_thread_handshake (FALSE);
278
279         TV_GETTIME (end_sw);
280         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
281         usec = TV_ELAPSED (stop_world_time, end_sw);
282         max_pause_usec = MAX (usec, max_pause_usec);
283
284         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
285
286         /*
287          * We must release the thread info suspend lock after doing
288          * the thread handshake.  Otherwise, if the GC stops the world
289          * and a thread is in the process of starting up, but has not
290          * yet registered (it's not in the thread_list), it is
291          * possible that the thread does register while the world is
292          * stopped.  When restarting the GC will then try to restart
293          * said thread, but since it never got the suspend signal, it
294          * cannot answer the restart signal, so a deadlock results.
295          */
296         release_gc_locks ();
297
298         TV_GETTIME (end_bridge);
299         bridge_usec = TV_ELAPSED (end_sw, end_bridge);
300
301         if (timing) {
302                 timing [0].stw_time = usec;
303                 timing [0].bridge_time = bridge_usec;
304         }
305 }
306
307 void
308 mono_sgen_init_stw (void)
309 {
310         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
311         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
312 }
313
314 /* Unified suspend code */
315
316 static gboolean
317 sgen_is_thread_in_current_stw (SgenThreadInfo *info)
318 {
319         /*
320         A thread explicitly asked to be skiped because it holds no managed state.
321         This is used by TP and finalizer threads.
322         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
323         */
324         if (info->client_info.gc_disabled) {
325                 return FALSE;
326         }
327
328         /*
329         We have detected that this thread is failing/dying, ignore it.
330         FIXME: can't we merge this with thread_is_dying?
331         */
332         if (info->client_info.skip) {
333                 return FALSE;
334         }
335
336         /*
337         Suspending the current thread will deadlock us, bad idea.
338         */
339         if (info == mono_thread_info_current ()) {
340                 return FALSE;
341         }
342
343         /*
344         We can't suspend the workers that will do all the heavy lifting.
345         FIXME Use some state bit in SgenThreadInfo for this.
346         */
347         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
348                 return FALSE;
349         }
350
351         /*
352         The thread has signaled that it started to detach, ignore it.
353         FIXME: can't we merge this with skip
354         */
355         if (!mono_thread_info_is_live (info)) {
356                 return FALSE;
357         }
358
359         return TRUE;
360 }
361
362 static void
363 update_sgen_info (SgenThreadInfo *info)
364 {
365         char *stack_start;
366
367         /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
368         info->client_info.stopped_domain = (MonoDomain *)mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
369         info->client_info.stopped_ip = (gpointer) MONO_CONTEXT_GET_IP (&mono_thread_info_get_suspend_state (info)->ctx);
370         stack_start = (char*)MONO_CONTEXT_GET_SP (&mono_thread_info_get_suspend_state (info)->ctx) - REDZONE_SIZE;
371
372         /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
373         if (stack_start < (char*)info->client_info.stack_start_limit || stack_start >= (char*)info->client_info.stack_end)
374                 g_error ("BAD STACK");
375
376         info->client_info.stack_start = stack_start;
377 #ifdef USE_MONO_CTX
378         info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
379 #else
380         g_assert_not_reached ();
381 #endif
382 }
383
384 static void
385 sgen_unified_suspend_stop_world (void)
386 {
387         int restart_counter;
388         int sleep_duration = -1;
389
390         mono_threads_begin_global_suspend ();
391         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
392
393         FOREACH_THREAD (info) {
394                 info->client_info.skip = FALSE;
395                 info->client_info.suspend_done = FALSE;
396                 if (sgen_is_thread_in_current_stw (info)) {
397                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
398                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
399                 } else {
400                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
401                 }
402         } FOREACH_THREAD_END
403
404         mono_thread_info_current ()->client_info.suspend_done = TRUE;
405         mono_threads_wait_pending_operations ();
406
407         for (;;) {
408                 restart_counter = 0;
409                 FOREACH_THREAD (info) {
410                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info)) {
411                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE thread %p not been processed done %d current %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info));
412                                 continue;
413                         }
414
415                         /*
416                         All threads that reach here are pristine suspended. This means the following:
417
418                         - We haven't accepted the previous suspend as good.
419                         - We haven't gave up on it for this STW (it's either bad or asked not to)
420                         */
421                         if (!mono_thread_info_check_suspend_result (info)) {
422                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SKIP thread %p failed to finish to suspend\n", mono_thread_info_get_tid (info));
423                                 info->client_info.skip = TRUE;
424                         } else if (mono_thread_info_in_critical_location (info)) {
425                                 gboolean res;
426                                 g_assert (mono_thread_info_suspend_count (info) == 1);
427                                 res = mono_thread_info_begin_resume (info);
428                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
429                                 if (res)
430                                         ++restart_counter;
431                                 else
432                                         info->client_info.skip = TRUE;
433                         } else {
434                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
435                                 g_assert (!info->client_info.in_critical_region);
436                                 info->client_info.suspend_done = TRUE;
437                         }
438                 } FOREACH_THREAD_END
439
440                 if (restart_counter == 0)
441                         break;
442                 mono_threads_wait_pending_operations ();
443
444                 if (sleep_duration < 0) {
445 #ifdef HOST_WIN32
446                         SwitchToThread ();
447 #else
448                         sched_yield ();
449 #endif
450                         sleep_duration = 0;
451                 } else {
452                         g_usleep (sleep_duration);
453                         sleep_duration += 10;
454                 }
455
456                 FOREACH_THREAD (info) {
457                         if (sgen_is_thread_in_current_stw (info) && mono_thread_info_is_running (info)) {
458                                 gboolean res = mono_thread_info_begin_suspend (info);
459                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
460                                 if (!res)
461                                         info->client_info.skip = TRUE;
462                         }
463                 } FOREACH_THREAD_END
464
465                 mono_threads_wait_pending_operations ();
466         }
467
468         FOREACH_THREAD (info) {
469                 if (sgen_is_thread_in_current_stw (info)) {
470                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
471                         g_assert (info->client_info.suspend_done);
472                         update_sgen_info (info);
473                 } else {
474                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
475                 }
476         } FOREACH_THREAD_END
477 }
478
479 static void
480 sgen_unified_suspend_restart_world (void)
481 {
482         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
483         FOREACH_THREAD (info) {
484                 if (sgen_is_thread_in_current_stw (info)) {
485                         g_assert (mono_thread_info_begin_resume (info));
486                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
487                 } else {
488                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p\n", mono_thread_info_get_tid (info));
489                 }
490         } FOREACH_THREAD_END
491
492         mono_threads_wait_pending_operations ();
493         mono_threads_end_global_suspend ();
494 }
495 #endif