Merge pull request #3489 from akoeplinger/fix-latin1-surrogatepair
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28
29 #define TV_DECLARE SGEN_TV_DECLARE
30 #define TV_GETTIME SGEN_TV_GETTIME
31 #define TV_ELAPSED SGEN_TV_ELAPSED
32
33 static void sgen_unified_suspend_restart_world (void);
34 static void sgen_unified_suspend_stop_world (void);
35
36 static TV_DECLARE (end_of_last_stw);
37
38 guint64 mono_time_since_last_stw ()
39 {
40         if (end_of_last_stw == 0)
41                 return 0;
42
43         TV_DECLARE (current_time);
44         TV_GETTIME (current_time);
45         return TV_ELAPSED (end_of_last_stw, current_time);
46 }
47
48 unsigned int sgen_global_stop_count = 0;
49
50 inline static void*
51 align_pointer (void *ptr)
52 {
53         mword p = (mword)ptr;
54         p += sizeof (gpointer) - 1;
55         p &= ~ (sizeof (gpointer) - 1);
56         return (void*)p;
57 }
58
59 static void
60 update_current_thread_stack (void *start)
61 {
62         int stack_guard = 0;
63         SgenThreadInfo *info = mono_thread_info_current ();
64
65         info->client_info.stack_start = align_pointer (&stack_guard);
66         g_assert (info->client_info.stack_start);
67         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
68
69 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
70         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
71 #else
72         g_error ("Sgen STW requires a working mono-context");
73 #endif
74
75         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
76                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
77 }
78
79 static gboolean
80 is_ip_in_managed_allocator (MonoDomain *domain, gpointer ip)
81 {
82         MonoJitInfo *ji;
83
84         if (!mono_thread_internal_current ())
85                 /* Happens during thread attach */
86                 return FALSE;
87
88         if (!ip || !domain)
89                 return FALSE;
90         if (!sgen_has_critical_method ())
91                 return FALSE;
92
93         /*
94          * mono_jit_info_table_find is not async safe since it calls into the AOT runtime to load information for
95          * missing methods (#13951). To work around this, we disable the AOT fallback. For this to work, the JIT needs
96          * to register the jit info for all GC critical methods after they are JITted/loaded.
97          */
98         ji = mono_jit_info_table_find_internal (domain, (char *)ip, FALSE, FALSE);
99         if (!ji)
100                 return FALSE;
101
102         return sgen_is_critical_method (mono_jit_info_get_method (ji));
103 }
104
105 static int
106 restart_threads_until_none_in_managed_allocator (void)
107 {
108         int num_threads_died = 0;
109         int sleep_duration = -1;
110
111         for (;;) {
112                 int restart_count = 0, restarted_count = 0;
113                 /* restart all threads that stopped in the
114                    allocator */
115                 FOREACH_THREAD (info) {
116                         gboolean result;
117                         if (info->client_info.skip || info->client_info.gc_disabled || info->client_info.suspend_done)
118                                 continue;
119                         if (mono_thread_info_is_live (info) &&
120                                         (!info->client_info.stack_start || info->client_info.in_critical_region || info->client_info.info.inside_critical_region ||
121                                         is_ip_in_managed_allocator (info->client_info.stopped_domain, info->client_info.stopped_ip))) {
122                                 binary_protocol_thread_restart ((gpointer)mono_thread_info_get_tid (info));
123                                 SGEN_LOG (3, "thread %p resumed.", (void*) (size_t) info->client_info.info.native_handle);
124                                 result = sgen_resume_thread (info);
125                                 if (result) {
126                                         ++restart_count;
127                                 } else {
128                                         info->client_info.skip = 1;
129                                 }
130                         } else {
131                                 /* we set the stopped_ip to
132                                    NULL for threads which
133                                    we're not restarting so
134                                    that we can easily identify
135                                    the others */
136                                 info->client_info.stopped_ip = NULL;
137                                 info->client_info.stopped_domain = NULL;
138                                 info->client_info.suspend_done = TRUE;
139                         }
140                 } FOREACH_THREAD_END
141                 /* if no threads were restarted, we're done */
142                 if (restart_count == 0)
143                         break;
144
145                 /* wait for the threads to signal their restart */
146                 sgen_wait_for_suspend_ack (restart_count);
147
148                 if (sleep_duration < 0) {
149                         mono_thread_info_yield ();
150                         sleep_duration = 0;
151                 } else {
152                         g_usleep (sleep_duration);
153                         sleep_duration += 10;
154                 }
155
156                 /* stop them again */
157                 FOREACH_THREAD (info) {
158                         gboolean result;
159                         if (info->client_info.skip || info->client_info.stopped_ip == NULL)
160                                 continue;
161                         result = sgen_suspend_thread (info);
162
163                         if (result) {
164                                 ++restarted_count;
165                         } else {
166                                 info->client_info.skip = 1;
167                         }
168                 } FOREACH_THREAD_END
169                 /* some threads might have died */
170                 num_threads_died += restart_count - restarted_count;
171                 /* wait for the threads to signal their suspension
172                    again */
173                 sgen_wait_for_suspend_ack (restarted_count);
174         }
175
176         return num_threads_died;
177 }
178
179 static void
180 acquire_gc_locks (void)
181 {
182         LOCK_INTERRUPTION;
183         mono_thread_info_suspend_lock ();
184 }
185
186 static void
187 release_gc_locks (void)
188 {
189         mono_thread_info_suspend_unlock ();
190         UNLOCK_INTERRUPTION;
191 }
192
193 static TV_DECLARE (stop_world_time);
194 static unsigned long max_pause_usec = 0;
195
196 static guint64 time_stop_world;
197 static guint64 time_restart_world;
198
199 /* LOCKING: assumes the GC lock is held */
200 void
201 sgen_client_stop_world (int generation)
202 {
203         TV_DECLARE (end_handshake);
204
205         /* notify the profiler of the leftovers */
206         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
207         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
208                 mono_sgen_gc_event_moves ();
209
210         acquire_gc_locks ();
211
212         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
213
214         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
215         sgen_process_togglerefs ();
216
217         update_current_thread_stack (&generation);
218
219         sgen_global_stop_count++;
220         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
221         TV_GETTIME (stop_world_time);
222
223         if (mono_thread_info_unified_management_enabled ()) {
224                 sgen_unified_suspend_stop_world ();
225         } else {
226                 int count, dead;
227                 count = sgen_thread_handshake (TRUE);
228                 dead = restart_threads_until_none_in_managed_allocator ();
229                 if (count < dead)
230                         g_error ("More threads have died (%d) that been initialy suspended %d", dead, count);
231         }
232
233         SGEN_LOG (3, "world stopped");
234
235         TV_GETTIME (end_handshake);
236         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
237
238         sgen_memgov_collection_start (generation);
239         if (sgen_need_bridge_processing ())
240                 sgen_bridge_reset_data ();
241 }
242
243 /* LOCKING: assumes the GC lock is held */
244 void
245 sgen_client_restart_world (int generation, gint64 *stw_time)
246 {
247         TV_DECLARE (end_sw);
248         TV_DECLARE (start_handshake);
249         unsigned long usec;
250
251         /* notify the profiler of the leftovers */
252         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
253         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
254                 mono_sgen_gc_event_moves ();
255
256         FOREACH_THREAD (info) {
257                 info->client_info.stack_start = NULL;
258                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
259         } FOREACH_THREAD_END
260
261         TV_GETTIME (start_handshake);
262
263         if (mono_thread_info_unified_management_enabled ())
264                 sgen_unified_suspend_restart_world ();
265         else
266                 sgen_thread_handshake (FALSE);
267
268         TV_GETTIME (end_sw);
269         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
270         usec = TV_ELAPSED (stop_world_time, end_sw);
271         max_pause_usec = MAX (usec, max_pause_usec);
272         end_of_last_stw = end_sw;
273
274         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
275
276         /*
277          * We must release the thread info suspend lock after doing
278          * the thread handshake.  Otherwise, if the GC stops the world
279          * and a thread is in the process of starting up, but has not
280          * yet registered (it's not in the thread_list), it is
281          * possible that the thread does register while the world is
282          * stopped.  When restarting the GC will then try to restart
283          * said thread, but since it never got the suspend signal, it
284          * cannot answer the restart signal, so a deadlock results.
285          */
286         release_gc_locks ();
287
288         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
289
290         *stw_time = usec;
291 }
292
293 void
294 mono_sgen_init_stw (void)
295 {
296         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
297         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
298 }
299
300 /* Unified suspend code */
301
302 static gboolean
303 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
304 {
305         /*
306         A thread explicitly asked to be skiped because it holds no managed state.
307         This is used by TP and finalizer threads.
308         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
309         */
310         if (info->client_info.gc_disabled) {
311                 if (reason)
312                         *reason = 1;
313                 return FALSE;
314         }
315
316         /*
317         We have detected that this thread is failing/dying, ignore it.
318         FIXME: can't we merge this with thread_is_dying?
319         */
320         if (info->client_info.skip) {
321                 if (reason)
322                         *reason = 2;
323                 return FALSE;
324         }
325
326         /*
327         Suspending the current thread will deadlock us, bad idea.
328         */
329         if (info == mono_thread_info_current ()) {
330                 if (reason)
331                         *reason = 3;
332                 return FALSE;
333         }
334
335         /*
336         We can't suspend the workers that will do all the heavy lifting.
337         FIXME Use some state bit in SgenThreadInfo for this.
338         */
339         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
340                 if (reason)
341                         *reason = 4;
342                 return FALSE;
343         }
344
345         /*
346         The thread has signaled that it started to detach, ignore it.
347         FIXME: can't we merge this with skip
348         */
349         if (!mono_thread_info_is_live (info)) {
350                 if (reason)
351                         *reason = 5;
352                 return FALSE;
353         }
354
355         return TRUE;
356 }
357
358 static void
359 sgen_unified_suspend_stop_world (void)
360 {
361         int restart_counter;
362         int sleep_duration = -1;
363
364         mono_threads_begin_global_suspend ();
365         THREADS_STW_DEBUG ("[GC-STW-BEGIN] *** BEGIN SUSPEND *** \n");
366
367         FOREACH_THREAD (info) {
368                 int reason;
369                 info->client_info.skip = FALSE;
370                 info->client_info.suspend_done = FALSE;
371                 if (sgen_is_thread_in_current_stw (info, &reason)) {
372                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
373                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), info->client_info.skip);
374                 } else {
375                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip, reason);
376                 }
377         } FOREACH_THREAD_END
378
379         mono_thread_info_current ()->client_info.suspend_done = TRUE;
380         mono_threads_wait_pending_operations ();
381
382         for (;;) {
383                 restart_counter = 0;
384                 FOREACH_THREAD (info) {
385                         int reason = 0;
386                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
387                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
388                                 continue;
389                         }
390
391                         /*
392                         All threads that reach here are pristine suspended. This means the following:
393
394                         - We haven't accepted the previous suspend as good.
395                         - We haven't gave up on it for this STW (it's either bad or asked not to)
396                         */
397                         if (mono_thread_info_in_critical_location (info)) {
398                                 gboolean res;
399                                 gint suspend_count = mono_thread_info_suspend_count (info);
400                                 if (!(suspend_count == 1))
401                                         g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
402                                 res = mono_thread_info_begin_resume (info);
403                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %d\n", mono_thread_info_get_tid (info), res);
404                                 if (res)
405                                         ++restart_counter;
406                                 else
407                                         info->client_info.skip = TRUE;
408                         } else {
409                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
410                                 g_assert (!info->client_info.in_critical_region);
411                                 info->client_info.suspend_done = TRUE;
412                         }
413                 } FOREACH_THREAD_END
414
415                 if (restart_counter == 0)
416                         break;
417                 mono_threads_wait_pending_operations ();
418
419                 if (sleep_duration < 0) {
420                         mono_thread_info_yield ();
421                         sleep_duration = 0;
422                 } else {
423                         g_usleep (sleep_duration);
424                         sleep_duration += 10;
425                 }
426
427                 FOREACH_THREAD (info) {
428                         int reason = 0;
429                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
430                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
431                                 continue;
432                         }
433
434                         if (mono_thread_info_is_running (info)) {
435                                 gboolean res = mono_thread_info_begin_suspend (info);
436                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %d\n", mono_thread_info_get_tid (info), res);
437                                 if (!res)
438                                         info->client_info.skip = TRUE;
439                         }
440                 } FOREACH_THREAD_END
441
442                 mono_threads_wait_pending_operations ();
443         }
444
445         FOREACH_THREAD (info) {
446                 int reason = 0;
447                 if (sgen_is_thread_in_current_stw (info, &reason)) {
448                         MonoThreadUnwindState *state;
449
450                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended\n", mono_thread_info_get_tid (info));
451                         g_assert (info->client_info.suspend_done);
452
453                         state = mono_thread_info_get_suspend_state (info);
454
455                         info->client_info.ctx = state->ctx;
456
457                         if (!state->unwind_data [MONO_UNWIND_DATA_DOMAIN] || !state->unwind_data [MONO_UNWIND_DATA_LMF]) {
458                                 /* thread is starting or detaching, nothing to scan here */
459                                 info->client_info.stopped_domain = NULL;
460                                 info->client_info.stopped_ip = NULL;
461                                 info->client_info.stack_start = NULL;
462                         } else {
463                                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
464                                 info->client_info.stopped_domain = (MonoDomain*) mono_thread_info_tls_get (info, TLS_KEY_DOMAIN);
465                                 info->client_info.stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
466                                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
467
468                                 /* altstack signal handler, sgen can't handle them, mono-threads should have handled this. */
469                                 if (!info->client_info.stack_start
470                                          || info->client_info.stack_start < info->client_info.stack_start_limit
471                                          || info->client_info.stack_start >= info->client_info.stack_end) {
472                                         g_error ("BAD STACK: stack_start = %p, stack_start_limit = %p, stack_end = %p",
473                                                 info->client_info.stack_start, info->client_info.stack_start_limit, info->client_info.stack_end);
474                                 }
475                         }
476
477                         binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), info->client_info.stopped_ip);
478                 } else {
479                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
480                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
481                 }
482         } FOREACH_THREAD_END
483 }
484
485 static void
486 sgen_unified_suspend_restart_world (void)
487 {
488         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
489         FOREACH_THREAD (info) {
490                 int reason = 0;
491                 if (sgen_is_thread_in_current_stw (info, &reason)) {
492                         g_assert (mono_thread_info_begin_resume (info));
493                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
494
495                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
496                 } else {
497                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
498                 }
499         } FOREACH_THREAD_END
500
501         mono_threads_wait_pending_operations ();
502         mono_threads_end_global_suspend ();
503 }
504 #endif