Merge pull request #4248 from Unity-Technologies/boehm-gc-alloc-fixed
[mono.git] / mono / metadata / sgen-stw.c
1 /*
2  * sgen-stw.c: Stop the world functionality
3  *
4  * Author:
5  *      Paolo Molaro (lupus@ximian.com)
6  *  Rodrigo Kumpera (kumpera@gmail.com)
7  *
8  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10  * Copyright 2011 Xamarin, Inc.
11  * Copyright (C) 2012 Xamarin Inc
12  *
13  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
14  */
15
16 #include "config.h"
17 #ifdef HAVE_SGEN_GC
18
19 #include "sgen/sgen-gc.h"
20 #include "sgen/sgen-protocol.h"
21 #include "sgen/sgen-memory-governor.h"
22 #include "sgen/sgen-thread-pool.h"
23 #include "metadata/profiler-private.h"
24 #include "sgen/sgen-client.h"
25 #include "metadata/sgen-bridge-internals.h"
26 #include "metadata/gc-internals.h"
27 #include "utils/mono-threads.h"
28 #include "utils/mono-threads-debug.h"
29
30 #define TV_DECLARE SGEN_TV_DECLARE
31 #define TV_GETTIME SGEN_TV_GETTIME
32 #define TV_ELAPSED SGEN_TV_ELAPSED
33
34 static void sgen_unified_suspend_restart_world (void);
35 static void sgen_unified_suspend_stop_world (void);
36
37 static TV_DECLARE (end_of_last_stw);
38
39 guint64 mono_time_since_last_stw ()
40 {
41         if (end_of_last_stw == 0)
42                 return 0;
43
44         TV_DECLARE (current_time);
45         TV_GETTIME (current_time);
46         return TV_ELAPSED (end_of_last_stw, current_time);
47 }
48
49 unsigned int sgen_global_stop_count = 0;
50
51 inline static void*
52 align_pointer (void *ptr)
53 {
54         mword p = (mword)ptr;
55         p += sizeof (gpointer) - 1;
56         p &= ~ (sizeof (gpointer) - 1);
57         return (void*)p;
58 }
59
60 static void
61 update_current_thread_stack (void *start)
62 {
63         int stack_guard = 0;
64         SgenThreadInfo *info = mono_thread_info_current ();
65
66         info->client_info.stack_start = align_pointer (&stack_guard);
67         g_assert (info->client_info.stack_start);
68         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
69
70 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
71         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
72 #else
73         g_error ("Sgen STW requires a working mono-context");
74 #endif
75
76         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
77                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
78 }
79
80 static void
81 acquire_gc_locks (void)
82 {
83         LOCK_INTERRUPTION;
84         mono_thread_info_suspend_lock ();
85 }
86
87 static void
88 release_gc_locks (void)
89 {
90         mono_thread_info_suspend_unlock ();
91         UNLOCK_INTERRUPTION;
92 }
93
94 static TV_DECLARE (stop_world_time);
95 static unsigned long max_pause_usec = 0;
96
97 static guint64 time_stop_world;
98 static guint64 time_restart_world;
99
100 /* LOCKING: assumes the GC lock is held */
101 void
102 sgen_client_stop_world (int generation)
103 {
104         TV_DECLARE (end_handshake);
105
106         /* notify the profiler of the leftovers */
107         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
108         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
109                 mono_sgen_gc_event_moves ();
110
111         acquire_gc_locks ();
112
113         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
114
115         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
116         sgen_process_togglerefs ();
117
118         update_current_thread_stack (&generation);
119
120         sgen_global_stop_count++;
121         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
122         TV_GETTIME (stop_world_time);
123
124         sgen_unified_suspend_stop_world ();
125
126         SGEN_LOG (3, "world stopped");
127
128         TV_GETTIME (end_handshake);
129         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
130
131         sgen_memgov_collection_start (generation);
132         if (sgen_need_bridge_processing ())
133                 sgen_bridge_reset_data ();
134 }
135
136 /* LOCKING: assumes the GC lock is held */
137 void
138 sgen_client_restart_world (int generation, gint64 *stw_time)
139 {
140         TV_DECLARE (end_sw);
141         TV_DECLARE (start_handshake);
142         unsigned long usec;
143
144         /* notify the profiler of the leftovers */
145         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
147                 mono_sgen_gc_event_moves ();
148
149         FOREACH_THREAD (info) {
150                 info->client_info.stack_start = NULL;
151                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
152         } FOREACH_THREAD_END
153
154         TV_GETTIME (start_handshake);
155
156         sgen_unified_suspend_restart_world ();
157
158         TV_GETTIME (end_sw);
159         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
160         usec = TV_ELAPSED (stop_world_time, end_sw);
161         max_pause_usec = MAX (usec, max_pause_usec);
162         end_of_last_stw = end_sw;
163
164         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
165
166         /*
167          * We must release the thread info suspend lock after doing
168          * the thread handshake.  Otherwise, if the GC stops the world
169          * and a thread is in the process of starting up, but has not
170          * yet registered (it's not in the thread_list), it is
171          * possible that the thread does register while the world is
172          * stopped.  When restarting the GC will then try to restart
173          * said thread, but since it never got the suspend signal, it
174          * cannot answer the restart signal, so a deadlock results.
175          */
176         release_gc_locks ();
177
178         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
179
180         *stw_time = usec;
181 }
182
183 void
184 mono_sgen_init_stw (void)
185 {
186         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
187         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
188 }
189
190 /* Unified suspend code */
191
192 static gboolean
193 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
194 {
195         /*
196         A thread explicitly asked to be skiped because it holds no managed state.
197         This is used by TP and finalizer threads.
198         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
199         */
200         if (info->client_info.gc_disabled) {
201                 if (reason)
202                         *reason = 1;
203                 return FALSE;
204         }
205
206         /*
207         We have detected that this thread is failing/dying, ignore it.
208         FIXME: can't we merge this with thread_is_dying?
209         */
210         if (info->client_info.skip) {
211                 if (reason)
212                         *reason = 2;
213                 return FALSE;
214         }
215
216         /*
217         Suspending the current thread will deadlock us, bad idea.
218         */
219         if (info == mono_thread_info_current ()) {
220                 if (reason)
221                         *reason = 3;
222                 return FALSE;
223         }
224
225         /*
226         We can't suspend the workers that will do all the heavy lifting.
227         FIXME Use some state bit in SgenThreadInfo for this.
228         */
229         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
230                 if (reason)
231                         *reason = 4;
232                 return FALSE;
233         }
234
235         /*
236         The thread has signaled that it started to detach, ignore it.
237         FIXME: can't we merge this with skip
238         */
239         if (!mono_thread_info_is_live (info)) {
240                 if (reason)
241                         *reason = 5;
242                 return FALSE;
243         }
244
245         return TRUE;
246 }
247
248 static void
249 sgen_unified_suspend_stop_world (void)
250 {
251         int sleep_duration = -1;
252
253         mono_threads_begin_global_suspend ();
254         THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
255
256         FOREACH_THREAD (info) {
257                 info->client_info.skip = FALSE;
258                 info->client_info.suspend_done = FALSE;
259
260                 int reason;
261                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
262                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
263                         continue;
264                 }
265
266                 info->client_info.skip = !mono_thread_info_begin_suspend (info);
267
268                 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
269         } FOREACH_THREAD_END
270
271         mono_thread_info_current ()->client_info.suspend_done = TRUE;
272         mono_threads_wait_pending_operations ();
273
274         for (;;) {
275                 gint restart_counter = 0;
276
277                 FOREACH_THREAD (info) {
278                         gint suspend_count;
279
280                         int reason = 0;
281                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
282                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
283                                 continue;
284                         }
285
286                         /*
287                         All threads that reach here are pristine suspended. This means the following:
288
289                         - We haven't accepted the previous suspend as good.
290                         - We haven't gave up on it for this STW (it's either bad or asked not to)
291                         */
292                         if (!mono_thread_info_in_critical_location (info)) {
293                                 info->client_info.suspend_done = TRUE;
294
295                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
296                                 continue;
297                         }
298
299                         suspend_count = mono_thread_info_suspend_count (info);
300                         if (!(suspend_count == 1))
301                                 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
302
303                         info->client_info.skip = !mono_thread_info_begin_resume (info);
304                         if (!info->client_info.skip)
305                                 restart_counter += 1;
306
307                         THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
308                 } FOREACH_THREAD_END
309
310                 mono_threads_wait_pending_operations ();
311
312                 if (restart_counter == 0)
313                         break;
314
315                 if (sleep_duration < 0) {
316                         mono_thread_info_yield ();
317                         sleep_duration = 0;
318                 } else {
319                         g_usleep (sleep_duration);
320                         sleep_duration += 10;
321                 }
322
323                 FOREACH_THREAD (info) {
324                         int reason = 0;
325                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
326                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
327                                 continue;
328                         }
329
330                         if (!mono_thread_info_is_running (info)) {
331                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
332                                 continue;
333                         }
334
335                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
336
337                         THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
338                 } FOREACH_THREAD_END
339
340                 mono_threads_wait_pending_operations ();
341         }
342
343         FOREACH_THREAD (info) {
344                 gpointer stopped_ip;
345
346                 int reason = 0;
347                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
348                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
349
350                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
351                         continue;
352                 }
353
354                 g_assert (info->client_info.suspend_done);
355
356                 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
357
358                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
359                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
360
361                 if (info->client_info.stack_start < info->client_info.stack_start_limit
362                          || info->client_info.stack_start >= info->client_info.stack_end) {
363                         /*
364                          * Thread context is in unhandled state, most likely because it is
365                          * dying. We don't scan it.
366                          * FIXME We should probably rework and check the valid flag instead.
367                          */
368                         info->client_info.stack_start = NULL;
369                 }
370
371                 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
372
373                 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
374
375                 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
376                         mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
377         } FOREACH_THREAD_END
378 }
379
380 static void
381 sgen_unified_suspend_restart_world (void)
382 {
383         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
384         FOREACH_THREAD (info) {
385                 int reason = 0;
386                 if (sgen_is_thread_in_current_stw (info, &reason)) {
387                         g_assert (mono_thread_info_begin_resume (info));
388                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
389
390                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
391                 } else {
392                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
393                 }
394         } FOREACH_THREAD_END
395
396         mono_threads_wait_pending_operations ();
397         mono_threads_end_global_suspend ();
398 }
399 #endif