[sgen] Move STW GC events to sgen-stw.c so they are properly reported.
[mono.git] / mono / metadata / sgen-stw.c
1 /**
2  * \file
3  * Stop the world functionality
4  *
5  * Author:
6  *      Paolo Molaro (lupus@ximian.com)
7  *  Rodrigo Kumpera (kumpera@gmail.com)
8  *
9  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11  * Copyright 2011 Xamarin, Inc.
12  * Copyright (C) 2012 Xamarin Inc
13  *
14  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
15  */
16
17 #include "config.h"
18 #ifdef HAVE_SGEN_GC
19
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
30
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
34
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
37
38 static TV_DECLARE (end_of_last_stw);
39
40 guint64 mono_time_since_last_stw ()
41 {
42         if (end_of_last_stw == 0)
43                 return 0;
44
45         TV_DECLARE (current_time);
46         TV_GETTIME (current_time);
47         return TV_ELAPSED (end_of_last_stw, current_time);
48 }
49
50 unsigned int sgen_global_stop_count = 0;
51
52 inline static void*
53 align_pointer (void *ptr)
54 {
55         mword p = (mword)ptr;
56         p += sizeof (gpointer) - 1;
57         p &= ~ (sizeof (gpointer) - 1);
58         return (void*)p;
59 }
60
61 static void
62 update_current_thread_stack (void *start)
63 {
64         int stack_guard = 0;
65         SgenThreadInfo *info = mono_thread_info_current ();
66
67         info->client_info.stack_start = align_pointer (&stack_guard);
68         g_assert (info->client_info.stack_start);
69         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
70
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
73 #else
74         g_error ("Sgen STW requires a working mono-context");
75 #endif
76
77         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
79 }
80
81 static void
82 acquire_gc_locks (void)
83 {
84         LOCK_INTERRUPTION;
85         mono_thread_info_suspend_lock ();
86 }
87
88 static void
89 release_gc_locks (void)
90 {
91         mono_thread_info_suspend_unlock ();
92         UNLOCK_INTERRUPTION;
93 }
94
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
97
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
100
101 /* LOCKING: assumes the GC lock is held */
102 void
103 sgen_client_stop_world (int generation)
104 {
105         TV_DECLARE (end_handshake);
106
107         /* notify the profiler of the leftovers */
108         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
109         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
110                 mono_sgen_gc_event_moves ();
111
112         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
113
114         acquire_gc_locks ();
115
116         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
117
118         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
119         sgen_process_togglerefs ();
120
121         update_current_thread_stack (&generation);
122
123         sgen_global_stop_count++;
124         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
125         TV_GETTIME (stop_world_time);
126
127         sgen_unified_suspend_stop_world ();
128
129         SGEN_LOG (3, "world stopped");
130
131         mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
132
133         TV_GETTIME (end_handshake);
134         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
135
136         sgen_memgov_collection_start (generation);
137         if (sgen_need_bridge_processing ())
138                 sgen_bridge_reset_data ();
139 }
140
141 /* LOCKING: assumes the GC lock is held */
142 void
143 sgen_client_restart_world (int generation, gint64 *stw_time)
144 {
145         TV_DECLARE (end_sw);
146         TV_DECLARE (start_handshake);
147         unsigned long usec;
148
149         /* notify the profiler of the leftovers */
150         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
151         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
152                 mono_sgen_gc_event_moves ();
153
154         mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
155
156         FOREACH_THREAD (info) {
157                 info->client_info.stack_start = NULL;
158                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
159         } FOREACH_THREAD_END
160
161         TV_GETTIME (start_handshake);
162
163         sgen_unified_suspend_restart_world ();
164
165         TV_GETTIME (end_sw);
166         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
167         usec = TV_ELAPSED (stop_world_time, end_sw);
168         max_pause_usec = MAX (usec, max_pause_usec);
169         end_of_last_stw = end_sw;
170
171         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
172
173         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
174
175         /*
176          * We must release the thread info suspend lock after doing
177          * the thread handshake.  Otherwise, if the GC stops the world
178          * and a thread is in the process of starting up, but has not
179          * yet registered (it's not in the thread_list), it is
180          * possible that the thread does register while the world is
181          * stopped.  When restarting the GC will then try to restart
182          * said thread, but since it never got the suspend signal, it
183          * cannot answer the restart signal, so a deadlock results.
184          */
185         release_gc_locks ();
186
187         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
188
189         *stw_time = usec;
190 }
191
192 void
193 mono_sgen_init_stw (void)
194 {
195         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
196         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
197 }
198
199 /* Unified suspend code */
200
201 static gboolean
202 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
203 {
204         /*
205         A thread explicitly asked to be skiped because it holds no managed state.
206         This is used by TP and finalizer threads.
207         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
208         */
209         if (info->client_info.gc_disabled) {
210                 if (reason)
211                         *reason = 1;
212                 return FALSE;
213         }
214
215         /*
216         We have detected that this thread is failing/dying, ignore it.
217         FIXME: can't we merge this with thread_is_dying?
218         */
219         if (info->client_info.skip) {
220                 if (reason)
221                         *reason = 2;
222                 return FALSE;
223         }
224
225         /*
226         Suspending the current thread will deadlock us, bad idea.
227         */
228         if (info == mono_thread_info_current ()) {
229                 if (reason)
230                         *reason = 3;
231                 return FALSE;
232         }
233
234         /*
235         We can't suspend the workers that will do all the heavy lifting.
236         FIXME Use some state bit in SgenThreadInfo for this.
237         */
238         if (sgen_thread_pool_is_thread_pool_thread (major_collector.get_sweep_pool (), mono_thread_info_get_tid (info)) ||
239                         sgen_workers_is_worker_thread (mono_thread_info_get_tid (info))) {
240                 if (reason)
241                         *reason = 4;
242                 return FALSE;
243         }
244
245         /*
246         The thread has signaled that it started to detach, ignore it.
247         FIXME: can't we merge this with skip
248         */
249         if (!mono_thread_info_is_live (info)) {
250                 if (reason)
251                         *reason = 5;
252                 return FALSE;
253         }
254
255         return TRUE;
256 }
257
258 static void
259 sgen_unified_suspend_stop_world (void)
260 {
261         int sleep_duration = -1;
262
263         mono_threads_begin_global_suspend ();
264         THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
265
266         FOREACH_THREAD (info) {
267                 info->client_info.skip = FALSE;
268                 info->client_info.suspend_done = FALSE;
269
270                 int reason;
271                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
272                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
273                         continue;
274                 }
275
276                 info->client_info.skip = !mono_thread_info_begin_suspend (info);
277
278                 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
279         } FOREACH_THREAD_END
280
281         mono_thread_info_current ()->client_info.suspend_done = TRUE;
282         mono_threads_wait_pending_operations ();
283
284         for (;;) {
285                 gint restart_counter = 0;
286
287                 FOREACH_THREAD (info) {
288                         gint suspend_count;
289
290                         int reason = 0;
291                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
292                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
293                                 continue;
294                         }
295
296                         /*
297                         All threads that reach here are pristine suspended. This means the following:
298
299                         - We haven't accepted the previous suspend as good.
300                         - We haven't gave up on it for this STW (it's either bad or asked not to)
301                         */
302                         if (!mono_thread_info_in_critical_location (info)) {
303                                 info->client_info.suspend_done = TRUE;
304
305                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
306                                 continue;
307                         }
308
309                         suspend_count = mono_thread_info_suspend_count (info);
310                         if (!(suspend_count == 1))
311                                 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
312
313                         info->client_info.skip = !mono_thread_info_begin_resume (info);
314                         if (!info->client_info.skip)
315                                 restart_counter += 1;
316
317                         THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
318                 } FOREACH_THREAD_END
319
320                 mono_threads_wait_pending_operations ();
321
322                 if (restart_counter == 0)
323                         break;
324
325                 if (sleep_duration < 0) {
326                         mono_thread_info_yield ();
327                         sleep_duration = 0;
328                 } else {
329                         g_usleep (sleep_duration);
330                         sleep_duration += 10;
331                 }
332
333                 FOREACH_THREAD (info) {
334                         int reason = 0;
335                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
336                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
337                                 continue;
338                         }
339
340                         if (!mono_thread_info_is_running (info)) {
341                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
342                                 continue;
343                         }
344
345                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
346
347                         THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
348                 } FOREACH_THREAD_END
349
350                 mono_threads_wait_pending_operations ();
351         }
352
353         FOREACH_THREAD (info) {
354                 gpointer stopped_ip;
355
356                 int reason = 0;
357                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
358                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
359
360                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
361                         continue;
362                 }
363
364                 g_assert (info->client_info.suspend_done);
365
366                 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
367
368                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
369                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
370
371                 if (info->client_info.stack_start < info->client_info.stack_start_limit
372                          || info->client_info.stack_start >= info->client_info.stack_end) {
373                         /*
374                          * Thread context is in unhandled state, most likely because it is
375                          * dying. We don't scan it.
376                          * FIXME We should probably rework and check the valid flag instead.
377                          */
378                         info->client_info.stack_start = NULL;
379                 }
380
381                 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
382
383                 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
384
385                 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
386                         mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
387         } FOREACH_THREAD_END
388 }
389
390 static void
391 sgen_unified_suspend_restart_world (void)
392 {
393         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
394         FOREACH_THREAD (info) {
395                 int reason = 0;
396                 if (sgen_is_thread_in_current_stw (info, &reason)) {
397                         g_assert (mono_thread_info_begin_resume (info));
398                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
399
400                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
401                 } else {
402                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
403                 }
404         } FOREACH_THREAD_END
405
406         mono_threads_wait_pending_operations ();
407         mono_threads_end_global_suspend ();
408 }
409 #endif