48ce7f963ed23cfab6cc0eb68b6b42aa4f37eebf
[mono.git] / mono / metadata / sgen-stw.c
1 /**
2  * \file
3  * Stop the world functionality
4  *
5  * Author:
6  *      Paolo Molaro (lupus@ximian.com)
7  *  Rodrigo Kumpera (kumpera@gmail.com)
8  *
9  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11  * Copyright 2011 Xamarin, Inc.
12  * Copyright (C) 2012 Xamarin Inc
13  *
14  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
15  */
16
17 #include "config.h"
18 #ifdef HAVE_SGEN_GC
19
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
30
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
34
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
37
38 static TV_DECLARE (end_of_last_stw);
39
40 guint64 mono_time_since_last_stw ()
41 {
42         if (end_of_last_stw == 0)
43                 return 0;
44
45         TV_DECLARE (current_time);
46         TV_GETTIME (current_time);
47         return TV_ELAPSED (end_of_last_stw, current_time);
48 }
49
50 unsigned int sgen_global_stop_count = 0;
51
52 inline static void*
53 align_pointer (void *ptr)
54 {
55         mword p = (mword)ptr;
56         p += sizeof (gpointer) - 1;
57         p &= ~ (sizeof (gpointer) - 1);
58         return (void*)p;
59 }
60
61 static void
62 update_current_thread_stack (void *start)
63 {
64         int stack_guard = 0;
65         SgenThreadInfo *info = mono_thread_info_current ();
66
67         info->client_info.stack_start = align_pointer (&stack_guard);
68         g_assert (info->client_info.stack_start);
69         g_assert (info->client_info.stack_start >= info->client_info.info.stack_start_limit && info->client_info.stack_start < info->client_info.info.stack_end);
70
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
73 #else
74         g_error ("Sgen STW requires a working mono-context");
75 #endif
76
77         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
79 }
80
81 static void
82 acquire_gc_locks (void)
83 {
84         LOCK_INTERRUPTION;
85         mono_thread_info_suspend_lock ();
86 }
87
88 static void
89 release_gc_locks (void)
90 {
91         mono_thread_info_suspend_unlock ();
92         UNLOCK_INTERRUPTION;
93 }
94
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
97
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
100
101 /* LOCKING: assumes the GC lock is held */
102 void
103 sgen_client_stop_world (int generation)
104 {
105         TV_DECLARE (end_handshake);
106
107         MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_STOP_WORLD, generation));
108
109         acquire_gc_locks ();
110
111         MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation));
112
113         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
114         sgen_process_togglerefs ();
115
116         update_current_thread_stack (&generation);
117
118         sgen_global_stop_count++;
119         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
120         TV_GETTIME (stop_world_time);
121
122         sgen_unified_suspend_stop_world ();
123
124         SGEN_LOG (3, "world stopped");
125
126         MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_STOP_WORLD, generation));
127
128         TV_GETTIME (end_handshake);
129         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
130
131         sgen_memgov_collection_start (generation);
132         if (sgen_need_bridge_processing ())
133                 sgen_bridge_reset_data ();
134 }
135
136 /* LOCKING: assumes the GC lock is held */
137 void
138 sgen_client_restart_world (int generation, gint64 *stw_time)
139 {
140         TV_DECLARE (end_sw);
141         TV_DECLARE (start_handshake);
142         unsigned long usec;
143
144         /* notify the profiler of the leftovers */
145         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146         if (MONO_PROFILER_ENABLED (gc_moves))
147                 mono_sgen_gc_event_moves ();
148
149         MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_PRE_START_WORLD, generation));
150
151         FOREACH_THREAD (info) {
152                 info->client_info.stack_start = NULL;
153                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
154         } FOREACH_THREAD_END
155
156         TV_GETTIME (start_handshake);
157
158         sgen_unified_suspend_restart_world ();
159
160         TV_GETTIME (end_sw);
161         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
162         usec = TV_ELAPSED (stop_world_time, end_sw);
163         max_pause_usec = MAX (usec, max_pause_usec);
164         end_of_last_stw = end_sw;
165
166         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
167
168         MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_START_WORLD, generation));
169
170         /*
171          * We must release the thread info suspend lock after doing
172          * the thread handshake.  Otherwise, if the GC stops the world
173          * and a thread is in the process of starting up, but has not
174          * yet registered (it's not in the thread_list), it is
175          * possible that the thread does register while the world is
176          * stopped.  When restarting the GC will then try to restart
177          * said thread, but since it never got the suspend signal, it
178          * cannot answer the restart signal, so a deadlock results.
179          */
180         release_gc_locks ();
181
182         MONO_PROFILER_RAISE (gc_event, (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation));
183
184         *stw_time = usec;
185 }
186
187 void
188 mono_sgen_init_stw (void)
189 {
190         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
191         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
192 }
193
194 /* Unified suspend code */
195
196 static gboolean
197 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
198 {
199         /*
200         A thread explicitly asked to be skiped because it holds no managed state.
201         This is used by TP and finalizer threads.
202         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
203         */
204         if (info->client_info.gc_disabled) {
205                 if (reason)
206                         *reason = 1;
207                 return FALSE;
208         }
209
210         /*
211         We have detected that this thread is failing/dying, ignore it.
212         FIXME: can't we merge this with thread_is_dying?
213         */
214         if (info->client_info.skip) {
215                 if (reason)
216                         *reason = 2;
217                 return FALSE;
218         }
219
220         /*
221         Suspending the current thread will deadlock us, bad idea.
222         */
223         if (info == mono_thread_info_current ()) {
224                 if (reason)
225                         *reason = 3;
226                 return FALSE;
227         }
228
229         /*
230         We can't suspend the workers that will do all the heavy lifting.
231         FIXME Use some state bit in SgenThreadInfo for this.
232         */
233         if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
234                 if (reason)
235                         *reason = 4;
236                 return FALSE;
237         }
238
239         /*
240         The thread has signaled that it started to detach, ignore it.
241         FIXME: can't we merge this with skip
242         */
243         if (!mono_thread_info_is_live (info)) {
244                 if (reason)
245                         *reason = 5;
246                 return FALSE;
247         }
248
249         return TRUE;
250 }
251
252 static void
253 sgen_unified_suspend_stop_world (void)
254 {
255         int sleep_duration = -1;
256
257         mono_threads_begin_global_suspend ();
258         THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
259
260         FOREACH_THREAD (info) {
261                 info->client_info.skip = FALSE;
262                 info->client_info.suspend_done = FALSE;
263
264                 int reason;
265                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
266                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
267                         continue;
268                 }
269
270                 info->client_info.skip = !mono_thread_info_begin_suspend (info);
271
272                 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
273         } FOREACH_THREAD_END
274
275         mono_thread_info_current ()->client_info.suspend_done = TRUE;
276         mono_threads_wait_pending_operations ();
277
278         for (;;) {
279                 gint restart_counter = 0;
280
281                 FOREACH_THREAD (info) {
282                         gint suspend_count;
283
284                         int reason = 0;
285                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
286                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
287                                 continue;
288                         }
289
290                         /*
291                         All threads that reach here are pristine suspended. This means the following:
292
293                         - We haven't accepted the previous suspend as good.
294                         - We haven't gave up on it for this STW (it's either bad or asked not to)
295                         */
296                         if (!mono_thread_info_in_critical_location (info)) {
297                                 info->client_info.suspend_done = TRUE;
298
299                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
300                                 continue;
301                         }
302
303                         suspend_count = mono_thread_info_suspend_count (info);
304                         if (!(suspend_count == 1))
305                                 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
306
307                         info->client_info.skip = !mono_thread_info_begin_resume (info);
308                         if (!info->client_info.skip)
309                                 restart_counter += 1;
310
311                         THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
312                 } FOREACH_THREAD_END
313
314                 mono_threads_wait_pending_operations ();
315
316                 if (restart_counter == 0)
317                         break;
318
319                 if (sleep_duration < 0) {
320                         mono_thread_info_yield ();
321                         sleep_duration = 0;
322                 } else {
323                         g_usleep (sleep_duration);
324                         sleep_duration += 10;
325                 }
326
327                 FOREACH_THREAD (info) {
328                         int reason = 0;
329                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
330                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
331                                 continue;
332                         }
333
334                         if (!mono_thread_info_is_running (info)) {
335                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
336                                 continue;
337                         }
338
339                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
340
341                         THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
342                 } FOREACH_THREAD_END
343
344                 mono_threads_wait_pending_operations ();
345         }
346
347         FOREACH_THREAD (info) {
348                 gpointer stopped_ip;
349
350                 int reason = 0;
351                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
352                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
353
354                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
355                         continue;
356                 }
357
358                 g_assert (info->client_info.suspend_done);
359
360                 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
361
362                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
363                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
364
365                 if (info->client_info.stack_start < info->client_info.info.stack_start_limit
366                          || info->client_info.stack_start >= info->client_info.info.stack_end) {
367                         /*
368                          * Thread context is in unhandled state, most likely because it is
369                          * dying. We don't scan it.
370                          * FIXME We should probably rework and check the valid flag instead.
371                          */
372                         info->client_info.stack_start = NULL;
373                 }
374
375                 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
376
377                 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
378
379                 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
380                         mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.info.stack_end : NULL);
381         } FOREACH_THREAD_END
382 }
383
384 static void
385 sgen_unified_suspend_restart_world (void)
386 {
387         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
388         FOREACH_THREAD (info) {
389                 int reason = 0;
390                 if (sgen_is_thread_in_current_stw (info, &reason)) {
391                         g_assert (mono_thread_info_begin_resume (info));
392                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
393
394                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
395                 } else {
396                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
397                 }
398         } FOREACH_THREAD_END
399
400         mono_threads_wait_pending_operations ();
401         mono_threads_end_global_suspend ();
402 }
403 #endif