[sgen] Split up concurrent sweep from worker logic
[mono.git] / mono / metadata / sgen-stw.c
1 /**
2  * \file
3  * Stop the world functionality
4  *
5  * Author:
6  *      Paolo Molaro (lupus@ximian.com)
7  *  Rodrigo Kumpera (kumpera@gmail.com)
8  *
9  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11  * Copyright 2011 Xamarin, Inc.
12  * Copyright (C) 2012 Xamarin Inc
13  *
14  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
15  */
16
17 #include "config.h"
18 #ifdef HAVE_SGEN_GC
19
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
30
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
34
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
37
38 static TV_DECLARE (end_of_last_stw);
39
40 guint64 mono_time_since_last_stw ()
41 {
42         if (end_of_last_stw == 0)
43                 return 0;
44
45         TV_DECLARE (current_time);
46         TV_GETTIME (current_time);
47         return TV_ELAPSED (end_of_last_stw, current_time);
48 }
49
50 unsigned int sgen_global_stop_count = 0;
51
52 inline static void*
53 align_pointer (void *ptr)
54 {
55         mword p = (mword)ptr;
56         p += sizeof (gpointer) - 1;
57         p &= ~ (sizeof (gpointer) - 1);
58         return (void*)p;
59 }
60
61 static void
62 update_current_thread_stack (void *start)
63 {
64         int stack_guard = 0;
65         SgenThreadInfo *info = mono_thread_info_current ();
66
67         info->client_info.stack_start = align_pointer (&stack_guard);
68         g_assert (info->client_info.stack_start);
69         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
70
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
73 #else
74         g_error ("Sgen STW requires a working mono-context");
75 #endif
76
77         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
79 }
80
81 static void
82 acquire_gc_locks (void)
83 {
84         LOCK_INTERRUPTION;
85         mono_thread_info_suspend_lock ();
86 }
87
88 static void
89 release_gc_locks (void)
90 {
91         mono_thread_info_suspend_unlock ();
92         UNLOCK_INTERRUPTION;
93 }
94
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
97
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
100
101 /* LOCKING: assumes the GC lock is held */
102 void
103 sgen_client_stop_world (int generation)
104 {
105         TV_DECLARE (end_handshake);
106
107         /* notify the profiler of the leftovers */
108         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
109         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
110                 mono_sgen_gc_event_moves ();
111
112         acquire_gc_locks ();
113
114         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
115
116         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
117         sgen_process_togglerefs ();
118
119         update_current_thread_stack (&generation);
120
121         sgen_global_stop_count++;
122         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
123         TV_GETTIME (stop_world_time);
124
125         sgen_unified_suspend_stop_world ();
126
127         SGEN_LOG (3, "world stopped");
128
129         TV_GETTIME (end_handshake);
130         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
131
132         sgen_memgov_collection_start (generation);
133         if (sgen_need_bridge_processing ())
134                 sgen_bridge_reset_data ();
135 }
136
137 /* LOCKING: assumes the GC lock is held */
138 void
139 sgen_client_restart_world (int generation, gint64 *stw_time)
140 {
141         TV_DECLARE (end_sw);
142         TV_DECLARE (start_handshake);
143         unsigned long usec;
144
145         /* notify the profiler of the leftovers */
146         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
147         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
148                 mono_sgen_gc_event_moves ();
149
150         FOREACH_THREAD (info) {
151                 info->client_info.stack_start = NULL;
152                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
153         } FOREACH_THREAD_END
154
155         TV_GETTIME (start_handshake);
156
157         sgen_unified_suspend_restart_world ();
158
159         TV_GETTIME (end_sw);
160         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
161         usec = TV_ELAPSED (stop_world_time, end_sw);
162         max_pause_usec = MAX (usec, max_pause_usec);
163         end_of_last_stw = end_sw;
164
165         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
166
167         /*
168          * We must release the thread info suspend lock after doing
169          * the thread handshake.  Otherwise, if the GC stops the world
170          * and a thread is in the process of starting up, but has not
171          * yet registered (it's not in the thread_list), it is
172          * possible that the thread does register while the world is
173          * stopped.  When restarting the GC will then try to restart
174          * said thread, but since it never got the suspend signal, it
175          * cannot answer the restart signal, so a deadlock results.
176          */
177         release_gc_locks ();
178
179         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
180
181         *stw_time = usec;
182 }
183
184 void
185 mono_sgen_init_stw (void)
186 {
187         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
188         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
189 }
190
191 /* Unified suspend code */
192
193 static gboolean
194 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
195 {
196         /*
197         A thread explicitly asked to be skiped because it holds no managed state.
198         This is used by TP and finalizer threads.
199         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
200         */
201         if (info->client_info.gc_disabled) {
202                 if (reason)
203                         *reason = 1;
204                 return FALSE;
205         }
206
207         /*
208         We have detected that this thread is failing/dying, ignore it.
209         FIXME: can't we merge this with thread_is_dying?
210         */
211         if (info->client_info.skip) {
212                 if (reason)
213                         *reason = 2;
214                 return FALSE;
215         }
216
217         /*
218         Suspending the current thread will deadlock us, bad idea.
219         */
220         if (info == mono_thread_info_current ()) {
221                 if (reason)
222                         *reason = 3;
223                 return FALSE;
224         }
225
226         /*
227         We can't suspend the workers that will do all the heavy lifting.
228         FIXME Use some state bit in SgenThreadInfo for this.
229         */
230         if (sgen_thread_pool_is_thread_pool_thread (major_collector.get_sweep_pool (), mono_thread_info_get_tid (info)) ||
231                         sgen_workers_is_worker_thread (mono_thread_info_get_tid (info))) {
232                 if (reason)
233                         *reason = 4;
234                 return FALSE;
235         }
236
237         /*
238         The thread has signaled that it started to detach, ignore it.
239         FIXME: can't we merge this with skip
240         */
241         if (!mono_thread_info_is_live (info)) {
242                 if (reason)
243                         *reason = 5;
244                 return FALSE;
245         }
246
247         return TRUE;
248 }
249
250 static void
251 sgen_unified_suspend_stop_world (void)
252 {
253         int sleep_duration = -1;
254
255         mono_threads_begin_global_suspend ();
256         THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
257
258         FOREACH_THREAD (info) {
259                 info->client_info.skip = FALSE;
260                 info->client_info.suspend_done = FALSE;
261
262                 int reason;
263                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
264                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
265                         continue;
266                 }
267
268                 info->client_info.skip = !mono_thread_info_begin_suspend (info);
269
270                 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
271         } FOREACH_THREAD_END
272
273         mono_thread_info_current ()->client_info.suspend_done = TRUE;
274         mono_threads_wait_pending_operations ();
275
276         for (;;) {
277                 gint restart_counter = 0;
278
279                 FOREACH_THREAD (info) {
280                         gint suspend_count;
281
282                         int reason = 0;
283                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
284                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
285                                 continue;
286                         }
287
288                         /*
289                         All threads that reach here are pristine suspended. This means the following:
290
291                         - We haven't accepted the previous suspend as good.
292                         - We haven't gave up on it for this STW (it's either bad or asked not to)
293                         */
294                         if (!mono_thread_info_in_critical_location (info)) {
295                                 info->client_info.suspend_done = TRUE;
296
297                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
298                                 continue;
299                         }
300
301                         suspend_count = mono_thread_info_suspend_count (info);
302                         if (!(suspend_count == 1))
303                                 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
304
305                         info->client_info.skip = !mono_thread_info_begin_resume (info);
306                         if (!info->client_info.skip)
307                                 restart_counter += 1;
308
309                         THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
310                 } FOREACH_THREAD_END
311
312                 mono_threads_wait_pending_operations ();
313
314                 if (restart_counter == 0)
315                         break;
316
317                 if (sleep_duration < 0) {
318                         mono_thread_info_yield ();
319                         sleep_duration = 0;
320                 } else {
321                         g_usleep (sleep_duration);
322                         sleep_duration += 10;
323                 }
324
325                 FOREACH_THREAD (info) {
326                         int reason = 0;
327                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
328                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
329                                 continue;
330                         }
331
332                         if (!mono_thread_info_is_running (info)) {
333                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
334                                 continue;
335                         }
336
337                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
338
339                         THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
340                 } FOREACH_THREAD_END
341
342                 mono_threads_wait_pending_operations ();
343         }
344
345         FOREACH_THREAD (info) {
346                 gpointer stopped_ip;
347
348                 int reason = 0;
349                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
350                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
351
352                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
353                         continue;
354                 }
355
356                 g_assert (info->client_info.suspend_done);
357
358                 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
359
360                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
361                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
362
363                 if (info->client_info.stack_start < info->client_info.stack_start_limit
364                          || info->client_info.stack_start >= info->client_info.stack_end) {
365                         /*
366                          * Thread context is in unhandled state, most likely because it is
367                          * dying. We don't scan it.
368                          * FIXME We should probably rework and check the valid flag instead.
369                          */
370                         info->client_info.stack_start = NULL;
371                 }
372
373                 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
374
375                 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
376
377                 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
378                         mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
379         } FOREACH_THREAD_END
380 }
381
382 static void
383 sgen_unified_suspend_restart_world (void)
384 {
385         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
386         FOREACH_THREAD (info) {
387                 int reason = 0;
388                 if (sgen_is_thread_in_current_stw (info, &reason)) {
389                         g_assert (mono_thread_info_begin_resume (info));
390                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
391
392                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
393                 } else {
394                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
395                 }
396         } FOREACH_THREAD_END
397
398         mono_threads_wait_pending_operations ();
399         mono_threads_end_global_suspend ();
400 }
401 #endif