Merge pull request #5014 from vkargov/vk-kasha
[mono.git] / mono / metadata / sgen-stw.c
1 /**
2  * \file
3  * Stop the world functionality
4  *
5  * Author:
6  *      Paolo Molaro (lupus@ximian.com)
7  *  Rodrigo Kumpera (kumpera@gmail.com)
8  *
9  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11  * Copyright 2011 Xamarin, Inc.
12  * Copyright (C) 2012 Xamarin Inc
13  *
14  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
15  */
16
17 #include "config.h"
18 #ifdef HAVE_SGEN_GC
19
20 #include "sgen/sgen-gc.h"
21 #include "sgen/sgen-protocol.h"
22 #include "sgen/sgen-memory-governor.h"
23 #include "sgen/sgen-workers.h"
24 #include "metadata/profiler-private.h"
25 #include "sgen/sgen-client.h"
26 #include "metadata/sgen-bridge-internals.h"
27 #include "metadata/gc-internals.h"
28 #include "utils/mono-threads.h"
29 #include "utils/mono-threads-debug.h"
30
31 #define TV_DECLARE SGEN_TV_DECLARE
32 #define TV_GETTIME SGEN_TV_GETTIME
33 #define TV_ELAPSED SGEN_TV_ELAPSED
34
35 static void sgen_unified_suspend_restart_world (void);
36 static void sgen_unified_suspend_stop_world (void);
37
38 static TV_DECLARE (end_of_last_stw);
39
40 guint64 mono_time_since_last_stw ()
41 {
42         if (end_of_last_stw == 0)
43                 return 0;
44
45         TV_DECLARE (current_time);
46         TV_GETTIME (current_time);
47         return TV_ELAPSED (end_of_last_stw, current_time);
48 }
49
50 unsigned int sgen_global_stop_count = 0;
51
52 inline static void*
53 align_pointer (void *ptr)
54 {
55         mword p = (mword)ptr;
56         p += sizeof (gpointer) - 1;
57         p &= ~ (sizeof (gpointer) - 1);
58         return (void*)p;
59 }
60
61 static void
62 update_current_thread_stack (void *start)
63 {
64         int stack_guard = 0;
65         SgenThreadInfo *info = mono_thread_info_current ();
66
67         info->client_info.stack_start = align_pointer (&stack_guard);
68         g_assert (info->client_info.stack_start);
69         g_assert (info->client_info.stack_start >= info->client_info.stack_start_limit && info->client_info.stack_start < info->client_info.stack_end);
70
71 #if !defined(MONO_CROSS_COMPILE) && MONO_ARCH_HAS_MONO_CONTEXT
72         MONO_CONTEXT_GET_CURRENT (info->client_info.ctx);
73 #else
74         g_error ("Sgen STW requires a working mono-context");
75 #endif
76
77         if (mono_gc_get_gc_callbacks ()->thread_suspend_func)
78                 mono_gc_get_gc_callbacks ()->thread_suspend_func (info->client_info.runtime_data, NULL, &info->client_info.ctx);
79 }
80
81 static void
82 acquire_gc_locks (void)
83 {
84         LOCK_INTERRUPTION;
85         mono_thread_info_suspend_lock ();
86 }
87
88 static void
89 release_gc_locks (void)
90 {
91         mono_thread_info_suspend_unlock ();
92         UNLOCK_INTERRUPTION;
93 }
94
95 static TV_DECLARE (stop_world_time);
96 static unsigned long max_pause_usec = 0;
97
98 static guint64 time_stop_world;
99 static guint64 time_restart_world;
100
101 /* LOCKING: assumes the GC lock is held */
102 void
103 sgen_client_stop_world (int generation)
104 {
105         TV_DECLARE (end_handshake);
106
107         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
108
109         acquire_gc_locks ();
110
111         mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD_LOCKED, generation);
112
113         /* We start to scan after locks are taking, this ensures we won't be interrupted. */
114         sgen_process_togglerefs ();
115
116         update_current_thread_stack (&generation);
117
118         sgen_global_stop_count++;
119         SGEN_LOG (3, "stopping world n %d from %p %p", sgen_global_stop_count, mono_thread_info_current (), (gpointer) (gsize) mono_native_thread_id_get ());
120         TV_GETTIME (stop_world_time);
121
122         sgen_unified_suspend_stop_world ();
123
124         SGEN_LOG (3, "world stopped");
125
126         mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
127
128         TV_GETTIME (end_handshake);
129         time_stop_world += TV_ELAPSED (stop_world_time, end_handshake);
130
131         sgen_memgov_collection_start (generation);
132         if (sgen_need_bridge_processing ())
133                 sgen_bridge_reset_data ();
134 }
135
136 /* LOCKING: assumes the GC lock is held */
137 void
138 sgen_client_restart_world (int generation, gint64 *stw_time)
139 {
140         TV_DECLARE (end_sw);
141         TV_DECLARE (start_handshake);
142         unsigned long usec;
143
144         /* notify the profiler of the leftovers */
145         /* FIXME this is the wrong spot at we can STW for non collection reasons. */
146         if (G_UNLIKELY (mono_profiler_events & MONO_PROFILE_GC_MOVES))
147                 mono_sgen_gc_event_moves ();
148
149         mono_profiler_gc_event (MONO_GC_EVENT_PRE_START_WORLD, generation);
150
151         FOREACH_THREAD (info) {
152                 info->client_info.stack_start = NULL;
153                 memset (&info->client_info.ctx, 0, sizeof (MonoContext));
154         } FOREACH_THREAD_END
155
156         TV_GETTIME (start_handshake);
157
158         sgen_unified_suspend_restart_world ();
159
160         TV_GETTIME (end_sw);
161         time_restart_world += TV_ELAPSED (start_handshake, end_sw);
162         usec = TV_ELAPSED (stop_world_time, end_sw);
163         max_pause_usec = MAX (usec, max_pause_usec);
164         end_of_last_stw = end_sw;
165
166         SGEN_LOG (2, "restarted (pause time: %d usec, max: %d)", (int)usec, (int)max_pause_usec);
167
168         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
169
170         /*
171          * We must release the thread info suspend lock after doing
172          * the thread handshake.  Otherwise, if the GC stops the world
173          * and a thread is in the process of starting up, but has not
174          * yet registered (it's not in the thread_list), it is
175          * possible that the thread does register while the world is
176          * stopped.  When restarting the GC will then try to restart
177          * said thread, but since it never got the suspend signal, it
178          * cannot answer the restart signal, so a deadlock results.
179          */
180         release_gc_locks ();
181
182         mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD_UNLOCKED, generation);
183
184         *stw_time = usec;
185 }
186
187 void
188 mono_sgen_init_stw (void)
189 {
190         mono_counters_register ("World stop", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_stop_world);
191         mono_counters_register ("World restart", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_restart_world);
192 }
193
194 /* Unified suspend code */
195
196 static gboolean
197 sgen_is_thread_in_current_stw (SgenThreadInfo *info, int *reason)
198 {
199         /*
200         A thread explicitly asked to be skiped because it holds no managed state.
201         This is used by TP and finalizer threads.
202         FIXME Use an atomic variable for this to avoid everyone taking the GC LOCK.
203         */
204         if (info->client_info.gc_disabled) {
205                 if (reason)
206                         *reason = 1;
207                 return FALSE;
208         }
209
210         /*
211         We have detected that this thread is failing/dying, ignore it.
212         FIXME: can't we merge this with thread_is_dying?
213         */
214         if (info->client_info.skip) {
215                 if (reason)
216                         *reason = 2;
217                 return FALSE;
218         }
219
220         /*
221         Suspending the current thread will deadlock us, bad idea.
222         */
223         if (info == mono_thread_info_current ()) {
224                 if (reason)
225                         *reason = 3;
226                 return FALSE;
227         }
228
229         /*
230         We can't suspend the workers that will do all the heavy lifting.
231         FIXME Use some state bit in SgenThreadInfo for this.
232         */
233         if (sgen_thread_pool_is_thread_pool_thread (major_collector.get_sweep_pool (), mono_thread_info_get_tid (info)) ||
234                         sgen_workers_is_worker_thread (mono_thread_info_get_tid (info))) {
235                 if (reason)
236                         *reason = 4;
237                 return FALSE;
238         }
239
240         /*
241         The thread has signaled that it started to detach, ignore it.
242         FIXME: can't we merge this with skip
243         */
244         if (!mono_thread_info_is_live (info)) {
245                 if (reason)
246                         *reason = 5;
247                 return FALSE;
248         }
249
250         return TRUE;
251 }
252
253 static void
254 sgen_unified_suspend_stop_world (void)
255 {
256         int sleep_duration = -1;
257
258         mono_threads_begin_global_suspend ();
259         THREADS_STW_DEBUG ("[GC-STW-BEGIN][%p] *** BEGIN SUSPEND *** \n", mono_thread_info_get_tid (mono_thread_info_current ()));
260
261         FOREACH_THREAD (info) {
262                 info->client_info.skip = FALSE;
263                 info->client_info.suspend_done = FALSE;
264
265                 int reason;
266                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
267                         THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] IGNORE thread %p skip %s reason %d\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false", reason);
268                         continue;
269                 }
270
271                 info->client_info.skip = !mono_thread_info_begin_suspend (info);
272
273                 THREADS_STW_DEBUG ("[GC-STW-BEGIN-SUSPEND] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
274         } FOREACH_THREAD_END
275
276         mono_thread_info_current ()->client_info.suspend_done = TRUE;
277         mono_threads_wait_pending_operations ();
278
279         for (;;) {
280                 gint restart_counter = 0;
281
282                 FOREACH_THREAD (info) {
283                         gint suspend_count;
284
285                         int reason = 0;
286                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
287                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE RESUME thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
288                                 continue;
289                         }
290
291                         /*
292                         All threads that reach here are pristine suspended. This means the following:
293
294                         - We haven't accepted the previous suspend as good.
295                         - We haven't gave up on it for this STW (it's either bad or asked not to)
296                         */
297                         if (!mono_thread_info_in_critical_location (info)) {
298                                 info->client_info.suspend_done = TRUE;
299
300                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] DONE thread %p deemed fully suspended\n", mono_thread_info_get_tid (info));
301                                 continue;
302                         }
303
304                         suspend_count = mono_thread_info_suspend_count (info);
305                         if (!(suspend_count == 1))
306                                 g_error ("[%p] suspend_count = %d, but should be 1", mono_thread_info_get_tid (info), suspend_count);
307
308                         info->client_info.skip = !mono_thread_info_begin_resume (info);
309                         if (!info->client_info.skip)
310                                 restart_counter += 1;
311
312                         THREADS_STW_DEBUG ("[GC-STW-RESTART] RESTART thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
313                 } FOREACH_THREAD_END
314
315                 mono_threads_wait_pending_operations ();
316
317                 if (restart_counter == 0)
318                         break;
319
320                 if (sleep_duration < 0) {
321                         mono_thread_info_yield ();
322                         sleep_duration = 0;
323                 } else {
324                         g_usleep (sleep_duration);
325                         sleep_duration += 10;
326                 }
327
328                 FOREACH_THREAD (info) {
329                         int reason = 0;
330                         if (info->client_info.suspend_done || !sgen_is_thread_in_current_stw (info, &reason)) {
331                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not been processed done %d current %d reason %d\n", mono_thread_info_get_tid (info), info->client_info.suspend_done, !sgen_is_thread_in_current_stw (info, NULL), reason);
332                                 continue;
333                         }
334
335                         if (!mono_thread_info_is_running (info)) {
336                                 THREADS_STW_DEBUG ("[GC-STW-RESTART] IGNORE SUSPEND thread %p not running\n", mono_thread_info_get_tid (info));
337                                 continue;
338                         }
339
340                         info->client_info.skip = !mono_thread_info_begin_suspend (info);
341
342                         THREADS_STW_DEBUG ("[GC-STW-RESTART] SUSPEND thread %p skip %s\n", mono_thread_info_get_tid (info), info->client_info.skip ? "true" : "false");
343                 } FOREACH_THREAD_END
344
345                 mono_threads_wait_pending_operations ();
346         }
347
348         FOREACH_THREAD (info) {
349                 gpointer stopped_ip;
350
351                 int reason = 0;
352                 if (!sgen_is_thread_in_current_stw (info, &reason)) {
353                         g_assert (!info->client_info.suspend_done || info == mono_thread_info_current ());
354
355                         THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is NOT suspended, reason %d\n", mono_thread_info_get_tid (info), reason);
356                         continue;
357                 }
358
359                 g_assert (info->client_info.suspend_done);
360
361                 info->client_info.ctx = mono_thread_info_get_suspend_state (info)->ctx;
362
363                 /* Once we remove the old suspend code, we should move sgen to directly access the state in MonoThread */
364                 info->client_info.stack_start = (gpointer) ((char*)MONO_CONTEXT_GET_SP (&info->client_info.ctx) - REDZONE_SIZE);
365
366                 if (info->client_info.stack_start < info->client_info.stack_start_limit
367                          || info->client_info.stack_start >= info->client_info.stack_end) {
368                         /*
369                          * Thread context is in unhandled state, most likely because it is
370                          * dying. We don't scan it.
371                          * FIXME We should probably rework and check the valid flag instead.
372                          */
373                         info->client_info.stack_start = NULL;
374                 }
375
376                 stopped_ip = (gpointer) (MONO_CONTEXT_GET_IP (&info->client_info.ctx));
377
378                 binary_protocol_thread_suspend ((gpointer) mono_thread_info_get_tid (info), stopped_ip);
379
380                 THREADS_STW_DEBUG ("[GC-STW-SUSPEND-END] thread %p is suspended, stopped_ip = %p, stack = %p -> %p\n",
381                         mono_thread_info_get_tid (info), stopped_ip, info->client_info.stack_start, info->client_info.stack_start ? info->client_info.stack_end : NULL);
382         } FOREACH_THREAD_END
383 }
384
385 static void
386 sgen_unified_suspend_restart_world (void)
387 {
388         THREADS_STW_DEBUG ("[GC-STW-END] *** BEGIN RESUME ***\n");
389         FOREACH_THREAD (info) {
390                 int reason = 0;
391                 if (sgen_is_thread_in_current_stw (info, &reason)) {
392                         g_assert (mono_thread_info_begin_resume (info));
393                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] RESUME thread %p\n", mono_thread_info_get_tid (info));
394
395                         binary_protocol_thread_restart ((gpointer) mono_thread_info_get_tid (info));
396                 } else {
397                         THREADS_STW_DEBUG ("[GC-STW-RESUME-WORLD] IGNORE thread %p, reason %d\n", mono_thread_info_get_tid (info), reason);
398                 }
399         } FOREACH_THREAD_END
400
401         mono_threads_wait_pending_operations ();
402         mono_threads_end_global_suspend ();
403 }
404 #endif