2 * mini-gc.c: GC interface for the mono JIT
5 * Zoltan Varga (vargaz@gmail.com)
7 * Copyright 2009 Novell, Inc (http://www.novell.com)
12 #include <mono/metadata/gc-internal.h>
15 * The code below does not work yet, and probably needs to be thrown out if we move
22 #include <mono/metadata/gc-internal.h>
23 #include <mono/utils/mono-counters.h>
25 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
28 #define DEBUG(s) do { s; } while (0)
34 #define DEBUG_GC_MAP(s) do { s; fflush (stdout); } while (0)
36 #define DEBUG_GC_MAP(s)
39 #define GC_BITS_PER_WORD (sizeof (gsize) * 8)
42 * Per-thread data kept by this module. This is stored in the GC and passed to us as
43 * parameters, instead of being stored in a TLS variable, since during a collection,
44 * only the collection thread is active.
50 MonoJitTlsData *jit_tls;
54 /* Stack slot doesn't contain a reference */
56 /* Stack slot contains a reference */
58 /* No info, slot needs to be scanned conservatively */
63 * Contains information needed to mark a stack frame.
64 * FIXME: Optimize the memory usage.
67 /* The frame pointer register */
69 /* The offset of the local variable area in the stack frame relative to the frame pointer */
71 /* The size of the locals area. Can't use nslots as it includes padding */
73 /* The number of stack slots */
79 /* A pair of low pc offset-high pc offset for each SLOT_REF value in gc_refs */
80 guint32 live_ranges [MONO_ZERO_LEN_ARRAY];
84 static guint32 gc_maps_size;
87 thread_attach_func (void)
89 return g_new0 (TlsData, 1);
93 thread_suspend_func (gpointer user_data, void *sigctx)
95 TlsData *tls = user_data;
97 tls->lmf = mono_get_lmf ();
99 mono_arch_sigctx_to_monoctx (sigctx, &tls->ctx);
100 tls->has_context = TRUE;
102 tls->has_context = FALSE;
104 tls->jit_tls = TlsGetValue (mono_jit_tls_id);
107 static int precise_frame_count [2], precise_frame_limit = -1;
108 static gboolean precise_frame_limit_inited;
110 #define DEAD_REF ((gpointer)(gssize)0x2a2a2a2a2a2a2a2aULL)
113 thread_mark_func (gpointer user_data, guint8 *stack_start, guint8 *stack_end, gboolean precise)
115 TlsData *tls = user_data;
116 MonoJitInfo *ji, res;
117 MonoContext ctx, new_ctx;
118 MonoLMF *lmf = tls->lmf;
120 gboolean last = TRUE, managed;
122 guint8* fp, *locals_start, *locals_end;
124 int scanned = 0, scanned_precisely, scanned_conservatively;
126 if (mono_thread_internal_current () == NULL) {
128 mono_gc_conservatively_scan_area (stack_start, stack_end);
132 /* Number of bytes scanned based on GC map data */
134 /* Number of bytes scanned precisely based on GC map data */
135 scanned_precisely = 0;
136 /* Number of bytes scanned conservatively based on GC map data */
137 scanned_conservatively = 0;
139 /* FIXME: sgen-gc.c calls this multiple times for each major collection from pin_from_roots */
141 /* FIXME: Use real gc descriptors instead of bitmaps */
143 /* This is one past the last address which we have scanned */
144 stack_limit = stack_start;
146 DEBUG (printf ("*** %s stack marking %p-%p ***\n", precise ? "Precise" : "Conservative", stack_start, stack_end));
148 if (!tls->has_context) {
149 memset (&new_ctx, 0, sizeof (ctx));
152 memcpy (&ctx, &new_ctx, sizeof (ctx));
154 g_assert ((guint64)stack_limit % sizeof (gpointer) == 0);
156 // FIXME: This doesn't work with appdomain transitions
157 ji = mono_find_jit_info (mono_domain_get (), tls->jit_tls, &res, NULL,
158 &ctx, &new_ctx, NULL, &lmf, NULL, &managed);
159 if (ji == (gpointer)-1)
162 /* The last frame can be in any state so mark conservatively */
168 /* These frames are returned by mono_find_jit_info () two times */
172 /* Scan the frame of this method */
175 * A frame contains the following:
180 * - localloc-ed memory
181 * Currently, only the locals are scanned precisely.
187 DEBUG (char *fname = mono_method_full_name (ji->method, TRUE); printf ("Mark(%d): No GC map for %s\n", precise, fname); g_free (fname));
192 * Debugging aid to control the number of frames scanned precisely
194 if (!precise_frame_limit_inited) {
195 if (getenv ("MONO_PRECISE_COUNT"))
196 precise_frame_limit = atoi (getenv ("MONO_PRECISE_COUNT"));
197 precise_frame_limit_inited = TRUE;
200 if (precise_frame_limit != -1) {
201 if (precise_frame_count [precise] == precise_frame_limit)
202 printf ("LAST PRECISE FRAME: %s\n", mono_method_full_name (ji->method, TRUE));
203 if (precise_frame_count [precise] > precise_frame_limit)
206 precise_frame_count [precise] ++;
209 if (map->frame_reg == AMD64_RSP)
210 fp = (guint8*)ctx.rsp;
211 else if (map->frame_reg == AMD64_RBP)
212 fp = (guint8*)ctx.rbp;
214 g_assert_not_reached ();
217 g_assert_not_reached ();
220 locals_start = fp + map->locals_offset;
221 locals_end = locals_start + map->locals_size;
223 pc_offset = (guint8*)MONO_CONTEXT_GET_IP (&ctx) - (guint8*)ji->code_start;
224 g_assert (pc_offset >= 0);
226 DEBUG (char *fname = mono_method_full_name (ji->method, TRUE); printf ("Mark(%d): %s+0x%x (%p) limit=%p fp=%p locals=%p-%p (%d)\n", precise, fname, pc_offset, (gpointer)MONO_CONTEXT_GET_IP (&ctx), stack_limit, fp, locals_start, locals_end, (int)(locals_end - locals_start)); g_free (fname));
229 * FIXME: Add a function to mark using a bitmap, to avoid doing a
230 * call for each object.
233 scanned += locals_end - locals_start;
235 /* Pinning needs to be done first, then the precise scan later */
238 g_assert (locals_start >= stack_limit);
240 if (locals_start > stack_limit) {
241 /* This scans the previously skipped frames as well */
242 DEBUG (printf ("\tscan area %p-%p.\n", stack_limit, locals_start));
243 mono_gc_conservatively_scan_area (stack_limit, locals_start);
250 for (i = 0; i < map->nslots; ++i) {
251 if (map->slots [i] == SLOT_PIN) {
252 DEBUG (printf ("\tscan slot %s0x%x(fp)=%p.\n", (guint8*)p > (guint8*)fp ? "" : "-", ABS ((int)((gssize)p - (gssize)fp)), p));
253 mono_gc_conservatively_scan_area (p, p + sizeof (gpointer));
254 scanned_conservatively += sizeof (gpointer);
256 p += sizeof (gpointer);
260 stack_limit = locals_end;
265 for (i = 0; i < map->nslots; ++i) {
266 if (map->slots [i] == SLOT_REF) {
267 MonoObject **ptr = (MonoObject**)(locals_start + (i * sizeof (gpointer)));
268 MonoObject *obj = *ptr;
270 if (pc_offset >= map->live_ranges [loffset] && pc_offset < map->live_ranges [loffset + 1] && obj != DEAD_REF) {
272 DEBUG (printf ("\tref %s0x%x(fp)=%p: %p ->", (guint8*)ptr >= (guint8*)fp ? "" : "-", ABS ((int)((gssize)ptr - (gssize)fp)), ptr, obj));
273 *ptr = mono_gc_scan_object (obj);
274 DEBUG (printf (" %p.\n", *ptr));
276 DEBUG (printf ("\tref %s0x%x(fp)=%p: %p.\n", (guint8*)ptr >= (guint8*)fp ? "" : "-", ABS ((int)((gssize)ptr - (gssize)fp)), ptr, obj));
279 DEBUG (printf ("\tref %s0x%x(fp)=%p: dead (%p)\n", (guint8*)ptr >= (guint8*)fp ? "" : "-", ABS ((int)((gssize)ptr - (gssize)fp)), ptr, obj));
281 * This serves two purposes:
282 * - fail fast if the live range is incorrect, and
283 * the JITted code tries to access this object
284 * - it avoids problems when a dead slot becomes live
285 * again due to a backward branch
286 * (see test_0_liveness_6).
292 scanned_precisely += sizeof (gpointer);
293 } else if (map->slots [i] == SLOT_NOREF) {
294 scanned_precisely += sizeof (gpointer);
301 if (stack_limit < stack_end && !precise) {
302 DEBUG (printf ("\tscan area %p-%p.\n", stack_limit, stack_end));
303 mono_gc_conservatively_scan_area (stack_limit, stack_end);
308 DEBUG (printf ("\tno context, scan area %p-%p.\n", stack_start, stack_end));
309 mono_gc_conservatively_scan_area (stack_start, stack_end);
313 DEBUG (printf ("Marked %d bytes, p=%d,c=%d out of %d.\n", scanned, scanned_precisely, scanned_conservatively, (int)(stack_end - stack_start)));
315 //mono_gc_conservatively_scan_area (stack_start, stack_end);
318 #define set_slot(slots, nslots, pos, val) do { \
319 g_assert ((pos) < (nslots)); \
320 (slots) [(pos)] = (val); \
324 mini_gc_init_gc_map (MonoCompile *cfg)
326 if (COMPILE_LLVM (cfg))
329 /* See mini_gc_create_gc_map () for comments as to why these are needed */
331 /* Extend the live ranges using the liveness information */
332 cfg->compute_precise_live_ranges = TRUE;
333 /* Is this still needed ? */
334 cfg->disable_reuse_ref_stack_slots = TRUE;
336 * Initialize all variables holding refs to null in the initlocals bblock, not just
337 * variables representing IL locals.
339 cfg->init_ref_vars = TRUE;
340 /* Prevent these initializations from being optimized away */
341 cfg->disable_initlocals_opt_refs = TRUE;
345 mini_gc_create_gc_map (MonoCompile *cfg)
348 int i, nslots, alloc_size, loffset, min_offset, max_offset;
349 StackSlotType *slots = NULL;
350 gboolean norefs = FALSE;
351 guint32 *live_range_start, *live_range_end;
354 * Since we currently don't use GC safe points, we need to create GC maps which
355 * are precise at every instruction within a method. We use the live ranges
356 * calculated by the JIT in mono_spill_global_vars () for this. Unfortunately by
357 * default these are not precise enought for several reasons:
358 * - the current calculation of MonoMethodVar->live_range_start/end is incorrect,
359 * it doesn't take into account loops etc. It needs to use the results of the
360 * liveness analysis pass.
361 * - the current liveness analysis pass is too conservative, ie. the live_in/out
362 * sets computed by it are sometimes include too many variables, for example because
363 * of the bogus links between bblocks. This means the live_in/out sets cannot be
364 * used to reliably compute precise live ranges.
365 * - stack slots are shared, which means the live ranges of stack slots have holes
367 * - the live ranges of variables used in out-of-line bblocks also have holes in
369 * - the live ranges of variables used for handling stack args also have holes in
376 * Here x is not live between the first and the second assignment.
378 * To work around these problems, we set a few cfg flags in mini_init_gc_maps ()
379 * which guarantee that the live range of stack slots have no holes, i.e. they hold
380 * a valid value (or null) during their entire live range.
381 * FIXME: This doesn't completely work yet, see test_0_liveness_6 (), where
382 * a variable becomes dead, then alive again.
386 if (!(cfg->comp_done & MONO_COMP_LIVENESS))
387 /* Without liveness info, the live ranges are not precise enough */
391 min_offset = ALIGN_TO (cfg->locals_min_stack_offset, sizeof (gpointer));
392 max_offset = cfg->locals_max_stack_offset;
394 /* min/max stack offset needs to be computed in mono_arch_allocate_vars () */
398 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
399 MonoInst *ins = cfg->varinfo [i];
400 MonoType *t = ins->inst_vtype;
402 if ((MONO_TYPE_ISSTRUCT (t) && ins->klass->has_references))
404 if (MONO_TYPE_ISSTRUCT (t))
406 if (t->byref || t->type == MONO_TYPE_PTR)
408 if (ins && ins->opcode == OP_REGOFFSET && MONO_TYPE_IS_REFERENCE (ins->inst_vtype))
412 if (i == cfg->num_varinfo)
415 if (cfg->verbose_level > 1)
416 printf ("GC Map for %s: 0x%x-0x%x\n", mono_method_full_name (cfg->method, TRUE), min_offset, max_offset);
418 nslots = (max_offset - min_offset) / sizeof (gpointer);
420 alloc_size = nslots * sizeof (StackSlotType);
421 slots = mono_domain_alloc0 (cfg->domain, alloc_size);
422 for (i = 0; i < nslots; ++i)
423 slots [i] = SLOT_NOREF;
424 gc_maps_size += alloc_size;
426 live_range_start = g_new (guint32, nslots);
427 live_range_end = g_new (guint32, nslots);
430 for (i = 0; i < nslots; ++i) {
431 live_range_start [i] = (guint32)-1;
432 live_range_end [i] = 0;
435 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
436 MonoInst *ins = cfg->varinfo [i];
437 MonoType *t = ins->inst_vtype;
444 vmv = MONO_VARINFO (cfg, i);
446 if (ins->opcode != OP_REGOFFSET)
449 if (ins->inst_offset % sizeof (gpointer) != 0)
452 pos = (ins->inst_offset - min_offset) / sizeof (gpointer);
454 if ((MONO_TYPE_ISSTRUCT (t) && !ins->klass->has_references))
457 if ((MONO_TYPE_ISSTRUCT (t) && ins->klass->has_references)) {
462 if (ins->klass->generic_container || mono_class_is_open_constructed_type (t)) {
463 /* FIXME: Generic sharing */
466 mono_class_compute_gc_descriptor (ins->klass);
468 bitmap = mono_gc_get_bitmap_for_descr (ins->klass->gc_descr, &numbits);
471 for (j = 0; j < numbits; ++j) {
472 if (bitmap [j / GC_BITS_PER_WORD] & ((gsize)1 << (j % GC_BITS_PER_WORD))) {
473 /* The descriptor is for the boxed object */
474 set_slot (slots, nslots, (pos + j - (sizeof (MonoObject) / sizeof (gpointer))), SLOT_REF);
479 if (cfg->verbose_level > 1)
480 printf ("\tvtype at fp+0x%x: %s -> 0x%x\n", (int)ins->inst_offset, mono_type_full_name (ins->inst_vtype), (int)ins->inst_offset);
482 // FIXME: These have no live range
490 if (ins->backend.is_pinvoke)
496 if (ins->backend.is_pinvoke)
497 size = mono_class_native_size (ins->klass, NULL);
499 size = mono_class_value_size (ins->klass, NULL);
500 for (j = 0; j < size / sizeof (gpointer); ++j)
501 set_slot (slots, nslots, pos + j, SLOT_PIN);
506 if (ins->inst_offset < min_offset || ins->inst_offset >= max_offset)
510 if (t->byref || t->type == MONO_TYPE_PTR || t->type == MONO_TYPE_I || t->type == MONO_TYPE_U) {
511 set_slot (slots, nslots, pos, SLOT_PIN);
515 if (MONO_TYPE_IS_REFERENCE (ins->inst_vtype)) {
516 if (vmv && !vmv->live_range_start) {
517 set_slot (slots, nslots, pos, SLOT_PIN);
521 if (ins->flags & (MONO_INST_VOLATILE | MONO_INST_INDIRECT)) {
522 set_slot (slots, nslots, pos, SLOT_PIN);
526 set_slot (slots, nslots, pos, SLOT_REF);
528 /* Stack slots holding refs shouldn't be shared */
529 g_assert (!live_range_end [pos]);
530 live_range_start [pos] = vmv->live_range_start;
531 live_range_end [pos] = vmv->live_range_end;
533 if (cfg->verbose_level > 1)
534 printf ("\tref at %s0x%x(fp) (slot=%d): %s [0x%x - 0x%x]\n", ins->inst_offset < 0 ? "-" : "", (ins->inst_offset < 0) ? -(int)ins->inst_offset : (int)ins->inst_offset, pos, mono_type_full_name (ins->inst_vtype), vmv->live_range_start, vmv->live_range_end);
538 alloc_size = sizeof (GCMap) + (norefs ? 0 : (nslots - MONO_ZERO_LEN_ARRAY) * sizeof (guint32) * 2);
539 map = mono_domain_alloc0 (cfg->domain, alloc_size);
540 gc_maps_size += alloc_size;
542 map->frame_reg = cfg->frame_reg;
543 map->locals_offset = min_offset;
544 map->locals_size = ALIGN_TO (max_offset - min_offset, sizeof (gpointer));
545 map->nslots = nslots;
549 for (i = 0; i < nslots; ++i) {
550 if (map->slots [i] == SLOT_REF) {
551 map->live_ranges [loffset ++] = live_range_start [i];
552 map->live_ranges [loffset ++] = live_range_end [i];
559 static int precise_count;
563 if (getenv ("MONO_GCMAP_COUNT")) {
564 if (precise_count == atoi (getenv ("MONO_GCMAP_COUNT")))
565 printf ("LAST: %s\n", mono_method_full_name (cfg->method, TRUE));
566 if (precise_count > atoi (getenv ("MONO_GCMAP_COUNT"))) {
567 for (i = 0; i < nslots; ++i)
568 map->slots [i] = SLOT_PIN;
575 cfg->jit_info->gc_info = map;
577 g_free (live_range_start);
578 g_free (live_range_end);
586 memset (&cb, 0, sizeof (cb));
587 cb.thread_attach_func = thread_attach_func;
588 cb.thread_suspend_func = thread_suspend_func;
589 /* Comment this out to disable precise stack marking */
590 cb.thread_mark_func = thread_mark_func;
591 mono_gc_set_gc_callbacks (&cb);
593 mono_counters_register ("GC Maps size",
594 MONO_COUNTER_GC | MONO_COUNTER_INT, &gc_maps_size);
605 mini_gc_init_gc_map (MonoCompile *cfg)
610 mini_gc_create_gc_map (MonoCompile *cfg)
619 * Set GC specific options in CFG.
622 mini_gc_init_cfg (MonoCompile *cfg)
624 if (mono_gc_is_moving ()) {
625 cfg->disable_ref_noref_stack_slot_share = TRUE;
626 cfg->gen_write_barriers = TRUE;
629 mini_gc_init_gc_map (cfg);
633 * Problems with the precise stack scannin code:
634 * - it makes two passes over the stack
635 * - the stack walk is slow
636 * - only the locals are scanned precisely
637 * - using a pc range for live ranges is not good, have to use a list of intervals
638 * - if the code is finished, less pinning will be done, causing problems because
639 * we promote all surviving objects to old-gen.