2 * mini-gc.c: GC interface for the mono JIT
5 * Zoltan Varga (vargaz@gmail.com)
7 * Copyright 2009 Novell, Inc (http://www.novell.com)
12 #include <mono/metadata/gc-internal.h>
15 * The code below does not work yet, and probably needs to be thrown out if we move
22 #include <mono/metadata/gc-internal.h>
23 #include <mono/utils/mono-counters.h>
25 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
28 #define DEBUG(s) do { s; } while (0)
34 #define DEBUG_GC_MAP(s) do { s; fflush (stdout); } while (0)
36 #define DEBUG_GC_MAP(s)
39 #define GC_BITS_PER_WORD (sizeof (gsize) * 8)
42 * Per-thread data kept by this module. This is stored in the GC and passed to us as
43 * parameters, instead of being stored in a TLS variable, since during a collection,
44 * only the collection thread is active.
50 MonoJitTlsData *jit_tls;
54 /* Stack slot doesn't contain a reference */
56 /* Stack slot contains a reference */
58 /* No info, slot needs to be scanned conservatively */
63 * Contains information needed to mark a stack frame.
64 * FIXME: Optimize the memory usage.
67 /* The frame pointer register */
69 /* The offset of the local variable area in the stack frame relative to the frame pointer */
71 /* The size of the locals area. Can't use nslots as it includes padding */
73 /* The number of stack slots */
75 /* The width of the liveness bitmap in bytes */
82 * A bitmap whose width is equal to the number of SLOT_REF values in gc_refs, and whose
83 * height is equal to the number of possible PC offsets.
84 * This needs to be compressed later.
86 guint8 bitmap [MONO_ZERO_LEN_ARRAY];
90 static guint32 gc_maps_size;
93 thread_attach_func (void)
95 return g_new0 (TlsData, 1);
99 thread_suspend_func (gpointer user_data, void *sigctx)
101 TlsData *tls = user_data;
104 /* Happens during startup */
107 tls->lmf = mono_get_lmf ();
109 mono_arch_sigctx_to_monoctx (sigctx, &tls->ctx);
110 tls->has_context = TRUE;
112 tls->has_context = FALSE;
114 tls->jit_tls = TlsGetValue (mono_jit_tls_id);
117 static int precise_frame_count [2], precise_frame_limit = -1;
118 static gboolean precise_frame_limit_inited;
120 static int scanned_stacks_stat;
121 static int scanned_stat, scanned_precisely_stat, scanned_conservatively_stat;
123 #define DEAD_REF ((gpointer)(gssize)0x2a2a2a2a2a2a2a2aULL)
126 thread_mark_func (gpointer user_data, guint8 *stack_start, guint8 *stack_end, gboolean precise)
128 TlsData *tls = user_data;
129 MonoJitInfo *ji, res;
130 MonoContext ctx, new_ctx;
133 gboolean last = TRUE, managed;
135 guint8* fp, *locals_start, *locals_end;
137 int scanned = 0, scanned_precisely, scanned_conservatively;
139 /* tls == NULL can happen during startup */
140 if (mono_thread_internal_current () == NULL || !tls) {
142 mono_gc_conservatively_scan_area (stack_start, stack_end);
143 scanned_stacks_stat += stack_end - stack_start;
150 /* Number of bytes scanned based on GC map data */
152 /* Number of bytes scanned precisely based on GC map data */
153 scanned_precisely = 0;
154 /* Number of bytes scanned conservatively based on GC map data */
155 scanned_conservatively = 0;
157 /* FIXME: sgen-gc.c calls this multiple times for each major collection from pin_from_roots */
159 /* FIXME: Use real gc descriptors instead of bitmaps */
161 /* This is one past the last address which we have scanned */
162 stack_limit = stack_start;
164 DEBUG (printf ("*** %s stack marking %p-%p ***\n", precise ? "Precise" : "Conservative", stack_start, stack_end));
166 if (!tls->has_context)
167 memset (&new_ctx, 0, sizeof (ctx));
169 memcpy (&new_ctx, &tls->ctx, sizeof (MonoContext));
172 memcpy (&ctx, &new_ctx, sizeof (ctx));
174 g_assert ((guint64)stack_limit % sizeof (gpointer) == 0);
176 // FIXME: This doesn't work with appdomain transitions
177 ji = mono_find_jit_info (mono_domain_get (), tls->jit_tls, &res, NULL,
178 &ctx, &new_ctx, NULL, &lmf, NULL, &managed);
179 if (ji == (gpointer)-1)
182 /* The last frame can be in any state so mark conservatively */
188 /* These frames are returned by mono_find_jit_info () two times */
192 /* Scan the frame of this method */
195 * A frame contains the following:
200 * - localloc-ed memory
201 * Currently, only the locals are scanned precisely.
207 DEBUG (char *fname = mono_method_full_name (ji->method, TRUE); printf ("Mark(%d): No GC map for %s\n", precise, fname); g_free (fname));
212 * Debugging aid to control the number of frames scanned precisely
214 if (!precise_frame_limit_inited) {
215 if (getenv ("MONO_PRECISE_COUNT"))
216 precise_frame_limit = atoi (getenv ("MONO_PRECISE_COUNT"));
217 precise_frame_limit_inited = TRUE;
220 if (precise_frame_limit != -1) {
221 if (precise_frame_count [precise] == precise_frame_limit)
222 printf ("LAST PRECISE FRAME: %s\n", mono_method_full_name (ji->method, TRUE));
223 if (precise_frame_count [precise] > precise_frame_limit)
226 precise_frame_count [precise] ++;
229 if (map->frame_reg == AMD64_RSP)
230 fp = (guint8*)ctx.rsp;
231 else if (map->frame_reg == AMD64_RBP)
232 fp = (guint8*)ctx.rbp;
234 g_assert_not_reached ();
237 g_assert_not_reached ();
240 locals_start = fp + map->locals_offset;
241 locals_end = locals_start + map->locals_size;
243 pc_offset = (guint8*)MONO_CONTEXT_GET_IP (&ctx) - (guint8*)ji->code_start;
244 g_assert (pc_offset >= 0);
246 DEBUG (char *fname = mono_method_full_name (ji->method, TRUE); printf ("Mark(%d): %s+0x%x (%p) limit=%p fp=%p locals=%p-%p (%d)\n", precise, fname, pc_offset, (gpointer)MONO_CONTEXT_GET_IP (&ctx), stack_limit, fp, locals_start, locals_end, (int)(locals_end - locals_start)); g_free (fname));
249 * FIXME: Add a function to mark using a bitmap, to avoid doing a
250 * call for each object.
253 scanned += locals_end - locals_start;
255 /* Pinning needs to be done first, then the precise scan later */
258 g_assert (locals_start >= stack_limit);
260 if (locals_start > stack_limit) {
261 /* This scans the previously skipped frames as well */
262 DEBUG (printf ("\tscan area %p-%p.\n", stack_limit, locals_start));
263 mono_gc_conservatively_scan_area (stack_limit, locals_start);
270 for (i = 0; i < map->nslots; ++i) {
271 if (map->slots [i] == SLOT_PIN) {
272 DEBUG (printf ("\tscan slot %s0x%x(fp)=%p.\n", (guint8*)p > (guint8*)fp ? "" : "-", ABS ((int)((gssize)p - (gssize)fp)), p));
273 mono_gc_conservatively_scan_area (p, p + sizeof (gpointer));
274 scanned_conservatively += sizeof (gpointer);
276 p += sizeof (gpointer);
280 stack_limit = locals_end;
284 guint8 *bitmap = &map->bitmap [(map->bitmap_width * pc_offset)];
287 for (i = 0; i < map->nslots; ++i) {
288 if (map->slots [i] == SLOT_REF) {
289 MonoObject **ptr = (MonoObject**)(locals_start + (i * sizeof (gpointer)));
290 MonoObject *obj = *ptr;
292 live = bitmap [loffset / 8] & (1 << (loffset % 8));
296 DEBUG (printf ("\tref %s0x%x(fp)=%p: %p ->", (guint8*)ptr >= (guint8*)fp ? "" : "-", ABS ((int)((gssize)ptr - (gssize)fp)), ptr, obj));
297 *ptr = mono_gc_scan_object (obj);
298 DEBUG (printf (" %p.\n", *ptr));
300 DEBUG (printf ("\tref %s0x%x(fp)=%p: %p.\n", (guint8*)ptr >= (guint8*)fp ? "" : "-", ABS ((int)((gssize)ptr - (gssize)fp)), ptr, obj));
303 DEBUG (printf ("\tref %s0x%x(fp)=%p: dead (%p)\n", (guint8*)ptr >= (guint8*)fp ? "" : "-", ABS ((int)((gssize)ptr - (gssize)fp)), ptr, obj));
305 * Fail fast if the live range is incorrect, and
306 * the JITted code tries to access this object
312 scanned_precisely += sizeof (gpointer);
313 } else if (map->slots [i] == SLOT_NOREF) {
314 scanned_precisely += sizeof (gpointer);
321 if (stack_limit < stack_end && !precise) {
322 DEBUG (printf ("\tscan area %p-%p.\n", stack_limit, stack_end));
323 mono_gc_conservatively_scan_area (stack_limit, stack_end);
326 DEBUG (printf ("Marked %d bytes, p=%d,c=%d out of %d.\n", scanned, scanned_precisely, scanned_conservatively, (int)(stack_end - stack_start)));
329 scanned_precisely_stat += scanned_precisely;
331 scanned_stacks_stat += stack_end - stack_start;
332 scanned_stat += scanned;
333 scanned_conservatively_stat += scanned_conservatively;
336 //mono_gc_conservatively_scan_area (stack_start, stack_end);
339 #define set_slot(slots, nslots, pos, val) do { \
340 g_assert ((pos) < (nslots)); \
341 (slots) [(pos)] = (val); \
345 mini_gc_init_gc_map (MonoCompile *cfg)
347 if (COMPILE_LLVM (cfg))
350 cfg->compute_gc_maps = TRUE;
354 mini_gc_create_gc_map (MonoCompile *cfg)
357 int i, nslots, alloc_size, loffset, min_offset, max_offset;
358 StackSlotType *slots = NULL;
359 gboolean norefs = FALSE;
360 GSList **live_intervals;
361 int bitmap_width, bitmap_size;
364 * Since we currently don't use GC safe points, we need to create GC maps which
365 * are precise at every instruction within a method. The live ranges calculated by
366 * the liveness pass are not usable for this, since they contain abstract positions, not
367 * pc offsets. The live ranges calculated by mono_spill_global_vars () are not usable
368 * either, since they can't model holes. Instead of these, we implement our own
369 * liveness analysis which is precise, and works with PC offsets. It calculates live
370 * intervals, which are unions of live ranges.
372 * - arguments (these are not scanned precisely currently).
373 * - it would simplify things if we extended live ranges to the end of basic blocks
374 * instead of computing them precisely.
375 * - maybe mark loads+stores as needing GC tracking, instead of using DEF/USE
379 if (!(cfg->comp_done & MONO_COMP_LIVENESS))
380 /* Without liveness info, the live ranges are not precise enough */
383 if (cfg->header->num_clauses)
385 * The calls to the finally clauses don't show up in the cfg. See
386 * test_0_liveness_8 ().
390 mono_analyze_liveness_gc (cfg);
393 min_offset = ALIGN_TO (cfg->locals_min_stack_offset, sizeof (gpointer));
394 max_offset = cfg->locals_max_stack_offset;
396 /* min/max stack offset needs to be computed in mono_arch_allocate_vars () */
400 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
401 MonoInst *ins = cfg->varinfo [i];
402 MonoType *t = ins->inst_vtype;
404 if ((MONO_TYPE_ISSTRUCT (t) && ins->klass->has_references))
406 if (MONO_TYPE_ISSTRUCT (t))
408 if (t->byref || t->type == MONO_TYPE_PTR)
410 if (ins && ins->opcode == OP_REGOFFSET && MONO_TYPE_IS_REFERENCE (ins->inst_vtype))
414 if (i == cfg->num_varinfo)
417 if (cfg->verbose_level > 1)
418 printf ("GC Map for %s: 0x%x-0x%x\n", mono_method_full_name (cfg->method, TRUE), min_offset, max_offset);
420 nslots = (max_offset - min_offset) / sizeof (gpointer);
422 alloc_size = nslots * sizeof (StackSlotType);
423 slots = mono_domain_alloc0 (cfg->domain, alloc_size);
424 for (i = 0; i < nslots; ++i)
425 slots [i] = SLOT_NOREF;
426 gc_maps_size += alloc_size;
428 live_intervals = g_new0 (GSList*, nslots);
431 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
432 MonoInst *ins = cfg->varinfo [i];
433 MonoType *t = ins->inst_vtype;
440 vmv = MONO_VARINFO (cfg, i);
442 if (ins->opcode != OP_REGOFFSET)
445 if (ins->inst_offset % sizeof (gpointer) != 0)
448 pos = (ins->inst_offset - min_offset) / sizeof (gpointer);
450 if ((MONO_TYPE_ISSTRUCT (t) && !ins->klass->has_references))
453 if ((MONO_TYPE_ISSTRUCT (t) && ins->klass->has_references)) {
458 if (ins->klass->generic_container || mono_class_is_open_constructed_type (t)) {
459 /* FIXME: Generic sharing */
462 mono_class_compute_gc_descriptor (ins->klass);
464 bitmap = mono_gc_get_bitmap_for_descr (ins->klass->gc_descr, &numbits);
467 for (j = 0; j < numbits; ++j) {
468 if (bitmap [j / GC_BITS_PER_WORD] & ((gsize)1 << (j % GC_BITS_PER_WORD))) {
469 /* The descriptor is for the boxed object */
470 set_slot (slots, nslots, (pos + j - (sizeof (MonoObject) / sizeof (gpointer))), SLOT_REF);
475 if (cfg->verbose_level > 1)
476 printf ("\tvtype at fp+0x%x: %s -> 0x%x\n", (int)ins->inst_offset, mono_type_full_name (ins->inst_vtype), (int)ins->inst_offset);
478 // FIXME: These have no live range
486 if (ins->backend.is_pinvoke)
492 if (ins->backend.is_pinvoke)
493 size = mono_class_native_size (ins->klass, NULL);
495 size = mono_class_value_size (ins->klass, NULL);
496 for (j = 0; j < size / sizeof (gpointer); ++j)
497 set_slot (slots, nslots, pos + j, SLOT_PIN);
502 if (ins->inst_offset < min_offset || ins->inst_offset >= max_offset)
506 if (t->byref || t->type == MONO_TYPE_I) {
507 // FIXME: JIT temporaries have type I
508 set_slot (slots, nslots, pos, SLOT_PIN);
512 if (MONO_TYPE_IS_REFERENCE (ins->inst_vtype)) {
513 if (vmv && !vmv->gc_interval) {
514 set_slot (slots, nslots, pos, SLOT_PIN);
518 if (ins->flags & (MONO_INST_VOLATILE | MONO_INST_INDIRECT)) {
519 set_slot (slots, nslots, pos, SLOT_PIN);
523 set_slot (slots, nslots, pos, SLOT_REF);
525 live_intervals [pos] = g_slist_prepend_mempool (cfg->mempool, live_intervals [pos], vmv->gc_interval);
527 if (cfg->verbose_level > 1) {
528 printf ("\tref at %s0x%x(fp) (slot=%d): %s ", ins->inst_offset < 0 ? "-" : "", (ins->inst_offset < 0) ? -(int)ins->inst_offset : (int)ins->inst_offset, pos, mono_type_full_name (ins->inst_vtype));
529 mono_linterval_print (vmv->gc_interval);
537 for (i = 0; i < nslots; ++i) {
538 if (slots [i] == SLOT_REF)
543 bitmap_width = ALIGN_TO (loffset, 8) / 8;
544 bitmap_size = bitmap_width * cfg->code_len;
545 alloc_size = sizeof (GCMap) + (norefs ? 0 : bitmap_size);
546 map = mono_domain_alloc0 (cfg->domain, alloc_size);
547 gc_maps_size += alloc_size;
549 map->frame_reg = cfg->frame_reg;
550 map->locals_offset = min_offset;
551 map->locals_size = ALIGN_TO (max_offset - min_offset, sizeof (gpointer));
552 map->nslots = nslots;
554 map->bitmap_width = bitmap_width;
556 /* Create liveness bitmap */
559 for (i = 0; i < nslots; ++i) {
560 if (map->slots [i] == SLOT_REF) {
561 MonoLiveInterval *iv;
566 for (l = live_intervals [i]; l; l = l->next) {
568 for (r = iv->range; r; r = r->next) {
569 for (pc_offset = r->from; pc_offset < r->to; ++pc_offset)
570 map->bitmap [(map->bitmap_width * pc_offset) + loffset / 8] |= (1 << (loffset % 8));
580 static int precise_count;
584 if (getenv ("MONO_GCMAP_COUNT")) {
585 if (precise_count == atoi (getenv ("MONO_GCMAP_COUNT")))
586 printf ("LAST: %s\n", mono_method_full_name (cfg->method, TRUE));
587 if (precise_count > atoi (getenv ("MONO_GCMAP_COUNT"))) {
588 for (i = 0; i < nslots; ++i)
589 map->slots [i] = SLOT_PIN;
596 cfg->jit_info->gc_info = map;
598 g_free (live_intervals);
606 memset (&cb, 0, sizeof (cb));
607 cb.thread_attach_func = thread_attach_func;
608 cb.thread_suspend_func = thread_suspend_func;
609 /* Comment this out to disable precise stack marking */
610 cb.thread_mark_func = thread_mark_func;
611 mono_gc_set_gc_callbacks (&cb);
613 mono_counters_register ("GC Maps size",
614 MONO_COUNTER_GC | MONO_COUNTER_INT, &gc_maps_size);
616 mono_counters_register ("Stack space scanned (all)",
617 MONO_COUNTER_GC | MONO_COUNTER_INT, &scanned_stacks_stat);
618 mono_counters_register ("Stack space scanned (using GC Maps)",
619 MONO_COUNTER_GC | MONO_COUNTER_INT, &scanned_stat);
620 mono_counters_register ("Stack space scanned (precise)",
621 MONO_COUNTER_GC | MONO_COUNTER_INT, &scanned_precisely_stat);
622 mono_counters_register ("Stack space scanned (conservative)",
623 MONO_COUNTER_GC | MONO_COUNTER_INT, &scanned_conservatively_stat);
634 mini_gc_init_gc_map (MonoCompile *cfg)
639 mini_gc_create_gc_map (MonoCompile *cfg)
648 * Set GC specific options in CFG.
651 mini_gc_init_cfg (MonoCompile *cfg)
653 if (mono_gc_is_moving ()) {
654 cfg->disable_ref_noref_stack_slot_share = TRUE;
655 cfg->gen_write_barriers = TRUE;
658 mini_gc_init_gc_map (cfg);
662 * Problems with the current code:
663 * - it makes two passes over the stack
664 * - the stack walk is slow
665 * - only the locals are scanned precisely
666 * - vtypes/refs used in EH regions are treated conservatively
667 * - the computation of the GC maps is slow since it involves a liveness analysis pass
668 * - the GC maps are uncompressed and take up a lot of memory.
669 * - if the code is finished, less pinning will be done, causing problems because
670 * we promote all surviving objects to old-gen.