2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright 2005-2009 Novell, Inc (http://www.novell.com)
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (first vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list and critical_fin_list are part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "metadata/method-builder.h"
148 #include "metadata/profiler-private.h"
149 #include "utils/mono-mmap.h"
151 #ifdef HAVE_VALGRIND_MEMCHECK_H
152 #include <valgrind/memcheck.h>
155 #define OPDEF(a,b,c,d,e,f,g,h,i,j) \
159 #include "mono/cil/opcode.def"
166 * ######################################################################
167 * ######## Types and constants used by the GC.
168 * ######################################################################
170 #if SIZEOF_VOID_P == 4
171 typedef guint32 mword;
173 typedef guint64 mword;
176 static int gc_initialized = 0;
177 static int gc_debug_level = 0;
178 static FILE* gc_debug_file;
179 /* If set, do a minor collection before every allocation */
180 static gboolean collect_before_allocs = FALSE;
181 /* If set, do a heap consistency check before each minor collection */
182 static gboolean consistency_check_at_minor_collection = FALSE;
186 mono_gc_flush_info (void)
188 fflush (gc_debug_file);
192 #define MAX_DEBUG_LEVEL 8
193 #define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0)
195 #define TV_DECLARE(name) struct timeval name
196 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
197 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
199 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
209 /* each request from the OS ends up in a GCMemSection */
210 typedef struct _GCMemSection GCMemSection;
211 struct _GCMemSection {
215 /* pointer where more data could be allocated if it fits */
219 * scan starts is an array of pointers to objects equally spaced in the allocation area
220 * They let use quickly find pinned objects from pinning pointers.
223 /* in major collections indexes in the pin_queue for objects that pin this section */
226 unsigned short num_scan_start;
230 /* large object space struct: 64+ KB */
231 /* we could make this limit much smaller to avoid memcpy copy
232 * and potentially have more room in the GC descriptor: need to measure
233 * This also means that such small OS objects will need to be
234 * allocated in a different way (using pinned chunks).
235 * We may want to put large but smaller than 64k objects in the fixed space
236 * when we move the object from one generation to another (to limit the
237 * pig in the snake effect).
238 * Note: it may be worth to have an optimized copy function, since we can
239 * assume that objects are aligned and have a multiple of 8 size.
240 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
241 * true if MONO_ZERO_LEN_ARRAY is nonzero.
243 typedef struct _LOSObject LOSObject;
246 mword size; /* this is the object size */
247 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
250 char data [MONO_ZERO_LEN_ARRAY];
253 /* Pinned objects are allocated in the LOS space if bigger than half a page
254 * or from freelists otherwise. We assume that pinned objects are relatively few
255 * and they have a slow dying speed (like interned strings, thread objects).
256 * As such they will be collected only at major collections.
257 * free lists are not global: when we need memory we allocate a PinnedChunk.
258 * Each pinned chunk is made of several pages, the first of wich is used
259 * internally for bookeeping (here think of a page as 4KB). The bookeeping
260 * includes the freelists vectors and info about the object size of each page
261 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
262 * a size is assigned to it, the page is divided in the proper chunks and each
263 * chunk is added to the freelist. To not waste space, the remaining space in the
264 * first page is used as objects of size 16 or 32 (need to measure which are more
266 * We use this same structure to allocate memory used internally by the GC, so
267 * we never use malloc/free if we need to alloc during collection: the world is stopped
268 * and malloc/free will deadlock.
269 * When we want to iterate over pinned objects, we just scan a page at a time
270 * linearly according to the size of objects in the page: the next pointer used to link
271 * the items in the freelist uses the same word as the vtable. Since we keep freelists
272 * for each pinned chunk, if the word points outside the pinned chunk it means
274 * We could avoid this expensive scanning in creative ways. We could have a policy
275 * of putting in the pinned space only objects we know about that have no struct fields
276 * with references and we can easily use a even expensive write barrier for them,
277 * since pointer writes on such objects should be rare.
278 * The best compromise is to just alloc interned strings and System.MonoType in them.
279 * It would be nice to allocate MonoThread in it, too: must check that we properly
280 * use write barriers so we don't have to do any expensive scanning of the whole pinned
281 * chunk list during minor collections. We can avoid it now because we alloc in it only
282 * reference-free objects.
284 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
285 #define MAX_FREELIST_SIZE 2048
286 #define PINNED_PAGE_SIZE (4096)
287 #define PINNED_CHUNK_MIN_SIZE (4096*8)
288 typedef struct _PinnedChunk PinnedChunk;
289 struct _PinnedChunk {
292 int *page_sizes; /* a 0 means the page is still unused */
295 void *data [1]; /* page sizes and free lists are stored here */
298 /* The method used to clear the nursery */
299 /* Clearing at nursery collections is the safest, but has bad interactions with caches.
300 * Clearing at TLAB creation is much faster, but more complex and it might expose hard
305 CLEAR_AT_TLAB_CREATION
306 } NurseryClearPolicy;
308 static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION;
311 * If this is set, the nursery is aligned to an address aligned to its size, ie.
312 * a 1MB nursery will be aligned to an address divisible by 1MB. This allows us to
313 * speed up ptr_in_nursery () checks which are very frequent. This requires the
314 * nursery size to be a compile time constant.
316 #define ALIGN_NURSERY 1
319 * The young generation is divided into fragments. This is because
320 * we can hand one fragments to a thread for lock-less fast alloc and
321 * because the young generation ends up fragmented anyway by pinned objects.
322 * Once a collection is done, a list of fragments is created. When doing
323 * thread local alloc we use smallish nurseries so we allow new threads to
324 * allocate memory from gen0 without triggering a collection. Threads that
325 * are found to allocate lots of memory are given bigger fragments. This
326 * should make the finalizer thread use little nursery memory after a while.
327 * We should start assigning threads very small fragments: if there are many
328 * threads the nursery will be full of reserved space that the threads may not
329 * use at all, slowing down allocation speed.
330 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
331 * Allocation Buffers (TLABs).
333 typedef struct _Fragment Fragment;
337 char *fragment_start;
338 char *fragment_limit; /* the current soft limit for allocation */
342 /* the runtime can register areas of memory as roots: we keep two lists of roots,
343 * a pinned root set for conservatively scanned roots and a normal one for
344 * precisely scanned roots (currently implemented as a single list).
346 typedef struct _RootRecord RootRecord;
354 /* for use with write barriers */
355 typedef struct _RememberedSet RememberedSet;
356 struct _RememberedSet {
360 mword data [MONO_ZERO_LEN_ARRAY];
363 /* we have 4 possible values in the low 2 bits */
365 REMSET_LOCATION, /* just a pointer to the exact location */
366 REMSET_RANGE, /* range of pointer fields */
367 REMSET_OBJECT, /* mark all the object for scanning */
368 REMSET_OTHER, /* all others */
369 REMSET_TYPE_MASK = 0x3
372 /* Subtypes of REMSET_OTHER */
374 REMSET_VTYPE, /* a valuetype described by a gc descriptor */
375 REMSET_ROOT_LOCATION, /* a location inside a root */
378 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
379 static pthread_key_t remembered_set_key;
380 static RememberedSet *global_remset;
381 //static int store_to_global_remset = 0;
383 /* FIXME: later choose a size that takes into account the RememberedSet struct
384 * and doesn't waste any alloc paddin space.
386 #define DEFAULT_REMSET_SIZE 1024
387 static RememberedSet* alloc_remset (int size, gpointer id);
389 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
390 * no cast from a pointer to an integer
397 /* these bits are set in the object vtable: we could merge them since an object can be
398 * either pinned or forwarded but not both.
399 * We store them in the vtable slot because the bits are used in the sync block for
400 * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
401 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
402 * would be an invalid combination for the monitor and hash code).
403 * The values are already shifted.
404 * The forwarding address is stored in the sync block.
406 #define FORWARDED_BIT 1
408 #define VTABLE_BITS_MASK 0x3
410 /* returns NULL if not forwarded, or the forwarded address */
411 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
412 /* set the forwarded address fw_addr for object obj */
413 #define forward_object(obj,fw_addr) do { \
414 ((mword*)(obj))[0] |= FORWARDED_BIT; \
415 ((mword*)(obj))[1] = (mword)(fw_addr); \
418 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
419 #define pin_object(obj) do { \
420 ((mword*)(obj))[0] |= PINNED_BIT; \
422 #define unpin_object(obj) do { \
423 ((mword*)(obj))[0] &= ~PINNED_BIT; \
427 #define ptr_in_nursery(ptr) (((mword)(ptr) & ~((1 << DEFAULT_NURSERY_BITS) - 1)) == (mword)nursery_start)
429 #define ptr_in_nursery(ptr) ((char*)(ptr) >= nursery_start && (char*)(ptr) < nursery_real_end)
433 * Since we set bits in the vtable, use the macro to load it from the pointer to
434 * an object that is potentially pinned.
436 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
439 safe_name (void* obj)
441 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
442 return vt->klass->name;
446 safe_object_get_size (MonoObject* o)
448 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
449 if (klass == mono_defaults.string_class) {
450 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
451 } else if (klass->rank) {
452 MonoArray *array = (MonoArray*)o;
453 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
454 if (G_UNLIKELY (array->bounds)) {
457 size += sizeof (MonoArrayBounds) * klass->rank;
461 /* from a created object: the class must be inited already */
462 return klass->instance_size;
466 static inline gboolean
467 is_maybe_half_constructed (MonoObject *o)
471 klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
472 if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) ||
473 (klass->rank && mono_array_length ((MonoArray*)o) == 0))
480 * ######################################################################
481 * ######## Global data.
482 * ######################################################################
484 static LOCK_DECLARE (gc_mutex);
485 static int gc_disabled = 0;
486 static int num_minor_gcs = 0;
487 static int num_major_gcs = 0;
489 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
490 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
491 #define DEFAULT_NURSERY_SIZE (1024*512*2)
492 /* The number of trailing 0 bits in DEFAULT_NURSERY_SIZE */
493 #define DEFAULT_NURSERY_BITS 20
494 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
495 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
496 /* to quickly find the head of an object pinned by a conservative address
497 * we keep track of the objects allocated for each SCAN_START_SIZE memory
498 * chunk in the nursery or other memory sections. Larger values have less
499 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
501 #define SCAN_START_SIZE (4096*2)
502 /* the minimum size of a fragment that we consider useful for allocation */
503 #define FRAGMENT_MIN_SIZE (512)
504 /* This is a fixed value used for pinned chunks, not the system pagesize */
505 #define FREELIST_PAGESIZE 4096
507 static mword pagesize = 4096;
508 static mword nursery_size = DEFAULT_NURSERY_SIZE;
509 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
510 static mword max_section_size = DEFAULT_MAX_SECTION;
511 static int section_size_used = 0;
512 static int degraded_mode = 0;
514 static LOSObject *los_object_list = NULL;
515 static mword los_memory_usage = 0;
516 static mword los_num_objects = 0;
517 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
518 static mword total_alloc = 0;
519 /* use this to tune when to do a major/minor collection */
520 static mword memory_pressure = 0;
522 static GCMemSection *section_list = NULL;
523 static GCMemSection *nursery_section = NULL;
524 static mword lowest_heap_address = ~(mword)0;
525 static mword highest_heap_address = 0;
527 typedef struct _FinalizeEntry FinalizeEntry;
528 struct _FinalizeEntry {
533 typedef struct _DisappearingLink DisappearingLink;
534 struct _DisappearingLink {
535 DisappearingLink *next;
539 #define HIDE_POINTER(p) ((gpointer)(~(gulong)(p)))
540 #define REVEAL_POINTER(p) HIDE_POINTER ((p))
542 #define DISLINK_OBJECT(d) (REVEAL_POINTER (*(d)->link))
545 * The finalizable hash has the object as the key, the
546 * disappearing_link hash, has the link address as key.
548 static FinalizeEntry **finalizable_hash = NULL;
549 /* objects that are ready to be finalized */
550 static FinalizeEntry *fin_ready_list = NULL;
551 static FinalizeEntry *critical_fin_list = NULL;
552 static DisappearingLink **disappearing_link_hash = NULL;
553 static mword disappearing_link_hash_size = 0;
554 static mword finalizable_hash_size = 0;
556 static int num_registered_finalizers = 0;
557 static int num_ready_finalizers = 0;
558 static int num_disappearing_links = 0;
559 static int no_finalize = 0;
561 /* keep each size a multiple of ALLOC_ALIGN */
562 /* on 64 bit systems 8 is likely completely unused. */
563 static const int freelist_sizes [] = {
564 8, 16, 24, 32, 40, 48, 64, 80,
565 96, 128, 160, 192, 224, 256, 320, 384,
566 448, 512, 584, 680, 816, 1024, 1360, 2048};
567 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
569 static char* max_pinned_chunk_addr = NULL;
570 static char* min_pinned_chunk_addr = (char*)-1;
571 /* pinned_chunk_list is used for allocations of objects that are never moved */
572 static PinnedChunk *pinned_chunk_list = NULL;
573 /* internal_chunk_list is used for allocating structures needed by the GC */
574 static PinnedChunk *internal_chunk_list = NULL;
577 obj_is_from_pinned_alloc (char *p)
579 PinnedChunk *chunk = pinned_chunk_list;
580 for (; chunk; chunk = chunk->next) {
581 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
588 ROOT_TYPE_NORMAL = 0, /* "normal" roots */
589 ROOT_TYPE_PINNED = 1, /* roots without a GC descriptor */
590 ROOT_TYPE_WBARRIER = 2, /* roots with a write barrier */
594 /* registered roots: the key to the hash is the root start address */
596 * Different kinds of roots are kept separate to speed up pin_from_roots () for example.
598 static RootRecord **roots_hash [ROOT_TYPE_NUM] = { NULL, NULL };
599 static int roots_hash_size [ROOT_TYPE_NUM] = { 0, 0, 0 };
600 static mword roots_size = 0; /* amount of memory in the root set */
601 static int num_roots_entries [ROOT_TYPE_NUM] = { 0, 0, 0 };
604 * The current allocation cursors
605 * We allocate objects in the nursery.
606 * The nursery is the area between nursery_start and nursery_real_end.
607 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
608 * from nursery fragments.
609 * tlab_next is the pointer to the space inside the TLAB where the next object will
611 * tlab_temp_end is the pointer to the end of the temporary space reserved for
612 * the allocation: it allows us to set the scan starts at reasonable intervals.
613 * tlab_real_end points to the end of the TLAB.
614 * nursery_frag_real_end points to the end of the currently used nursery fragment.
615 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
616 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
617 * At the next allocation, the area of the nursery where objects can be present is
618 * between MIN(nursery_first_pinned_start, first_fragment_start) and
619 * MAX(nursery_last_pinned_end, nursery_frag_real_end)
621 static char *nursery_start = NULL;
624 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
625 * variables for next+temp_end ?
627 static __thread char *tlab_start;
628 static __thread char *tlab_next;
629 static __thread char *tlab_temp_end;
630 static __thread char *tlab_real_end;
631 /* Used by the managed allocator */
632 static __thread char **tlab_next_addr;
633 static char *nursery_next = NULL;
634 static char *nursery_frag_real_end = NULL;
635 static char *nursery_real_end = NULL;
636 //static char *nursery_first_pinned_start = NULL;
637 static char *nursery_last_pinned_end = NULL;
639 /* The size of a TLAB */
640 /* The bigger the value, the less often we have to go to the slow path to allocate a new
641 * one, but the more space is wasted by threads not allocating much memory.
643 * FIXME: Make this self-tuning for each thread.
645 static guint32 tlab_size = (1024 * 4);
647 /* fragments that are free and ready to be used for allocation */
648 static Fragment *nursery_fragments = NULL;
649 /* freeelist of fragment structures */
650 static Fragment *fragment_freelist = NULL;
653 * used when moving the objects
654 * When the nursery is collected, objects are copied to to_space.
655 * The area between gray_first and gray_objects is used as a stack
656 * of objects that need their fields checked for more references
658 * We should optimize somehow this mechanism to avoid rescanning
659 * ptr-free objects. The order is also probably not optimal: need to
660 * test cache misses and other graph traversal orders.
662 static char *to_space = NULL;
663 static char *gray_first = NULL;
664 static char *gray_objects = NULL;
665 static char *to_space_end = NULL;
666 static GCMemSection *to_space_section = NULL;
668 /* objects bigger then this go into the large object space */
669 #define MAX_SMALL_OBJ_SIZE 0xffff
671 /* Functions supplied by the runtime to be called by the GC */
672 static MonoGCCallbacks gc_callbacks;
675 * ######################################################################
676 * ######## Macros and function declarations.
677 * ######################################################################
680 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
681 if ((mword)(low) < lowest_heap_address) \
682 lowest_heap_address = (mword)(low); \
683 if ((mword)(high) > highest_heap_address) \
684 highest_heap_address = (mword)(high); \
688 align_pointer (void *ptr)
690 mword p = (mword)ptr;
691 p += sizeof (gpointer) - 1;
692 p &= ~ (sizeof (gpointer) - 1);
696 /* forward declarations */
697 static void* get_internal_mem (size_t size);
698 static void free_internal_mem (void *addr);
699 static void* get_os_memory (size_t size, int activate);
700 static void free_os_memory (void *addr, size_t size);
701 static G_GNUC_UNUSED void report_internal_mem_usage (void);
703 static int stop_world (void);
704 static int restart_world (void);
705 static void scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise);
706 static void scan_from_remsets (void *start_nursery, void *end_nursery);
707 static void find_pinning_ref_from_thread (char *obj, size_t size);
708 static void update_current_thread_stack (void *start);
709 static GCMemSection* alloc_section (size_t size);
710 static void finalize_in_range (char *start, char *end);
711 static void null_link_in_range (char *start, char *end);
712 static gboolean search_fragment_for_size (size_t size);
713 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
714 static void clear_remsets (void);
715 static void clear_tlabs (void);
716 static char *find_tlab_next_from_address (char *addr);
717 static void sweep_pinned_objects (void);
718 static void scan_from_pinned_objects (char *addr_start, char *addr_end);
719 static void free_large_object (LOSObject *obj);
720 static void free_mem_section (GCMemSection *section);
722 void describe_ptr (char *ptr);
723 void check_consistency (void);
724 char* check_object (char *start);
727 * ######################################################################
728 * ######## GC descriptors
729 * ######################################################################
730 * Used to quickly get the info the GC needs about an object: size and
731 * where the references are held.
733 /* objects are aligned to 8 bytes boundaries
734 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
735 * The low 3 bits define the type of the descriptor. The other bits
736 * depend on the type.
737 * As a general rule the 13 remaining low bits define the size, either
738 * of the whole object or of the elements in the arrays. While for objects
739 * the size is already in bytes, for arrays we need to shift, because
740 * array elements might be smaller than 8 bytes. In case of arrays, we
741 * use two bits to describe what the additional high bits represents,
742 * so the default behaviour can handle element sizes less than 2048 bytes.
743 * The high 16 bits, if 0 it means the object is pointer-free.
744 * This design should make it easy and fast to skip over ptr-free data.
745 * The first 4 types should cover >95% of the objects.
746 * Note that since the size of objects is limited to 64K, larger objects
747 * will be allocated in the large object heap.
748 * If we want 4-bytes alignment, we need to put vector and small bitmap
752 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
753 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
754 DESC_TYPE_STRING, /* nothing */
755 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
756 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
757 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
758 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
759 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
760 /* subtypes for arrays and vectors */
761 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
762 DESC_TYPE_V_REFS, /* all the array elements are refs */
763 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
764 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
767 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
768 #define LOW_TYPE_BITS 3
769 #define SMALL_BITMAP_SHIFT 16
770 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
771 #define VECTOR_INFO_SHIFT 14
772 #define VECTOR_ELSIZE_SHIFT 3
773 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
774 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
775 #define SMALL_SIZE_MASK 0xfff8
776 #define MAX_ELEMENT_SIZE 0x3ff
777 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
778 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
779 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
780 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
781 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
783 #define ALLOC_ALIGN 8
786 /* Root bitmap descriptors are simpler: the lower three bits describe the type
787 * and we either have 30/62 bitmap bits or nibble-based run-length,
788 * or a complex descriptor, or a user defined marker function.
791 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
796 ROOT_DESC_TYPE_MASK = 0x7,
797 ROOT_DESC_TYPE_SHIFT = 3,
800 #define MAKE_ROOT_DESC(type,val) ((type) | ((val) << ROOT_DESC_TYPE_SHIFT))
802 #define MAX_USER_DESCRIPTORS 16
804 static gsize* complex_descriptors = NULL;
805 static int complex_descriptors_size = 0;
806 static int complex_descriptors_next = 0;
807 static MonoGCMarkFunc user_descriptors [MAX_USER_DESCRIPTORS];
808 static int user_descriptors_next = 0;
811 alloc_complex_descriptor (gsize *bitmap, int numbits)
813 int nwords = numbits/GC_BITS_PER_WORD + 2;
818 res = complex_descriptors_next;
819 /* linear search, so we don't have duplicates with domain load/unload
820 * this should not be performance critical or we'd have bigger issues
821 * (the number and size of complex descriptors should be small).
823 for (i = 0; i < complex_descriptors_next; ) {
824 if (complex_descriptors [i] == nwords) {
826 for (j = 0; j < nwords - 1; ++j) {
827 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
837 i += complex_descriptors [i];
839 if (complex_descriptors_next + nwords > complex_descriptors_size) {
840 int new_size = complex_descriptors_size * 2 + nwords;
841 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
842 complex_descriptors_size = new_size;
844 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
845 complex_descriptors_next += nwords;
846 complex_descriptors [res] = nwords;
847 for (i = 0; i < nwords - 1; ++i) {
848 complex_descriptors [res + 1 + i] = bitmap [i];
849 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
856 * Descriptor builders.
859 mono_gc_make_descr_for_string (gsize *bitmap, int numbits)
861 return (void*) DESC_TYPE_STRING;
865 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
867 int first_set = -1, num_set = 0, last_set = -1, i;
869 size_t stored_size = obj_size;
870 stored_size += ALLOC_ALIGN - 1;
871 stored_size &= ~(ALLOC_ALIGN - 1);
872 for (i = 0; i < numbits; ++i) {
873 if (bitmap [i / GC_BITS_PER_WORD] & ((gsize)1 << (i % GC_BITS_PER_WORD))) {
880 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
881 /* check run-length encoding first: one byte offset, one byte number of pointers
882 * on 64 bit archs, we can have 3 runs, just one on 32.
883 * It may be better to use nibbles.
886 desc = DESC_TYPE_RUN_LENGTH | stored_size;
887 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size));
889 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
890 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
891 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
894 /* we know the 2-word header is ptr-free */
895 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
896 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
897 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
901 /* we know the 2-word header is ptr-free */
902 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
903 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
904 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
907 /* it's a complex object ... */
908 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
912 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
914 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
916 int first_set = -1, num_set = 0, last_set = -1, i;
917 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
918 for (i = 0; i < numbits; ++i) {
919 if (elem_bitmap [i / GC_BITS_PER_WORD] & ((gsize)1 << (i % GC_BITS_PER_WORD))) {
926 if (elem_size <= MAX_ELEMENT_SIZE) {
927 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
929 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
931 /* Note: we also handle structs with just ref fields */
932 if (num_set * sizeof (gpointer) == elem_size) {
933 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16));
935 /* FIXME: try run-len first */
936 /* Note: we can't skip the object header here, because it's not present */
937 if (last_set <= SMALL_BITMAP_SIZE) {
938 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
941 /* it's am array of complex structs ... */
942 desc = DESC_TYPE_COMPLEX_ARR;
943 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
947 /* Return the bitmap encoded by a descriptor */
949 mono_gc_get_bitmap_for_descr (void *descr, int *numbits)
951 mword d = (mword)descr;
955 case DESC_TYPE_RUN_LENGTH: {
956 int first_set = (d >> 16) & 0xff;
957 int num_set = (d >> 16) & 0xff;
960 bitmap = g_new0 (gsize, (first_set + num_set + 7) / 8);
962 for (i = first_set; i < first_set + num_set; ++i)
963 bitmap [i / GC_BITS_PER_WORD] |= ((gsize)1 << (i % GC_BITS_PER_WORD));
965 *numbits = first_set + num_set;
969 case DESC_TYPE_SMALL_BITMAP:
970 bitmap = g_new0 (gsize, 1);
972 bitmap [0] = (d >> SMALL_BITMAP_SHIFT) << OBJECT_HEADER_WORDS;
974 *numbits = GC_BITS_PER_WORD;
978 g_assert_not_reached ();
982 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
983 #define STRING_SIZE(size,str) do { \
984 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
985 (size) += (ALLOC_ALIGN - 1); \
986 (size) &= ~(ALLOC_ALIGN - 1); \
989 #define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \
990 (size) = (desc) & 0xfff8; \
993 #define OBJ_BITMAP_SIZE(size,desc,obj) do { \
994 (size) = (desc) & 0xfff8; \
997 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
998 #define PREFETCH(addr)
1000 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
1001 #define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \
1002 if ((desc) & 0xffff0000) { \
1003 /* there are pointers */ \
1004 void **_objptr_end; \
1005 void **_objptr = (void**)(obj); \
1006 _objptr += ((desc) >> 16) & 0xff; \
1007 _objptr_end = _objptr + (((desc) >> 24) & 0xff); \
1008 while (_objptr < _objptr_end) { \
1009 HANDLE_PTR (_objptr, (obj)); \
1015 /* a bitmap desc means that there are pointer references or we'd have
1016 * choosen run-length, instead: add an assert to check.
1018 #define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \
1019 /* there are pointers */ \
1020 void **_objptr = (void**)(obj); \
1021 gsize _bmap = (desc) >> 16; \
1022 _objptr += OBJECT_HEADER_WORDS; \
1024 if ((_bmap & 1)) { \
1025 HANDLE_PTR (_objptr, (obj)); \
1032 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
1033 /* there are pointers */ \
1034 void **_objptr = (void**)(obj); \
1035 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
1036 _objptr += OBJECT_HEADER_WORDS; \
1038 if ((_bmap & 1)) { \
1039 HANDLE_PTR (_objptr, (obj)); \
1046 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
1047 /* there are pointers */ \
1048 void **_objptr = (void**)(obj); \
1049 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
1050 int bwords = (*bitmap_data) - 1; \
1051 void **start_run = _objptr; \
1054 MonoObject *myobj = (MonoObject*)obj; \
1055 g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1057 while (bwords-- > 0) { \
1058 gsize _bmap = *bitmap_data++; \
1059 _objptr = start_run; \
1060 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
1062 if ((_bmap & 1)) { \
1063 HANDLE_PTR (_objptr, (obj)); \
1068 start_run += GC_BITS_PER_WORD; \
1072 /* this one is untested */
1073 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
1074 /* there are pointers */ \
1075 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
1076 int mbwords = (*mbitmap_data++) - 1; \
1077 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
1078 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1079 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1081 MonoObject *myobj = (MonoObject*)start; \
1082 g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1084 while (e_start < e_end) { \
1085 void **_objptr = (void**)e_start; \
1086 gsize *bitmap_data = mbitmap_data; \
1087 unsigned int bwords = mbwords; \
1088 while (bwords-- > 0) { \
1089 gsize _bmap = *bitmap_data++; \
1090 void **start_run = _objptr; \
1091 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
1093 if ((_bmap & 1)) { \
1094 HANDLE_PTR (_objptr, (obj)); \
1099 _objptr = start_run + GC_BITS_PER_WORD; \
1101 e_start += el_size; \
1105 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
1106 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
1107 if ((vt)->desc & 0xffffc000) { \
1108 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
1109 /* there are pointers */ \
1110 int etype = (vt)->desc & 0xc000; \
1111 if (etype == (DESC_TYPE_V_REFS << 14)) { \
1112 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
1113 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
1114 /* Note: this code can handle also arrays of struct with only references in them */ \
1115 while (p < end_refs) { \
1116 HANDLE_PTR (p, (obj)); \
1119 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
1120 int offset = ((vt)->desc >> 16) & 0xff; \
1121 int num_refs = ((vt)->desc >> 24) & 0xff; \
1122 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1123 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1124 while (e_start < e_end) { \
1125 void **p = (void**)e_start; \
1128 for (i = 0; i < num_refs; ++i) { \
1129 HANDLE_PTR (p + i, (obj)); \
1131 e_start += el_size; \
1133 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
1134 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1135 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1136 while (e_start < e_end) { \
1137 void **p = (void**)e_start; \
1138 gsize _bmap = (vt)->desc >> 16; \
1139 /* Note: there is no object header here to skip */ \
1141 if ((_bmap & 1)) { \
1142 HANDLE_PTR (p, (obj)); \
1147 e_start += el_size; \
1153 static mword new_obj_references = 0;
1154 static mword obj_references_checked = 0;
1157 #define HANDLE_PTR(ptr,obj) do { \
1158 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1159 new_obj_references++; \
1160 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1162 obj_references_checked++; \
1167 * ######################################################################
1168 * ######## Detecting and removing garbage.
1169 * ######################################################################
1170 * This section of code deals with detecting the objects no longer in use
1171 * and reclaiming the memory.
1174 static void __attribute__((noinline))
1175 scan_area (char *start, char *end)
1180 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1182 new_obj_references = 0;
1183 obj_references_checked = 0;
1184 while (start < end) {
1185 if (!*(void**)start) {
1186 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1189 vt = (GCVTable*)LOAD_VTABLE (start);
1190 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1192 MonoObject *obj = (MonoObject*)start;
1193 g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1197 if (type == DESC_TYPE_STRING) {
1198 STRING_SIZE (skip_size, start);
1202 } else if (type == DESC_TYPE_RUN_LENGTH) {
1203 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1204 g_assert (skip_size);
1205 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1209 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1210 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1211 skip_size *= mono_array_length ((MonoArray*)start);
1212 skip_size += sizeof (MonoArray);
1213 skip_size += (ALLOC_ALIGN - 1);
1214 skip_size &= ~(ALLOC_ALIGN - 1);
1215 OBJ_VECTOR_FOREACH_PTR (vt, start);
1216 if (type == DESC_TYPE_ARRAY) {
1217 /* account for the bounds */
1222 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1223 OBJ_BITMAP_SIZE (skip_size, desc, start);
1224 g_assert (skip_size);
1225 OBJ_BITMAP_FOREACH_PTR (desc,start);
1229 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1230 skip_size = safe_object_get_size ((MonoObject*)start);
1231 skip_size += (ALLOC_ALIGN - 1);
1232 skip_size &= ~(ALLOC_ALIGN - 1);
1233 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1237 } else if (type == DESC_TYPE_COMPLEX) {
1238 /* this is a complex object */
1239 skip_size = safe_object_get_size ((MonoObject*)start);
1240 skip_size += (ALLOC_ALIGN - 1);
1241 skip_size &= ~(ALLOC_ALIGN - 1);
1242 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1246 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1247 /* this is an array of complex structs */
1248 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1249 skip_size *= mono_array_length ((MonoArray*)start);
1250 skip_size += sizeof (MonoArray);
1251 skip_size += (ALLOC_ALIGN - 1);
1252 skip_size &= ~(ALLOC_ALIGN - 1);
1253 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1254 if (type == DESC_TYPE_ARRAY) {
1255 /* account for the bounds */
1264 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1265 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1266 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1269 static void __attribute__((noinline))
1270 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1277 while (start < end) {
1278 if (!*(void**)start) {
1279 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1282 vt = (GCVTable*)LOAD_VTABLE (start);
1283 /* handle threads someway (maybe insert the root domain vtable?) */
1284 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1285 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1292 if (type == DESC_TYPE_STRING) {
1293 STRING_SIZE (skip_size, start);
1294 if (remove) memset (start, 0, skip_size);
1297 } else if (type == DESC_TYPE_RUN_LENGTH) {
1298 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1299 g_assert (skip_size);
1300 if (remove) memset (start, 0, skip_size);
1303 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1304 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1305 skip_size *= mono_array_length ((MonoArray*)start);
1306 skip_size += sizeof (MonoArray);
1307 skip_size += (ALLOC_ALIGN - 1);
1308 skip_size &= ~(ALLOC_ALIGN - 1);
1309 if (type == DESC_TYPE_ARRAY) {
1310 /* account for the bounds */
1312 if (remove) memset (start, 0, skip_size);
1315 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1316 OBJ_BITMAP_SIZE (skip_size, desc, start);
1317 g_assert (skip_size);
1318 if (remove) memset (start, 0, skip_size);
1321 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1322 skip_size = safe_object_get_size ((MonoObject*)start);
1323 skip_size += (ALLOC_ALIGN - 1);
1324 skip_size &= ~(ALLOC_ALIGN - 1);
1325 if (remove) memset (start, 0, skip_size);
1328 } else if (type == DESC_TYPE_COMPLEX) {
1329 /* this is a complex object */
1330 skip_size = safe_object_get_size ((MonoObject*)start);
1331 skip_size += (ALLOC_ALIGN - 1);
1332 skip_size &= ~(ALLOC_ALIGN - 1);
1333 if (remove) memset (start, 0, skip_size);
1336 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1337 /* this is an array of complex structs */
1338 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1339 skip_size *= mono_array_length ((MonoArray*)start);
1340 skip_size += sizeof (MonoArray);
1341 skip_size += (ALLOC_ALIGN - 1);
1342 skip_size &= ~(ALLOC_ALIGN - 1);
1343 if (type == DESC_TYPE_ARRAY) {
1344 /* account for the bounds */
1346 if (remove) memset (start, 0, skip_size);
1356 * When appdomains are unloaded we can easily remove objects that have finalizers,
1357 * but all the others could still be present in random places on the heap.
1358 * We need a sweep to get rid of them even though it's going to be costly
1360 * The reason we need to remove them is because we access the vtable and class
1361 * structures to know the object size and the reference bitmap: once the domain is
1362 * unloaded the point to random memory.
1365 mono_gc_clear_domain (MonoDomain * domain)
1367 GCMemSection *section;
1369 for (section = section_list; section; section = section->next) {
1370 scan_area_for_domain (domain, section->data, section->end_data);
1372 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1378 * add_to_global_remset:
1380 * The global remset contains locations which point into newspace after
1381 * a minor collection. This can happen if the objects they point to are pinned.
1384 add_to_global_remset (gpointer ptr, gboolean root)
1388 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1391 * FIXME: If an object remains pinned, we need to add it at every minor collection.
1392 * To avoid uncontrolled growth of the global remset, only add each pointer once.
1394 if (global_remset->store_next + 3 < global_remset->end_set) {
1396 *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER;
1397 *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION;
1399 *(global_remset->store_next++) = (mword)ptr;
1403 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1404 rs->next = global_remset;
1407 *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER;
1408 *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION;
1410 *(global_remset->store_next++) = (mword)ptr;
1414 int global_rs_size = 0;
1416 for (rs = global_remset; rs; rs = rs->next) {
1417 global_rs_size += rs->store_next - rs->data;
1419 DEBUG (4, fprintf (gc_debug_file, "Global remset now has size %d\n", global_rs_size));
1424 * This is how the copying happens from the nursery to the old generation.
1425 * We assume that at this time all the pinned objects have been identified and
1427 * We run scan_object() for each pinned object so that each referenced
1428 * objects if possible are copied. The new gray objects created can have
1429 * scan_object() run on them right away, too.
1430 * Then we run copy_object() for the precisely tracked roots. At this point
1431 * all the roots are either gray or black. We run scan_object() on the gray
1432 * objects until no more gray objects are created.
1433 * At the end of the process we walk again the pinned list and we unmark
1434 * the pinned flag. As we go we also create the list of free space for use
1435 * in the next allocation runs.
1437 * We need to remember objects from the old generation that point to the new one
1438 * (or just addresses?).
1440 * copy_object could be made into a macro once debugged (use inline for now).
1443 static char* __attribute__((noinline))
1444 copy_object (char *obj, char *from_space_start, char *from_space_end)
1446 static void *copy_labels [] = { &&LAB_0, &&LAB_1, &&LAB_2, &&LAB_3, &&LAB_4, &&LAB_5, &&LAB_6, &&LAB_7, &&LAB_8 };
1449 * FIXME: The second set of checks is only needed if we are called for tospace
1452 if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) {
1456 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1457 if ((forwarded = object_is_forwarded (obj))) {
1458 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1459 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1462 if (object_is_pinned (obj)) {
1463 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1464 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1467 objsize = safe_object_get_size ((MonoObject*)obj);
1468 objsize += ALLOC_ALIGN - 1;
1469 objsize &= ~(ALLOC_ALIGN - 1);
1470 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1471 /* FIXME: handle pinned allocs:
1472 * Large objects are simple, at least until we always follow the rule:
1473 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1474 * At the end of major collections, we walk the los list and if
1475 * the object is pinned, it is marked, otherwise it can be freed.
1477 if (G_UNLIKELY (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj)))) {
1478 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize));
1482 /* ok, the object is not pinned, we can move it */
1483 /* use a optimized memcpy here */
1484 if (objsize <= sizeof (gpointer) * 8) {
1485 mword *dest = (mword*)gray_objects;
1486 goto *copy_labels [objsize / sizeof (gpointer)];
1488 (dest) [7] = ((mword*)obj) [7];
1490 (dest) [6] = ((mword*)obj) [6];
1492 (dest) [5] = ((mword*)obj) [5];
1494 (dest) [4] = ((mword*)obj) [4];
1496 (dest) [3] = ((mword*)obj) [3];
1498 (dest) [2] = ((mword*)obj) [2];
1500 (dest) [1] = ((mword*)obj) [1];
1502 (dest) [0] = ((mword*)obj) [0];
1510 char* edi = gray_objects;
1511 __asm__ __volatile__(
1513 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1514 : "0" (objsize/4), "1" (edi),"2" (esi)
1519 memcpy (gray_objects, obj, objsize);
1522 /* adjust array->bounds */
1523 vt = ((MonoObject*)obj)->vtable;
1524 g_assert (vt->gc_descr);
1525 if (G_UNLIKELY (vt->rank && ((MonoArray*)obj)->bounds)) {
1526 MonoArray *array = (MonoArray*)gray_objects;
1527 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1528 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1530 /* set the forwarding pointer */
1531 forward_object (obj, gray_objects);
1533 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1534 gray_objects += objsize;
1535 DEBUG (8, g_assert (gray_objects <= to_space_end));
1542 #define HANDLE_PTR(ptr,obj) do { \
1543 void *__old = *(ptr); \
1545 *(ptr) = copy_object (__old, from_start, from_end); \
1546 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1547 if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) && !ptr_in_nursery (ptr)) \
1548 add_to_global_remset ((ptr), FALSE); \
1553 * Scan the object pointed to by @start for references to
1554 * other objects between @from_start and @from_end and copy
1555 * them to the gray_objects area.
1556 * Returns a pointer to the end of the object.
1559 scan_object (char *start, char* from_start, char* from_end)
1565 vt = (GCVTable*)LOAD_VTABLE (start);
1566 //type = vt->desc & 0x7;
1568 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1570 switch (desc & 0x7) {
1571 //if (type == DESC_TYPE_STRING) {
1572 case DESC_TYPE_STRING:
1573 STRING_SIZE (skip_size, start);
1574 return start + skip_size;
1575 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1576 case DESC_TYPE_RUN_LENGTH:
1577 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1578 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1579 g_assert (skip_size);
1580 return start + skip_size;
1581 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1582 case DESC_TYPE_ARRAY:
1583 case DESC_TYPE_VECTOR:
1584 OBJ_VECTOR_FOREACH_PTR (vt, start);
1585 skip_size = safe_object_get_size ((MonoObject*)start);
1587 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1588 skip_size *= mono_array_length ((MonoArray*)start);
1589 skip_size += sizeof (MonoArray);
1591 skip_size += (ALLOC_ALIGN - 1);
1592 skip_size &= ~(ALLOC_ALIGN - 1);
1593 return start + skip_size;
1594 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1595 case DESC_TYPE_SMALL_BITMAP:
1596 OBJ_BITMAP_FOREACH_PTR (desc,start);
1597 OBJ_BITMAP_SIZE (skip_size, desc, start);
1598 return start + skip_size;
1599 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1600 case DESC_TYPE_LARGE_BITMAP:
1601 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1602 skip_size = safe_object_get_size ((MonoObject*)start);
1603 skip_size += (ALLOC_ALIGN - 1);
1604 skip_size &= ~(ALLOC_ALIGN - 1);
1605 return start + skip_size;
1606 //} else if (type == DESC_TYPE_COMPLEX) {
1607 case DESC_TYPE_COMPLEX:
1608 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1609 /* this is a complex object */
1610 skip_size = safe_object_get_size ((MonoObject*)start);
1611 skip_size += (ALLOC_ALIGN - 1);
1612 skip_size &= ~(ALLOC_ALIGN - 1);
1613 return start + skip_size;
1614 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1615 case DESC_TYPE_COMPLEX_ARR:
1616 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1617 /* this is an array of complex structs */
1618 skip_size = safe_object_get_size ((MonoObject*)start);
1620 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1621 skip_size *= mono_array_length ((MonoArray*)start);
1622 skip_size += sizeof (MonoArray);
1624 skip_size += (ALLOC_ALIGN - 1);
1625 skip_size &= ~(ALLOC_ALIGN - 1);
1626 return start + skip_size;
1628 g_assert_not_reached ();
1635 * Scan objects in the gray stack until the stack is empty. This should be called
1636 * frequently after each object is copied, to achieve better locality and cache
1640 drain_gray_stack (char *start_addr, char *end_addr)
1642 char *gray_start = gray_first;
1644 while (gray_start < gray_objects) {
1645 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1646 gray_start = scan_object (gray_start, start_addr, end_addr);
1649 gray_first = gray_start;
1655 * Scan the valuetype pointed to by START, described by DESC for references to
1656 * other objects between @from_start and @from_end and copy them to the gray_objects area.
1657 * Returns a pointer to the end of the object.
1660 scan_vtype (char *start, mword desc, char* from_start, char* from_end)
1664 /* The descriptors include info about the MonoObject header as well */
1665 start -= sizeof (MonoObject);
1667 switch (desc & 0x7) {
1668 case DESC_TYPE_RUN_LENGTH:
1669 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1670 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1671 g_assert (skip_size);
1672 return start + skip_size;
1673 case DESC_TYPE_SMALL_BITMAP:
1674 OBJ_BITMAP_FOREACH_PTR (desc,start);
1675 OBJ_BITMAP_SIZE (skip_size, desc, start);
1676 return start + skip_size;
1677 case DESC_TYPE_LARGE_BITMAP:
1678 case DESC_TYPE_COMPLEX:
1680 g_assert_not_reached ();
1683 // The other descriptors can't happen with vtypes
1684 g_assert_not_reached ();
1691 * Addresses from start to end are already sorted. This function finds the object header
1692 * for each address and pins the object. The addresses must be inside the passed section.
1693 * Return the number of pinned objects.
1696 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1701 void *last_obj = NULL;
1702 size_t last_obj_size = 0;
1705 void **definitely_pinned = start;
1706 while (start < end) {
1708 /* the range check should be reduntant */
1709 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1710 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1711 /* multiple pointers to the same object */
1712 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1716 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1717 search_start = (void*)section->scan_starts [idx];
1718 if (!search_start || search_start > addr) {
1721 search_start = section->scan_starts [idx];
1722 if (search_start && search_start <= addr)
1725 if (!search_start || search_start > addr)
1726 search_start = start_nursery;
1728 if (search_start < last_obj)
1729 search_start = (char*)last_obj + last_obj_size;
1730 /* now addr should be in an object a short distance from search_start
1731 * Note that search_start must point to zeroed mem or point to an object.
1734 if (!*(void**)search_start) {
1735 mword p = (mword)search_start;
1736 p += sizeof (gpointer);
1737 p += ALLOC_ALIGN - 1;
1738 p &= ~(ALLOC_ALIGN - 1);
1739 search_start = (void*)p;
1742 last_obj = search_start;
1743 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1744 last_obj_size += ALLOC_ALIGN - 1;
1745 last_obj_size &= ~(ALLOC_ALIGN - 1);
1746 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size));
1747 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1748 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1749 pin_object (search_start);
1750 definitely_pinned [count] = search_start;
1754 /* skip to the next object */
1755 search_start = (void*)((char*)search_start + last_obj_size);
1756 } while (search_start <= addr);
1757 /* we either pinned the correct object or we ignored the addr because
1758 * it points to unused zeroed memory.
1764 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1768 static void** pin_queue;
1769 static int pin_queue_size = 0;
1770 static int next_pin_slot = 0;
1775 gap = (gap * 10) / 13;
1776 if (gap == 9 || gap == 10)
1785 compare_addr (const void *a, const void *b)
1787 return *(const void **)a - *(const void **)b;
1791 /* sort the addresses in array in increasing order */
1793 sort_addresses (void **array, int size)
1796 * qsort is slower as predicted.
1797 * qsort (array, size, sizeof (gpointer), compare_addr);
1804 gap = new_gap (gap);
1807 for (i = 0; i < end; i++) {
1809 if (array [i] > array [j]) {
1810 void* val = array [i];
1811 array [i] = array [j];
1816 if (gap == 1 && !swapped)
1821 static G_GNUC_UNUSED void
1822 print_nursery_gaps (void* start_nursery, void *end_nursery)
1825 gpointer first = start_nursery;
1827 for (i = 0; i < next_pin_slot; ++i) {
1828 next = pin_queue [i];
1829 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1833 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1836 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1838 optimize_pin_queue (int start_slot)
1840 void **start, **cur, **end;
1841 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1842 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1843 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1844 if ((next_pin_slot - start_slot) > 1)
1845 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1846 start = cur = pin_queue + start_slot;
1847 end = pin_queue + next_pin_slot;
1850 while (*start == *cur && cur < end)
1854 next_pin_slot = start - pin_queue;
1855 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1856 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1861 realloc_pin_queue (void)
1863 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1864 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1865 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1866 free_internal_mem (pin_queue);
1867 pin_queue = new_pin;
1868 pin_queue_size = new_size;
1869 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1873 * Scan the memory between start and end and queue values which could be pointers
1874 * to the area between start_nursery and end_nursery for later consideration.
1875 * Typically used for thread stacks.
1878 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1881 while (start < end) {
1882 if (*start >= start_nursery && *start < end_nursery) {
1884 * *start can point to the middle of an object
1885 * note: should we handle pointing at the end of an object?
1886 * pinning in C# code disallows pointing at the end of an object
1887 * but there is some small chance that an optimizing C compiler
1888 * may keep the only reference to an object by pointing
1889 * at the end of it. We ignore this small chance for now.
1890 * Pointers to the end of an object are indistinguishable
1891 * from pointers to the start of the next object in memory
1892 * so if we allow that we'd need to pin two objects...
1893 * We queue the pointer in an array, the
1894 * array will then be sorted and uniqued. This way
1895 * we can coalesce several pinning pointers and it should
1896 * be faster since we'd do a memory scan with increasing
1897 * addresses. Note: we can align the address to the allocation
1898 * alignment, so the unique process is more effective.
1900 mword addr = (mword)*start;
1901 addr &= ~(ALLOC_ALIGN - 1);
1902 if (next_pin_slot >= pin_queue_size)
1903 realloc_pin_queue ();
1904 pin_queue [next_pin_slot++] = (void*)addr;
1905 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1910 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1912 #ifdef HAVE_VALGRIND_MEMCHECK_H
1914 * The pinning addresses might come from undefined memory, this is normal. Since they
1915 * are used in lots of functions, we make the memory defined here instead of having
1916 * to add a supression for those functions.
1918 VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0]));
1923 * If generation is 0, just mark objects in the nursery, the others we don't care,
1924 * since they are not going to move anyway.
1925 * There are different areas that are scanned for pinned pointers:
1926 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1927 * *) the pinned handle table
1928 * *) the pinned roots
1930 * Note: when we'll use a write barrier for old to new gen references, we need to
1931 * keep track of old gen objects that point to pinned new gen objects because in that
1932 * case the referenced object will be moved maybe at the next collection, but there
1933 * is no write in the old generation area where the pinned object is referenced
1934 * and we may not consider it as reachable.
1936 static G_GNUC_UNUSED void
1937 mark_pinned_objects (int generation)
1942 * Debugging function: find in the conservative roots where @obj is being pinned.
1944 static G_GNUC_UNUSED void
1945 find_pinning_reference (char *obj, size_t size)
1949 char *endobj = obj + size;
1950 for (i = 0; i < roots_hash_size [0]; ++i) {
1951 for (root = roots_hash [0][i]; root; root = root->next) {
1952 /* if desc is non-null it has precise info */
1953 if (!root->root_desc) {
1954 char ** start = (char**)root->start_root;
1955 while (start < (char**)root->end_root) {
1956 if (*start >= obj && *start < endobj) {
1957 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1964 find_pinning_ref_from_thread (obj, size);
1968 * The first thing we do in a collection is to identify pinned objects.
1969 * This function considers all the areas of memory that need to be
1970 * conservatively scanned.
1973 pin_from_roots (void *start_nursery, void *end_nursery)
1977 DEBUG (2, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d/%d entries)\n", (int)roots_size, num_roots_entries [ROOT_TYPE_NORMAL], num_roots_entries [ROOT_TYPE_PINNED]));
1978 /* objects pinned from the API are inside these roots */
1979 for (i = 0; i < roots_hash_size [ROOT_TYPE_PINNED]; ++i) {
1980 for (root = roots_hash [ROOT_TYPE_PINNED][i]; root; root = root->next) {
1981 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1982 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1985 /* now deal with the thread stacks
1986 * in the future we should be able to conservatively scan only:
1987 * *) the cpu registers
1988 * *) the unmanaged stack frames
1989 * *) the _last_ managed stack frame
1990 * *) pointers slots in managed frames
1992 scan_thread_data (start_nursery, end_nursery, FALSE);
1995 /* Copy function called from user defined mark functions */
1996 static char *user_copy_n_start;
1997 static char *user_copy_n_end;
2000 user_copy (void *addr)
2003 return copy_object (addr, user_copy_n_start, user_copy_n_end);
2009 * The memory area from start_root to end_root contains pointers to objects.
2010 * Their position is precisely described by @desc (this means that the pointer
2011 * can be either NULL or the pointer to the start of an object).
2012 * This functions copies them to to_space updates them.
2015 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
2017 switch (desc & ROOT_DESC_TYPE_MASK) {
2018 case ROOT_DESC_BITMAP:
2019 desc >>= ROOT_DESC_TYPE_SHIFT;
2021 if ((desc & 1) && *start_root) {
2022 *start_root = copy_object (*start_root, n_start, n_end);
2023 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root));
2024 drain_gray_stack (n_start, n_end);
2030 case ROOT_DESC_COMPLEX: {
2031 gsize *bitmap_data = complex_descriptors + (desc >> ROOT_DESC_TYPE_SHIFT);
2032 int bwords = (*bitmap_data) - 1;
2033 void **start_run = start_root;
2035 while (bwords-- > 0) {
2036 gsize bmap = *bitmap_data++;
2037 void **objptr = start_run;
2039 if ((bmap & 1) && *objptr) {
2040 *objptr = copy_object (*objptr, n_start, n_end);
2041 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", objptr, *objptr));
2042 drain_gray_stack (n_start, n_end);
2047 start_run += GC_BITS_PER_WORD;
2051 case ROOT_DESC_USER: {
2052 MonoGCMarkFunc marker = user_descriptors [desc >> ROOT_DESC_TYPE_SHIFT];
2054 user_copy_n_start = n_start;
2055 user_copy_n_end = n_end;
2056 marker (start_root, user_copy);
2059 case ROOT_DESC_RUN_LEN:
2060 g_assert_not_reached ();
2062 g_assert_not_reached ();
2067 alloc_fragment (void)
2069 Fragment *frag = fragment_freelist;
2071 fragment_freelist = frag->next;
2075 frag = get_internal_mem (sizeof (Fragment));
2081 * Allocate and setup the data structures needed to be able to allocate objects
2082 * in the nursery. The nursery is stored in nursery_section.
2085 alloc_nursery (void)
2087 GCMemSection *section;
2093 if (nursery_section)
2095 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size));
2096 /* later we will alloc a larger area for the nursery but only activate
2097 * what we need. The rest will be used as expansion if we have too many pinned
2098 * objects in the existing nursery.
2100 /* FIXME: handle OOM */
2101 section = get_internal_mem (sizeof (GCMemSection));
2103 #ifdef ALIGN_NURSERY
2104 /* Allocate twice the memory to be able to put the nursery at an aligned address */
2105 g_assert (nursery_size == DEFAULT_NURSERY_SIZE);
2107 alloc_size = nursery_size * 2;
2108 data = get_os_memory (alloc_size, TRUE);
2109 nursery_start = (void*)(((mword)data + (1 << DEFAULT_NURSERY_BITS) - 1) & ~((1 << DEFAULT_NURSERY_BITS) - 1));
2110 g_assert ((char*)nursery_start + nursery_size <= ((char*)data + alloc_size));
2111 /* FIXME: Use the remaining size for something else, if it is big enough */
2113 alloc_size = nursery_size;
2114 data = get_os_memory (alloc_size, TRUE);
2115 nursery_start = data;
2117 nursery_real_end = nursery_start + nursery_size;
2118 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
2119 nursery_next = nursery_start;
2120 total_alloc += alloc_size;
2121 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc));
2122 section->data = section->next_data = data;
2123 section->size = alloc_size;
2124 section->end_data = nursery_real_end;
2125 scan_starts = alloc_size / SCAN_START_SIZE;
2126 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2127 section->num_scan_start = scan_starts;
2128 section->role = MEMORY_ROLE_GEN0;
2130 /* add to the section list */
2131 section->next = section_list;
2132 section_list = section;
2134 nursery_section = section;
2136 /* Setup the single first large fragment */
2137 frag = alloc_fragment ();
2138 frag->fragment_start = nursery_start;
2139 frag->fragment_limit = nursery_start;
2140 frag->fragment_end = nursery_real_end;
2141 nursery_frag_real_end = nursery_real_end;
2142 /* FIXME: frag here is lost */
2146 scan_finalizer_entries (FinalizeEntry *list, char *start, char *end) {
2149 for (fin = list; fin; fin = fin->next) {
2152 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2153 fin->object = copy_object (fin->object, start, end);
2158 * Update roots in the old generation. Since we currently don't have the
2159 * info from the write barriers, we just scan all the objects.
2161 static G_GNUC_UNUSED void
2162 scan_old_generation (char *start, char* end)
2164 GCMemSection *section;
2165 LOSObject *big_object;
2168 for (section = section_list; section; section = section->next) {
2169 if (section == nursery_section)
2171 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
2172 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
2174 while (p < section->next_data) {
2179 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
2180 p = scan_object (p, start, end);
2183 /* scan the old object space, too */
2184 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2185 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2186 scan_object (big_object->data, start, end);
2188 /* scan the list of objects ready for finalization */
2189 scan_finalizer_entries (fin_ready_list, start, end);
2190 scan_finalizer_entries (critical_fin_list, start, end);
2193 static mword fragment_total = 0;
2195 * We found a fragment of free memory in the nursery: memzero it and if
2196 * it is big enough, add it to the list of fragments that can be used for
2200 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
2203 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
2204 /* memsetting just the first chunk start is bound to provide better cache locality */
2205 if (nursery_clear_policy == CLEAR_AT_GC)
2206 memset (frag_start, 0, frag_size);
2207 /* Not worth dealing with smaller fragments: need to tune */
2208 if (frag_size >= FRAGMENT_MIN_SIZE) {
2209 fragment = alloc_fragment ();
2210 fragment->fragment_start = frag_start;
2211 fragment->fragment_limit = frag_start;
2212 fragment->fragment_end = frag_end;
2213 fragment->next = nursery_fragments;
2214 nursery_fragments = fragment;
2215 fragment_total += frag_size;
2217 /* Clear unused fragments, pinning depends on this */
2218 memset (frag_start, 0, frag_size);
2223 scan_needed_big_objects (char *start_addr, char *end_addr)
2225 LOSObject *big_object;
2227 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2228 if (!big_object->scanned && object_is_pinned (big_object->data)) {
2229 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2230 scan_object (big_object->data, start_addr, end_addr);
2231 big_object->scanned = TRUE;
2239 finish_gray_stack (char *start_addr, char *end_addr)
2243 int fin_ready, bigo_scanned_num;
2247 * We copied all the reachable objects. Now it's the time to copy
2248 * the objects that were not referenced by the roots, but by the copied objects.
2249 * we built a stack of objects pointed to by gray_start: they are
2250 * additional roots and we may add more items as we go.
2251 * We loop until gray_start == gray_objects which means no more objects have
2252 * been added. Note this is iterative: no recursion is involved.
2253 * We need to walk the LO list as well in search of marked big objects
2254 * (use a flag since this is needed only on major collections). We need to loop
2255 * here as well, so keep a counter of marked LO (increasing it in copy_object).
2256 * To achieve better cache locality and cache usage, we drain the gray stack
2257 * frequently, after each object is copied, and just finish the work here.
2259 gray_start = gray_first;
2260 while (gray_start < gray_objects) {
2261 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2262 gray_start = scan_object (gray_start, start_addr, end_addr);
2265 //scan_old_generation (start_addr, end_addr);
2266 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
2267 /* walk the finalization queue and move also the objects that need to be
2268 * finalized: use the finalized objects as new roots so the objects they depend
2269 * on are also not reclaimed. As with the roots above, only objects in the nursery
2270 * are marked/copied.
2271 * We need a loop here, since objects ready for finalizers may reference other objects
2272 * that are fin-ready. Speedup with a flag?
2275 fin_ready = num_ready_finalizers;
2276 finalize_in_range (start_addr, end_addr);
2277 bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr);
2279 /* drain the new stack that might have been created */
2280 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2281 while (gray_start < gray_objects) {
2282 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2283 gray_start = scan_object (gray_start, start_addr, end_addr);
2285 } while (fin_ready != num_ready_finalizers || bigo_scanned_num);
2287 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
2288 to_space = gray_start;
2289 to_space_section->next_data = to_space;
2292 * handle disappearing links
2293 * Note we do this after checking the finalization queue because if an object
2294 * survives (at least long enough to be finalized) we don't clear the link.
2295 * This also deals with a possible issue with the monitor reclamation: with the Boehm
2296 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
2299 null_link_in_range (start_addr, end_addr);
2301 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2304 static int last_num_pinned = 0;
2307 build_nursery_fragments (int start_pin, int end_pin)
2309 char *frag_start, *frag_end;
2313 /* FIXME: handle non-NULL fragment_freelist */
2314 fragment_freelist = nursery_fragments;
2315 nursery_fragments = NULL;
2316 frag_start = nursery_start;
2318 /* clear scan starts */
2319 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
2320 for (i = start_pin; i < end_pin; ++i) {
2321 frag_end = pin_queue [i];
2322 /* remove the pin bit from pinned objects */
2323 unpin_object (frag_end);
2324 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
2325 frag_size = frag_end - frag_start;
2327 add_nursery_frag (frag_size, frag_start, frag_end);
2328 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2329 frag_size += ALLOC_ALIGN - 1;
2330 frag_size &= ~(ALLOC_ALIGN - 1);
2331 frag_start = (char*)pin_queue [i] + frag_size;
2333 * pin_queue [i] might point to a half-constructed string or vector whose
2334 * length field is not set. In that case, frag_start points inside the
2335 * (zero initialized) object. Find the end of the object by scanning forward.
2338 if (is_maybe_half_constructed (pin_queue [i])) {
2341 /* This is also hit for zero length arrays/strings */
2343 /* Find the end of the TLAB which contained this allocation */
2344 tlab_end = find_tlab_next_from_address (pin_queue [i]);
2347 while ((frag_start < tlab_end) && *(mword*)frag_start == 0)
2348 frag_start += sizeof (mword);
2351 * FIXME: The object is either not allocated in a TLAB, or it isn't a
2352 * half constructed object.
2357 nursery_last_pinned_end = frag_start;
2358 frag_end = nursery_real_end;
2359 frag_size = frag_end - frag_start;
2361 add_nursery_frag (frag_size, frag_start, frag_end);
2362 if (!nursery_fragments) {
2363 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
2364 for (i = start_pin; i < end_pin; ++i) {
2365 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2370 nursery_next = nursery_frag_real_end = NULL;
2372 /* Clear TLABs for all threads */
2376 /* FIXME: later reduce code duplication here with the above
2377 * We don't keep track of section fragments for non-nursery sections yet, so
2381 build_section_fragments (GCMemSection *section)
2384 char *frag_start, *frag_end;
2387 /* clear scan starts */
2388 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
2389 frag_start = section->data;
2390 section->next_data = section->data;
2391 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
2392 frag_end = pin_queue [i];
2393 /* remove the pin bit from pinned objects */
2394 unpin_object (frag_end);
2395 if (frag_end >= section->data + section->size) {
2396 frag_end = section->data + section->size;
2398 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
2400 frag_size = frag_end - frag_start;
2402 memset (frag_start, 0, frag_size);
2403 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2404 frag_size += ALLOC_ALIGN - 1;
2405 frag_size &= ~(ALLOC_ALIGN - 1);
2406 frag_start = (char*)pin_queue [i] + frag_size;
2407 section->next_data = MAX (section->next_data, frag_start);
2409 frag_end = section->end_data;
2410 frag_size = frag_end - frag_start;
2412 memset (frag_start, 0, frag_size);
2416 scan_from_registered_roots (char *addr_start, char *addr_end, int root_type)
2420 for (i = 0; i < roots_hash_size [root_type]; ++i) {
2421 for (root = roots_hash [root_type][i]; root; root = root->next) {
2422 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2423 precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc);
2429 * Collect objects in the nursery.
2432 collect_nursery (size_t requested_size)
2434 GCMemSection *section;
2435 size_t max_garbage_amount;
2437 char *orig_nursery_next;
2439 TV_DECLARE (all_atv);
2440 TV_DECLARE (all_btv);
2445 orig_nursery_next = nursery_next;
2446 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2447 /* FIXME: optimize later to use the higher address where an object can be present */
2448 nursery_next = MAX (nursery_next, nursery_real_end);
2450 if (consistency_check_at_minor_collection)
2451 check_consistency ();
2453 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2454 max_garbage_amount = nursery_next - nursery_start;
2456 /* Clear all remaining nursery fragments, pinning depends on this */
2457 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2458 g_assert (orig_nursery_next <= nursery_frag_real_end);
2459 memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next);
2460 for (frag = nursery_fragments; frag; frag = frag->next) {
2461 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2466 * not enough room in the old generation to store all the possible data from
2467 * the nursery in a single continuous space.
2468 * We reset to_space if we allocated objects in degraded mode.
2470 if (to_space_section)
2471 to_space = gray_objects = gray_first = to_space_section->next_data;
2472 if ((to_space_end - to_space) < max_garbage_amount) {
2473 section = alloc_section (nursery_section->size * 4);
2474 g_assert (nursery_section->size >= max_garbage_amount);
2475 to_space = gray_objects = gray_first = section->next_data;
2476 to_space_end = section->end_data;
2477 to_space_section = section;
2479 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2480 nursery_section->next_data = nursery_next;
2483 mono_stats.minor_gc_count ++;
2484 /* world must be stopped already */
2485 TV_GETTIME (all_atv);
2487 /* pin from pinned handles */
2488 pin_from_roots (nursery_start, nursery_next);
2489 /* identify pinned objects */
2490 optimize_pin_queue (0);
2491 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2493 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2494 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2497 * walk all the roots and copy the young objects to the old generation,
2498 * starting from to_space
2501 scan_from_remsets (nursery_start, nursery_next);
2502 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2504 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2506 /* the pinned objects are roots */
2507 for (i = 0; i < next_pin_slot; ++i) {
2508 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2509 scan_object (pin_queue [i], nursery_start, nursery_next);
2511 /* registered roots, this includes static fields */
2512 scan_from_registered_roots (nursery_start, nursery_next, ROOT_TYPE_NORMAL);
2513 scan_thread_data (nursery_start, nursery_next, TRUE);
2514 /* alloc_pinned objects */
2515 scan_from_pinned_objects (nursery_start, nursery_next);
2517 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2519 finish_gray_stack (nursery_start, nursery_next);
2521 /* walk the pin_queue, build up the fragment list of free memory, unmark
2522 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2525 build_nursery_fragments (0, next_pin_slot);
2527 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2529 TV_GETTIME (all_btv);
2530 mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2532 /* prepare the pin queue for the next collection */
2533 last_num_pinned = next_pin_slot;
2535 if (fin_ready_list || critical_fin_list) {
2536 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2537 mono_gc_finalize_notify ();
2542 major_collection (void)
2544 GCMemSection *section, *prev_section;
2545 LOSObject *bigobj, *prevbo;
2550 TV_DECLARE (all_atv);
2551 TV_DECLARE (all_btv);
2554 /* FIXME: only use these values for the precise scan
2555 * note that to_space pointers should be excluded anyway...
2557 char *heap_start = NULL;
2558 char *heap_end = (char*)-1;
2559 size_t copy_space_required = 0;
2562 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2564 mono_stats.major_gc_count ++;
2566 /* Clear all remaining nursery fragments, pinning depends on this */
2567 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2568 g_assert (nursery_next <= nursery_frag_real_end);
2569 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
2570 for (frag = nursery_fragments; frag; frag = frag->next) {
2571 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2576 * FIXME: implement Mark/Compact
2577 * Until that is done, we can just apply mostly the same alg as for the nursery:
2578 * this means we need a big section to potentially copy all the other sections, so
2579 * it is not ideal specially with large heaps.
2581 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2582 collect_nursery (0);
2585 TV_GETTIME (all_atv);
2586 /* FIXME: make sure the nursery next_data ptr is updated */
2587 nursery_section->next_data = nursery_real_end;
2588 /* we should also coalesce scanning from sections close to each other
2589 * and deal with pointers outside of the sections later.
2591 /* The remsets are not useful for a major collection */
2593 /* world must be stopped already */
2595 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2596 for (section = section_list; section; section = section->next) {
2597 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2598 pin_from_roots (section->data, section->next_data);
2599 if (count != next_pin_slot) {
2601 optimize_pin_queue (count);
2602 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2603 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2604 section->pin_queue_end = next_pin_slot = count + reduced_to;
2606 copy_space_required += (char*)section->next_data - (char*)section->data;
2608 /* identify possible pointers to the insize of large objects */
2609 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2610 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2611 count = next_pin_slot;
2612 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2613 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2614 if (next_pin_slot != count) {
2615 next_pin_slot = count;
2616 pin_object (bigobj->data);
2617 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2620 /* look for pinned addresses for pinned-alloc objects */
2621 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2622 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2623 count = next_pin_slot;
2624 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2625 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2626 if (next_pin_slot != count) {
2627 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2628 next_pin_slot = count;
2633 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2634 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2636 /* allocate the big to space */
2637 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required));
2638 section = alloc_section (copy_space_required);
2639 to_space = gray_objects = gray_first = section->next_data;
2640 to_space_end = section->end_data;
2641 to_space_section = section;
2643 /* the old generation doesn't need to be scanned (no remembered sets or card
2644 * table needed either): the only objects that must survive are those pinned and
2645 * those referenced by the precise roots.
2646 * mark any section without pinned objects, so we can free it since we will be able to
2647 * move all the objects.
2649 /* the pinned objects are roots (big objects are included in this list, too) */
2650 for (i = 0; i < next_pin_slot; ++i) {
2651 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2652 scan_object (pin_queue [i], heap_start, heap_end);
2654 /* registered roots, this includes static fields */
2655 scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_NORMAL);
2656 scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_WBARRIER);
2658 scan_thread_data (heap_start, heap_end, TRUE);
2659 /* alloc_pinned objects */
2660 scan_from_pinned_objects (heap_start, heap_end);
2661 /* scan the list of objects ready for finalization */
2662 scan_finalizer_entries (fin_ready_list, heap_start, heap_end);
2663 scan_finalizer_entries (critical_fin_list, heap_start, heap_end);
2665 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2667 /* we need to go over the big object list to see if any was marked and scan it
2668 * And we need to make this in a loop, considering that objects referenced by finalizable
2669 * objects could reference big objects (this happens in finish_gray_stack ())
2671 scan_needed_big_objects (heap_start, heap_end);
2672 /* all the objects in the heap */
2673 finish_gray_stack (heap_start, heap_end);
2675 /* sweep the big objects list */
2677 for (bigobj = los_object_list; bigobj;) {
2678 if (object_is_pinned (bigobj->data)) {
2679 unpin_object (bigobj->data);
2680 bigobj->scanned = FALSE;
2683 /* not referenced anywhere, so we can free it */
2685 prevbo->next = bigobj->next;
2687 los_object_list = bigobj->next;
2689 bigobj = bigobj->next;
2690 free_large_object (to_free);
2694 bigobj = bigobj->next;
2696 /* unpin objects from the pinned chunks and free the unmarked ones */
2697 sweep_pinned_objects ();
2699 /* free the unused sections */
2700 prev_section = NULL;
2701 for (section = section_list; section;) {
2702 /* to_space doesn't need handling here and the nursery is special */
2703 if (section == to_space_section || section == nursery_section) {
2704 prev_section = section;
2705 section = section->next;
2708 /* no pinning object, so the section is free */
2709 if (section->pin_queue_start == section->pin_queue_end) {
2710 GCMemSection *to_free;
2712 prev_section->next = section->next;
2714 section_list = section->next;
2716 section = section->next;
2717 free_mem_section (to_free);
2720 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2721 build_section_fragments (section);
2723 prev_section = section;
2724 section = section->next;
2727 /* walk the pin_queue, build up the fragment list of free memory, unmark
2728 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2731 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2733 TV_GETTIME (all_btv);
2734 mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2735 /* prepare the pin queue for the next collection */
2737 if (fin_ready_list || critical_fin_list) {
2738 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2739 mono_gc_finalize_notify ();
2744 * Allocate a new section of memory to be used as old generation.
2746 static GCMemSection*
2747 alloc_section (size_t size)
2749 GCMemSection *section;
2752 size_t new_size = next_section_size;
2754 if (size > next_section_size) {
2756 new_size += pagesize - 1;
2757 new_size &= ~(pagesize - 1);
2759 section_size_used++;
2760 if (section_size_used > 3) {
2761 section_size_used = 0;
2762 next_section_size *= 2;
2763 if (next_section_size > max_section_size)
2764 next_section_size = max_section_size;
2766 section = get_internal_mem (sizeof (GCMemSection));
2767 data = get_os_memory (new_size, TRUE);
2768 section->data = section->next_data = data;
2769 section->size = new_size;
2770 section->end_data = data + new_size;
2771 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2772 total_alloc += new_size;
2773 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc));
2774 section->data = data;
2775 section->size = new_size;
2776 scan_starts = new_size / SCAN_START_SIZE;
2777 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2778 section->num_scan_start = scan_starts;
2779 section->role = MEMORY_ROLE_GEN1;
2781 /* add to the section list */
2782 section->next = section_list;
2783 section_list = section;
2789 free_mem_section (GCMemSection *section)
2791 char *data = section->data;
2792 size_t size = section->size;
2793 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size));
2794 free_os_memory (data, size);
2795 free_internal_mem (section);
2796 total_alloc -= size;
2800 * When deciding if it's better to collect or to expand, keep track
2801 * of how much garbage was reclaimed with the last collection: if it's too
2803 * This is called when we could not allocate a small object.
2805 static void __attribute__((noinline))
2806 minor_collect_or_expand_inner (size_t size)
2808 int do_minor_collection = 1;
2810 if (!nursery_section) {
2814 if (do_minor_collection) {
2816 collect_nursery (size);
2817 DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage));
2819 /* this also sets the proper pointers for the next allocation */
2820 if (!search_fragment_for_size (size)) {
2822 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2823 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned));
2824 for (i = 0; i < last_num_pinned; ++i) {
2825 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2830 //report_internal_mem_usage ();
2834 * ######################################################################
2835 * ######## Memory allocation from the OS
2836 * ######################################################################
2837 * This section of code deals with getting memory from the OS and
2838 * allocating memory for GC-internal data structures.
2839 * Internal memory can be handled with a freelist for small objects.
2843 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2844 * This must not require any lock.
2847 get_os_memory (size_t size, int activate)
2850 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2852 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2853 size += pagesize - 1;
2854 size &= ~(pagesize - 1);
2855 ptr = mono_valloc (0, size, prot_flags);
2860 * Free the memory returned by get_os_memory (), returning it to the OS.
2863 free_os_memory (void *addr, size_t size)
2865 munmap (addr, size);
2872 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2874 int i, free_pages, num_free, free_mem;
2876 for (i = 0; i < chunk->num_pages; ++i) {
2877 if (!chunk->page_sizes [i])
2880 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2881 free_mem = FREELIST_PAGESIZE * free_pages;
2882 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2883 if (!chunk->free_list [i])
2886 p = chunk->free_list [i];
2891 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2892 free_mem += freelist_sizes [i] * num_free;
2894 printf ("\tfree memory in chunk: %d\n", free_mem);
2900 static G_GNUC_UNUSED void
2901 report_internal_mem_usage (void) {
2904 printf ("Internal memory usage:\n");
2906 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2907 report_pinned_chunk (chunk, i++);
2909 printf ("Pinned memory usage:\n");
2911 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2912 report_pinned_chunk (chunk, i++);
2917 * the array of pointers from @start to @end contains conservative
2918 * pointers to objects inside @chunk: mark each referenced object
2922 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2924 for (; start < end; start++) {
2925 char *addr = *start;
2926 int offset = (char*)addr - (char*)chunk;
2927 int page = offset / FREELIST_PAGESIZE;
2928 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2929 int slot_size = chunk->page_sizes [page];
2931 /* the page is not allocated */
2934 /* would be faster if we restrict the sizes to power of two,
2935 * but that's a waste of memory: need to measure. it could reduce
2936 * fragmentation since there are less pages needed, if for example
2937 * someone interns strings of each size we end up with one page per
2938 * interned string (still this is just ~40 KB): with more fine-grained sizes
2939 * this increases the number of used pages.
2942 obj_offset /= slot_size;
2943 obj_offset *= slot_size;
2944 addr = (char*)chunk->start_data + obj_offset;
2946 obj_offset /= slot_size;
2947 obj_offset *= slot_size;
2948 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2951 /* if the vtable is inside the chunk it's on the freelist, so skip */
2952 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2954 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2960 sweep_pinned_objects (void)
2967 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2968 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2969 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2970 for (i = 0; i < chunk->num_pages; ++i) {
2971 obj_size = chunk->page_sizes [i];
2974 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2975 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2976 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2977 while (p + obj_size <= endp) {
2979 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2980 /* if the first word (the vtable) is outside the chunk we have an object */
2981 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2982 if (object_is_pinned (ptr)) {
2984 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2986 /* FIXME: add to freelist */
2987 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2997 scan_from_pinned_objects (char *addr_start, char *addr_end)
3004 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
3005 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
3006 DEBUG (6, fprintf (gc_debug_file, "Scanning pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
3007 for (i = 0; i < chunk->num_pages; ++i) {
3008 obj_size = chunk->page_sizes [i];
3011 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
3012 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
3013 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
3014 while (p + obj_size <= endp) {
3016 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
3017 /* if the first word (the vtable) is outside the chunk we have an object */
3018 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
3019 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of alloc_pinned %p (%s)\n", i, ptr, safe_name (ptr)));
3020 // FIXME: Put objects without references into separate chunks
3021 // which do not need to be scanned
3022 scan_object ((char*)ptr, addr_start, addr_end);
3031 * Find the slot number in the freelist for memory chunks that
3032 * can contain @size objects.
3035 slot_for_size (size_t size)
3038 /* do a binary search or lookup table later. */
3039 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
3040 if (freelist_sizes [slot] >= size)
3043 g_assert_not_reached ();
3048 * Build a free list for @size memory chunks from the memory area between
3049 * start_page and end_page.
3052 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
3056 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
3057 p = (void**)start_page;
3058 end = (void**)(end_page - size);
3059 g_assert (!chunk->free_list [slot]);
3060 chunk->free_list [slot] = p;
3061 while ((char*)p + size <= (char*)end) {
3063 *p = (void*)((char*)p + size);
3067 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
3071 alloc_pinned_chunk (size_t size)
3076 size += pagesize; /* at least one page */
3077 size += pagesize - 1;
3078 size &= ~(pagesize - 1);
3079 if (size < PINNED_CHUNK_MIN_SIZE * 2)
3080 size = PINNED_CHUNK_MIN_SIZE * 2;
3081 chunk = get_os_memory (size, TRUE);
3082 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
3083 total_alloc += size;
3085 /* setup the bookeeping fields */
3086 chunk->num_pages = size / FREELIST_PAGESIZE;
3087 offset = G_STRUCT_OFFSET (PinnedChunk, data);
3088 chunk->page_sizes = (void*)((char*)chunk + offset);
3089 offset += sizeof (int) * chunk->num_pages;
3090 offset += ALLOC_ALIGN - 1;
3091 offset &= ~(ALLOC_ALIGN - 1);
3092 chunk->free_list = (void*)((char*)chunk + offset);
3093 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
3094 offset += ALLOC_ALIGN - 1;
3095 offset &= ~(ALLOC_ALIGN - 1);
3096 chunk->start_data = (void*)((char*)chunk + offset);
3098 /* allocate the first page to the freelist */
3099 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
3100 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
3101 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size));
3102 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
3103 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
3107 /* assumes freelist for slot is empty, so try to alloc a new page */
3109 get_chunk_freelist (PinnedChunk *chunk, int slot)
3113 p = chunk->free_list [slot];
3115 chunk->free_list [slot] = *p;
3118 for (i = 0; i < chunk->num_pages; ++i) {
3120 if (chunk->page_sizes [i])
3122 size = freelist_sizes [slot];
3123 chunk->page_sizes [i] = size;
3124 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
3128 p = chunk->free_list [slot];
3130 chunk->free_list [slot] = *p;
3137 alloc_from_freelist (size_t size)
3141 PinnedChunk *pchunk;
3142 slot = slot_for_size (size);
3143 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
3144 g_assert (size <= freelist_sizes [slot]);
3145 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
3146 void **p = pchunk->free_list [slot];
3148 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
3149 pchunk->free_list [slot] = *p;
3153 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
3154 res = get_chunk_freelist (pchunk, slot);
3158 pchunk = alloc_pinned_chunk (size);
3159 /* FIXME: handle OOM */
3160 pchunk->next = pinned_chunk_list;
3161 pinned_chunk_list = pchunk;
3162 res = get_chunk_freelist (pchunk, slot);
3166 /* used for the GC-internal data structures */
3167 /* FIXME: add support for bigger sizes by allocating more than one page
3171 get_internal_mem (size_t size)
3173 return calloc (1, size);
3177 PinnedChunk *pchunk;
3178 slot = slot_for_size (size);
3179 g_assert (size <= freelist_sizes [slot]);
3180 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3181 void **p = pchunk->free_list [slot];
3183 pchunk->free_list [slot] = *p;
3187 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3188 res = get_chunk_freelist (pchunk, slot);
3192 pchunk = alloc_pinned_chunk (size);
3193 /* FIXME: handle OOM */
3194 pchunk->next = internal_chunk_list;
3195 internal_chunk_list = pchunk;
3196 res = get_chunk_freelist (pchunk, slot);
3202 free_internal_mem (void *addr)
3206 PinnedChunk *pchunk;
3207 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3208 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
3209 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
3210 int offset = (char*)addr - (char*)pchunk;
3211 int page = offset / FREELIST_PAGESIZE;
3212 int slot = slot_for_size (pchunk->page_sizes [page]);
3214 *p = pchunk->free_list [slot];
3215 pchunk->free_list [slot] = p;
3219 printf ("free of %p failed\n", addr);
3220 g_assert_not_reached ();
3225 * ######################################################################
3226 * ######## Object allocation
3227 * ######################################################################
3228 * This section of code deals with allocating memory for objects.
3229 * There are several ways:
3230 * *) allocate large objects
3231 * *) allocate normal objects
3232 * *) fast lock-free allocation
3233 * *) allocation of pinned objects
3237 free_large_object (LOSObject *obj)
3239 size_t size = obj->size;
3240 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size));
3242 los_memory_usage -= size;
3243 size += sizeof (LOSObject);
3244 size += pagesize - 1;
3245 size &= ~(pagesize - 1);
3246 total_alloc -= size;
3248 free_os_memory (obj, size);
3252 * Objects with size >= 64KB are allocated in the large object space.
3253 * They are currently kept track of with a linked list.
3254 * They don't move, so there is no need to pin them during collection
3255 * and we avoid the memcpy overhead.
3257 static void* __attribute__((noinline))
3258 alloc_large_inner (MonoVTable *vtable, size_t size)
3263 int just_did_major_gc = FALSE;
3265 if (los_memory_usage > next_los_collection) {
3266 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection));
3267 just_did_major_gc = TRUE;
3269 major_collection ();
3271 /* later increase based on a percent of the heap size */
3272 next_los_collection = los_memory_usage + 5*1024*1024;
3275 alloc_size += sizeof (LOSObject);
3276 alloc_size += pagesize - 1;
3277 alloc_size &= ~(pagesize - 1);
3278 /* FIXME: handle OOM */
3279 obj = get_os_memory (alloc_size, TRUE);
3281 vtslot = (void**)obj->data;
3283 total_alloc += alloc_size;
3284 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
3285 obj->next = los_object_list;
3286 los_object_list = obj;
3287 los_memory_usage += size;
3289 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size));
3293 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
3294 * an object of size @size
3295 * Return FALSE if not found (which means we need a collection)
3298 search_fragment_for_size (size_t size)
3300 Fragment *frag, *prev;
3301 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size));
3303 if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3304 /* Clear the remaining space, pinning depends on this */
3305 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
3308 for (frag = nursery_fragments; frag; frag = frag->next) {
3309 if (size <= (frag->fragment_end - frag->fragment_start)) {
3310 /* remove from the list */
3312 prev->next = frag->next;
3314 nursery_fragments = frag->next;
3315 nursery_next = frag->fragment_start;
3316 nursery_frag_real_end = frag->fragment_end;
3318 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
3319 frag->next = fragment_freelist;
3320 fragment_freelist = frag;
3329 * size is already rounded up and we hold the GC lock.
3332 alloc_degraded (MonoVTable *vtable, size_t size)
3334 GCMemSection *section;
3336 for (section = section_list; section; section = section->next) {
3337 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
3338 p = (void**)section->next_data;
3343 section = alloc_section (nursery_section->size * 4);
3344 /* FIXME: handle OOM */
3345 p = (void**)section->next_data;
3347 section->next_data += size;
3348 degraded_mode += size;
3349 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section));
3355 * Provide a variant that takes just the vtable for small fixed-size objects.
3356 * The aligned size is already computed and stored in vt->gc_descr.
3357 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
3358 * processing. We can keep track of where objects start, for example,
3359 * so when we scan the thread stacks for pinned objects, we can start
3360 * a search for the pinned object in SCAN_START_SIZE chunks.
3363 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
3365 /* FIXME: handle OOM */
3370 size += ALLOC_ALIGN - 1;
3371 size &= ~(ALLOC_ALIGN - 1);
3373 g_assert (vtable->gc_descr);
3375 if (G_UNLIKELY (collect_before_allocs)) {
3378 if (nursery_section) {
3381 update_current_thread_stack (&dummy);
3383 collect_nursery (0);
3385 if (!degraded_mode && !search_fragment_for_size (size)) {
3387 g_assert_not_reached ();
3393 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
3395 p = (void**)tlab_next;
3396 /* FIXME: handle overflow */
3397 new_next = (char*)p + size;
3398 tlab_next = new_next;
3400 if (G_LIKELY (new_next < tlab_temp_end)) {
3404 * FIXME: We might need a memory barrier here so the change to tlab_next is
3405 * visible before the vtable store.
3408 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3416 /* there are two cases: the object is too big or we run out of space in the TLAB */
3417 /* we also reach here when the thread does its first allocation after a minor
3418 * collection, since the tlab_ variables are initialized to NULL.
3419 * there can be another case (from ORP), if we cooperate with the runtime a bit:
3420 * objects that need finalizers can have the high bit set in their size
3421 * so the above check fails and we can readily add the object to the queue.
3422 * This avoids taking again the GC lock when registering, but this is moot when
3423 * doing thread-local allocation, so it may not be a good idea.
3426 if (size > MAX_SMALL_OBJ_SIZE) {
3427 /* get ready for possible collection */
3428 update_current_thread_stack (&dummy);
3430 p = alloc_large_inner (vtable, size);
3432 if (tlab_next >= tlab_real_end) {
3434 * Run out of space in the TLAB. When this happens, some amount of space
3435 * remains in the TLAB, but not enough to satisfy the current allocation
3436 * request. Currently, we retire the TLAB in all cases, later we could
3437 * keep it if the remaining space is above a treshold, and satisfy the
3438 * allocation directly from the nursery.
3441 /* when running in degraded mode, we continue allocing that way
3442 * for a while, to decrease the number of useless nursery collections.
3444 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
3445 p = alloc_degraded (vtable, size);
3450 if (size > tlab_size) {
3451 /* Allocate directly from the nursery */
3452 if (nursery_next + size >= nursery_frag_real_end) {
3453 if (!search_fragment_for_size (size)) {
3454 /* get ready for possible collection */
3455 update_current_thread_stack (&dummy);
3456 minor_collect_or_expand_inner (size);
3457 if (degraded_mode) {
3458 p = alloc_degraded (vtable, size);
3465 p = (void*)nursery_next;
3466 nursery_next += size;
3467 if (nursery_next > nursery_frag_real_end) {
3472 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3473 memset (p, 0, size);
3476 DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size)));
3478 if (nursery_next + tlab_size >= nursery_frag_real_end) {
3479 res = search_fragment_for_size (tlab_size);
3481 /* get ready for possible collection */
3482 update_current_thread_stack (&dummy);
3483 minor_collect_or_expand_inner (tlab_size);
3484 if (degraded_mode) {
3485 p = alloc_degraded (vtable, size);
3492 /* Allocate a new TLAB from the current nursery fragment */
3493 tlab_start = nursery_next;
3494 nursery_next += tlab_size;
3495 tlab_next = tlab_start;
3496 tlab_real_end = tlab_start + tlab_size;
3497 tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size);
3499 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3500 memset (tlab_start, 0, tlab_size);
3502 /* Allocate from the TLAB */
3503 p = (void*)tlab_next;
3505 g_assert (tlab_next <= tlab_real_end);
3507 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3510 /* Reached tlab_temp_end */
3512 /* record the scan start so we can find pinned objects more easily */
3513 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3514 /* we just bump tlab_temp_end as well */
3515 tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE);
3516 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end));
3520 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3529 * To be used for interned strings and possibly MonoThread, reflection handles.
3530 * We may want to explicitly free these objects.
3533 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
3535 /* FIXME: handle OOM */
3537 size += ALLOC_ALIGN - 1;
3538 size &= ~(ALLOC_ALIGN - 1);
3540 if (size > MAX_FREELIST_SIZE) {
3541 update_current_thread_stack (&p);
3542 /* large objects are always pinned anyway */
3543 p = alloc_large_inner (vtable, size);
3545 p = alloc_from_freelist (size);
3546 memset (p, 0, size);
3548 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3555 * ######################################################################
3556 * ######## Finalization support
3557 * ######################################################################
3561 * this is valid for the nursery: if the object has been forwarded it means it's
3562 * still refrenced from a root. If it is pinned it's still alive as well.
3563 * Return TRUE if @obj is ready to be finalized.
3565 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
3568 is_critical_finalizer (FinalizeEntry *entry)
3573 if (!mono_defaults.critical_finalizer_object)
3576 obj = entry->object;
3577 class = ((MonoVTable*)LOAD_VTABLE (obj))->klass;
3579 return mono_class_has_parent (class, mono_defaults.critical_finalizer_object);
3583 queue_finalization_entry (FinalizeEntry *entry) {
3584 if (is_critical_finalizer (entry)) {
3585 entry->next = critical_fin_list;
3586 critical_fin_list = entry;
3588 entry->next = fin_ready_list;
3589 fin_ready_list = entry;
3594 finalize_in_range (char *start, char *end)
3596 FinalizeEntry *entry, *prev;
3600 for (i = 0; i < finalizable_hash_size; ++i) {
3602 for (entry = finalizable_hash [i]; entry;) {
3603 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3604 if (object_is_fin_ready (entry->object)) {
3606 FinalizeEntry *next;
3607 /* remove and put in fin_ready_list */
3609 prev->next = entry->next;
3611 finalizable_hash [i] = entry->next;
3613 num_ready_finalizers++;
3614 num_registered_finalizers--;
3615 queue_finalization_entry (entry);
3616 /* Make it survive */
3617 from = entry->object;
3618 entry->object = copy_object (entry->object, start, end);
3619 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3623 /* update pointer */
3624 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3625 entry->object = copy_object (entry->object, start, end);
3629 entry = entry->next;
3635 null_link_in_range (char *start, char *end)
3637 DisappearingLink *entry, *prev;
3639 for (i = 0; i < disappearing_link_hash_size; ++i) {
3641 for (entry = disappearing_link_hash [i]; entry;) {
3642 char *object = DISLINK_OBJECT (entry);
3643 if (object >= start && object < end && (object < to_space || object >= to_space_end)) {
3644 if (object_is_fin_ready (object)) {
3645 void **p = entry->link;
3646 DisappearingLink *old;
3648 /* remove from list */
3650 prev->next = entry->next;
3652 disappearing_link_hash [i] = entry->next;
3653 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, object));
3655 free_internal_mem (entry);
3657 num_disappearing_links--;
3660 /* update pointer if it's moved
3661 * FIXME: what if an object is moved earlier?
3663 *entry->link = HIDE_POINTER (copy_object (object, start, end));
3664 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->link, DISLINK_OBJECT (entry)));
3668 entry = entry->next;
3674 * mono_gc_finalizers_for_domain:
3675 * @domain: the unloading appdomain
3676 * @out_array: output array
3677 * @out_size: size of output array
3679 * Store inside @out_array up to @out_size objects that belong to the unloading
3680 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3681 * until it returns 0.
3682 * The items are removed from the finalizer data structure, so the caller is supposed
3684 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3687 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3689 FinalizeEntry *entry, *prev;
3691 if (no_finalize || !out_size || !out_array)
3695 for (i = 0; i < finalizable_hash_size; ++i) {
3697 for (entry = finalizable_hash [i]; entry;) {
3698 if (mono_object_domain (entry->object) == domain) {
3699 FinalizeEntry *next;
3700 /* remove and put in out_array */
3702 prev->next = entry->next;
3704 finalizable_hash [i] = entry->next;
3706 num_registered_finalizers--;
3707 out_array [count ++] = entry->object;
3708 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3710 if (count == out_size) {
3717 entry = entry->next;
3725 rehash_fin_table (void)
3729 FinalizeEntry **new_hash;
3730 FinalizeEntry *entry, *next;
3731 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3733 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3734 for (i = 0; i < finalizable_hash_size; ++i) {
3735 for (entry = finalizable_hash [i]; entry; entry = next) {
3736 hash = mono_object_hash (entry->object) % new_size;
3738 entry->next = new_hash [hash];
3739 new_hash [hash] = entry;
3742 free_internal_mem (finalizable_hash);
3743 finalizable_hash = new_hash;
3744 finalizable_hash_size = new_size;
3748 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3750 FinalizeEntry *entry, *prev;
3754 g_assert (user_data == NULL || user_data == mono_gc_run_finalize);
3755 hash = mono_object_hash (obj);
3757 if (num_registered_finalizers >= finalizable_hash_size * 2)
3758 rehash_fin_table ();
3759 hash %= finalizable_hash_size;
3761 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3762 if (entry->object == obj) {
3764 /* remove from the list */
3766 prev->next = entry->next;
3768 finalizable_hash [hash] = entry->next;
3769 num_registered_finalizers--;
3770 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3771 free_internal_mem (entry);
3779 /* request to deregister, but already out of the list */
3783 entry = get_internal_mem (sizeof (FinalizeEntry));
3784 entry->object = obj;
3785 entry->next = finalizable_hash [hash];
3786 finalizable_hash [hash] = entry;
3787 num_registered_finalizers++;
3788 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3793 rehash_dislink (void)
3797 DisappearingLink **new_hash;
3798 DisappearingLink *entry, *next;
3799 int new_size = g_spaced_primes_closest (num_disappearing_links);
3801 new_hash = get_internal_mem (new_size * sizeof (DisappearingLink*));
3802 for (i = 0; i < disappearing_link_hash_size; ++i) {
3803 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3804 hash = mono_aligned_addr_hash (entry->link) % new_size;
3806 entry->next = new_hash [hash];
3807 new_hash [hash] = entry;
3810 free_internal_mem (disappearing_link_hash);
3811 disappearing_link_hash = new_hash;
3812 disappearing_link_hash_size = new_size;
3816 mono_gc_register_disappearing_link (MonoObject *obj, void **link)
3818 DisappearingLink *entry, *prev;
3822 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3824 /* FIXME: add check that link is not in the heap */
3825 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3826 entry = disappearing_link_hash [hash];
3828 for (; entry; entry = entry->next) {
3829 /* link already added */
3830 if (link == entry->link) {
3831 /* NULL obj means remove */
3834 prev->next = entry->next;
3836 disappearing_link_hash [hash] = entry->next;
3837 num_disappearing_links--;
3838 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3839 free_internal_mem (entry);
3842 *link = HIDE_POINTER (obj); /* we allow the change of object */
3849 entry = get_internal_mem (sizeof (DisappearingLink));
3850 *link = HIDE_POINTER (obj);
3852 entry->next = disappearing_link_hash [hash];
3853 disappearing_link_hash [hash] = entry;
3854 num_disappearing_links++;
3855 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3860 mono_gc_invoke_finalizers (void)
3862 FinalizeEntry *entry = NULL;
3863 gboolean entry_is_critical;
3866 /* FIXME: batch to reduce lock contention */
3867 while (fin_ready_list || critical_fin_list) {
3871 FinalizeEntry **list = entry_is_critical ? &critical_fin_list : &fin_ready_list;
3873 /* We have finalized entry in the last
3874 interation, now we need to remove it from
3877 *list = entry->next;
3879 FinalizeEntry *e = *list;
3880 while (e->next != entry)
3882 e->next = entry->next;
3884 free_internal_mem (entry);
3888 /* Now look for the first non-null entry. */
3889 for (entry = fin_ready_list; entry && !entry->object; entry = entry->next)
3892 entry_is_critical = FALSE;
3894 entry_is_critical = TRUE;
3895 for (entry = critical_fin_list; entry && !entry->object; entry = entry->next)
3900 g_assert (entry->object);
3901 num_ready_finalizers--;
3902 obj = entry->object;
3903 entry->object = NULL;
3904 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3912 g_assert (entry->object == NULL);
3914 /* the object is on the stack so it is pinned */
3915 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3916 mono_gc_run_finalize (obj, NULL);
3923 mono_gc_pending_finalizers (void)
3925 return fin_ready_list || critical_fin_list;
3928 /* Negative value to remove */
3930 mono_gc_add_memory_pressure (gint64 value)
3932 /* FIXME: Use interlocked functions */
3934 memory_pressure += value;
3939 * ######################################################################
3940 * ######## registered roots support
3941 * ######################################################################
3945 rehash_roots (gboolean pinned)
3949 RootRecord **new_hash;
3950 RootRecord *entry, *next;
3953 new_size = g_spaced_primes_closest (num_roots_entries [pinned]);
3954 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3955 for (i = 0; i < roots_hash_size [pinned]; ++i) {
3956 for (entry = roots_hash [pinned][i]; entry; entry = next) {
3957 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3959 entry->next = new_hash [hash];
3960 new_hash [hash] = entry;
3963 free_internal_mem (roots_hash [pinned]);
3964 roots_hash [pinned] = new_hash;
3965 roots_hash_size [pinned] = new_size;
3969 find_root (int root_type, char *start, guint32 addr_hash)
3971 RootRecord *new_root;
3973 guint32 hash = addr_hash % roots_hash_size [root_type];
3974 for (new_root = roots_hash [root_type][hash]; new_root; new_root = new_root->next) {
3975 /* we allow changing the size and the descriptor (for thread statics etc) */
3976 if (new_root->start_root == start) {
3985 * We do not coalesce roots.
3988 mono_gc_register_root_inner (char *start, size_t size, void *descr, int root_type)
3990 RootRecord *new_root;
3991 unsigned int hash, addr_hash = mono_aligned_addr_hash (start);
3994 for (i = 0; i < ROOT_TYPE_NUM; ++i) {
3995 if (num_roots_entries [i] >= roots_hash_size [i] * 2)
3998 for (i = 0; i < ROOT_TYPE_NUM; ++i) {
3999 new_root = find_root (i, start, addr_hash);
4000 /* we allow changing the size and the descriptor (for thread statics etc) */
4002 size_t old_size = new_root->end_root - new_root->start_root;
4003 new_root->end_root = new_root->start_root + size;
4004 g_assert (((new_root->root_desc != 0) && (descr != NULL)) ||
4005 ((new_root->root_desc == 0) && (descr == NULL)));
4006 new_root->root_desc = (mword)descr;
4008 roots_size -= old_size;
4013 new_root = get_internal_mem (sizeof (RootRecord));
4015 new_root->start_root = start;
4016 new_root->end_root = new_root->start_root + size;
4017 new_root->root_desc = (mword)descr;
4019 hash = addr_hash % roots_hash_size [root_type];
4020 num_roots_entries [root_type]++;
4021 new_root->next = roots_hash [root_type] [hash];
4022 roots_hash [root_type][hash] = new_root;
4023 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
4033 mono_gc_register_root (char *start, size_t size, void *descr)
4035 return mono_gc_register_root_inner (start, size, descr, descr ? ROOT_TYPE_NORMAL : ROOT_TYPE_PINNED);
4039 mono_gc_register_root_wbarrier (char *start, size_t size, void *descr)
4041 return mono_gc_register_root_inner (start, size, descr, ROOT_TYPE_WBARRIER);
4045 mono_gc_deregister_root (char* addr)
4047 RootRecord *tmp, *prev;
4048 unsigned int hash, addr_hash = mono_aligned_addr_hash (addr);
4052 for (root_type = 0; root_type < ROOT_TYPE_NUM; ++root_type) {
4053 hash = addr_hash % roots_hash_size [root_type];
4054 tmp = roots_hash [root_type][hash];
4057 if (tmp->start_root == (char*)addr) {
4059 prev->next = tmp->next;
4061 roots_hash [root_type][hash] = tmp->next;
4062 roots_size -= (tmp->end_root - tmp->start_root);
4063 num_roots_entries [root_type]--;
4064 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
4065 free_internal_mem (tmp);
4076 * ######################################################################
4077 * ######## Thread handling (stop/start code)
4078 * ######################################################################
4081 /* eventually share with MonoThread? */
4082 typedef struct _SgenThreadInfo SgenThreadInfo;
4084 struct _SgenThreadInfo {
4085 SgenThreadInfo *next;
4086 ARCH_THREAD_TYPE id;
4087 unsigned int stop_count; /* to catch duplicate signals */
4092 char **tlab_next_addr;
4093 char **tlab_start_addr;
4094 char **tlab_temp_end_addr;
4095 char **tlab_real_end_addr;
4096 RememberedSet *remset;
4097 gpointer runtime_data;
4100 /* FIXME: handle large/small config */
4101 #define THREAD_HASH_SIZE 11
4102 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
4104 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
4106 #if USE_SIGNAL_BASED_START_STOP_WORLD
4108 static sem_t suspend_ack_semaphore;
4109 static unsigned int global_stop_count = 0;
4110 static int suspend_signal_num = SIGPWR;
4111 static int restart_signal_num = SIGXCPU;
4112 static sigset_t suspend_signal_mask;
4113 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
4115 /* LOCKING: assumes the GC lock is held */
4116 static SgenThreadInfo*
4117 thread_info_lookup (ARCH_THREAD_TYPE id)
4119 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4120 SgenThreadInfo *info;
4122 info = thread_table [hash];
4123 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
4130 update_current_thread_stack (void *start)
4132 void *ptr = cur_thread_regs;
4133 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
4134 info->stack_start = align_pointer (&ptr);
4135 ARCH_STORE_REGS (ptr);
4136 if (gc_callbacks.thread_suspend_func)
4137 gc_callbacks.thread_suspend_func (info->runtime_data, NULL);
4141 signal_desc (int signum)
4143 if (signum == suspend_signal_num)
4145 if (signum == restart_signal_num)
4150 /* LOCKING: assumes the GC lock is held */
4152 thread_handshake (int signum)
4154 int count, i, result;
4155 SgenThreadInfo *info;
4156 pthread_t me = pthread_self ();
4159 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4160 for (info = thread_table [i]; info; info = info->next) {
4161 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
4162 if (ARCH_THREAD_EQUALS (info->id, me)) {
4163 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
4166 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
4168 result = pthread_kill (info->id, signum);
4170 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
4173 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
4179 for (i = 0; i < count; ++i) {
4180 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
4181 if (errno != EINTR) {
4182 g_error ("sem_wait ()");
4189 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
4191 suspend_handler (int sig, siginfo_t *siginfo, void *context)
4193 SgenThreadInfo *info;
4196 int old_errno = errno;
4198 id = pthread_self ();
4199 info = thread_info_lookup (id);
4200 stop_count = global_stop_count;
4201 /* duplicate signal */
4202 if (0 && info->stop_count == stop_count) {
4206 /* update the remset info in the thread data structure */
4207 info->remset = remembered_set;
4209 * this includes the register values that the kernel put on the stack.
4210 * Write arch-specific code to only push integer regs and a more accurate
4213 info->stack_start = align_pointer (&id);
4215 /* Notify the JIT */
4216 if (gc_callbacks.thread_suspend_func)
4217 gc_callbacks.thread_suspend_func (info->runtime_data, context);
4219 /* notify the waiting thread */
4220 sem_post (&suspend_ack_semaphore);
4221 info->stop_count = stop_count;
4223 /* wait until we receive the restart signal */
4226 sigsuspend (&suspend_signal_mask);
4227 } while (info->signal != restart_signal_num);
4229 /* notify the waiting thread */
4230 sem_post (&suspend_ack_semaphore);
4236 restart_handler (int sig)
4238 SgenThreadInfo *info;
4239 int old_errno = errno;
4241 info = thread_info_lookup (pthread_self ());
4242 info->signal = restart_signal_num;
4247 static TV_DECLARE (stop_world_time);
4248 static unsigned long max_pause_usec = 0;
4250 /* LOCKING: assumes the GC lock is held */
4256 global_stop_count++;
4257 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
4258 TV_GETTIME (stop_world_time);
4259 count = thread_handshake (suspend_signal_num);
4260 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
4264 /* LOCKING: assumes the GC lock is held */
4266 restart_world (void)
4269 TV_DECLARE (end_sw);
4272 count = thread_handshake (restart_signal_num);
4273 TV_GETTIME (end_sw);
4274 usec = TV_ELAPSED (stop_world_time, end_sw);
4275 max_pause_usec = MAX (usec, max_pause_usec);
4276 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
4280 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
4283 mono_gc_set_gc_callbacks (MonoGCCallbacks *callbacks)
4285 gc_callbacks = *callbacks;
4288 /* Variables holding start/end nursery so it won't have to be passed at every call */
4289 static void *scan_area_arg_start, *scan_area_arg_end;
4292 mono_gc_conservatively_scan_area (void *start, void *end)
4294 conservatively_pin_objects_from (start, end, scan_area_arg_start, scan_area_arg_end);
4298 mono_gc_scan_object (void *obj)
4300 return copy_object (obj, scan_area_arg_start, scan_area_arg_end);
4304 * Mark from thread stacks and registers.
4307 scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise)
4310 SgenThreadInfo *info;
4312 scan_area_arg_start = start_nursery;
4313 scan_area_arg_end = end_nursery;
4315 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4316 for (info = thread_table [i]; info; info = info->next) {
4318 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
4321 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd, pinned=%d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, next_pin_slot));
4322 if (gc_callbacks.thread_mark_func)
4323 gc_callbacks.thread_mark_func (info->runtime_data, info->stack_start, info->stack_end, precise);
4325 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
4328 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers, pinned=%d\n", next_pin_slot));
4330 conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery);
4334 find_pinning_ref_from_thread (char *obj, size_t size)
4337 SgenThreadInfo *info;
4338 char *endobj = obj + size;
4340 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4341 for (info = thread_table [i]; info; info = info->next) {
4342 char **start = (char**)info->stack_start;
4345 while (start < (char**)info->stack_end) {
4346 if (*start >= obj && *start < endobj) {
4347 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end));
4353 /* FIXME: check register */
4356 /* return TRUE if ptr points inside the managed heap */
4358 ptr_in_heap (void* ptr)
4360 mword p = (mword)ptr;
4361 if (p < lowest_heap_address || p >= highest_heap_address)
4363 /* FIXME: more checks */
4368 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
4374 /* FIXME: exclude stack locations */
4375 switch ((*p) & REMSET_TYPE_MASK) {
4376 case REMSET_LOCATION:
4378 //__builtin_prefetch (ptr);
4379 if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) {
4380 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4381 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
4382 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4384 * If the object is pinned, each reference to it from nonpinned objects
4385 * becomes part of the global remset, which can grow very large.
4387 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4388 add_to_global_remset (ptr, FALSE);
4391 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
4395 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4396 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4399 while (count-- > 0) {
4400 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4401 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
4402 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
4403 add_to_global_remset (ptr, FALSE);
4408 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4409 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4411 scan_object (*ptr, start_nursery, end_nursery);
4413 case REMSET_OTHER: {
4414 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4418 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4421 scan_vtype ((char*)ptr, desc, start_nursery, end_nursery);
4423 case REMSET_ROOT_LOCATION:
4424 /* Same as REMSET_LOCATION, but the address is not required to be in the heap */
4425 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4426 DEBUG (9, fprintf (gc_debug_file, "Overwrote root location remset at %p with %p\n", ptr, *ptr));
4427 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4429 * If the object is pinned, each reference to it from nonpinned objects
4430 * becomes part of the global remset, which can grow very large.
4432 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4433 add_to_global_remset (ptr, TRUE);
4437 g_assert_not_reached ();
4442 g_assert_not_reached ();
4448 scan_from_remsets (void *start_nursery, void *end_nursery)
4451 SgenThreadInfo *info;
4452 RememberedSet *remset, *next;
4453 mword *p, *next_p, *store_pos;
4455 /* the global one */
4456 for (remset = global_remset; remset; remset = remset->next) {
4457 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4458 store_pos = remset->data;
4459 for (p = remset->data; p < remset->store_next; p = next_p) {
4462 next_p = handle_remset (p, start_nursery, end_nursery, TRUE);
4465 * Clear global remsets of locations which no longer point to the
4466 * nursery. Otherwise, they could grow indefinitely between major
4469 ptr = (p [0] & ~REMSET_TYPE_MASK);
4470 if ((p [0] & REMSET_TYPE_MASK) == REMSET_LOCATION) {
4471 if (ptr_in_nursery (*(void**)ptr))
4472 *store_pos ++ = p [0];
4474 g_assert ((p [0] & REMSET_TYPE_MASK) == REMSET_OTHER);
4475 g_assert (p [1] == REMSET_ROOT_LOCATION);
4476 if (ptr_in_nursery (*(void**)ptr)) {
4477 *store_pos ++ = p [0];
4478 *store_pos ++ = p [1];
4483 /* Truncate the remset */
4484 remset->store_next = store_pos;
4487 /* the per-thread ones */
4488 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4489 for (info = thread_table [i]; info; info = info->next) {
4490 for (remset = info->remset; remset; remset = next) {
4491 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4492 for (p = remset->data; p < remset->store_next;) {
4493 p = handle_remset (p, start_nursery, end_nursery, FALSE);
4495 remset->store_next = remset->data;
4496 next = remset->next;
4497 remset->next = NULL;
4498 if (remset != info->remset) {
4499 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4500 free_internal_mem (remset);
4508 * Clear the info in the remembered sets: we're doing a major collection, so
4509 * the per-thread ones are not needed and the global ones will be reconstructed
4513 clear_remsets (void)
4516 SgenThreadInfo *info;
4517 RememberedSet *remset, *next;
4519 /* the global list */
4520 for (remset = global_remset; remset; remset = next) {
4521 remset->store_next = remset->data;
4522 next = remset->next;
4523 remset->next = NULL;
4524 if (remset != global_remset) {
4525 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4526 free_internal_mem (remset);
4529 /* the per-thread ones */
4530 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4531 for (info = thread_table [i]; info; info = info->next) {
4532 for (remset = info->remset; remset; remset = next) {
4533 remset->store_next = remset->data;
4534 next = remset->next;
4535 remset->next = NULL;
4536 if (remset != info->remset) {
4537 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4538 free_internal_mem (remset);
4546 * Clear the thread local TLAB variables for all threads.
4551 SgenThreadInfo *info;
4554 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4555 for (info = thread_table [i]; info; info = info->next) {
4556 /* A new TLAB will be allocated when the thread does its first allocation */
4557 *info->tlab_start_addr = NULL;
4558 *info->tlab_next_addr = NULL;
4559 *info->tlab_temp_end_addr = NULL;
4560 *info->tlab_real_end_addr = NULL;
4566 * Find the tlab_next value of the TLAB which contains ADDR.
4569 find_tlab_next_from_address (char *addr)
4571 SgenThreadInfo *info;
4574 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4575 for (info = thread_table [i]; info; info = info->next) {
4576 if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr)
4577 return *info->tlab_next_addr;
4584 /* LOCKING: assumes the GC lock is held */
4585 static SgenThreadInfo*
4586 gc_register_current_thread (void *addr)
4589 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
4592 info->id = ARCH_GET_THREAD ();
4593 info->stop_count = -1;
4596 info->stack_start = NULL;
4597 info->tlab_start_addr = &tlab_start;
4598 info->tlab_next_addr = &tlab_next;
4599 info->tlab_temp_end_addr = &tlab_temp_end;
4600 info->tlab_real_end_addr = &tlab_real_end;
4602 tlab_next_addr = &tlab_next;
4604 /* try to get it with attributes first */
4605 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
4609 pthread_attr_t attr;
4610 pthread_getattr_np (pthread_self (), &attr);
4611 pthread_attr_getstack (&attr, &sstart, &size);
4612 info->stack_end = (char*)sstart + size;
4613 pthread_attr_destroy (&attr);
4615 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
4616 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
4619 /* FIXME: we assume the stack grows down */
4620 gsize stack_bottom = (gsize)addr;
4621 stack_bottom += 4095;
4622 stack_bottom &= ~4095;
4623 info->stack_end = (char*)stack_bottom;
4627 /* hash into the table */
4628 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
4629 info->next = thread_table [hash];
4630 thread_table [hash] = info;
4632 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
4633 pthread_setspecific (remembered_set_key, remembered_set);
4634 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
4636 if (gc_callbacks.thread_attach_func)
4637 info->runtime_data = gc_callbacks.thread_attach_func ();
4643 unregister_current_thread (void)
4646 SgenThreadInfo *prev = NULL;
4648 RememberedSet *rset;
4649 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
4651 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4652 p = thread_table [hash];
4654 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
4655 while (!ARCH_THREAD_EQUALS (p->id, id)) {
4660 thread_table [hash] = p->next;
4662 prev->next = p->next;
4665 /* FIXME: transfer remsets if any */
4667 RememberedSet *next = rset->next;
4668 free_internal_mem (rset);
4675 unregister_thread (void *k)
4678 unregister_current_thread ();
4683 mono_gc_register_thread (void *baseptr)
4685 SgenThreadInfo *info;
4687 info = thread_info_lookup (ARCH_GET_THREAD ());
4689 info = gc_register_current_thread (baseptr);
4691 return info != NULL;
4694 #if USE_PTHREAD_INTERCEPT
4696 #undef pthread_create
4698 #undef pthread_detach
4701 void *(*start_routine) (void *);
4705 } SgenThreadStartInfo;
4708 gc_start_thread (void *arg)
4710 SgenThreadStartInfo *start_info = arg;
4711 SgenThreadInfo* info;
4712 void *t_arg = start_info->arg;
4713 void *(*start_func) (void*) = start_info->start_routine;
4717 info = gc_register_current_thread (&result);
4719 sem_post (&(start_info->registered));
4720 result = start_func (t_arg);
4722 * this is done by the pthread key dtor
4724 unregister_current_thread ();
4732 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
4734 SgenThreadStartInfo *start_info;
4737 start_info = malloc (sizeof (SgenThreadStartInfo));
4740 sem_init (&(start_info->registered), 0, 0);
4741 start_info->arg = arg;
4742 start_info->start_routine = start_routine;
4744 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
4746 while (sem_wait (&(start_info->registered)) != 0) {
4747 /*if (EINTR != errno) ABORT("sem_wait failed"); */
4750 sem_destroy (&(start_info->registered));
4756 mono_gc_pthread_join (pthread_t thread, void **retval)
4758 return pthread_join (thread, retval);
4762 mono_gc_pthread_detach (pthread_t thread)
4764 return pthread_detach (thread);
4767 #endif /* USE_PTHREAD_INTERCEPT */
4770 * ######################################################################
4771 * ######## Write barriers
4772 * ######################################################################
4775 static RememberedSet*
4776 alloc_remset (int size, gpointer id) {
4777 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
4778 res->store_next = res->data;
4779 res->end_set = res->data + size;
4781 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
4786 * Note: the write barriers first do the needed GC work and then do the actual store:
4787 * this way the value is visible to the conservative GC scan after the write barrier
4788 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
4789 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
4790 * happens when we need to record which pointers contain references to the new generation.
4791 * The write barrier will be executed, but the pointer is still not stored.
4794 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
4797 if (ptr_in_nursery (field_ptr)) {
4798 *(void**)field_ptr = value;
4801 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
4802 rs = remembered_set;
4803 if (rs->store_next < rs->end_set) {
4804 *(rs->store_next++) = (mword)field_ptr;
4805 *(void**)field_ptr = value;
4808 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4809 rs->next = remembered_set;
4810 remembered_set = rs;
4811 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4812 *(rs->store_next++) = (mword)field_ptr;
4813 *(void**)field_ptr = value;
4817 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
4819 RememberedSet *rs = remembered_set;
4820 if (ptr_in_nursery (slot_ptr)) {
4821 *(void**)slot_ptr = value;
4824 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
4825 if (rs->store_next < rs->end_set) {
4826 *(rs->store_next++) = (mword)slot_ptr;
4827 *(void**)slot_ptr = value;
4830 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4831 rs->next = remembered_set;
4832 remembered_set = rs;
4833 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4834 *(rs->store_next++) = (mword)slot_ptr;
4835 *(void**)slot_ptr = value;
4839 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
4841 RememberedSet *rs = remembered_set;
4842 if (ptr_in_nursery (slot_ptr))
4844 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
4845 if (rs->store_next + 1 < rs->end_set) {
4846 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4847 *(rs->store_next++) = count;
4850 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4851 rs->next = remembered_set;
4852 remembered_set = rs;
4853 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4854 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4855 *(rs->store_next++) = count;
4859 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4862 if (ptr_in_nursery (ptr)) {
4863 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4864 *(void**)ptr = value;
4867 rs = remembered_set;
4868 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
4869 /* FIXME: ensure it is on the heap */
4870 if (rs->store_next < rs->end_set) {
4871 *(rs->store_next++) = (mword)ptr;
4872 *(void**)ptr = value;
4875 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4876 rs->next = remembered_set;
4877 remembered_set = rs;
4878 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4879 *(rs->store_next++) = (mword)ptr;
4880 *(void**)ptr = value;
4884 mono_gc_wbarrier_set_root (gpointer ptr, MonoObject *value)
4886 RememberedSet *rs = remembered_set;
4887 if (ptr_in_nursery (ptr))
4889 DEBUG (8, fprintf (gc_debug_file, "Adding root remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
4891 if (rs->store_next + 2 < rs->end_set) {
4892 *(rs->store_next++) = (mword)ptr | REMSET_OTHER;
4893 *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION;
4894 *(void**)ptr = value;
4897 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4898 rs->next = remembered_set;
4899 remembered_set = rs;
4900 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4901 *(rs->store_next++) = (mword)ptr | REMSET_OTHER;
4902 *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION;
4904 *(void**)ptr = value;
4908 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4910 RememberedSet *rs = remembered_set;
4911 if (ptr_in_nursery (dest))
4913 DEBUG (8, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4915 if (rs->store_next + 2 < rs->end_set) {
4916 *(rs->store_next++) = (mword)dest | REMSET_OTHER;
4917 *(rs->store_next++) = (mword)REMSET_VTYPE;
4918 *(rs->store_next++) = (mword)klass->gc_descr;
4921 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4922 rs->next = remembered_set;
4923 remembered_set = rs;
4924 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4925 *(rs->store_next++) = (mword)dest | REMSET_OTHER;
4926 *(rs->store_next++) = (mword)REMSET_VTYPE;
4927 *(rs->store_next++) = (mword)klass->gc_descr;
4931 * mono_gc_wbarrier_object:
4933 * Write barrier to call when obj is the result of a clone or copy of an object.
4936 mono_gc_wbarrier_object (MonoObject* obj)
4938 RememberedSet *rs = remembered_set;
4939 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4940 if (rs->store_next < rs->end_set) {
4941 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4944 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4945 rs->next = remembered_set;
4946 remembered_set = rs;
4947 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4948 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4952 * ######################################################################
4953 * ######## Collector debugging
4954 * ######################################################################
4957 const char*descriptor_types [] = {
4969 describe_ptr (char *ptr)
4971 GCMemSection *section;
4976 if (ptr_in_nursery (ptr)) {
4977 printf ("Pointer inside nursery.\n");
4979 for (section = section_list; section;) {
4980 if (ptr >= section->data && ptr < section->data + section->size)
4982 section = section->next;
4986 printf ("Pointer inside oldspace.\n");
4987 } else if (obj_is_from_pinned_alloc (ptr)) {
4988 printf ("Pointer is inside a pinned chunk.\n");
4990 printf ("Pointer unknown.\n");
4995 if (object_is_pinned (ptr))
4996 printf ("Object is pinned.\n");
4998 if (object_is_forwarded (ptr))
4999 printf ("Object is forwared.\n");
5001 // FIXME: Handle pointers to the inside of objects
5002 vtable = (MonoVTable*)LOAD_VTABLE (ptr);
5004 printf ("VTable: %p\n", vtable);
5005 if (vtable == NULL) {
5006 printf ("VTable is invalid (empty).\n");
5009 if (ptr_in_nursery (vtable)) {
5010 printf ("VTable is invalid (points inside nursery).\n");
5013 printf ("Class: %s\n", vtable->klass->name);
5015 desc = ((GCVTable*)vtable)->desc;
5016 printf ("Descriptor: %lx\n", desc);
5019 printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]);
5023 find_in_remset_loc (mword *p, char *addr, gboolean *found)
5029 switch ((*p) & REMSET_TYPE_MASK) {
5030 case REMSET_LOCATION:
5031 if (*p == (mword)addr)
5035 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
5037 if ((void**)addr >= ptr && (void**)addr < ptr + count)
5041 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
5042 count = safe_object_get_size ((MonoObject*)ptr);
5043 count += (ALLOC_ALIGN - 1);
5044 count &= (ALLOC_ALIGN - 1);
5045 count /= sizeof (mword);
5046 if ((void**)addr >= ptr && (void**)addr < ptr + count)
5049 case REMSET_OTHER: {
5052 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
5055 switch (desc & 0x7) {
5056 case DESC_TYPE_RUN_LENGTH:
5057 OBJ_RUN_LEN_SIZE (skip_size, desc, ptr);
5058 /* The descriptor includes the size of MonoObject */
5059 skip_size -= sizeof (MonoObject);
5060 if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer)))
5065 g_assert_not_reached ();
5069 case REMSET_ROOT_LOCATION:
5072 g_assert_not_reached ();
5077 g_assert_not_reached ();
5083 * Return whenever ADDR occurs in the remembered sets
5086 find_in_remsets (char *addr)
5089 SgenThreadInfo *info;
5090 RememberedSet *remset;
5092 gboolean found = FALSE;
5094 /* the global one */
5095 for (remset = global_remset; remset; remset = remset->next) {
5096 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
5097 for (p = remset->data; p < remset->store_next;) {
5098 p = find_in_remset_loc (p, addr, &found);
5103 /* the per-thread ones */
5104 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
5105 for (info = thread_table [i]; info; info = info->next) {
5106 for (remset = info->remset; remset; remset = remset->next) {
5107 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
5108 for (p = remset->data; p < remset->store_next;) {
5109 p = find_in_remset_loc (p, addr, &found);
5121 #define HANDLE_PTR(ptr,obj) do { \
5122 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
5123 if (!find_in_remsets ((char*)(ptr))) { \
5124 fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \
5125 g_assert_not_reached (); \
5131 * Check that each object reference inside the area which points into the nursery
5132 * can be found in the remembered sets.
5134 static void __attribute__((noinline))
5135 check_remsets_for_area (char *start, char *end)
5140 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
5142 new_obj_references = 0;
5143 obj_references_checked = 0;
5144 while (start < end) {
5145 if (!*(void**)start) {
5146 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
5149 vt = (GCVTable*)LOAD_VTABLE (start);
5150 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
5152 MonoObject *obj = (MonoObject*)start;
5153 g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
5157 if (type == DESC_TYPE_STRING) {
5158 STRING_SIZE (skip_size, start);
5162 } else if (type == DESC_TYPE_RUN_LENGTH) {
5163 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
5164 g_assert (skip_size);
5165 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
5169 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
5170 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
5171 skip_size *= mono_array_length ((MonoArray*)start);
5172 skip_size += sizeof (MonoArray);
5173 skip_size += (ALLOC_ALIGN - 1);
5174 skip_size &= ~(ALLOC_ALIGN - 1);
5175 OBJ_VECTOR_FOREACH_PTR (vt, start);
5176 if (((MonoArray*)start)->bounds) {
5177 /* account for the bounds */
5178 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
5183 } else if (type == DESC_TYPE_SMALL_BITMAP) {
5184 OBJ_BITMAP_SIZE (skip_size, desc, start);
5185 g_assert (skip_size);
5186 OBJ_BITMAP_FOREACH_PTR (desc,start);
5190 } else if (type == DESC_TYPE_LARGE_BITMAP) {
5191 skip_size = safe_object_get_size ((MonoObject*)start);
5192 skip_size += (ALLOC_ALIGN - 1);
5193 skip_size &= ~(ALLOC_ALIGN - 1);
5194 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
5198 } else if (type == DESC_TYPE_COMPLEX) {
5199 /* this is a complex object */
5200 skip_size = safe_object_get_size ((MonoObject*)start);
5201 skip_size += (ALLOC_ALIGN - 1);
5202 skip_size &= ~(ALLOC_ALIGN - 1);
5203 OBJ_COMPLEX_FOREACH_PTR (vt, start);
5207 } else if (type == DESC_TYPE_COMPLEX_ARR) {
5208 /* this is an array of complex structs */
5209 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
5210 skip_size *= mono_array_length ((MonoArray*)start);
5211 skip_size += sizeof (MonoArray);
5212 skip_size += (ALLOC_ALIGN - 1);
5213 skip_size &= ~(ALLOC_ALIGN - 1);
5214 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
5215 if (((MonoArray*)start)->bounds) {
5216 /* account for the bounds */
5217 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
5229 * Perform consistency check of the heap.
5231 * Assumes the world is stopped.
5234 check_consistency (void)
5236 GCMemSection *section;
5238 // Need to add more checks
5239 // FIXME: Create a general heap enumeration function and use that
5241 DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n"));
5243 // Check that oldspace->newspace pointers are registered with the collector
5244 for (section = section_list; section; section = section->next) {
5245 if (section->role == MEMORY_ROLE_GEN0)
5247 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
5248 check_remsets_for_area (section->data, section->next_data);
5251 DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n"));
5254 /* Check that the reference is valid */
5256 #define HANDLE_PTR(ptr,obj) do { \
5258 g_assert (safe_name (*(ptr)) != NULL); \
5265 * Perform consistency check on an object. Currently we only check that the
5266 * reference fields are valid.
5269 check_object (char *start)
5278 vt = (GCVTable*)LOAD_VTABLE (start);
5279 //type = vt->desc & 0x7;
5282 switch (desc & 0x7) {
5283 case DESC_TYPE_STRING:
5284 STRING_SIZE (skip_size, start);
5285 return start + skip_size;
5286 case DESC_TYPE_RUN_LENGTH:
5287 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
5288 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
5289 g_assert (skip_size);
5290 return start + skip_size;
5291 case DESC_TYPE_ARRAY:
5292 case DESC_TYPE_VECTOR:
5293 OBJ_VECTOR_FOREACH_PTR (vt, start);
5294 skip_size = safe_object_get_size ((MonoObject*)start);
5295 skip_size += (ALLOC_ALIGN - 1);
5296 skip_size &= ~(ALLOC_ALIGN - 1);
5297 return start + skip_size;
5298 case DESC_TYPE_SMALL_BITMAP:
5299 OBJ_BITMAP_FOREACH_PTR (desc,start);
5300 OBJ_BITMAP_SIZE (skip_size, desc, start);
5301 return start + skip_size;
5302 case DESC_TYPE_LARGE_BITMAP:
5303 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
5304 skip_size = safe_object_get_size ((MonoObject*)start);
5305 skip_size += (ALLOC_ALIGN - 1);
5306 skip_size &= ~(ALLOC_ALIGN - 1);
5307 return start + skip_size;
5308 case DESC_TYPE_COMPLEX:
5309 OBJ_COMPLEX_FOREACH_PTR (vt, start);
5310 /* this is a complex object */
5311 skip_size = safe_object_get_size ((MonoObject*)start);
5312 skip_size += (ALLOC_ALIGN - 1);
5313 skip_size &= ~(ALLOC_ALIGN - 1);
5314 return start + skip_size;
5315 case DESC_TYPE_COMPLEX_ARR:
5316 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
5317 /* this is an array of complex structs */
5318 skip_size = safe_object_get_size ((MonoObject*)start);
5319 skip_size += (ALLOC_ALIGN - 1);
5320 skip_size &= ~(ALLOC_ALIGN - 1);
5321 return start + skip_size;
5323 g_assert_not_reached ();
5328 * ######################################################################
5329 * ######## Other mono public interface functions.
5330 * ######################################################################
5334 mono_gc_collect (int generation)
5337 update_current_thread_stack (&generation);
5339 if (generation == 0) {
5340 collect_nursery (0);
5342 major_collection ();
5349 mono_gc_max_generation (void)
5355 mono_gc_collection_count (int generation)
5357 if (generation == 0)
5358 return num_minor_gcs;
5359 return num_major_gcs;
5363 mono_gc_get_used_size (void)
5366 GCMemSection *section;
5368 tot = los_memory_usage;
5369 for (section = section_list; section; section = section->next) {
5370 /* this is approximate... */
5371 tot += section->next_data - section->data;
5373 /* FIXME: account for pinned objects */
5379 mono_gc_get_heap_size (void)
5385 mono_gc_disable (void)
5393 mono_gc_enable (void)
5401 mono_object_is_alive (MonoObject* o)
5407 mono_gc_get_generation (MonoObject *obj)
5409 if (ptr_in_nursery (obj))
5415 mono_gc_enable_events (void)
5420 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
5422 mono_gc_register_disappearing_link (obj, link_addr);
5426 mono_gc_weak_link_remove (void **link_addr)
5428 mono_gc_register_disappearing_link (NULL, link_addr);
5432 mono_gc_weak_link_get (void **link_addr)
5434 MonoObject *obj = REVEAL_POINTER (*link_addr);
5436 if (obj == HIDE_POINTER (NULL))
5442 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
5444 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
5445 return (void*)MAKE_ROOT_DESC (ROOT_DESC_BITMAP, bitmap [0]);
5447 mword complex = alloc_complex_descriptor (bitmap, numbits + 1);
5448 return (void*)MAKE_ROOT_DESC (ROOT_DESC_COMPLEX, complex);
5453 mono_gc_make_root_descr_user (MonoGCMarkFunc marker)
5457 g_assert (user_descriptors_next < MAX_USER_DESCRIPTORS);
5458 descr = (void*)MAKE_ROOT_DESC (ROOT_DESC_USER, (mword)user_descriptors_next);
5459 user_descriptors [user_descriptors_next ++] = marker;
5465 mono_gc_alloc_fixed (size_t size, void *descr)
5467 /* FIXME: do a single allocation */
5468 void *res = calloc (1, size);
5471 if (!mono_gc_register_root (res, size, descr)) {
5479 mono_gc_free_fixed (void* addr)
5481 mono_gc_deregister_root (addr);
5486 mono_gc_is_gc_thread (void)
5490 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
5496 mono_gc_base_init (void)
5500 struct sigaction sinfo;
5502 LOCK_INIT (gc_mutex);
5504 if (gc_initialized) {
5508 pagesize = mono_pagesize ();
5509 gc_debug_file = stderr;
5510 if ((env = getenv ("MONO_GC_DEBUG"))) {
5511 opts = g_strsplit (env, ",", -1);
5512 for (ptr = opts; ptr && *ptr; ptr ++) {
5514 if (opt [0] >= '0' && opt [0] <= '9') {
5515 gc_debug_level = atoi (opt);
5520 char *rf = g_strdup_printf ("%s.%d", opt, getpid ());
5521 gc_debug_file = fopen (rf, "wb");
5523 gc_debug_file = stderr;
5526 } else if (!strcmp (opt, "collect-before-allocs")) {
5527 collect_before_allocs = TRUE;
5528 } else if (!strcmp (opt, "check-at-minor-collections")) {
5529 consistency_check_at_minor_collection = TRUE;
5530 } else if (!strcmp (opt, "clear-at-gc")) {
5531 nursery_clear_policy = CLEAR_AT_GC;
5533 fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env);
5534 fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|<option>]+ where l is a debug level 0-9.\n");
5535 fprintf (stderr, "Valid options are: collect-before-allocs, check-at-minor-collections, clear-at-gc.\n");
5542 sem_init (&suspend_ack_semaphore, 0, 0);
5544 sigfillset (&sinfo.sa_mask);
5545 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
5546 sinfo.sa_sigaction = suspend_handler;
5547 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
5548 g_error ("failed sigaction");
5551 sinfo.sa_handler = restart_handler;
5552 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
5553 g_error ("failed sigaction");
5556 sigfillset (&suspend_signal_mask);
5557 sigdelset (&suspend_signal_mask, restart_signal_num);
5559 global_remset = alloc_remset (1024, NULL);
5560 global_remset->next = NULL;
5562 pthread_key_create (&remembered_set_key, unregister_thread);
5563 gc_initialized = TRUE;
5565 mono_gc_register_thread (&sinfo);
5573 /* FIXME: Do this in the JIT, where specialized allocation sequences can be created
5574 * for each class. This is currently not easy to do, as it is hard to generate basic
5575 * blocks + branches, but it is easy with the linear IL codebase.
5578 create_allocator (int atype)
5580 int tlab_next_addr_offset = -1;
5581 int tlab_temp_end_offset = -1;
5582 int p_var, size_var, tlab_next_addr_var, new_next_var;
5583 guint32 slowpath_branch;
5584 MonoMethodBuilder *mb;
5586 MonoMethodSignature *csig;
5587 static gboolean registered = FALSE;
5589 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
5590 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5592 g_assert (tlab_next_addr_offset != -1);
5593 g_assert (tlab_temp_end_offset != -1);
5595 g_assert (atype == ATYPE_NORMAL);
5598 mono_register_jit_icall (mono_gc_alloc_obj, "mono_gc_alloc_obj", mono_create_icall_signature ("object ptr int"), FALSE);
5602 csig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
5603 csig->ret = &mono_defaults.object_class->byval_arg;
5604 csig->params [0] = &mono_defaults.int_class->byval_arg;
5606 mb = mono_mb_new (mono_defaults.object_class, "Alloc", MONO_WRAPPER_ALLOC);
5607 size_var = mono_mb_add_local (mb, &mono_defaults.int32_class->byval_arg);
5608 /* size = vtable->klass->instance_size; */
5609 mono_mb_emit_ldarg (mb, 0);
5610 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoVTable, klass));
5611 mono_mb_emit_byte (mb, CEE_ADD);
5612 mono_mb_emit_byte (mb, CEE_LDIND_I);
5613 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoClass, instance_size));
5614 mono_mb_emit_byte (mb, CEE_ADD);
5615 /* FIXME: assert instance_size stays a 4 byte integer */
5616 mono_mb_emit_byte (mb, CEE_LDIND_U4);
5617 mono_mb_emit_stloc (mb, size_var);
5619 /* size += ALLOC_ALIGN - 1; */
5620 mono_mb_emit_ldloc (mb, size_var);
5621 mono_mb_emit_icon (mb, ALLOC_ALIGN - 1);
5622 mono_mb_emit_byte (mb, CEE_ADD);
5623 /* size &= ~(ALLOC_ALIGN - 1); */
5624 mono_mb_emit_icon (mb, ~(ALLOC_ALIGN - 1));
5625 mono_mb_emit_byte (mb, CEE_AND);
5626 mono_mb_emit_stloc (mb, size_var);
5629 * We need to modify tlab_next, but the JIT only supports reading, so we read
5630 * another tls var holding its address instead.
5633 /* tlab_next_addr (local) = tlab_next_addr (TLS var) */
5634 tlab_next_addr_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5635 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5636 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5637 mono_mb_emit_i4 (mb, tlab_next_addr_offset);
5638 mono_mb_emit_stloc (mb, tlab_next_addr_var);
5640 /* p = (void**)tlab_next; */
5641 p_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5642 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5643 mono_mb_emit_byte (mb, CEE_LDIND_I);
5644 mono_mb_emit_stloc (mb, p_var);
5646 /* new_next = (char*)p + size; */
5647 new_next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5648 mono_mb_emit_ldloc (mb, p_var);
5649 mono_mb_emit_ldloc (mb, size_var);
5650 mono_mb_emit_byte (mb, CEE_CONV_I);
5651 mono_mb_emit_byte (mb, CEE_ADD);
5652 mono_mb_emit_stloc (mb, new_next_var);
5654 /* tlab_next = new_next */
5655 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5656 mono_mb_emit_ldloc (mb, new_next_var);
5657 mono_mb_emit_byte (mb, CEE_STIND_I);
5659 /* if (G_LIKELY (new_next < tlab_temp_end)) */
5660 mono_mb_emit_ldloc (mb, new_next_var);
5661 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5662 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5663 mono_mb_emit_i4 (mb, tlab_temp_end_offset);
5664 slowpath_branch = mono_mb_emit_short_branch (mb, MONO_CEE_BLT_UN_S);
5668 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5669 mono_mb_emit_byte (mb, CEE_MONO_NOT_TAKEN);
5671 /* FIXME: mono_gc_alloc_obj takes a 'size_t' as an argument, not an int32 */
5672 mono_mb_emit_ldarg (mb, 0);
5673 mono_mb_emit_ldloc (mb, size_var);
5674 mono_mb_emit_icall (mb, mono_gc_alloc_obj);
5675 mono_mb_emit_byte (mb, CEE_RET);
5678 mono_mb_patch_short_branch (mb, slowpath_branch);
5680 /* FIXME: Memory barrier */
5683 mono_mb_emit_ldloc (mb, p_var);
5684 mono_mb_emit_ldarg (mb, 0);
5685 mono_mb_emit_byte (mb, CEE_STIND_I);
5688 mono_mb_emit_ldloc (mb, p_var);
5689 mono_mb_emit_byte (mb, CEE_RET);
5691 res = mono_mb_create_method (mb, csig, 8);
5693 mono_method_get_header (res)->init_locals = FALSE;
5697 static MonoMethod* alloc_method_cache [ATYPE_NUM];
5700 * Generate an allocator method implementing the fast path of mono_gc_alloc_obj ().
5701 * The signature of the called method is:
5702 * object allocate (MonoVTable *vtable)
5705 mono_gc_get_managed_allocator (MonoVTable *vtable, gboolean for_box)
5707 int tlab_next_offset = -1;
5708 int tlab_temp_end_offset = -1;
5709 MonoClass *klass = vtable->klass;
5710 MONO_THREAD_VAR_OFFSET (tlab_next, tlab_next_offset);
5711 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5713 if (tlab_next_offset == -1 || tlab_temp_end_offset == -1)
5715 if (klass->instance_size > tlab_size)
5717 if (klass->has_finalize || klass->marshalbyref || (mono_profiler_get_events () & MONO_PROFILE_ALLOCATIONS))
5721 if (klass->byval_arg.type == MONO_TYPE_STRING)
5723 if (collect_before_allocs)
5726 return mono_gc_get_managed_allocator_by_type (0);
5730 mono_gc_get_managed_allocator_type (MonoMethod *managed_alloc)
5736 mono_gc_get_managed_allocator_by_type (int atype)
5740 mono_loader_lock ();
5741 res = alloc_method_cache [atype];
5743 res = alloc_method_cache [atype] = create_allocator (atype);
5744 mono_loader_unlock ();
5749 mono_gc_get_managed_allocator_types (void)
5754 static MonoMethod *write_barrier_method;
5757 mono_gc_get_write_barrier (void)
5760 int remset_offset = -1;
5761 int remset_var, next_var;
5762 MonoMethodBuilder *mb;
5763 MonoMethodSignature *sig;
5766 MONO_THREAD_VAR_OFFSET (remembered_set, remset_offset);
5768 // FIXME: Maybe create a separate version for ctors (the branch would be
5769 // correctly predicted more times)
5770 if (write_barrier_method)
5771 return write_barrier_method;
5773 /* Create the IL version of mono_gc_barrier_generic_store () */
5774 sig = mono_metadata_signature_alloc (mono_defaults.corlib, 2);
5775 sig->ret = &mono_defaults.void_class->byval_arg;
5776 sig->params [0] = &mono_defaults.int_class->byval_arg;
5777 sig->params [1] = &mono_defaults.object_class->byval_arg;
5779 mb = mono_mb_new (mono_defaults.object_class, "wbarrier", MONO_WRAPPER_WRITE_BARRIER);
5781 /* ptr_in_nursery () check */
5782 #ifdef ALIGN_NURSERY
5784 * Masking out the bits might be faster, but we would have to use 64 bit
5785 * immediates, which might be slower.
5787 mono_mb_emit_ldarg (mb, 0);
5788 mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS);
5789 mono_mb_emit_byte (mb, CEE_SHR_UN);
5790 mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS);
5791 label1 = mono_mb_emit_branch (mb, CEE_BNE_UN);
5794 g_assert_not_reached ();
5797 /* Don't need write barrier case */
5798 /* do the assignment */
5799 mono_mb_emit_ldarg (mb, 0);
5800 mono_mb_emit_ldarg (mb, 1);
5801 /* Don't use STIND_REF, as it would cause infinite recursion */
5802 mono_mb_emit_byte (mb, CEE_STIND_I);
5803 mono_mb_emit_byte (mb, CEE_RET);
5805 /* Need write barrier case */
5806 mono_mb_patch_branch (mb, label1);
5808 if (remset_offset == -1)
5810 g_assert_not_reached ();
5812 // remset_var = remembered_set;
5813 remset_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5814 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5815 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5816 mono_mb_emit_i4 (mb, remset_offset);
5817 mono_mb_emit_stloc (mb, remset_var);
5819 // next_var = rs->store_next
5820 next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5821 mono_mb_emit_ldloc (mb, remset_var);
5822 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, store_next));
5823 mono_mb_emit_byte (mb, CEE_LDIND_I);
5824 mono_mb_emit_stloc (mb, next_var);
5826 // if (rs->store_next < rs->end_set) {
5827 mono_mb_emit_ldloc (mb, next_var);
5828 mono_mb_emit_ldloc (mb, remset_var);
5829 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, end_set));
5830 mono_mb_emit_byte (mb, CEE_LDIND_I);
5831 label2 = mono_mb_emit_branch (mb, CEE_BGE);
5833 /* write barrier fast path */
5834 // *(rs->store_next++) = (mword)ptr;
5835 mono_mb_emit_ldloc (mb, next_var);
5836 mono_mb_emit_ldarg (mb, 0);
5837 mono_mb_emit_byte (mb, CEE_STIND_I);
5839 mono_mb_emit_ldloc (mb, next_var);
5840 mono_mb_emit_icon (mb, sizeof (gpointer));
5841 mono_mb_emit_byte (mb, CEE_ADD);
5842 mono_mb_emit_stloc (mb, next_var);
5844 mono_mb_emit_ldloc (mb, remset_var);
5845 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, store_next));
5846 mono_mb_emit_ldloc (mb, next_var);
5847 mono_mb_emit_byte (mb, CEE_STIND_I);
5849 // *(void**)ptr = value;
5850 mono_mb_emit_ldarg (mb, 0);
5851 mono_mb_emit_ldarg (mb, 1);
5852 mono_mb_emit_byte (mb, CEE_STIND_I);
5853 mono_mb_emit_byte (mb, CEE_RET);
5855 /* write barrier slow path */
5856 mono_mb_patch_branch (mb, label2);
5858 mono_mb_emit_ldarg (mb, 0);
5859 mono_mb_emit_ldarg (mb, 1);
5860 mono_mb_emit_icall (mb, mono_gc_wbarrier_generic_store);
5861 mono_mb_emit_byte (mb, CEE_RET);
5863 res = mono_mb_create_method (mb, sig, 16);
5866 mono_loader_lock ();
5867 if (write_barrier_method) {
5868 /* Already created */
5869 mono_free_method (res);
5871 /* double-checked locking */
5872 mono_memory_barrier ();
5873 write_barrier_method = res;
5875 mono_loader_unlock ();
5877 return write_barrier_method;
5880 #endif /* HAVE_SGEN_GC */